Vidyuth commited on
Commit
99b5842
1 Parent(s): 96946f8

Upload 23 files

Browse files
.gitattributes CHANGED
@@ -1,35 +1,27 @@
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
19
  *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
  *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
  *.bz2 filter=lfs diff=lfs merge=lfs -text
 
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
11
  *.model filter=lfs diff=lfs merge=lfs -text
12
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
 
13
  *.onnx filter=lfs diff=lfs merge=lfs -text
14
  *.ot filter=lfs diff=lfs merge=lfs -text
15
  *.parquet filter=lfs diff=lfs merge=lfs -text
16
  *.pb filter=lfs diff=lfs merge=lfs -text
 
 
17
  *.pt filter=lfs diff=lfs merge=lfs -text
18
  *.pth filter=lfs diff=lfs merge=lfs -text
19
  *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 
21
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
22
  *.tflite filter=lfs diff=lfs merge=lfs -text
23
  *.tgz filter=lfs diff=lfs merge=lfs -text
 
24
  *.xz filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
README.md CHANGED
@@ -1,3 +1,71 @@
1
  ---
2
- license: other
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ license: apache-2.0
3
+ tags:
4
+ - summarization
5
+ - generated_from_trainer
6
+ metrics:
7
+ - rouge
8
+ model-index:
9
+ - name: mt5-small-finetuned-amazon-en-es
10
+ results: []
11
  ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # mt5-small-finetuned-amazon-en-es
17
+
18
+ This model is a fine-tuned version of [google/mt5-small](https://huggingface.co/google/mt5-small) on the None dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 3.0285
21
+ - Rouge1: 16.9728
22
+ - Rouge2: 8.2969
23
+ - Rougel: 16.8366
24
+ - Rougelsum: 16.8510
25
+ - Gen Len: 10.1597
26
+
27
+ ## Model description
28
+
29
+ More information needed
30
+
31
+ ## Intended uses & limitations
32
+
33
+ More information needed
34
+
35
+ ## Training and evaluation data
36
+
37
+ More information needed
38
+
39
+ ## Training procedure
40
+
41
+ ### Training hyperparameters
42
+
43
+ The following hyperparameters were used during training:
44
+ - learning_rate: 8e-05
45
+ - train_batch_size: 8
46
+ - eval_batch_size: 8
47
+ - seed: 42
48
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
+ - lr_scheduler_type: linear
50
+ - num_epochs: 8
51
+
52
+ ### Training results
53
+
54
+ | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
55
+ |:-------------:|:-----:|:----:|:---------------:|:-------:|:------:|:-------:|:---------:|:-------:|
56
+ | 6.4205 | 1.0 | 1209 | 3.3904 | 7.3124 | 2.1083 | 7.0649 | 7.0966 | 4.7269 |
57
+ | 3.7818 | 2.0 | 2418 | 3.1762 | 10.5437 | 3.0706 | 10.4618 | 10.4713 | 5.3697 |
58
+ | 3.4672 | 3.0 | 3627 | 3.1304 | 10.4674 | 3.0531 | 10.2156 | 10.2549 | 5.9748 |
59
+ | 3.3179 | 4.0 | 4836 | 3.1170 | 11.2847 | 3.3152 | 11.1387 | 11.146 | 6.1723 |
60
+ | 3.2048 | 5.0 | 6045 | 3.1069 | 11.5212 | 3.1957 | 11.2117 | 11.2044 | 6.042 |
61
+ | 3.1211 | 6.0 | 7254 | 3.1028 | 11.8104 | 3.6482 | 11.5535 | 11.5259 | 6.0462 |
62
+ | 3.0724 | 7.0 | 8463 | 3.1001 | 11.7336 | 3.6575 | 11.4403 | 11.4738 | 5.9454 |
63
+ | 3.0476 | 8.0 | 9672 | 3.0983 | 11.8061 | 3.6575 | 11.4999 | 11.5414 | 5.9286 |
64
+
65
+
66
+ ### Framework versions
67
+
68
+ - Transformers 4.12.0.dev0
69
+ - Pytorch 1.9.1+cu111
70
+ - Datasets 1.12.2.dev0
71
+ - Tokenizers 0.10.3
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mt5-small",
3
+ "architectures": [
4
+ "MT5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 1024,
7
+ "d_kv": 64,
8
+ "d_model": 512,
9
+ "decoder_start_token_id": 0,
10
+ "dropout_rate": 0.1,
11
+ "eos_token_id": 1,
12
+ "feed_forward_proj": "gated-gelu",
13
+ "initializer_factor": 1.0,
14
+ "is_encoder_decoder": true,
15
+ "layer_norm_epsilon": 1e-06,
16
+ "model_type": "mt5",
17
+ "num_decoder_layers": 8,
18
+ "num_heads": 6,
19
+ "num_layers": 8,
20
+ "pad_token_id": 0,
21
+ "relative_attention_num_buckets": 32,
22
+ "tie_word_embeddings": false,
23
+ "tokenizer_class": "T5Tokenizer",
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.12.0.dev0",
26
+ "use_cache": true,
27
+ "vocab_size": 250112
28
+ }
runs/Oct06_09-16-36_vorace/1633504871.3913915/events.out.tfevents.1633504871.vorace.3479400.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dbab45aa8f1c991ab68afd8855499293cb3fdf0c3e099535c5ad1c09b48fd8d
3
+ size 4772
runs/Oct06_09-16-36_vorace/events.out.tfevents.1633504866.vorace.3479400.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d8dfa15417d257d1f5497db5add7dae4b543c480ba7ed21cdcc6947d36a081c
3
+ size 9574
runs/Oct06_09-16-36_vorace/events.out.tfevents.1633506798.vorace.3479400.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cce3d95927170b76849b32bed8f75be85eaf79dce65eb8b79fccef9ab9e8820
3
+ size 565
runs/Oct06_10-16-41_vorace/1633508491.8694715/events.out.tfevents.1633508491.vorace.3486085.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44e177a560647f25af67a5579cff35bafb9622cb1d1bff6b6c6e2f3b60497e20
3
+ size 4784
runs/Oct06_10-16-41_vorace/events.out.tfevents.1633508491.vorace.3486085.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aab899aac507830c8fc5e2d95d3da187be1b7bd696501a586ee6947c56994993
3
+ size 6172
runs/Oct06_10-50-57_vorace/1633510293.596135/events.out.tfevents.1633510293.vorace.3488926.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71a64f07fc692ed077928a2edf28475bd4235b14d2c4da2b3eb1b062352639b2
3
+ size 4784
runs/Oct06_10-50-57_vorace/events.out.tfevents.1633510274.vorace.3488926.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:096a292b4f546b566677da652010cd498468523ad830119f476840c338c6ca4a
3
+ size 9582
runs/Oct06_10-50-57_vorace/events.out.tfevents.1633512457.vorace.3488926.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3694400e5c0e71ea6d2e553553ca8371dd4525dfbe611c68c3dae773961e718
3
+ size 565
runs/Sep29_14-23-20_vorace/1632918487.4283721/events.out.tfevents.1632918487.vorace.2673642.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a371d693df8c7de443878962b865d2c5d666bb8218d013ae6936ff06128fe65
3
+ size 4434
runs/Sep29_14-23-20_vorace/events.out.tfevents.1632918487.vorace.2673642.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b328dbf4d725d49d47961f7ac961dfe9b710674373705b97556491239cdaf478
3
+ size 3253
runs/Sep29_14-34-02_vorace/1632918914.4980953/events.out.tfevents.1632918914.vorace.2677037.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10e61ea87cf71d6aea0fe0aa106bfb74fbe5493f1cba1f849860b3d0abdb62e7
3
+ size 4434
runs/Sep29_14-34-02_vorace/events.out.tfevents.1632918892.vorace.2677037.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba8e0d8321230d4ad06b7397eedf35b2e4fbce322f9bf6fa3b80c267b3012a33
3
+ size 488
runs/Sep29_14-34-02_vorace/events.out.tfevents.1632918914.vorace.2677037.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eee23e12afafaa563b4869dc01790d1bbb34436b14000d6b085748d4a8ee4de
3
+ size 8925
runs/Sep29_14-34-02_vorace/events.out.tfevents.1632921612.vorace.2677037.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe6e6fa0218a664455d145d09d44687038daf82a1cb0046c23f92c2506cb2805
3
+ size 575
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
3
+ size 4309802
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 0, "additional_special_tokens": null, "special_tokens_map_file": "/data/.cache/hf/transformers/685ac0ca8568ec593a48b61b0a3c272beee9bc194a3c7241d15dcadb5f875e53.f76030f3ec1b96a8199b2593390c610e76ca8028ef3d24680000619ffb646276", "name_or_path": "google/mt5-small", "sp_model_kwargs": {}, "tokenizer_class": "T5Tokenizer"}
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d59497cdf8f09a222bc15b027c87ad54c5eee909b3281b62c14199af7fb0e405
3
+ size 2991