Upload 23 files
Browse files- .gitattributes +3 -11
- .gitignore +1 -0
- README.md +69 -1
- config.json +28 -0
- runs/Oct06_09-16-36_vorace/1633504871.3913915/events.out.tfevents.1633504871.vorace.3479400.1 +3 -0
- runs/Oct06_09-16-36_vorace/events.out.tfevents.1633504866.vorace.3479400.0 +3 -0
- runs/Oct06_09-16-36_vorace/events.out.tfevents.1633506798.vorace.3479400.2 +3 -0
- runs/Oct06_10-16-41_vorace/1633508491.8694715/events.out.tfevents.1633508491.vorace.3486085.1 +3 -0
- runs/Oct06_10-16-41_vorace/events.out.tfevents.1633508491.vorace.3486085.0 +3 -0
- runs/Oct06_10-50-57_vorace/1633510293.596135/events.out.tfevents.1633510293.vorace.3488926.1 +3 -0
- runs/Oct06_10-50-57_vorace/events.out.tfevents.1633510274.vorace.3488926.0 +3 -0
- runs/Oct06_10-50-57_vorace/events.out.tfevents.1633512457.vorace.3488926.2 +3 -0
- runs/Sep29_14-23-20_vorace/1632918487.4283721/events.out.tfevents.1632918487.vorace.2673642.1 +3 -0
- runs/Sep29_14-23-20_vorace/events.out.tfevents.1632918487.vorace.2673642.0 +3 -0
- runs/Sep29_14-34-02_vorace/1632918914.4980953/events.out.tfevents.1632918914.vorace.2677037.2 +3 -0
- runs/Sep29_14-34-02_vorace/events.out.tfevents.1632918892.vorace.2677037.0 +3 -0
- runs/Sep29_14-34-02_vorace/events.out.tfevents.1632918914.vorace.2677037.1 +3 -0
- runs/Sep29_14-34-02_vorace/events.out.tfevents.1632921612.vorace.2677037.3 +3 -0
- special_tokens_map.json +1 -0
- spiece.model +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
- training_args.bin +3 -0
.gitattributes
CHANGED
@@ -1,35 +1,27 @@
|
|
1 |
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
|
|
4 |
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
1 |
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
5 |
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
|
|
6 |
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
|
|
11 |
*.model filter=lfs diff=lfs merge=lfs -text
|
12 |
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
13 |
*.onnx filter=lfs diff=lfs merge=lfs -text
|
14 |
*.ot filter=lfs diff=lfs merge=lfs -text
|
15 |
*.parquet filter=lfs diff=lfs merge=lfs -text
|
16 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
17 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
18 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
19 |
*.rar filter=lfs diff=lfs merge=lfs -text
|
20 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
|
21 |
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
|
|
22 |
*.tflite filter=lfs diff=lfs merge=lfs -text
|
23 |
*.tgz filter=lfs diff=lfs merge=lfs -text
|
|
|
24 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
+
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
checkpoint-*/
|
README.md
CHANGED
@@ -1,3 +1,71 @@
|
|
1 |
---
|
2 |
-
license:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
license: apache-2.0
|
3 |
+
tags:
|
4 |
+
- summarization
|
5 |
+
- generated_from_trainer
|
6 |
+
metrics:
|
7 |
+
- rouge
|
8 |
+
model-index:
|
9 |
+
- name: mt5-small-finetuned-amazon-en-es
|
10 |
+
results: []
|
11 |
---
|
12 |
+
|
13 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
14 |
+
should probably proofread and complete it, then remove this comment. -->
|
15 |
+
|
16 |
+
# mt5-small-finetuned-amazon-en-es
|
17 |
+
|
18 |
+
This model is a fine-tuned version of [google/mt5-small](https://huggingface.co/google/mt5-small) on the None dataset.
|
19 |
+
It achieves the following results on the evaluation set:
|
20 |
+
- Loss: 3.0285
|
21 |
+
- Rouge1: 16.9728
|
22 |
+
- Rouge2: 8.2969
|
23 |
+
- Rougel: 16.8366
|
24 |
+
- Rougelsum: 16.8510
|
25 |
+
- Gen Len: 10.1597
|
26 |
+
|
27 |
+
## Model description
|
28 |
+
|
29 |
+
More information needed
|
30 |
+
|
31 |
+
## Intended uses & limitations
|
32 |
+
|
33 |
+
More information needed
|
34 |
+
|
35 |
+
## Training and evaluation data
|
36 |
+
|
37 |
+
More information needed
|
38 |
+
|
39 |
+
## Training procedure
|
40 |
+
|
41 |
+
### Training hyperparameters
|
42 |
+
|
43 |
+
The following hyperparameters were used during training:
|
44 |
+
- learning_rate: 8e-05
|
45 |
+
- train_batch_size: 8
|
46 |
+
- eval_batch_size: 8
|
47 |
+
- seed: 42
|
48 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
49 |
+
- lr_scheduler_type: linear
|
50 |
+
- num_epochs: 8
|
51 |
+
|
52 |
+
### Training results
|
53 |
+
|
54 |
+
| Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
|
55 |
+
|:-------------:|:-----:|:----:|:---------------:|:-------:|:------:|:-------:|:---------:|:-------:|
|
56 |
+
| 6.4205 | 1.0 | 1209 | 3.3904 | 7.3124 | 2.1083 | 7.0649 | 7.0966 | 4.7269 |
|
57 |
+
| 3.7818 | 2.0 | 2418 | 3.1762 | 10.5437 | 3.0706 | 10.4618 | 10.4713 | 5.3697 |
|
58 |
+
| 3.4672 | 3.0 | 3627 | 3.1304 | 10.4674 | 3.0531 | 10.2156 | 10.2549 | 5.9748 |
|
59 |
+
| 3.3179 | 4.0 | 4836 | 3.1170 | 11.2847 | 3.3152 | 11.1387 | 11.146 | 6.1723 |
|
60 |
+
| 3.2048 | 5.0 | 6045 | 3.1069 | 11.5212 | 3.1957 | 11.2117 | 11.2044 | 6.042 |
|
61 |
+
| 3.1211 | 6.0 | 7254 | 3.1028 | 11.8104 | 3.6482 | 11.5535 | 11.5259 | 6.0462 |
|
62 |
+
| 3.0724 | 7.0 | 8463 | 3.1001 | 11.7336 | 3.6575 | 11.4403 | 11.4738 | 5.9454 |
|
63 |
+
| 3.0476 | 8.0 | 9672 | 3.0983 | 11.8061 | 3.6575 | 11.4999 | 11.5414 | 5.9286 |
|
64 |
+
|
65 |
+
|
66 |
+
### Framework versions
|
67 |
+
|
68 |
+
- Transformers 4.12.0.dev0
|
69 |
+
- Pytorch 1.9.1+cu111
|
70 |
+
- Datasets 1.12.2.dev0
|
71 |
+
- Tokenizers 0.10.3
|
config.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "google/mt5-small",
|
3 |
+
"architectures": [
|
4 |
+
"MT5ForConditionalGeneration"
|
5 |
+
],
|
6 |
+
"d_ff": 1024,
|
7 |
+
"d_kv": 64,
|
8 |
+
"d_model": 512,
|
9 |
+
"decoder_start_token_id": 0,
|
10 |
+
"dropout_rate": 0.1,
|
11 |
+
"eos_token_id": 1,
|
12 |
+
"feed_forward_proj": "gated-gelu",
|
13 |
+
"initializer_factor": 1.0,
|
14 |
+
"is_encoder_decoder": true,
|
15 |
+
"layer_norm_epsilon": 1e-06,
|
16 |
+
"model_type": "mt5",
|
17 |
+
"num_decoder_layers": 8,
|
18 |
+
"num_heads": 6,
|
19 |
+
"num_layers": 8,
|
20 |
+
"pad_token_id": 0,
|
21 |
+
"relative_attention_num_buckets": 32,
|
22 |
+
"tie_word_embeddings": false,
|
23 |
+
"tokenizer_class": "T5Tokenizer",
|
24 |
+
"torch_dtype": "float32",
|
25 |
+
"transformers_version": "4.12.0.dev0",
|
26 |
+
"use_cache": true,
|
27 |
+
"vocab_size": 250112
|
28 |
+
}
|
runs/Oct06_09-16-36_vorace/1633504871.3913915/events.out.tfevents.1633504871.vorace.3479400.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1dbab45aa8f1c991ab68afd8855499293cb3fdf0c3e099535c5ad1c09b48fd8d
|
3 |
+
size 4772
|
runs/Oct06_09-16-36_vorace/events.out.tfevents.1633504866.vorace.3479400.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d8dfa15417d257d1f5497db5add7dae4b543c480ba7ed21cdcc6947d36a081c
|
3 |
+
size 9574
|
runs/Oct06_09-16-36_vorace/events.out.tfevents.1633506798.vorace.3479400.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9cce3d95927170b76849b32bed8f75be85eaf79dce65eb8b79fccef9ab9e8820
|
3 |
+
size 565
|
runs/Oct06_10-16-41_vorace/1633508491.8694715/events.out.tfevents.1633508491.vorace.3486085.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44e177a560647f25af67a5579cff35bafb9622cb1d1bff6b6c6e2f3b60497e20
|
3 |
+
size 4784
|
runs/Oct06_10-16-41_vorace/events.out.tfevents.1633508491.vorace.3486085.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aab899aac507830c8fc5e2d95d3da187be1b7bd696501a586ee6947c56994993
|
3 |
+
size 6172
|
runs/Oct06_10-50-57_vorace/1633510293.596135/events.out.tfevents.1633510293.vorace.3488926.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71a64f07fc692ed077928a2edf28475bd4235b14d2c4da2b3eb1b062352639b2
|
3 |
+
size 4784
|
runs/Oct06_10-50-57_vorace/events.out.tfevents.1633510274.vorace.3488926.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:096a292b4f546b566677da652010cd498468523ad830119f476840c338c6ca4a
|
3 |
+
size 9582
|
runs/Oct06_10-50-57_vorace/events.out.tfevents.1633512457.vorace.3488926.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3694400e5c0e71ea6d2e553553ca8371dd4525dfbe611c68c3dae773961e718
|
3 |
+
size 565
|
runs/Sep29_14-23-20_vorace/1632918487.4283721/events.out.tfevents.1632918487.vorace.2673642.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a371d693df8c7de443878962b865d2c5d666bb8218d013ae6936ff06128fe65
|
3 |
+
size 4434
|
runs/Sep29_14-23-20_vorace/events.out.tfevents.1632918487.vorace.2673642.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b328dbf4d725d49d47961f7ac961dfe9b710674373705b97556491239cdaf478
|
3 |
+
size 3253
|
runs/Sep29_14-34-02_vorace/1632918914.4980953/events.out.tfevents.1632918914.vorace.2677037.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10e61ea87cf71d6aea0fe0aa106bfb74fbe5493f1cba1f849860b3d0abdb62e7
|
3 |
+
size 4434
|
runs/Sep29_14-34-02_vorace/events.out.tfevents.1632918892.vorace.2677037.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba8e0d8321230d4ad06b7397eedf35b2e4fbce322f9bf6fa3b80c267b3012a33
|
3 |
+
size 488
|
runs/Sep29_14-34-02_vorace/events.out.tfevents.1632918914.vorace.2677037.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5eee23e12afafaa563b4869dc01790d1bbb34436b14000d6b085748d4a8ee4de
|
3 |
+
size 8925
|
runs/Sep29_14-34-02_vorace/events.out.tfevents.1632921612.vorace.2677037.3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe6e6fa0218a664455d145d09d44687038daf82a1cb0046c23f92c2506cb2805
|
3 |
+
size 575
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
|
spiece.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
|
3 |
+
size 4309802
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 0, "additional_special_tokens": null, "special_tokens_map_file": "/data/.cache/hf/transformers/685ac0ca8568ec593a48b61b0a3c272beee9bc194a3c7241d15dcadb5f875e53.f76030f3ec1b96a8199b2593390c610e76ca8028ef3d24680000619ffb646276", "name_or_path": "google/mt5-small", "sp_model_kwargs": {}, "tokenizer_class": "T5Tokenizer"}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d59497cdf8f09a222bc15b027c87ad54c5eee909b3281b62c14199af7fb0e405
|
3 |
+
size 2991
|