AmalNlal commited on
Commit
a506f23
·
verified ·
1 Parent(s): 6e3d59c

End of training

Browse files
README.md CHANGED
@@ -1,74 +1,48 @@
1
- ---
2
- tags:
3
- - generated_from_trainer
4
- model-index:
5
- - name: Bert-MLM
6
- results: []
7
- ---
8
-
9
- <!-- This model card has been generated automatically according to the information the Trainer had access to. You
10
- should probably proofread and complete it, then remove this comment. -->
11
-
12
- # Bert-MLM
13
-
14
- This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
15
- It achieves the following results on the evaluation set:
16
- - Loss: 7.7544
17
-
18
- ## Model description
19
-
20
- More information needed
21
-
22
- ## Intended uses & limitations
23
-
24
- More information needed
25
-
26
- ## Training and evaluation data
27
-
28
- More information needed
29
-
30
- ## Training procedure
31
-
32
- ### Training hyperparameters
33
-
34
- The following hyperparameters were used during training:
35
- - learning_rate: 5e-05
36
- - train_batch_size: 8
37
- - eval_batch_size: 8
38
- - seed: 42
39
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
40
- - lr_scheduler_type: linear
41
- - training_steps: 1000
42
-
43
- ### Training results
44
-
45
- | Training Loss | Epoch | Step | Validation Loss |
46
- |:-------------:|:-----:|:----:|:---------------:|
47
- | No log | 0.02 | 50 | 9.3840 |
48
- | 9.3687 | 0.03 | 100 | 8.6014 |
49
- | 9.3687 | 0.05 | 150 | 8.2440 |
50
- | 8.0254 | 0.06 | 200 | 8.0843 |
51
- | 8.0254 | 0.08 | 250 | 8.0234 |
52
- | 7.8649 | 0.09 | 300 | 7.9828 |
53
- | 7.8649 | 0.11 | 350 | 7.9550 |
54
- | 7.732 | 0.12 | 400 | 7.9101 |
55
- | 7.732 | 0.14 | 450 | 7.8946 |
56
- | 7.6192 | 0.15 | 500 | 7.8525 |
57
- | 7.6192 | 0.17 | 550 | 7.8461 |
58
- | 7.6378 | 0.18 | 600 | 7.8285 |
59
- | 7.6378 | 0.2 | 650 | 7.8182 |
60
- | 7.6338 | 0.22 | 700 | 7.7917 |
61
- | 7.6338 | 0.23 | 750 | nan |
62
- | 7.5994 | 0.25 | 800 | 7.7837 |
63
- | 7.5994 | 0.26 | 850 | 7.7596 |
64
- | 7.5323 | 0.28 | 900 | 7.7634 |
65
- | 7.5323 | 0.29 | 950 | 7.7750 |
66
- | 7.5914 | 0.31 | 1000 | 7.7544 |
67
-
68
-
69
- ### Framework versions
70
-
71
- - Transformers 4.36.2
72
- - Pytorch 2.1.2+cu118
73
- - Datasets 2.16.0
74
- - Tokenizers 0.15.0
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: AmalNlal/my_awesome_eli5_mlm_model
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: Bert-MLM
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # Bert-MLM
15
+
16
+ This model is a fine-tuned version of [AmalNlal/my_awesome_eli5_mlm_model](https://huggingface.co/AmalNlal/my_awesome_eli5_mlm_model) on the None dataset.
17
+
18
+ ## Model description
19
+
20
+ More information needed
21
+
22
+ ## Intended uses & limitations
23
+
24
+ More information needed
25
+
26
+ ## Training and evaluation data
27
+
28
+ More information needed
29
+
30
+ ## Training procedure
31
+
32
+ ### Training hyperparameters
33
+
34
+ The following hyperparameters were used during training:
35
+ - learning_rate: 2
36
+ - train_batch_size: 4
37
+ - eval_batch_size: 8
38
+ - seed: 42
39
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
40
+ - lr_scheduler_type: linear
41
+ - num_epochs: 1
42
+
43
+ ### Framework versions
44
+
45
+ - Transformers 4.36.2
46
+ - Pytorch 2.1.2+cu118
47
+ - Datasets 2.16.0
48
+ - Tokenizers 0.15.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.json CHANGED
@@ -1,24 +1,27 @@
1
- {
2
- "architectures": [
3
- "BertForMaskedLM"
4
- ],
5
- "attention_probs_dropout_prob": 0.1,
6
- "classifier_dropout": null,
7
- "hidden_act": "gelu",
8
- "hidden_dropout_prob": 0.1,
9
- "hidden_size": 504,
10
- "initializer_range": 0.02,
11
- "intermediate_size": 1024,
12
- "layer_norm_eps": 1e-12,
13
- "max_position_embeddings": 256,
14
- "model_type": "bert",
15
- "num_attention_heads": 12,
16
- "num_hidden_layers": 12,
17
- "pad_token_id": 0,
18
- "position_embedding_type": "absolute",
19
- "torch_dtype": "float32",
20
- "transformers_version": "4.36.2",
21
- "type_vocab_size": 2,
22
- "use_cache": true,
23
- "vocab_size": 50000
24
- }
 
 
 
 
1
+ {
2
+ "_name_or_path": "AmalNlal/my_awesome_eli5_mlm_model",
3
+ "architectures": [
4
+ "RobertaForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 6,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.36.2",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 50265
27
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe2d982cc67a1f6b1d059e5a1590aee15b846bd470839309ba1e54c81f3b5976
3
- size 201153152
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6680570641a2e5ae2f4df9dffb4cd40625ef41f382153604f34769bff561ef10
3
+ size 328693404
runs/May01_20-01-50_Amal/events.out.tfevents.1714586517.Amal.31556.9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b64bfb5562d8aaf7e17f679f425427d171f3e550d06bcb947e97bb8959c21f0e
3
+ size 4350
runs/May01_20-02-18_Amal/events.out.tfevents.1714586539.Amal.31556.10 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c1e579fb725222e9906cedcb80dc460f25627670df5c9aab73902dabcf5206a
3
+ size 4346
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63cdc91d4d15fc792951e823f4a5979a46afbd3fa7eb9a7873a2efc58156f9ac
3
- size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:126327bbd43737948cb3e9c1c05ae62aed137a50d1f673f3fcde64c5b0fc7318
3
+ size 4792