Shresthadev403 commited on
Commit
bf15c68
·
1 Parent(s): 776e1f5

End of training

Browse files
README.md CHANGED
@@ -14,8 +14,6 @@ should probably proofread and complete it, then remove this comment. -->
14
  # bert-base-banking77-pt2
15
 
16
  This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
17
- It achieves the following results on the evaluation set:
18
- - Loss: 1.7084
19
 
20
  ## Model description
21
 
@@ -35,24 +33,17 @@ More information needed
35
 
36
  The following hyperparameters were used during training:
37
  - learning_rate: 5e-05
38
- - train_batch_size: 1
39
- - eval_batch_size: 8
40
  - seed: 42
 
 
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
  - num_epochs: 7
44
 
45
  ### Training results
46
 
47
- | Training Loss | Epoch | Step | Validation Loss |
48
- |:-------------:|:-----:|:----:|:---------------:|
49
- | 16.994 | 1.0 | 10 | 4.2000 |
50
- | 3.8266 | 2.0 | 20 | 3.1920 |
51
- | 3.0646 | 3.0 | 30 | 2.2112 |
52
- | 2.3667 | 4.0 | 40 | 1.9768 |
53
- | 2.0815 | 5.0 | 50 | 1.8096 |
54
- | 1.9727 | 6.0 | 60 | 1.7462 |
55
- | 1.8305 | 7.0 | 70 | 1.7084 |
56
 
57
 
58
  ### Framework versions
 
14
  # bert-base-banking77-pt2
15
 
16
  This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
 
 
17
 
18
  ## Model description
19
 
 
33
 
34
  The following hyperparameters were used during training:
35
  - learning_rate: 5e-05
36
+ - train_batch_size: 32
37
+ - eval_batch_size: 32
38
  - seed: 42
39
+ - gradient_accumulation_steps: 20
40
+ - total_train_batch_size: 640
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
  - num_epochs: 7
44
 
45
  ### Training results
46
 
 
 
 
 
 
 
 
 
 
47
 
48
 
49
  ### Framework versions
logs/events.out.tfevents.1702224308.a2c88cfea1e4.588.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d951358e05c804da4cbbca25400d25cc6154bed5bb666c8cc60609ca655774b
3
- size 6134
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dbe5dcff01b4fc9abfd8ce67e62c005d3562f64b797cda1e32abca33554c47b
3
+ size 6288
logs/events.out.tfevents.1702224578.a2c88cfea1e4.588.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:597d92f5bc0ad1cc956dd20900424d45d5c6b6870be86287e923f511d85e1567
3
+ size 4803
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95570e5a301362c930fde7b10ee66a21c8f42e3c240dc4cf312d40a02bdcfb5d
3
  size 497814144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbdf21c65e42ecf425af13ee6cb1bc584fb68ab844200b542e0174b7ee5e4cb4
3
  size 497814144
trainer_state.json CHANGED
@@ -1,126 +1,28 @@
1
  {
2
- "best_metric": 1.7083709239959717,
3
- "best_model_checkpoint": "bert-base-banking77-pt2/checkpoint-70",
4
- "epoch": 7.0,
5
  "eval_steps": 10,
6
- "global_step": 70,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 1.0,
13
- "learning_rate": 4.2857142857142856e-05,
14
- "loss": 16.994,
15
- "step": 10
16
- },
17
- {
18
- "epoch": 1.0,
19
- "eval_loss": 4.199960231781006,
20
- "eval_runtime": 0.1251,
21
- "eval_samples_per_second": 79.92,
22
- "eval_steps_per_second": 15.984,
23
- "step": 10
24
- },
25
- {
26
- "epoch": 2.0,
27
- "learning_rate": 3.571428571428572e-05,
28
- "loss": 3.8266,
29
- "step": 20
30
- },
31
- {
32
- "epoch": 2.0,
33
- "eval_loss": 3.1919631958007812,
34
- "eval_runtime": 0.1628,
35
- "eval_samples_per_second": 61.412,
36
- "eval_steps_per_second": 12.282,
37
- "step": 20
38
- },
39
- {
40
- "epoch": 3.0,
41
- "learning_rate": 2.857142857142857e-05,
42
- "loss": 3.0646,
43
- "step": 30
44
- },
45
- {
46
- "epoch": 3.0,
47
- "eval_loss": 2.2111892700195312,
48
- "eval_runtime": 0.1271,
49
- "eval_samples_per_second": 78.703,
50
- "eval_steps_per_second": 15.741,
51
- "step": 30
52
- },
53
- {
54
- "epoch": 4.0,
55
- "learning_rate": 2.1428571428571428e-05,
56
- "loss": 2.3667,
57
- "step": 40
58
- },
59
- {
60
- "epoch": 4.0,
61
- "eval_loss": 1.9768412113189697,
62
- "eval_runtime": 0.1291,
63
- "eval_samples_per_second": 77.443,
64
- "eval_steps_per_second": 15.489,
65
- "step": 40
66
- },
67
- {
68
- "epoch": 5.0,
69
- "learning_rate": 1.4285714285714285e-05,
70
- "loss": 2.0815,
71
- "step": 50
72
- },
73
- {
74
- "epoch": 5.0,
75
- "eval_loss": 1.8095529079437256,
76
- "eval_runtime": 0.1244,
77
- "eval_samples_per_second": 80.397,
78
- "eval_steps_per_second": 16.079,
79
- "step": 50
80
- },
81
- {
82
- "epoch": 6.0,
83
- "learning_rate": 7.142857142857143e-06,
84
- "loss": 1.9727,
85
- "step": 60
86
- },
87
- {
88
- "epoch": 6.0,
89
- "eval_loss": 1.7462323904037476,
90
- "eval_runtime": 0.1467,
91
- "eval_samples_per_second": 68.173,
92
- "eval_steps_per_second": 13.635,
93
- "step": 60
94
- },
95
- {
96
- "epoch": 7.0,
97
- "learning_rate": 0.0,
98
- "loss": 1.8305,
99
- "step": 70
100
- },
101
- {
102
- "epoch": 7.0,
103
- "eval_loss": 1.7083709239959717,
104
- "eval_runtime": 0.1559,
105
- "eval_samples_per_second": 64.127,
106
- "eval_steps_per_second": 12.825,
107
- "step": 70
108
- },
109
- {
110
- "epoch": 7.0,
111
- "step": 70,
112
- "total_flos": 4572610560000.0,
113
- "train_loss": 4.5909277507237025,
114
- "train_runtime": 92.0015,
115
- "train_samples_per_second": 0.761,
116
- "train_steps_per_second": 0.761
117
  }
118
  ],
119
  "logging_steps": 10,
120
- "max_steps": 70,
121
  "num_train_epochs": 7,
122
  "save_steps": 10,
123
- "total_flos": 4572610560000.0,
124
  "trial_name": null,
125
  "trial_params": null
126
  }
 
1
  {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 4.375,
5
  "eval_steps": 10,
6
+ "global_step": 7,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 4.38,
13
+ "step": 7,
14
+ "total_flos": 286376067072000.0,
15
+ "train_loss": 23.388562883649552,
16
+ "train_runtime": 165.8541,
17
+ "train_samples_per_second": 42.206,
18
+ "train_steps_per_second": 0.042
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  ],
21
  "logging_steps": 10,
22
+ "max_steps": 7,
23
  "num_train_epochs": 7,
24
  "save_steps": 10,
25
+ "total_flos": 286376067072000.0,
26
  "trial_name": null,
27
  "trial_params": null
28
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:927eb5b40dd0e5373dd84ce242021c3db8b3eefba4ad8144befc58a13e51b4a7
3
  size 4600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3120a79b950bf9a79bef5fab066d940b2c8afce2622e34f82e0c5edb68126598
3
  size 4600