boumehdi commited on
Commit
9e72bec
1 Parent(s): 3d37992

Upload 9 files

Browse files
Files changed (7) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +110 -17
  7. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:589f092724206cdcff9df9c709f552ab03a6bce7a3af87738df3364f8205eb31
3
  size 2490594117
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe7fd1764c7c90f6d43cdd67295e5233d46cdc1193418c56bd9a019183019dc7
3
  size 2490594117
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87cfc3d39029c9690dd86b4f0275f4111954d33b4e1c685fd7f36825e00a3eae
3
  size 1262168365
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7438d7620c8202515f2afa81498da17b53ec1debae3c4f7fa871d57a678ca488
3
  size 1262168365
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d78fbb1a5438682b3e56cdcbcd670790d52471dfc41d3e1df54743de1874ab2
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cdaf50a0753d3a6e101916319bd71a8a4deca5c897164903ee046122c35005a
3
  size 14575
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc40a4be6a52cee4d7658df4041e660ffa02b0d8b5bd143bb8bb397f7b71b1a5
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4e827e327ff1e038aa2027dda2c3ed3be5c6a6d7a0f9578e32510d4dcaf26a3
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cd51f54c3c9ede3156fe9d077aa80e3c39fad9c7aefb825e83dd16225db78dd
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:387faccb2a2f1652b597f93e0dfe377728691d7dafb8a403a4470c4646c76cd5
3
  size 627
trainer_state.json CHANGED
@@ -1,37 +1,130 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.379540400296516,
5
- "global_step": 400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.89,
12
- "learning_rate": 9.999702363235908e-06,
13
- "loss": 0.0354,
14
  "step": 150
15
  },
16
  {
17
- "epoch": 1.78,
18
- "learning_rate": 9.99880945294363e-06,
19
- "loss": 0.0336,
20
  "step": 300
21
  },
22
  {
23
- "epoch": 2.38,
24
- "eval_loss": 0.19874997437000275,
25
- "eval_runtime": 421.2238,
26
- "eval_samples_per_second": 11.438,
27
- "eval_steps_per_second": 1.432,
28
- "eval_wer": 0.15169214199908868,
29
- "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  }
31
  ],
32
- "max_steps": 1680000,
33
  "num_train_epochs": 10000,
34
- "total_flos": 9.154271872958712e+18,
35
  "trial_name": null,
36
  "trial_params": null
37
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 13.791738382099828,
5
+ "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.03,
12
+ "learning_rate": 9.99965514863094e-07,
13
+ "loss": 0.0333,
14
  "step": 150
15
  },
16
  {
17
+ "epoch": 2.07,
18
+ "learning_rate": 9.99862059452376e-07,
19
+ "loss": 0.0332,
20
  "step": 300
21
  },
22
  {
23
+ "epoch": 3.1,
24
+ "learning_rate": 9.99758604041658e-07,
25
+ "loss": 0.0336,
26
+ "step": 450
27
+ },
28
+ {
29
+ "epoch": 3.45,
30
+ "eval_loss": 0.21229705214500427,
31
+ "eval_runtime": 487.1939,
32
+ "eval_samples_per_second": 10.649,
33
+ "eval_steps_per_second": 1.332,
34
+ "eval_wer": 0.1448122092799754,
35
+ "step": 500
36
+ },
37
+ {
38
+ "epoch": 4.14,
39
+ "learning_rate": 9.9965514863094e-07,
40
+ "loss": 0.0329,
41
+ "step": 600
42
+ },
43
+ {
44
+ "epoch": 5.17,
45
+ "learning_rate": 9.995516932202221e-07,
46
+ "loss": 0.0323,
47
+ "step": 750
48
+ },
49
+ {
50
+ "epoch": 6.21,
51
+ "learning_rate": 9.99448237809504e-07,
52
+ "loss": 0.0317,
53
+ "step": 900
54
+ },
55
+ {
56
+ "epoch": 6.9,
57
+ "eval_loss": 0.20801204442977905,
58
+ "eval_runtime": 282.77,
59
+ "eval_samples_per_second": 18.347,
60
+ "eval_steps_per_second": 2.295,
61
+ "eval_wer": 0.14550417099142737,
62
+ "step": 1000
63
+ },
64
+ {
65
+ "epoch": 7.24,
66
+ "learning_rate": 9.993454721015242e-07,
67
+ "loss": 0.0323,
68
+ "step": 1050
69
+ },
70
+ {
71
+ "epoch": 8.28,
72
+ "learning_rate": 9.992420166908062e-07,
73
+ "loss": 0.031,
74
+ "step": 1200
75
+ },
76
+ {
77
+ "epoch": 9.31,
78
+ "learning_rate": 9.991385612800883e-07,
79
+ "loss": 0.0314,
80
+ "step": 1350
81
+ },
82
+ {
83
+ "epoch": 10.34,
84
+ "learning_rate": 9.990351058693703e-07,
85
+ "loss": 0.0317,
86
+ "step": 1500
87
+ },
88
+ {
89
+ "epoch": 10.34,
90
+ "eval_loss": 0.2071654200553894,
91
+ "eval_runtime": 303.1157,
92
+ "eval_samples_per_second": 17.116,
93
+ "eval_steps_per_second": 2.141,
94
+ "eval_wer": 0.14465844001076386,
95
+ "step": 1500
96
+ },
97
+ {
98
+ "epoch": 11.38,
99
+ "learning_rate": 9.989316504586522e-07,
100
+ "loss": 0.031,
101
+ "step": 1650
102
+ },
103
+ {
104
+ "epoch": 12.41,
105
+ "learning_rate": 9.988281950479342e-07,
106
+ "loss": 0.0318,
107
+ "step": 1800
108
+ },
109
+ {
110
+ "epoch": 13.45,
111
+ "learning_rate": 9.987247396372162e-07,
112
+ "loss": 0.0314,
113
+ "step": 1950
114
+ },
115
+ {
116
+ "epoch": 13.79,
117
+ "eval_loss": 0.20706616342067719,
118
+ "eval_runtime": 287.3918,
119
+ "eval_samples_per_second": 18.052,
120
+ "eval_steps_per_second": 2.258,
121
+ "eval_wer": 0.1442355745204321,
122
+ "step": 2000
123
  }
124
  ],
125
+ "max_steps": 1450000,
126
  "num_train_epochs": 10000,
127
+ "total_flos": 5.652539665735567e+19,
128
  "trial_name": null,
129
  "trial_params": null
130
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:baa3cfd4e4131e4651786c3cda673f577d5bd99579c07be55652efaf86dc1600
3
  size 3323
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c61dd66333a3a0d68eedaaf6d552d4521e71274edd381e8de2a88579575bba8c
3
  size 3323