timewanderer commited on
Commit
40e9c14
·
verified ·
1 Parent(s): 90d0e06

Training in progress, step 3000

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8b669e1bd7ff55a9121797a8e4a756a52cfbba25a8136fcf2b7646dfe31f152
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eed8dbca2945508ed4ac89d4bd5cb30c0dfef2c41a89fb1da33d35d9f95d297f
3
  size 268290900
run-0/checkpoint-3000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70a66fd616aed35b39bc6bb24990cc3e1133adcba9f957399b18b8e0b7fe0ac1
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eed8dbca2945508ed4ac89d4bd5cb30c0dfef2c41a89fb1da33d35d9f95d297f
3
  size 268290900
run-0/checkpoint-3000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:449551f59bff91ad71ee3cee3bc164d266215234ab3e197650a1bf6f4451ccbe
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:051d15c448b0310509870729e149f9764897cb069c35beb10ac3e570e876e589
3
  size 536643898
run-0/checkpoint-3000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9548b29c3da5cefcad3a24e99f04bf8fb9b742e0c4db1bfc7dbdc86bbedf4f42
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9359efd64229560bd90e80fe6ffa6de38090f6b7f033862212653f3ccf150d93
3
  size 1064
run-0/checkpoint-3000/trainer_state.json CHANGED
@@ -10,132 +10,132 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5938709677419355,
14
- "eval_loss": 0.19997140765190125,
15
- "eval_runtime": 5.5473,
16
- "eval_samples_per_second": 558.834,
17
- "eval_steps_per_second": 11.717,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5329213738441467,
23
- "learning_rate": 1.685534591194969e-05,
24
- "loss": 0.3229,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.8238709677419355,
30
- "eval_loss": 0.09559512138366699,
31
- "eval_runtime": 5.7978,
32
- "eval_samples_per_second": 534.683,
33
- "eval_steps_per_second": 11.211,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.8812903225806452,
39
- "eval_loss": 0.06311405450105667,
40
- "eval_runtime": 5.4988,
41
- "eval_samples_per_second": 563.759,
42
- "eval_steps_per_second": 11.821,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.44504690170288086,
48
- "learning_rate": 1.371069182389937e-05,
49
- "loss": 0.1111,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.9006451612903226,
55
- "eval_loss": 0.048286207020282745,
56
- "eval_runtime": 5.5577,
57
- "eval_samples_per_second": 557.787,
58
- "eval_steps_per_second": 11.696,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
- "grad_norm": 0.3021944761276245,
64
- "learning_rate": 1.0566037735849058e-05,
65
- "loss": 0.0699,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
- "eval_accuracy": 0.9103225806451613,
71
- "eval_loss": 0.03919174522161484,
72
- "eval_runtime": 5.6898,
73
- "eval_samples_per_second": 544.834,
74
- "eval_steps_per_second": 11.424,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
- "eval_accuracy": 0.9132258064516129,
80
- "eval_loss": 0.03407873958349228,
81
- "eval_runtime": 5.5185,
82
- "eval_samples_per_second": 561.746,
83
- "eval_steps_per_second": 11.779,
84
  "step": 1908
85
  },
86
  {
87
  "epoch": 6.289308176100629,
88
- "grad_norm": 0.2565140724182129,
89
- "learning_rate": 7.421383647798742e-06,
90
- "loss": 0.0542,
91
  "step": 2000
92
  },
93
  {
94
  "epoch": 7.0,
95
- "eval_accuracy": 0.9229032258064516,
96
- "eval_loss": 0.03131110221147537,
97
- "eval_runtime": 5.6225,
98
- "eval_samples_per_second": 551.352,
99
- "eval_steps_per_second": 11.561,
100
  "step": 2226
101
  },
102
  {
103
  "epoch": 7.861635220125786,
104
- "grad_norm": 0.27681249380111694,
105
- "learning_rate": 4.276729559748428e-06,
106
- "loss": 0.0468,
107
  "step": 2500
108
  },
109
  {
110
  "epoch": 8.0,
111
- "eval_accuracy": 0.9270967741935484,
112
- "eval_loss": 0.028976548463106155,
113
- "eval_runtime": 5.7358,
114
- "eval_samples_per_second": 540.464,
115
- "eval_steps_per_second": 11.332,
116
  "step": 2544
117
  },
118
  {
119
  "epoch": 9.0,
120
- "eval_accuracy": 0.9283870967741935,
121
- "eval_loss": 0.027913924306631088,
122
- "eval_runtime": 5.5425,
123
- "eval_samples_per_second": 559.315,
124
- "eval_steps_per_second": 11.728,
125
  "step": 2862
126
  },
127
  {
128
  "epoch": 9.433962264150944,
129
- "grad_norm": 0.2555118501186371,
130
- "learning_rate": 1.1320754716981133e-06,
131
- "loss": 0.0434,
132
  "step": 3000
133
  }
134
  ],
135
  "logging_steps": 500,
136
- "max_steps": 3180,
137
  "num_input_tokens_seen": 0,
138
- "num_train_epochs": 10,
139
  "save_steps": 500,
140
  "stateful_callbacks": {
141
  "TrainerControl": {
@@ -153,8 +153,8 @@
153
  "train_batch_size": 48,
154
  "trial_name": null,
155
  "trial_params": {
156
- "alpha": 0.2755961419937909,
157
- "num_train_epochs": 10,
158
- "temperature": 10
159
  }
160
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6187096774193548,
14
+ "eval_loss": 0.2273363471031189,
15
+ "eval_runtime": 5.4212,
16
+ "eval_samples_per_second": 571.832,
17
+ "eval_steps_per_second": 11.99,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5702099204063416,
23
+ "learning_rate": 1.7141223556317898e-05,
24
+ "loss": 0.3672,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8416129032258064,
30
+ "eval_loss": 0.10179296880960464,
31
+ "eval_runtime": 5.7619,
32
+ "eval_samples_per_second": 538.015,
33
+ "eval_steps_per_second": 11.281,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8906451612903226,
39
+ "eval_loss": 0.06358441710472107,
40
+ "eval_runtime": 5.4704,
41
+ "eval_samples_per_second": 566.684,
42
+ "eval_steps_per_second": 11.882,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.48276859521865845,
48
+ "learning_rate": 1.4282447112635793e-05,
49
+ "loss": 0.1191,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.905483870967742,
55
+ "eval_loss": 0.04695529490709305,
56
+ "eval_runtime": 5.4589,
57
+ "eval_samples_per_second": 567.879,
58
+ "eval_steps_per_second": 11.907,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
+ "grad_norm": 0.3197612166404724,
64
+ "learning_rate": 1.1423670668953687e-05,
65
+ "loss": 0.0712,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
+ "eval_accuracy": 0.9167741935483871,
71
+ "eval_loss": 0.03740057349205017,
72
+ "eval_runtime": 5.721,
73
+ "eval_samples_per_second": 541.86,
74
+ "eval_steps_per_second": 11.362,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
+ "eval_accuracy": 0.9241935483870968,
80
+ "eval_loss": 0.032310228794813156,
81
+ "eval_runtime": 5.4558,
82
+ "eval_samples_per_second": 568.208,
83
+ "eval_steps_per_second": 11.914,
84
  "step": 1908
85
  },
86
  {
87
  "epoch": 6.289308176100629,
88
+ "grad_norm": 0.2629503309726715,
89
+ "learning_rate": 8.564894225271586e-06,
90
+ "loss": 0.0539,
91
  "step": 2000
92
  },
93
  {
94
  "epoch": 7.0,
95
+ "eval_accuracy": 0.9283870967741935,
96
+ "eval_loss": 0.029569072648882866,
97
+ "eval_runtime": 5.5263,
98
+ "eval_samples_per_second": 560.95,
99
+ "eval_steps_per_second": 11.762,
100
  "step": 2226
101
  },
102
  {
103
  "epoch": 7.861635220125786,
104
+ "grad_norm": 0.29348886013031006,
105
+ "learning_rate": 5.706117781589479e-06,
106
+ "loss": 0.046,
107
  "step": 2500
108
  },
109
  {
110
  "epoch": 8.0,
111
+ "eval_accuracy": 0.9309677419354838,
112
+ "eval_loss": 0.02714126743376255,
113
+ "eval_runtime": 5.6894,
114
+ "eval_samples_per_second": 544.869,
115
+ "eval_steps_per_second": 11.425,
116
  "step": 2544
117
  },
118
  {
119
  "epoch": 9.0,
120
+ "eval_accuracy": 0.9335483870967742,
121
+ "eval_loss": 0.02584407664835453,
122
+ "eval_runtime": 5.4141,
123
+ "eval_samples_per_second": 572.575,
124
+ "eval_steps_per_second": 12.006,
125
  "step": 2862
126
  },
127
  {
128
  "epoch": 9.433962264150944,
129
+ "grad_norm": 0.2581123113632202,
130
+ "learning_rate": 2.847341337907376e-06,
131
+ "loss": 0.042,
132
  "step": 3000
133
  }
134
  ],
135
  "logging_steps": 500,
136
+ "max_steps": 3498,
137
  "num_input_tokens_seen": 0,
138
+ "num_train_epochs": 11,
139
  "save_steps": 500,
140
  "stateful_callbacks": {
141
  "TrainerControl": {
 
153
  "train_batch_size": 48,
154
  "trial_name": null,
155
  "trial_params": {
156
+ "alpha": 0.5264187954467557,
157
+ "num_train_epochs": 11,
158
+ "temperature": 5
159
  }
160
  }
run-0/checkpoint-3000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2da5d40335246ac682533d0ca6c52c386b58cb072bae3a30bf87a5e28562733
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0589ed65fdaae56362b9a8760696297fdd9596b2ddfb3d7051a76a6dfd129716
3
  size 5240
runs/Oct12_06-40-39_b76c1be2ae55/events.out.tfevents.1728715792.b76c1be2ae55.1423.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:946866cbedfb96b502f27e9c02d9c9bbd82dccb02e40d3b77bbcf740c7017d98
3
- size 16081
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa61ebe05e1bdad6a73f3fc5960eabaad1cdbde610420e3a274161635895904d
3
+ size 16615