joelniklaus commited on
Commit
e8120ef
1 Parent(s): be74cf2

Training in progress, step 700000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80276c82f803233f5fdb661f073deaa493a9da9622e517afb26a84ad5e426889
3
  size 3480942553
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbe29aa02358e2143d72f205b3a4a91a5018b93605acb79f66ed604b5f19383d
3
  size 3480942553
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:010ff15e7c5b4944d147f08e3ca39a7a13d144f0ac2b46d7be37da6a2832f71d
3
  size 1740493675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb5b90c98009d51696b36759831d22a584a9950a286ec1f19cf3456f91a65bb8
3
  size 1740493675
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9938c710ed3be6b7a7709c622a2ddd00b0b3538d188f7c24757e3a01a9d961a9
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a144c4622f8cf43db68e969e7f66a4be23d7f871fcf2af4c5f77f6f71f88ad58
3
  size 13611
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9938c710ed3be6b7a7709c622a2ddd00b0b3538d188f7c24757e3a01a9d961a9
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a144c4622f8cf43db68e969e7f66a4be23d7f871fcf2af4c5f77f6f71f88ad58
3
  size 13611
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9938c710ed3be6b7a7709c622a2ddd00b0b3538d188f7c24757e3a01a9d961a9
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a144c4622f8cf43db68e969e7f66a4be23d7f871fcf2af4c5f77f6f71f88ad58
3
  size 13611
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9938c710ed3be6b7a7709c622a2ddd00b0b3538d188f7c24757e3a01a9d961a9
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a144c4622f8cf43db68e969e7f66a4be23d7f871fcf2af4c5f77f6f71f88ad58
3
  size 13611
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9938c710ed3be6b7a7709c622a2ddd00b0b3538d188f7c24757e3a01a9d961a9
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a144c4622f8cf43db68e969e7f66a4be23d7f871fcf2af4c5f77f6f71f88ad58
3
  size 13611
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9938c710ed3be6b7a7709c622a2ddd00b0b3538d188f7c24757e3a01a9d961a9
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a144c4622f8cf43db68e969e7f66a4be23d7f871fcf2af4c5f77f6f71f88ad58
3
  size 13611
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9938c710ed3be6b7a7709c622a2ddd00b0b3538d188f7c24757e3a01a9d961a9
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a144c4622f8cf43db68e969e7f66a4be23d7f871fcf2af4c5f77f6f71f88ad58
3
  size 13611
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9938c710ed3be6b7a7709c622a2ddd00b0b3538d188f7c24757e3a01a9d961a9
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a144c4622f8cf43db68e969e7f66a4be23d7f871fcf2af4c5f77f6f71f88ad58
3
  size 13611
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d7fa20411577666fac76fe76348b4f9231439cc2e524d6e3185910c258591e9
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2d6ab5c2c0046e2c66cf435ff00463ff3da7208ddb35b5b6f19c87d94f3623b
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.65,
5
- "global_step": 650000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4010,11 +4010,319 @@
4010
  "eval_samples_per_second": 29.262,
4011
  "eval_steps_per_second": 0.462,
4012
  "step": 650000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4013
  }
4014
  ],
4015
  "max_steps": 1000000,
4016
  "num_train_epochs": 9223372036854775807,
4017
- "total_flos": 3.87847022444544e+19,
4018
  "trial_name": null,
4019
  "trial_params": null
4020
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7,
5
+ "global_step": 700000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4010
  "eval_samples_per_second": 29.262,
4011
  "eval_steps_per_second": 0.462,
4012
  "step": 650000
4013
+ },
4014
+ {
4015
+ "epoch": 0.65,
4016
+ "learning_rate": 2.976391850971065e-05,
4017
+ "loss": 0.8019,
4018
+ "step": 651000
4019
+ },
4020
+ {
4021
+ "epoch": 0.65,
4022
+ "learning_rate": 2.9612829550614836e-05,
4023
+ "loss": 0.8151,
4024
+ "step": 652000
4025
+ },
4026
+ {
4027
+ "epoch": 0.65,
4028
+ "learning_rate": 2.9461963542348737e-05,
4029
+ "loss": 0.8159,
4030
+ "step": 653000
4031
+ },
4032
+ {
4033
+ "epoch": 0.65,
4034
+ "learning_rate": 2.931132213475884e-05,
4035
+ "loss": 0.8109,
4036
+ "step": 654000
4037
+ },
4038
+ {
4039
+ "epoch": 0.66,
4040
+ "learning_rate": 2.916090697523549e-05,
4041
+ "loss": 0.8056,
4042
+ "step": 655000
4043
+ },
4044
+ {
4045
+ "epoch": 0.66,
4046
+ "learning_rate": 2.9010719708694722e-05,
4047
+ "loss": 0.7921,
4048
+ "step": 656000
4049
+ },
4050
+ {
4051
+ "epoch": 0.66,
4052
+ "learning_rate": 2.8860761977560436e-05,
4053
+ "loss": 0.7977,
4054
+ "step": 657000
4055
+ },
4056
+ {
4057
+ "epoch": 0.66,
4058
+ "learning_rate": 2.8711035421746367e-05,
4059
+ "loss": 0.8184,
4060
+ "step": 658000
4061
+ },
4062
+ {
4063
+ "epoch": 0.66,
4064
+ "learning_rate": 2.8561541678638142e-05,
4065
+ "loss": 0.7923,
4066
+ "step": 659000
4067
+ },
4068
+ {
4069
+ "epoch": 0.66,
4070
+ "learning_rate": 2.8412282383075363e-05,
4071
+ "loss": 0.8042,
4072
+ "step": 660000
4073
+ },
4074
+ {
4075
+ "epoch": 0.66,
4076
+ "learning_rate": 2.8263259167333777e-05,
4077
+ "loss": 0.7837,
4078
+ "step": 661000
4079
+ },
4080
+ {
4081
+ "epoch": 0.66,
4082
+ "learning_rate": 2.811447366110741e-05,
4083
+ "loss": 0.7822,
4084
+ "step": 662000
4085
+ },
4086
+ {
4087
+ "epoch": 0.66,
4088
+ "learning_rate": 2.7965927491490705e-05,
4089
+ "loss": 0.7886,
4090
+ "step": 663000
4091
+ },
4092
+ {
4093
+ "epoch": 0.66,
4094
+ "learning_rate": 2.7817622282960815e-05,
4095
+ "loss": 0.8035,
4096
+ "step": 664000
4097
+ },
4098
+ {
4099
+ "epoch": 0.67,
4100
+ "learning_rate": 2.766955965735968e-05,
4101
+ "loss": 0.8047,
4102
+ "step": 665000
4103
+ },
4104
+ {
4105
+ "epoch": 0.67,
4106
+ "learning_rate": 2.7521741233876496e-05,
4107
+ "loss": 0.7887,
4108
+ "step": 666000
4109
+ },
4110
+ {
4111
+ "epoch": 0.67,
4112
+ "learning_rate": 2.7374168629029813e-05,
4113
+ "loss": 0.786,
4114
+ "step": 667000
4115
+ },
4116
+ {
4117
+ "epoch": 0.67,
4118
+ "learning_rate": 2.7226843456650037e-05,
4119
+ "loss": 0.796,
4120
+ "step": 668000
4121
+ },
4122
+ {
4123
+ "epoch": 0.67,
4124
+ "learning_rate": 2.707976732786166e-05,
4125
+ "loss": 0.7884,
4126
+ "step": 669000
4127
+ },
4128
+ {
4129
+ "epoch": 0.67,
4130
+ "learning_rate": 2.693294185106562e-05,
4131
+ "loss": 0.8067,
4132
+ "step": 670000
4133
+ },
4134
+ {
4135
+ "epoch": 0.67,
4136
+ "learning_rate": 2.6786368631921836e-05,
4137
+ "loss": 0.7903,
4138
+ "step": 671000
4139
+ },
4140
+ {
4141
+ "epoch": 0.67,
4142
+ "learning_rate": 2.6640049273331515e-05,
4143
+ "loss": 0.8027,
4144
+ "step": 672000
4145
+ },
4146
+ {
4147
+ "epoch": 0.67,
4148
+ "learning_rate": 2.6493985375419778e-05,
4149
+ "loss": 0.796,
4150
+ "step": 673000
4151
+ },
4152
+ {
4153
+ "epoch": 0.67,
4154
+ "learning_rate": 2.6348178535517966e-05,
4155
+ "loss": 0.8048,
4156
+ "step": 674000
4157
+ },
4158
+ {
4159
+ "epoch": 0.68,
4160
+ "learning_rate": 2.6202630348146324e-05,
4161
+ "loss": 0.7818,
4162
+ "step": 675000
4163
+ },
4164
+ {
4165
+ "epoch": 0.68,
4166
+ "learning_rate": 2.6057342404996522e-05,
4167
+ "loss": 0.7887,
4168
+ "step": 676000
4169
+ },
4170
+ {
4171
+ "epoch": 0.68,
4172
+ "learning_rate": 2.591231629491423e-05,
4173
+ "loss": 0.8113,
4174
+ "step": 677000
4175
+ },
4176
+ {
4177
+ "epoch": 0.68,
4178
+ "learning_rate": 2.5767553603881767e-05,
4179
+ "loss": 0.8007,
4180
+ "step": 678000
4181
+ },
4182
+ {
4183
+ "epoch": 0.68,
4184
+ "learning_rate": 2.562305591500069e-05,
4185
+ "loss": 0.7915,
4186
+ "step": 679000
4187
+ },
4188
+ {
4189
+ "epoch": 0.68,
4190
+ "learning_rate": 2.547882480847461e-05,
4191
+ "loss": 0.8059,
4192
+ "step": 680000
4193
+ },
4194
+ {
4195
+ "epoch": 0.68,
4196
+ "learning_rate": 2.5334861861591753e-05,
4197
+ "loss": 0.8149,
4198
+ "step": 681000
4199
+ },
4200
+ {
4201
+ "epoch": 0.68,
4202
+ "learning_rate": 2.5191168648707887e-05,
4203
+ "loss": 0.8027,
4204
+ "step": 682000
4205
+ },
4206
+ {
4207
+ "epoch": 0.68,
4208
+ "learning_rate": 2.5047746741228978e-05,
4209
+ "loss": 0.8,
4210
+ "step": 683000
4211
+ },
4212
+ {
4213
+ "epoch": 0.68,
4214
+ "learning_rate": 2.490459770759398e-05,
4215
+ "loss": 0.784,
4216
+ "step": 684000
4217
+ },
4218
+ {
4219
+ "epoch": 0.69,
4220
+ "learning_rate": 2.476172311325783e-05,
4221
+ "loss": 0.7895,
4222
+ "step": 685000
4223
+ },
4224
+ {
4225
+ "epoch": 0.69,
4226
+ "learning_rate": 2.4619124520674146e-05,
4227
+ "loss": 0.7837,
4228
+ "step": 686000
4229
+ },
4230
+ {
4231
+ "epoch": 0.69,
4232
+ "learning_rate": 2.447680348927837e-05,
4233
+ "loss": 0.7905,
4234
+ "step": 687000
4235
+ },
4236
+ {
4237
+ "epoch": 0.69,
4238
+ "learning_rate": 2.433476157547044e-05,
4239
+ "loss": 0.7852,
4240
+ "step": 688000
4241
+ },
4242
+ {
4243
+ "epoch": 0.69,
4244
+ "learning_rate": 2.419300033259798e-05,
4245
+ "loss": 0.7815,
4246
+ "step": 689000
4247
+ },
4248
+ {
4249
+ "epoch": 0.69,
4250
+ "learning_rate": 2.405152131093926e-05,
4251
+ "loss": 0.7644,
4252
+ "step": 690000
4253
+ },
4254
+ {
4255
+ "epoch": 0.69,
4256
+ "learning_rate": 2.3910326057686127e-05,
4257
+ "loss": 0.7401,
4258
+ "step": 691000
4259
+ },
4260
+ {
4261
+ "epoch": 0.69,
4262
+ "learning_rate": 2.3769416116927335e-05,
4263
+ "loss": 0.744,
4264
+ "step": 692000
4265
+ },
4266
+ {
4267
+ "epoch": 0.69,
4268
+ "learning_rate": 2.362879302963135e-05,
4269
+ "loss": 0.7276,
4270
+ "step": 693000
4271
+ },
4272
+ {
4273
+ "epoch": 0.69,
4274
+ "learning_rate": 2.3488458333629777e-05,
4275
+ "loss": 0.7099,
4276
+ "step": 694000
4277
+ },
4278
+ {
4279
+ "epoch": 0.69,
4280
+ "learning_rate": 2.3348413563600325e-05,
4281
+ "loss": 0.7142,
4282
+ "step": 695000
4283
+ },
4284
+ {
4285
+ "epoch": 0.7,
4286
+ "learning_rate": 2.3208660251050158e-05,
4287
+ "loss": 0.7357,
4288
+ "step": 696000
4289
+ },
4290
+ {
4291
+ "epoch": 0.7,
4292
+ "learning_rate": 2.3069199924299174e-05,
4293
+ "loss": 0.7397,
4294
+ "step": 697000
4295
+ },
4296
+ {
4297
+ "epoch": 0.7,
4298
+ "learning_rate": 2.29300341084631e-05,
4299
+ "loss": 0.7371,
4300
+ "step": 698000
4301
+ },
4302
+ {
4303
+ "epoch": 0.7,
4304
+ "learning_rate": 2.279116432543705e-05,
4305
+ "loss": 0.7174,
4306
+ "step": 699000
4307
+ },
4308
+ {
4309
+ "epoch": 0.7,
4310
+ "learning_rate": 2.2652592093878666e-05,
4311
+ "loss": 0.7433,
4312
+ "step": 700000
4313
+ },
4314
+ {
4315
+ "epoch": 0.7,
4316
+ "eval_loss": 0.3951902687549591,
4317
+ "eval_runtime": 184.5949,
4318
+ "eval_samples_per_second": 27.086,
4319
+ "eval_steps_per_second": 0.428,
4320
+ "step": 700000
4321
  }
4322
  ],
4323
  "max_steps": 1000000,
4324
  "num_train_epochs": 9223372036854775807,
4325
+ "total_flos": 4.17681408786432e+19,
4326
  "trial_name": null,
4327
  "trial_params": null
4328
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db82a1b292273e59a4cde694eb5b29617673aa0dcee2fd4598267bfb6eaa669f
3
  size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2b92bb609d314cf22d9efbf8583c1eb78e2d6778c97eddc20b95b0c856addb3
3
  size 3439
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:010ff15e7c5b4944d147f08e3ca39a7a13d144f0ac2b46d7be37da6a2832f71d
3
  size 1740493675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb5b90c98009d51696b36759831d22a584a9950a286ec1f19cf3456f91a65bb8
3
  size 1740493675
runs/Mar22_03-02-10_t1v-n-ae339136-w-0/1679454966.5971136/events.out.tfevents.1679454966.t1v-n-ae339136-w-0.10622.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9332a482b85987bb1c463d7cfbb9a61379e7115b6d0ee845488113f42c11ff6d
3
+ size 5479
runs/Mar22_03-02-10_t1v-n-ae339136-w-0/events.out.tfevents.1679454966.t1v-n-ae339136-w-0.10622.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7aace6ac9dd370a01161ccd7df8ab06deeb0d1f70aae54fc815d42c71c0f138
3
+ size 12089
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db82a1b292273e59a4cde694eb5b29617673aa0dcee2fd4598267bfb6eaa669f
3
  size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2b92bb609d314cf22d9efbf8583c1eb78e2d6778c97eddc20b95b0c856addb3
3
  size 3439