DuongTrongChi
commited on
Commit
•
7f67ab7
1
Parent(s):
e203dcd
Training in progress, step 635, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100198584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e6916f7e36c566c02cf5b2c732bbba7342ba9b478d907f6a86bacf7cb2c0150
|
3 |
size 100198584
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50675604
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a06e698c180e26ad266a872f2b31ce48f7f30f3e994431201bfae0f415a7c1a3
|
3 |
size 50675604
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:673f846db062d0e3d1543ca2e762819fe3771910bf6b5b7e181954499f0e9914
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4067,6 +4067,391 @@
|
|
4067 |
"learning_rate": 3.5616438356164386e-06,
|
4068 |
"loss": 1.1487,
|
4069 |
"step": 580
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4070 |
}
|
4071 |
],
|
4072 |
"logging_steps": 1,
|
@@ -4086,7 +4471,7 @@
|
|
4086 |
"attributes": {}
|
4087 |
}
|
4088 |
},
|
4089 |
-
"total_flos":
|
4090 |
"train_batch_size": 4,
|
4091 |
"trial_name": null,
|
4092 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9277691535019633,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 635,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4067 |
"learning_rate": 3.5616438356164386e-06,
|
4068 |
"loss": 1.1487,
|
4069 |
"step": 580
|
4070 |
+
},
|
4071 |
+
{
|
4072 |
+
"epoch": 0.8488722491096703,
|
4073 |
+
"grad_norm": 0.12069284915924072,
|
4074 |
+
"learning_rate": 3.527397260273973e-06,
|
4075 |
+
"loss": 1.1901,
|
4076 |
+
"step": 581
|
4077 |
+
},
|
4078 |
+
{
|
4079 |
+
"epoch": 0.8503333028947128,
|
4080 |
+
"grad_norm": 0.11841464787721634,
|
4081 |
+
"learning_rate": 3.4931506849315072e-06,
|
4082 |
+
"loss": 1.1714,
|
4083 |
+
"step": 582
|
4084 |
+
},
|
4085 |
+
{
|
4086 |
+
"epoch": 0.8517943566797552,
|
4087 |
+
"grad_norm": 0.12204127013683319,
|
4088 |
+
"learning_rate": 3.458904109589041e-06,
|
4089 |
+
"loss": 1.1501,
|
4090 |
+
"step": 583
|
4091 |
+
},
|
4092 |
+
{
|
4093 |
+
"epoch": 0.8532554104647977,
|
4094 |
+
"grad_norm": 0.13133108615875244,
|
4095 |
+
"learning_rate": 3.4246575342465754e-06,
|
4096 |
+
"loss": 1.2586,
|
4097 |
+
"step": 584
|
4098 |
+
},
|
4099 |
+
{
|
4100 |
+
"epoch": 0.8547164642498402,
|
4101 |
+
"grad_norm": 0.11903316527605057,
|
4102 |
+
"learning_rate": 3.39041095890411e-06,
|
4103 |
+
"loss": 1.2399,
|
4104 |
+
"step": 585
|
4105 |
+
},
|
4106 |
+
{
|
4107 |
+
"epoch": 0.8561775180348826,
|
4108 |
+
"grad_norm": 0.1073235422372818,
|
4109 |
+
"learning_rate": 3.356164383561644e-06,
|
4110 |
+
"loss": 1.2036,
|
4111 |
+
"step": 586
|
4112 |
+
},
|
4113 |
+
{
|
4114 |
+
"epoch": 0.8576385718199251,
|
4115 |
+
"grad_norm": 0.1356821060180664,
|
4116 |
+
"learning_rate": 3.3219178082191784e-06,
|
4117 |
+
"loss": 1.1168,
|
4118 |
+
"step": 587
|
4119 |
+
},
|
4120 |
+
{
|
4121 |
+
"epoch": 0.8590996256049676,
|
4122 |
+
"grad_norm": 0.1314031332731247,
|
4123 |
+
"learning_rate": 3.2876712328767123e-06,
|
4124 |
+
"loss": 1.1407,
|
4125 |
+
"step": 588
|
4126 |
+
},
|
4127 |
+
{
|
4128 |
+
"epoch": 0.86056067939001,
|
4129 |
+
"grad_norm": 0.1098315566778183,
|
4130 |
+
"learning_rate": 3.253424657534247e-06,
|
4131 |
+
"loss": 1.1737,
|
4132 |
+
"step": 589
|
4133 |
+
},
|
4134 |
+
{
|
4135 |
+
"epoch": 0.8620217331750525,
|
4136 |
+
"grad_norm": 0.12484846264123917,
|
4137 |
+
"learning_rate": 3.2191780821917813e-06,
|
4138 |
+
"loss": 1.29,
|
4139 |
+
"step": 590
|
4140 |
+
},
|
4141 |
+
{
|
4142 |
+
"epoch": 0.863482786960095,
|
4143 |
+
"grad_norm": 0.12023176997900009,
|
4144 |
+
"learning_rate": 3.184931506849315e-06,
|
4145 |
+
"loss": 1.2455,
|
4146 |
+
"step": 591
|
4147 |
+
},
|
4148 |
+
{
|
4149 |
+
"epoch": 0.8649438407451374,
|
4150 |
+
"grad_norm": 0.1124570220708847,
|
4151 |
+
"learning_rate": 3.1506849315068495e-06,
|
4152 |
+
"loss": 1.1792,
|
4153 |
+
"step": 592
|
4154 |
+
},
|
4155 |
+
{
|
4156 |
+
"epoch": 0.8664048945301799,
|
4157 |
+
"grad_norm": 0.11319974809885025,
|
4158 |
+
"learning_rate": 3.1164383561643843e-06,
|
4159 |
+
"loss": 1.1708,
|
4160 |
+
"step": 593
|
4161 |
+
},
|
4162 |
+
{
|
4163 |
+
"epoch": 0.8678659483152223,
|
4164 |
+
"grad_norm": 0.11844311654567719,
|
4165 |
+
"learning_rate": 3.082191780821918e-06,
|
4166 |
+
"loss": 1.1962,
|
4167 |
+
"step": 594
|
4168 |
+
},
|
4169 |
+
{
|
4170 |
+
"epoch": 0.8693270021002648,
|
4171 |
+
"grad_norm": 0.12392130494117737,
|
4172 |
+
"learning_rate": 3.0479452054794525e-06,
|
4173 |
+
"loss": 1.1748,
|
4174 |
+
"step": 595
|
4175 |
+
},
|
4176 |
+
{
|
4177 |
+
"epoch": 0.8707880558853073,
|
4178 |
+
"grad_norm": 0.1305224448442459,
|
4179 |
+
"learning_rate": 3.0136986301369864e-06,
|
4180 |
+
"loss": 1.2388,
|
4181 |
+
"step": 596
|
4182 |
+
},
|
4183 |
+
{
|
4184 |
+
"epoch": 0.8722491096703497,
|
4185 |
+
"grad_norm": 0.11098136007785797,
|
4186 |
+
"learning_rate": 2.979452054794521e-06,
|
4187 |
+
"loss": 1.2109,
|
4188 |
+
"step": 597
|
4189 |
+
},
|
4190 |
+
{
|
4191 |
+
"epoch": 0.8737101634553922,
|
4192 |
+
"grad_norm": 0.11488241702318192,
|
4193 |
+
"learning_rate": 2.945205479452055e-06,
|
4194 |
+
"loss": 1.2222,
|
4195 |
+
"step": 598
|
4196 |
+
},
|
4197 |
+
{
|
4198 |
+
"epoch": 0.8751712172404347,
|
4199 |
+
"grad_norm": 0.11296083778142929,
|
4200 |
+
"learning_rate": 2.9109589041095893e-06,
|
4201 |
+
"loss": 1.2425,
|
4202 |
+
"step": 599
|
4203 |
+
},
|
4204 |
+
{
|
4205 |
+
"epoch": 0.8766322710254771,
|
4206 |
+
"grad_norm": 0.11494144797325134,
|
4207 |
+
"learning_rate": 2.876712328767123e-06,
|
4208 |
+
"loss": 1.1589,
|
4209 |
+
"step": 600
|
4210 |
+
},
|
4211 |
+
{
|
4212 |
+
"epoch": 0.8780933248105196,
|
4213 |
+
"grad_norm": 0.11837083101272583,
|
4214 |
+
"learning_rate": 2.842465753424658e-06,
|
4215 |
+
"loss": 1.1556,
|
4216 |
+
"step": 601
|
4217 |
+
},
|
4218 |
+
{
|
4219 |
+
"epoch": 0.8795543785955621,
|
4220 |
+
"grad_norm": 0.12680098414421082,
|
4221 |
+
"learning_rate": 2.8082191780821922e-06,
|
4222 |
+
"loss": 1.1742,
|
4223 |
+
"step": 602
|
4224 |
+
},
|
4225 |
+
{
|
4226 |
+
"epoch": 0.8810154323806045,
|
4227 |
+
"grad_norm": 0.12157981842756271,
|
4228 |
+
"learning_rate": 2.773972602739726e-06,
|
4229 |
+
"loss": 1.1715,
|
4230 |
+
"step": 603
|
4231 |
+
},
|
4232 |
+
{
|
4233 |
+
"epoch": 0.882476486165647,
|
4234 |
+
"grad_norm": 0.11859599500894547,
|
4235 |
+
"learning_rate": 2.7397260273972604e-06,
|
4236 |
+
"loss": 1.1988,
|
4237 |
+
"step": 604
|
4238 |
+
},
|
4239 |
+
{
|
4240 |
+
"epoch": 0.8839375399506895,
|
4241 |
+
"grad_norm": 0.11733684688806534,
|
4242 |
+
"learning_rate": 2.705479452054795e-06,
|
4243 |
+
"loss": 1.2507,
|
4244 |
+
"step": 605
|
4245 |
+
},
|
4246 |
+
{
|
4247 |
+
"epoch": 0.8853985937357319,
|
4248 |
+
"grad_norm": 0.11254255473613739,
|
4249 |
+
"learning_rate": 2.671232876712329e-06,
|
4250 |
+
"loss": 1.1843,
|
4251 |
+
"step": 606
|
4252 |
+
},
|
4253 |
+
{
|
4254 |
+
"epoch": 0.8868596475207744,
|
4255 |
+
"grad_norm": 0.11433933675289154,
|
4256 |
+
"learning_rate": 2.6369863013698634e-06,
|
4257 |
+
"loss": 1.1129,
|
4258 |
+
"step": 607
|
4259 |
+
},
|
4260 |
+
{
|
4261 |
+
"epoch": 0.8883207013058169,
|
4262 |
+
"grad_norm": 0.11361224204301834,
|
4263 |
+
"learning_rate": 2.6027397260273973e-06,
|
4264 |
+
"loss": 1.1883,
|
4265 |
+
"step": 608
|
4266 |
+
},
|
4267 |
+
{
|
4268 |
+
"epoch": 0.8897817550908593,
|
4269 |
+
"grad_norm": 0.11042333394289017,
|
4270 |
+
"learning_rate": 2.568493150684932e-06,
|
4271 |
+
"loss": 1.2322,
|
4272 |
+
"step": 609
|
4273 |
+
},
|
4274 |
+
{
|
4275 |
+
"epoch": 0.8912428088759018,
|
4276 |
+
"grad_norm": 0.1262194812297821,
|
4277 |
+
"learning_rate": 2.534246575342466e-06,
|
4278 |
+
"loss": 1.1767,
|
4279 |
+
"step": 610
|
4280 |
+
},
|
4281 |
+
{
|
4282 |
+
"epoch": 0.8927038626609443,
|
4283 |
+
"grad_norm": 0.10622208565473557,
|
4284 |
+
"learning_rate": 2.5e-06,
|
4285 |
+
"loss": 1.2355,
|
4286 |
+
"step": 611
|
4287 |
+
},
|
4288 |
+
{
|
4289 |
+
"epoch": 0.8941649164459866,
|
4290 |
+
"grad_norm": 0.11434955894947052,
|
4291 |
+
"learning_rate": 2.4657534246575345e-06,
|
4292 |
+
"loss": 1.1571,
|
4293 |
+
"step": 612
|
4294 |
+
},
|
4295 |
+
{
|
4296 |
+
"epoch": 0.8956259702310291,
|
4297 |
+
"grad_norm": 0.11948630958795547,
|
4298 |
+
"learning_rate": 2.431506849315069e-06,
|
4299 |
+
"loss": 1.1345,
|
4300 |
+
"step": 613
|
4301 |
+
},
|
4302 |
+
{
|
4303 |
+
"epoch": 0.8970870240160715,
|
4304 |
+
"grad_norm": 0.119502492249012,
|
4305 |
+
"learning_rate": 2.3972602739726027e-06,
|
4306 |
+
"loss": 1.1296,
|
4307 |
+
"step": 614
|
4308 |
+
},
|
4309 |
+
{
|
4310 |
+
"epoch": 0.898548077801114,
|
4311 |
+
"grad_norm": 0.1145474910736084,
|
4312 |
+
"learning_rate": 2.363013698630137e-06,
|
4313 |
+
"loss": 1.2316,
|
4314 |
+
"step": 615
|
4315 |
+
},
|
4316 |
+
{
|
4317 |
+
"epoch": 0.9000091315861565,
|
4318 |
+
"grad_norm": 0.11403004080057144,
|
4319 |
+
"learning_rate": 2.3287671232876713e-06,
|
4320 |
+
"loss": 1.1536,
|
4321 |
+
"step": 616
|
4322 |
+
},
|
4323 |
+
{
|
4324 |
+
"epoch": 0.9014701853711989,
|
4325 |
+
"grad_norm": 0.11280905455350876,
|
4326 |
+
"learning_rate": 2.2945205479452057e-06,
|
4327 |
+
"loss": 1.1922,
|
4328 |
+
"step": 617
|
4329 |
+
},
|
4330 |
+
{
|
4331 |
+
"epoch": 0.9029312391562414,
|
4332 |
+
"grad_norm": 0.10832927376031876,
|
4333 |
+
"learning_rate": 2.26027397260274e-06,
|
4334 |
+
"loss": 1.2386,
|
4335 |
+
"step": 618
|
4336 |
+
},
|
4337 |
+
{
|
4338 |
+
"epoch": 0.9043922929412839,
|
4339 |
+
"grad_norm": 0.11291555315256119,
|
4340 |
+
"learning_rate": 2.2260273972602743e-06,
|
4341 |
+
"loss": 1.1788,
|
4342 |
+
"step": 619
|
4343 |
+
},
|
4344 |
+
{
|
4345 |
+
"epoch": 0.9058533467263263,
|
4346 |
+
"grad_norm": 0.1250094771385193,
|
4347 |
+
"learning_rate": 2.191780821917808e-06,
|
4348 |
+
"loss": 1.2142,
|
4349 |
+
"step": 620
|
4350 |
+
},
|
4351 |
+
{
|
4352 |
+
"epoch": 0.9073144005113688,
|
4353 |
+
"grad_norm": 0.11631559580564499,
|
4354 |
+
"learning_rate": 2.1575342465753425e-06,
|
4355 |
+
"loss": 1.2121,
|
4356 |
+
"step": 621
|
4357 |
+
},
|
4358 |
+
{
|
4359 |
+
"epoch": 0.9087754542964113,
|
4360 |
+
"grad_norm": 0.11511734873056412,
|
4361 |
+
"learning_rate": 2.123287671232877e-06,
|
4362 |
+
"loss": 1.1137,
|
4363 |
+
"step": 622
|
4364 |
+
},
|
4365 |
+
{
|
4366 |
+
"epoch": 0.9102365080814537,
|
4367 |
+
"grad_norm": 0.12305217236280441,
|
4368 |
+
"learning_rate": 2.089041095890411e-06,
|
4369 |
+
"loss": 1.1745,
|
4370 |
+
"step": 623
|
4371 |
+
},
|
4372 |
+
{
|
4373 |
+
"epoch": 0.9116975618664962,
|
4374 |
+
"grad_norm": 0.14875584840774536,
|
4375 |
+
"learning_rate": 2.0547945205479454e-06,
|
4376 |
+
"loss": 1.1986,
|
4377 |
+
"step": 624
|
4378 |
+
},
|
4379 |
+
{
|
4380 |
+
"epoch": 0.9131586156515387,
|
4381 |
+
"grad_norm": 0.13255374133586884,
|
4382 |
+
"learning_rate": 2.0205479452054797e-06,
|
4383 |
+
"loss": 1.2716,
|
4384 |
+
"step": 625
|
4385 |
+
},
|
4386 |
+
{
|
4387 |
+
"epoch": 0.9146196694365811,
|
4388 |
+
"grad_norm": 0.13747917115688324,
|
4389 |
+
"learning_rate": 1.9863013698630136e-06,
|
4390 |
+
"loss": 1.1092,
|
4391 |
+
"step": 626
|
4392 |
+
},
|
4393 |
+
{
|
4394 |
+
"epoch": 0.9160807232216236,
|
4395 |
+
"grad_norm": 0.11340590566396713,
|
4396 |
+
"learning_rate": 1.952054794520548e-06,
|
4397 |
+
"loss": 1.2085,
|
4398 |
+
"step": 627
|
4399 |
+
},
|
4400 |
+
{
|
4401 |
+
"epoch": 0.9175417770066661,
|
4402 |
+
"grad_norm": 0.11387283354997635,
|
4403 |
+
"learning_rate": 1.9178082191780823e-06,
|
4404 |
+
"loss": 1.2549,
|
4405 |
+
"step": 628
|
4406 |
+
},
|
4407 |
+
{
|
4408 |
+
"epoch": 0.9190028307917085,
|
4409 |
+
"grad_norm": 0.11490115523338318,
|
4410 |
+
"learning_rate": 1.8835616438356166e-06,
|
4411 |
+
"loss": 1.1344,
|
4412 |
+
"step": 629
|
4413 |
+
},
|
4414 |
+
{
|
4415 |
+
"epoch": 0.920463884576751,
|
4416 |
+
"grad_norm": 0.10832976549863815,
|
4417 |
+
"learning_rate": 1.8493150684931507e-06,
|
4418 |
+
"loss": 1.2395,
|
4419 |
+
"step": 630
|
4420 |
+
},
|
4421 |
+
{
|
4422 |
+
"epoch": 0.9219249383617935,
|
4423 |
+
"grad_norm": 0.12825772166252136,
|
4424 |
+
"learning_rate": 1.8150684931506852e-06,
|
4425 |
+
"loss": 1.2296,
|
4426 |
+
"step": 631
|
4427 |
+
},
|
4428 |
+
{
|
4429 |
+
"epoch": 0.9233859921468359,
|
4430 |
+
"grad_norm": 0.1021864116191864,
|
4431 |
+
"learning_rate": 1.7808219178082193e-06,
|
4432 |
+
"loss": 1.2589,
|
4433 |
+
"step": 632
|
4434 |
+
},
|
4435 |
+
{
|
4436 |
+
"epoch": 0.9248470459318784,
|
4437 |
+
"grad_norm": 0.12429718673229218,
|
4438 |
+
"learning_rate": 1.7465753424657536e-06,
|
4439 |
+
"loss": 1.1167,
|
4440 |
+
"step": 633
|
4441 |
+
},
|
4442 |
+
{
|
4443 |
+
"epoch": 0.9263080997169209,
|
4444 |
+
"grad_norm": 0.10847421735525131,
|
4445 |
+
"learning_rate": 1.7123287671232877e-06,
|
4446 |
+
"loss": 1.1975,
|
4447 |
+
"step": 634
|
4448 |
+
},
|
4449 |
+
{
|
4450 |
+
"epoch": 0.9277691535019633,
|
4451 |
+
"grad_norm": 0.11923690140247345,
|
4452 |
+
"learning_rate": 1.678082191780822e-06,
|
4453 |
+
"loss": 1.1595,
|
4454 |
+
"step": 635
|
4455 |
}
|
4456 |
],
|
4457 |
"logging_steps": 1,
|
|
|
4471 |
"attributes": {}
|
4472 |
}
|
4473 |
},
|
4474 |
+
"total_flos": 7.18235035100799e+17,
|
4475 |
"train_batch_size": 4,
|
4476 |
"trial_name": null,
|
4477 |
"trial_params": null
|