DuongTrongChi
commited on
Training in progress, step 911, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100198584
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77c90016a866284a2715dd96997296bc544f7f98889e7c3a425100f87afad0cf
|
3 |
size 100198584
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50675604
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4a7bafef733afd7ab58bb79bbd7b589528548bcb6cb645b64505a525cc544ab
|
3 |
size 50675604
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04e2e9566872b589840ca5036c48f962e578ce55c36abdd70f84617113393bbb
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6216,6 +6216,174 @@
|
|
6216 |
"learning_rate": 5.91861898890259e-07,
|
6217 |
"loss": 1.1123,
|
6218 |
"step": 887
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6219 |
}
|
6220 |
],
|
6221 |
"logging_steps": 1,
|
@@ -6230,12 +6398,12 @@
|
|
6230 |
"should_evaluate": false,
|
6231 |
"should_log": false,
|
6232 |
"should_save": true,
|
6233 |
-
"should_training_stop":
|
6234 |
},
|
6235 |
"attributes": {}
|
6236 |
}
|
6237 |
},
|
6238 |
-
"total_flos": 9.
|
6239 |
"train_batch_size": 4,
|
6240 |
"trial_name": null,
|
6241 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9997256515775035,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 911,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6216 |
"learning_rate": 5.91861898890259e-07,
|
6217 |
"loss": 1.1123,
|
6218 |
"step": 887
|
6219 |
+
},
|
6220 |
+
{
|
6221 |
+
"epoch": 0.974485596707819,
|
6222 |
+
"grad_norm": 0.13672898709774017,
|
6223 |
+
"learning_rate": 5.672009864364982e-07,
|
6224 |
+
"loss": 1.1418,
|
6225 |
+
"step": 888
|
6226 |
+
},
|
6227 |
+
{
|
6228 |
+
"epoch": 0.9755829903978052,
|
6229 |
+
"grad_norm": 0.14941120147705078,
|
6230 |
+
"learning_rate": 5.425400739827374e-07,
|
6231 |
+
"loss": 1.1893,
|
6232 |
+
"step": 889
|
6233 |
+
},
|
6234 |
+
{
|
6235 |
+
"epoch": 0.9766803840877915,
|
6236 |
+
"grad_norm": 0.13195830583572388,
|
6237 |
+
"learning_rate": 5.178791615289766e-07,
|
6238 |
+
"loss": 1.1235,
|
6239 |
+
"step": 890
|
6240 |
+
},
|
6241 |
+
{
|
6242 |
+
"epoch": 0.9777777777777777,
|
6243 |
+
"grad_norm": 0.13043729960918427,
|
6244 |
+
"learning_rate": 4.932182490752158e-07,
|
6245 |
+
"loss": 1.1237,
|
6246 |
+
"step": 891
|
6247 |
+
},
|
6248 |
+
{
|
6249 |
+
"epoch": 0.978875171467764,
|
6250 |
+
"grad_norm": 0.1450997292995453,
|
6251 |
+
"learning_rate": 4.6855733662145503e-07,
|
6252 |
+
"loss": 1.1434,
|
6253 |
+
"step": 892
|
6254 |
+
},
|
6255 |
+
{
|
6256 |
+
"epoch": 0.9799725651577503,
|
6257 |
+
"grad_norm": 0.12888318300247192,
|
6258 |
+
"learning_rate": 4.438964241676942e-07,
|
6259 |
+
"loss": 1.1535,
|
6260 |
+
"step": 893
|
6261 |
+
},
|
6262 |
+
{
|
6263 |
+
"epoch": 0.9810699588477366,
|
6264 |
+
"grad_norm": 0.1358352154493332,
|
6265 |
+
"learning_rate": 4.1923551171393343e-07,
|
6266 |
+
"loss": 1.0979,
|
6267 |
+
"step": 894
|
6268 |
+
},
|
6269 |
+
{
|
6270 |
+
"epoch": 0.9821673525377229,
|
6271 |
+
"grad_norm": 0.1400756686925888,
|
6272 |
+
"learning_rate": 3.9457459926017265e-07,
|
6273 |
+
"loss": 1.0588,
|
6274 |
+
"step": 895
|
6275 |
+
},
|
6276 |
+
{
|
6277 |
+
"epoch": 0.9832647462277092,
|
6278 |
+
"grad_norm": 0.1559644639492035,
|
6279 |
+
"learning_rate": 3.699136868064119e-07,
|
6280 |
+
"loss": 1.137,
|
6281 |
+
"step": 896
|
6282 |
+
},
|
6283 |
+
{
|
6284 |
+
"epoch": 0.9843621399176955,
|
6285 |
+
"grad_norm": 0.13535606861114502,
|
6286 |
+
"learning_rate": 3.4525277435265105e-07,
|
6287 |
+
"loss": 1.1953,
|
6288 |
+
"step": 897
|
6289 |
+
},
|
6290 |
+
{
|
6291 |
+
"epoch": 0.9854595336076818,
|
6292 |
+
"grad_norm": 0.1321636438369751,
|
6293 |
+
"learning_rate": 3.205918618988903e-07,
|
6294 |
+
"loss": 1.0905,
|
6295 |
+
"step": 898
|
6296 |
+
},
|
6297 |
+
{
|
6298 |
+
"epoch": 0.9865569272976681,
|
6299 |
+
"grad_norm": 0.14353665709495544,
|
6300 |
+
"learning_rate": 2.959309494451295e-07,
|
6301 |
+
"loss": 1.126,
|
6302 |
+
"step": 899
|
6303 |
+
},
|
6304 |
+
{
|
6305 |
+
"epoch": 0.9876543209876543,
|
6306 |
+
"grad_norm": 0.1350976675748825,
|
6307 |
+
"learning_rate": 2.712700369913687e-07,
|
6308 |
+
"loss": 1.1387,
|
6309 |
+
"step": 900
|
6310 |
+
},
|
6311 |
+
{
|
6312 |
+
"epoch": 0.9887517146776406,
|
6313 |
+
"grad_norm": 0.13206009566783905,
|
6314 |
+
"learning_rate": 2.466091245376079e-07,
|
6315 |
+
"loss": 1.0879,
|
6316 |
+
"step": 901
|
6317 |
+
},
|
6318 |
+
{
|
6319 |
+
"epoch": 0.9898491083676269,
|
6320 |
+
"grad_norm": 0.13680791854858398,
|
6321 |
+
"learning_rate": 2.219482120838471e-07,
|
6322 |
+
"loss": 1.1285,
|
6323 |
+
"step": 902
|
6324 |
+
},
|
6325 |
+
{
|
6326 |
+
"epoch": 0.9909465020576131,
|
6327 |
+
"grad_norm": 0.17720668017864227,
|
6328 |
+
"learning_rate": 1.9728729963008633e-07,
|
6329 |
+
"loss": 1.0461,
|
6330 |
+
"step": 903
|
6331 |
+
},
|
6332 |
+
{
|
6333 |
+
"epoch": 0.9920438957475994,
|
6334 |
+
"grad_norm": 0.14731575548648834,
|
6335 |
+
"learning_rate": 1.7262638717632553e-07,
|
6336 |
+
"loss": 1.0347,
|
6337 |
+
"step": 904
|
6338 |
+
},
|
6339 |
+
{
|
6340 |
+
"epoch": 0.9931412894375857,
|
6341 |
+
"grad_norm": 0.14285138249397278,
|
6342 |
+
"learning_rate": 1.4796547472256475e-07,
|
6343 |
+
"loss": 1.0603,
|
6344 |
+
"step": 905
|
6345 |
+
},
|
6346 |
+
{
|
6347 |
+
"epoch": 0.994238683127572,
|
6348 |
+
"grad_norm": 0.1308155655860901,
|
6349 |
+
"learning_rate": 1.2330456226880395e-07,
|
6350 |
+
"loss": 1.101,
|
6351 |
+
"step": 906
|
6352 |
+
},
|
6353 |
+
{
|
6354 |
+
"epoch": 0.9953360768175583,
|
6355 |
+
"grad_norm": 0.1355922371149063,
|
6356 |
+
"learning_rate": 9.864364981504316e-08,
|
6357 |
+
"loss": 1.2046,
|
6358 |
+
"step": 907
|
6359 |
+
},
|
6360 |
+
{
|
6361 |
+
"epoch": 0.9964334705075446,
|
6362 |
+
"grad_norm": 0.13389693200588226,
|
6363 |
+
"learning_rate": 7.398273736128238e-08,
|
6364 |
+
"loss": 1.1953,
|
6365 |
+
"step": 908
|
6366 |
+
},
|
6367 |
+
{
|
6368 |
+
"epoch": 0.9975308641975309,
|
6369 |
+
"grad_norm": 0.1277182251214981,
|
6370 |
+
"learning_rate": 4.932182490752158e-08,
|
6371 |
+
"loss": 1.1153,
|
6372 |
+
"step": 909
|
6373 |
+
},
|
6374 |
+
{
|
6375 |
+
"epoch": 0.9986282578875172,
|
6376 |
+
"grad_norm": 0.1549104005098343,
|
6377 |
+
"learning_rate": 2.466091245376079e-08,
|
6378 |
+
"loss": 1.0726,
|
6379 |
+
"step": 910
|
6380 |
+
},
|
6381 |
+
{
|
6382 |
+
"epoch": 0.9997256515775035,
|
6383 |
+
"grad_norm": 0.1443055421113968,
|
6384 |
+
"learning_rate": 0.0,
|
6385 |
+
"loss": 1.1051,
|
6386 |
+
"step": 911
|
6387 |
}
|
6388 |
],
|
6389 |
"logging_steps": 1,
|
|
|
6398 |
"should_evaluate": false,
|
6399 |
"should_log": false,
|
6400 |
"should_save": true,
|
6401 |
+
"should_training_stop": true
|
6402 |
},
|
6403 |
"attributes": {}
|
6404 |
}
|
6405 |
},
|
6406 |
+
"total_flos": 9.455125939404595e+17,
|
6407 |
"train_batch_size": 4,
|
6408 |
"trial_name": null,
|
6409 |
"trial_params": null
|