Training in progress, step 20000
Browse files- model.safetensors +1 -1
- trainer_log.jsonl +155 -0
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 50161688
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80c755988a144e87b15013585038690885a0c219d5bd1afc8f491a9e16b1b25d
|
3 |
size 50161688
|
trainer_log.jsonl
CHANGED
@@ -469,3 +469,158 @@
|
|
469 |
{"current_steps": 15008, "total_steps": 20000, "loss": 3.2985, "eval_loss": null, "predict_loss": null, "learning_rate": 7.017883949439288e-05, "epoch": 0.8548157430084866, "percentage": 75.04}
|
470 |
{"current_steps": 15040, "total_steps": 20000, "loss": 3.4314, "eval_loss": null, "predict_loss": null, "learning_rate": 6.969216655409388e-05, "epoch": 0.8566383778549866, "percentage": 75.2}
|
471 |
{"current_steps": 15072, "total_steps": 20000, "loss": 3.3598, "eval_loss": null, "predict_loss": null, "learning_rate": 6.92079505675697e-05, "epoch": 0.8584610127014866, "percentage": 75.36}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
469 |
{"current_steps": 15008, "total_steps": 20000, "loss": 3.2985, "eval_loss": null, "predict_loss": null, "learning_rate": 7.017883949439288e-05, "epoch": 0.8548157430084866, "percentage": 75.04}
|
470 |
{"current_steps": 15040, "total_steps": 20000, "loss": 3.4314, "eval_loss": null, "predict_loss": null, "learning_rate": 6.969216655409388e-05, "epoch": 0.8566383778549866, "percentage": 75.2}
|
471 |
{"current_steps": 15072, "total_steps": 20000, "loss": 3.3598, "eval_loss": null, "predict_loss": null, "learning_rate": 6.92079505675697e-05, "epoch": 0.8584610127014866, "percentage": 75.36}
|
472 |
+
{"current_steps": 15104, "total_steps": 20000, "loss": 3.3572, "eval_loss": null, "predict_loss": null, "learning_rate": 6.872620401749094e-05, "epoch": 0.8602836475479866, "percentage": 75.52}
|
473 |
+
{"current_steps": 15136, "total_steps": 20000, "loss": 3.5778, "eval_loss": null, "predict_loss": null, "learning_rate": 6.824693932286834e-05, "epoch": 0.8621062823944865, "percentage": 75.68}
|
474 |
+
{"current_steps": 15168, "total_steps": 20000, "loss": 3.418, "eval_loss": null, "predict_loss": null, "learning_rate": 6.777016883873253e-05, "epoch": 0.8639289172409865, "percentage": 75.84}
|
475 |
+
{"current_steps": 15200, "total_steps": 20000, "loss": 3.417, "eval_loss": null, "predict_loss": null, "learning_rate": 6.729590485581552e-05, "epoch": 0.8657515520874864, "percentage": 76.0}
|
476 |
+
{"current_steps": 15232, "total_steps": 20000, "loss": 3.4092, "eval_loss": null, "predict_loss": null, "learning_rate": 6.68241596002339e-05, "epoch": 0.8675741869339865, "percentage": 76.16}
|
477 |
+
{"current_steps": 15264, "total_steps": 20000, "loss": 3.3916, "eval_loss": null, "predict_loss": null, "learning_rate": 6.63549452331737e-05, "epoch": 0.8693968217804864, "percentage": 76.32}
|
478 |
+
{"current_steps": 15296, "total_steps": 20000, "loss": 3.5273, "eval_loss": null, "predict_loss": null, "learning_rate": 6.588827385057663e-05, "epoch": 0.8712194566269864, "percentage": 76.48}
|
479 |
+
{"current_steps": 15328, "total_steps": 20000, "loss": 3.5554, "eval_loss": null, "predict_loss": null, "learning_rate": 6.542415748282867e-05, "epoch": 0.8730420914734863, "percentage": 76.64}
|
480 |
+
{"current_steps": 15360, "total_steps": 20000, "loss": 3.3427, "eval_loss": null, "predict_loss": null, "learning_rate": 6.496260809444961e-05, "epoch": 0.8748647263199864, "percentage": 76.8}
|
481 |
+
{"current_steps": 15392, "total_steps": 20000, "loss": 3.2446, "eval_loss": null, "predict_loss": null, "learning_rate": 6.45036375837848e-05, "epoch": 0.8766873611664863, "percentage": 76.96}
|
482 |
+
{"current_steps": 15424, "total_steps": 20000, "loss": 3.4997, "eval_loss": null, "predict_loss": null, "learning_rate": 6.404725778269821e-05, "epoch": 0.8785099960129863, "percentage": 77.12}
|
483 |
+
{"current_steps": 15456, "total_steps": 20000, "loss": 3.3077, "eval_loss": null, "predict_loss": null, "learning_rate": 6.359348045626768e-05, "epoch": 0.8803326308594862, "percentage": 77.28}
|
484 |
+
{"current_steps": 15488, "total_steps": 20000, "loss": 3.3685, "eval_loss": null, "predict_loss": null, "learning_rate": 6.314231730248144e-05, "epoch": 0.8821552657059862, "percentage": 77.44}
|
485 |
+
{"current_steps": 15520, "total_steps": 20000, "loss": 3.2327, "eval_loss": null, "predict_loss": null, "learning_rate": 6.269377995193662e-05, "epoch": 0.8839779005524862, "percentage": 77.6}
|
486 |
+
{"current_steps": 15552, "total_steps": 20000, "loss": 3.3271, "eval_loss": null, "predict_loss": null, "learning_rate": 6.224787996753946e-05, "epoch": 0.8858005353989862, "percentage": 77.76}
|
487 |
+
{"current_steps": 15584, "total_steps": 20000, "loss": 3.5036, "eval_loss": null, "predict_loss": null, "learning_rate": 6.180462884420702e-05, "epoch": 0.8876231702454861, "percentage": 77.92}
|
488 |
+
{"current_steps": 15616, "total_steps": 20000, "loss": 3.3916, "eval_loss": null, "predict_loss": null, "learning_rate": 6.136403800857113e-05, "epoch": 0.8894458050919861, "percentage": 78.08}
|
489 |
+
{"current_steps": 15648, "total_steps": 20000, "loss": 3.3146, "eval_loss": null, "predict_loss": null, "learning_rate": 6.092611881868369e-05, "epoch": 0.891268439938486, "percentage": 78.24}
|
490 |
+
{"current_steps": 15680, "total_steps": 20000, "loss": 3.2211, "eval_loss": null, "predict_loss": null, "learning_rate": 6.049088256372386e-05, "epoch": 0.8930910747849861, "percentage": 78.4}
|
491 |
+
{"current_steps": 15712, "total_steps": 20000, "loss": 3.1301, "eval_loss": null, "predict_loss": null, "learning_rate": 6.005834046370704e-05, "epoch": 0.894913709631486, "percentage": 78.56}
|
492 |
+
{"current_steps": 15744, "total_steps": 20000, "loss": 3.4451, "eval_loss": null, "predict_loss": null, "learning_rate": 5.962850366919566e-05, "epoch": 0.896736344477986, "percentage": 78.72}
|
493 |
+
{"current_steps": 15776, "total_steps": 20000, "loss": 3.332, "eval_loss": null, "predict_loss": null, "learning_rate": 5.9201383261011636e-05, "epoch": 0.8985589793244859, "percentage": 78.88}
|
494 |
+
{"current_steps": 15808, "total_steps": 20000, "loss": 3.2727, "eval_loss": null, "predict_loss": null, "learning_rate": 5.877699024995088e-05, "epoch": 0.9003816141709859, "percentage": 79.04}
|
495 |
+
{"current_steps": 15840, "total_steps": 20000, "loss": 3.301, "eval_loss": null, "predict_loss": null, "learning_rate": 5.835533557649932e-05, "epoch": 0.9022042490174859, "percentage": 79.2}
|
496 |
+
{"current_steps": 15872, "total_steps": 20000, "loss": 3.3599, "eval_loss": null, "predict_loss": null, "learning_rate": 5.793643011055091e-05, "epoch": 0.9040268838639859, "percentage": 79.36}
|
497 |
+
{"current_steps": 15904, "total_steps": 20000, "loss": 3.2228, "eval_loss": null, "predict_loss": null, "learning_rate": 5.752028465112736e-05, "epoch": 0.9058495187104858, "percentage": 79.52}
|
498 |
+
{"current_steps": 15936, "total_steps": 20000, "loss": 3.3492, "eval_loss": null, "predict_loss": null, "learning_rate": 5.710690992609983e-05, "epoch": 0.9076721535569858, "percentage": 79.68}
|
499 |
+
{"current_steps": 15968, "total_steps": 20000, "loss": 3.2855, "eval_loss": null, "predict_loss": null, "learning_rate": 5.6696316591912355e-05, "epoch": 0.9094947884034857, "percentage": 79.84}
|
500 |
+
{"current_steps": 16000, "total_steps": 20000, "loss": 3.3544, "eval_loss": null, "predict_loss": null, "learning_rate": 5.628851523330708e-05, "epoch": 0.9113174232499858, "percentage": 80.0}
|
501 |
+
{"current_steps": 16032, "total_steps": 20000, "loss": 3.4634, "eval_loss": null, "predict_loss": null, "learning_rate": 5.588351636305146e-05, "epoch": 0.9131400580964857, "percentage": 80.16}
|
502 |
+
{"current_steps": 16064, "total_steps": 20000, "loss": 3.3, "eval_loss": null, "predict_loss": null, "learning_rate": 5.548133042166714e-05, "epoch": 0.9149626929429857, "percentage": 80.32}
|
503 |
+
{"current_steps": 16096, "total_steps": 20000, "loss": 3.493, "eval_loss": null, "predict_loss": null, "learning_rate": 5.5081967777161005e-05, "epoch": 0.9167853277894856, "percentage": 80.48}
|
504 |
+
{"current_steps": 16128, "total_steps": 20000, "loss": 3.4965, "eval_loss": null, "predict_loss": null, "learning_rate": 5.468543872475766e-05, "epoch": 0.9186079626359857, "percentage": 80.64}
|
505 |
+
{"current_steps": 16160, "total_steps": 20000, "loss": 3.3215, "eval_loss": null, "predict_loss": null, "learning_rate": 5.42917534866342e-05, "epoch": 0.9204305974824856, "percentage": 80.8}
|
506 |
+
{"current_steps": 16192, "total_steps": 20000, "loss": 3.3324, "eval_loss": null, "predict_loss": null, "learning_rate": 5.390092221165669e-05, "epoch": 0.9222532323289856, "percentage": 80.96}
|
507 |
+
{"current_steps": 16224, "total_steps": 20000, "loss": 3.2286, "eval_loss": null, "predict_loss": null, "learning_rate": 5.3512954975118384e-05, "epoch": 0.9240758671754855, "percentage": 81.12}
|
508 |
+
{"current_steps": 16256, "total_steps": 20000, "loss": 3.4596, "eval_loss": null, "predict_loss": null, "learning_rate": 5.3127861778480155e-05, "epoch": 0.9258985020219855, "percentage": 81.28}
|
509 |
+
{"current_steps": 16288, "total_steps": 20000, "loss": 3.5627, "eval_loss": null, "predict_loss": null, "learning_rate": 5.274565254911261e-05, "epoch": 0.9277211368684855, "percentage": 81.44}
|
510 |
+
{"current_steps": 16320, "total_steps": 20000, "loss": 3.5229, "eval_loss": null, "predict_loss": null, "learning_rate": 5.236633714004014e-05, "epoch": 0.9295437717149855, "percentage": 81.6}
|
511 |
+
{"current_steps": 16352, "total_steps": 20000, "loss": 3.3047, "eval_loss": null, "predict_loss": null, "learning_rate": 5.1989925329686985e-05, "epoch": 0.9313664065614854, "percentage": 81.76}
|
512 |
+
{"current_steps": 16384, "total_steps": 20000, "loss": 3.2693, "eval_loss": null, "predict_loss": null, "learning_rate": 5.161642682162506e-05, "epoch": 0.9331890414079854, "percentage": 81.92}
|
513 |
+
{"current_steps": 16416, "total_steps": 20000, "loss": 3.4454, "eval_loss": null, "predict_loss": null, "learning_rate": 5.1245851244323926e-05, "epoch": 0.9350116762544853, "percentage": 82.08}
|
514 |
+
{"current_steps": 16448, "total_steps": 20000, "loss": 3.4168, "eval_loss": null, "predict_loss": null, "learning_rate": 5.087820815090239e-05, "epoch": 0.9368343111009854, "percentage": 82.24}
|
515 |
+
{"current_steps": 16480, "total_steps": 20000, "loss": 3.3262, "eval_loss": null, "predict_loss": null, "learning_rate": 5.0513507018882495e-05, "epoch": 0.9386569459474854, "percentage": 82.4}
|
516 |
+
{"current_steps": 16512, "total_steps": 20000, "loss": 3.349, "eval_loss": null, "predict_loss": null, "learning_rate": 5.015175724994498e-05, "epoch": 0.9404795807939853, "percentage": 82.56}
|
517 |
+
{"current_steps": 16544, "total_steps": 20000, "loss": 3.5162, "eval_loss": null, "predict_loss": null, "learning_rate": 4.979296816968697e-05, "epoch": 0.9423022156404853, "percentage": 82.72}
|
518 |
+
{"current_steps": 16576, "total_steps": 20000, "loss": 3.2785, "eval_loss": null, "predict_loss": null, "learning_rate": 4.943714902738163e-05, "epoch": 0.9441248504869852, "percentage": 82.88}
|
519 |
+
{"current_steps": 16608, "total_steps": 20000, "loss": 3.5329, "eval_loss": null, "predict_loss": null, "learning_rate": 4.908430899573967e-05, "epoch": 0.9459474853334853, "percentage": 83.04}
|
520 |
+
{"current_steps": 16640, "total_steps": 20000, "loss": 3.3971, "eval_loss": null, "predict_loss": null, "learning_rate": 4.873445717067292e-05, "epoch": 0.9477701201799852, "percentage": 83.2}
|
521 |
+
{"current_steps": 16672, "total_steps": 20000, "loss": 3.2827, "eval_loss": null, "predict_loss": null, "learning_rate": 4.838760257105979e-05, "epoch": 0.9495927550264852, "percentage": 83.36}
|
522 |
+
{"current_steps": 16704, "total_steps": 20000, "loss": 3.4696, "eval_loss": null, "predict_loss": null, "learning_rate": 4.8043754138512794e-05, "epoch": 0.9514153898729851, "percentage": 83.52}
|
523 |
+
{"current_steps": 16736, "total_steps": 20000, "loss": 3.3106, "eval_loss": null, "predict_loss": null, "learning_rate": 4.770292073714807e-05, "epoch": 0.9532380247194852, "percentage": 83.68}
|
524 |
+
{"current_steps": 16768, "total_steps": 20000, "loss": 3.1202, "eval_loss": null, "predict_loss": null, "learning_rate": 4.73651111533569e-05, "epoch": 0.9550606595659851, "percentage": 83.84}
|
525 |
+
{"current_steps": 16800, "total_steps": 20000, "loss": 3.4183, "eval_loss": null, "predict_loss": null, "learning_rate": 4.703033409557903e-05, "epoch": 0.9568832944124851, "percentage": 84.0}
|
526 |
+
{"current_steps": 16832, "total_steps": 20000, "loss": 3.2588, "eval_loss": null, "predict_loss": null, "learning_rate": 4.669859819407844e-05, "epoch": 0.958705929258985, "percentage": 84.16}
|
527 |
+
{"current_steps": 16864, "total_steps": 20000, "loss": 3.2761, "eval_loss": null, "predict_loss": null, "learning_rate": 4.63699120007206e-05, "epoch": 0.960528564105485, "percentage": 84.32}
|
528 |
+
{"current_steps": 16896, "total_steps": 20000, "loss": 3.3405, "eval_loss": null, "predict_loss": null, "learning_rate": 4.6044283988752214e-05, "epoch": 0.962351198951985, "percentage": 84.48}
|
529 |
+
{"current_steps": 16928, "total_steps": 20000, "loss": 3.4917, "eval_loss": null, "predict_loss": null, "learning_rate": 4.572172255258268e-05, "epoch": 0.964173833798485, "percentage": 84.64}
|
530 |
+
{"current_steps": 16960, "total_steps": 20000, "loss": 3.3823, "eval_loss": null, "predict_loss": null, "learning_rate": 4.540223600756775e-05, "epoch": 0.9659964686449849, "percentage": 84.8}
|
531 |
+
{"current_steps": 16992, "total_steps": 20000, "loss": 3.2799, "eval_loss": null, "predict_loss": null, "learning_rate": 4.508583258979507e-05, "epoch": 0.9678191034914849, "percentage": 84.96}
|
532 |
+
{"current_steps": 17024, "total_steps": 20000, "loss": 3.3658, "eval_loss": null, "predict_loss": null, "learning_rate": 4.4772520455871974e-05, "epoch": 0.9696417383379848, "percentage": 85.12}
|
533 |
+
{"current_steps": 17056, "total_steps": 20000, "loss": 3.2603, "eval_loss": null, "predict_loss": null, "learning_rate": 4.446230768271513e-05, "epoch": 0.9714643731844849, "percentage": 85.28}
|
534 |
+
{"current_steps": 17088, "total_steps": 20000, "loss": 3.4131, "eval_loss": null, "predict_loss": null, "learning_rate": 4.415520226734242e-05, "epoch": 0.9732870080309848, "percentage": 85.44}
|
535 |
+
{"current_steps": 17120, "total_steps": 20000, "loss": 3.4021, "eval_loss": null, "predict_loss": null, "learning_rate": 4.385121212666663e-05, "epoch": 0.9751096428774848, "percentage": 85.6}
|
536 |
+
{"current_steps": 17152, "total_steps": 20000, "loss": 3.2652, "eval_loss": null, "predict_loss": null, "learning_rate": 4.355034509729152e-05, "epoch": 0.9769322777239847, "percentage": 85.76}
|
537 |
+
{"current_steps": 17184, "total_steps": 20000, "loss": 3.332, "eval_loss": null, "predict_loss": null, "learning_rate": 4.325260893530965e-05, "epoch": 0.9787549125704847, "percentage": 85.92}
|
538 |
+
{"current_steps": 17216, "total_steps": 20000, "loss": 3.3896, "eval_loss": null, "predict_loss": null, "learning_rate": 4.295801131610265e-05, "epoch": 0.9805775474169847, "percentage": 86.08}
|
539 |
+
{"current_steps": 17248, "total_steps": 20000, "loss": 3.1189, "eval_loss": null, "predict_loss": null, "learning_rate": 4.266655983414312e-05, "epoch": 0.9824001822634847, "percentage": 86.24}
|
540 |
+
{"current_steps": 17280, "total_steps": 20000, "loss": 3.3313, "eval_loss": null, "predict_loss": null, "learning_rate": 4.237826200279898e-05, "epoch": 0.9842228171099846, "percentage": 86.4}
|
541 |
+
{"current_steps": 17312, "total_steps": 20000, "loss": 3.3465, "eval_loss": null, "predict_loss": null, "learning_rate": 4.209312525413978e-05, "epoch": 0.9860454519564846, "percentage": 86.56}
|
542 |
+
{"current_steps": 17344, "total_steps": 20000, "loss": 3.2805, "eval_loss": null, "predict_loss": null, "learning_rate": 4.1811156938745036e-05, "epoch": 0.9878680868029845, "percentage": 86.72}
|
543 |
+
{"current_steps": 17376, "total_steps": 20000, "loss": 3.288, "eval_loss": null, "predict_loss": null, "learning_rate": 4.153236432551488e-05, "epoch": 0.9896907216494846, "percentage": 86.88}
|
544 |
+
{"current_steps": 17408, "total_steps": 20000, "loss": 3.492, "eval_loss": null, "predict_loss": null, "learning_rate": 4.125675460148243e-05, "epoch": 0.9915133564959845, "percentage": 87.04}
|
545 |
+
{"current_steps": 17440, "total_steps": 20000, "loss": 3.3222, "eval_loss": null, "predict_loss": null, "learning_rate": 4.09843348716288e-05, "epoch": 0.9933359913424845, "percentage": 87.2}
|
546 |
+
{"current_steps": 17472, "total_steps": 20000, "loss": 3.244, "eval_loss": null, "predict_loss": null, "learning_rate": 4.071511215869974e-05, "epoch": 0.9951586261889844, "percentage": 87.36}
|
547 |
+
{"current_steps": 17504, "total_steps": 20000, "loss": 3.27, "eval_loss": null, "predict_loss": null, "learning_rate": 4.044909340302465e-05, "epoch": 0.9969812610354845, "percentage": 87.52}
|
548 |
+
{"current_steps": 17536, "total_steps": 20000, "loss": 3.2318, "eval_loss": null, "predict_loss": null, "learning_rate": 4.018628546233774e-05, "epoch": 0.9988038958819844, "percentage": 87.68}
|
549 |
+
{"current_steps": 17568, "total_steps": 20000, "loss": 3.4984, "eval_loss": null, "predict_loss": null, "learning_rate": 3.992669511160111e-05, "epoch": 1.0006265307284843, "percentage": 87.84}
|
550 |
+
{"current_steps": 17600, "total_steps": 20000, "loss": 3.2318, "eval_loss": null, "predict_loss": null, "learning_rate": 3.9670329042830205e-05, "epoch": 1.0024491655749843, "percentage": 88.0}
|
551 |
+
{"current_steps": 17632, "total_steps": 20000, "loss": 3.4537, "eval_loss": null, "predict_loss": null, "learning_rate": 3.941719386492123e-05, "epoch": 1.0042718004214843, "percentage": 88.16}
|
552 |
+
{"current_steps": 17664, "total_steps": 20000, "loss": 3.2246, "eval_loss": null, "predict_loss": null, "learning_rate": 3.916729610348085e-05, "epoch": 1.0060944352679844, "percentage": 88.32}
|
553 |
+
{"current_steps": 17696, "total_steps": 20000, "loss": 3.3693, "eval_loss": null, "predict_loss": null, "learning_rate": 3.89206422006579e-05, "epoch": 1.0079170701144842, "percentage": 88.48}
|
554 |
+
{"current_steps": 17728, "total_steps": 20000, "loss": 3.3662, "eval_loss": null, "predict_loss": null, "learning_rate": 3.8677238514977316e-05, "epoch": 1.0097397049609842, "percentage": 88.64}
|
555 |
+
{"current_steps": 17760, "total_steps": 20000, "loss": 3.4177, "eval_loss": null, "predict_loss": null, "learning_rate": 3.843709132117625e-05, "epoch": 1.0115623398074842, "percentage": 88.8}
|
556 |
+
{"current_steps": 17792, "total_steps": 20000, "loss": 3.1474, "eval_loss": null, "predict_loss": null, "learning_rate": 3.8200206810042385e-05, "epoch": 1.0133849746539842, "percentage": 88.96}
|
557 |
+
{"current_steps": 17824, "total_steps": 20000, "loss": 3.5238, "eval_loss": null, "predict_loss": null, "learning_rate": 3.7966591088254076e-05, "epoch": 1.015207609500484, "percentage": 89.12}
|
558 |
+
{"current_steps": 17856, "total_steps": 20000, "loss": 3.2886, "eval_loss": null, "predict_loss": null, "learning_rate": 3.7736250178223284e-05, "epoch": 1.017030244346984, "percentage": 89.28}
|
559 |
+
{"current_steps": 17888, "total_steps": 20000, "loss": 3.2523, "eval_loss": null, "predict_loss": null, "learning_rate": 3.7509190017940066e-05, "epoch": 1.018852879193484, "percentage": 89.44}
|
560 |
+
{"current_steps": 17920, "total_steps": 20000, "loss": 3.1801, "eval_loss": null, "predict_loss": null, "learning_rate": 3.728541646081958e-05, "epoch": 1.0206755140399841, "percentage": 89.6}
|
561 |
+
{"current_steps": 17952, "total_steps": 20000, "loss": 3.1789, "eval_loss": null, "predict_loss": null, "learning_rate": 3.7064935275551196e-05, "epoch": 1.022498148886484, "percentage": 89.76}
|
562 |
+
{"current_steps": 17984, "total_steps": 20000, "loss": 3.2932, "eval_loss": null, "predict_loss": null, "learning_rate": 3.6847752145949786e-05, "epoch": 1.024320783732984, "percentage": 89.92}
|
563 |
+
{"current_steps": 18016, "total_steps": 20000, "loss": 3.2778, "eval_loss": null, "predict_loss": null, "learning_rate": 3.66338726708092e-05, "epoch": 1.026143418579484, "percentage": 90.08}
|
564 |
+
{"current_steps": 18048, "total_steps": 20000, "loss": 3.3783, "eval_loss": null, "predict_loss": null, "learning_rate": 3.642330236375788e-05, "epoch": 1.027966053425984, "percentage": 90.24}
|
565 |
+
{"current_steps": 18080, "total_steps": 20000, "loss": 3.3264, "eval_loss": null, "predict_loss": null, "learning_rate": 3.6216046653116795e-05, "epoch": 1.0297886882724838, "percentage": 90.4}
|
566 |
+
{"current_steps": 18112, "total_steps": 20000, "loss": 3.4302, "eval_loss": null, "predict_loss": null, "learning_rate": 3.601211088175948e-05, "epoch": 1.0316113231189838, "percentage": 90.56}
|
567 |
+
{"current_steps": 18144, "total_steps": 20000, "loss": 3.2672, "eval_loss": null, "predict_loss": null, "learning_rate": 3.581150030697432e-05, "epoch": 1.0334339579654839, "percentage": 90.72}
|
568 |
+
{"current_steps": 18176, "total_steps": 20000, "loss": 3.3007, "eval_loss": null, "predict_loss": null, "learning_rate": 3.561422010032892e-05, "epoch": 1.035256592811984, "percentage": 90.88}
|
569 |
+
{"current_steps": 18208, "total_steps": 20000, "loss": 3.3528, "eval_loss": null, "predict_loss": null, "learning_rate": 3.5420275347536976e-05, "epoch": 1.0370792276584837, "percentage": 91.04}
|
570 |
+
{"current_steps": 18240, "total_steps": 20000, "loss": 3.3784, "eval_loss": null, "predict_loss": null, "learning_rate": 3.522967104832694e-05, "epoch": 1.0389018625049837, "percentage": 91.2}
|
571 |
+
{"current_steps": 18272, "total_steps": 20000, "loss": 3.3414, "eval_loss": null, "predict_loss": null, "learning_rate": 3.504241211631338e-05, "epoch": 1.0407244973514838, "percentage": 91.36}
|
572 |
+
{"current_steps": 18304, "total_steps": 20000, "loss": 3.1549, "eval_loss": null, "predict_loss": null, "learning_rate": 3.485850337887007e-05, "epoch": 1.0425471321979838, "percentage": 91.52}
|
573 |
+
{"current_steps": 18336, "total_steps": 20000, "loss": 3.2679, "eval_loss": null, "predict_loss": null, "learning_rate": 3.467794957700573e-05, "epoch": 1.0443697670444836, "percentage": 91.68}
|
574 |
+
{"current_steps": 18368, "total_steps": 20000, "loss": 3.3427, "eval_loss": null, "predict_loss": null, "learning_rate": 3.450075536524166e-05, "epoch": 1.0461924018909836, "percentage": 91.84}
|
575 |
+
{"current_steps": 18400, "total_steps": 20000, "loss": 3.3639, "eval_loss": null, "predict_loss": null, "learning_rate": 3.432692531149191e-05, "epoch": 1.0480150367374836, "percentage": 92.0}
|
576 |
+
{"current_steps": 18432, "total_steps": 20000, "loss": 3.2158, "eval_loss": null, "predict_loss": null, "learning_rate": 3.4156463896945356e-05, "epoch": 1.0498376715839837, "percentage": 92.16}
|
577 |
+
{"current_steps": 18464, "total_steps": 20000, "loss": 3.3309, "eval_loss": null, "predict_loss": null, "learning_rate": 3.398937551595037e-05, "epoch": 1.0516603064304835, "percentage": 92.32}
|
578 |
+
{"current_steps": 18496, "total_steps": 20000, "loss": 3.2401, "eval_loss": null, "predict_loss": null, "learning_rate": 3.382566447590126e-05, "epoch": 1.0534829412769835, "percentage": 92.48}
|
579 |
+
{"current_steps": 18528, "total_steps": 20000, "loss": 3.2267, "eval_loss": null, "predict_loss": null, "learning_rate": 3.366533499712757e-05, "epoch": 1.0553055761234835, "percentage": 92.64}
|
580 |
+
{"current_steps": 18560, "total_steps": 20000, "loss": 3.434, "eval_loss": null, "predict_loss": null, "learning_rate": 3.3508391212784984e-05, "epoch": 1.0571282109699836, "percentage": 92.8}
|
581 |
+
{"current_steps": 18592, "total_steps": 20000, "loss": 3.2164, "eval_loss": null, "predict_loss": null, "learning_rate": 3.3354837168748956e-05, "epoch": 1.0589508458164834, "percentage": 92.96}
|
582 |
+
{"current_steps": 18624, "total_steps": 20000, "loss": 3.1111, "eval_loss": null, "predict_loss": null, "learning_rate": 3.32046768235104e-05, "epoch": 1.0607734806629834, "percentage": 93.12}
|
583 |
+
{"current_steps": 18656, "total_steps": 20000, "loss": 3.4028, "eval_loss": null, "predict_loss": null, "learning_rate": 3.305791404807349e-05, "epoch": 1.0625961155094834, "percentage": 93.28}
|
584 |
+
{"current_steps": 18688, "total_steps": 20000, "loss": 3.3702, "eval_loss": null, "predict_loss": null, "learning_rate": 3.291455262585608e-05, "epoch": 1.0644187503559834, "percentage": 93.44}
|
585 |
+
{"current_steps": 18720, "total_steps": 20000, "loss": 3.2935, "eval_loss": null, "predict_loss": null, "learning_rate": 3.277459625259199e-05, "epoch": 1.0662413852024832, "percentage": 93.6}
|
586 |
+
{"current_steps": 18752, "total_steps": 20000, "loss": 3.2803, "eval_loss": null, "predict_loss": null, "learning_rate": 3.2638048536235925e-05, "epoch": 1.0680640200489833, "percentage": 93.76}
|
587 |
+
{"current_steps": 18784, "total_steps": 20000, "loss": 3.2631, "eval_loss": null, "predict_loss": null, "learning_rate": 3.2504912996870223e-05, "epoch": 1.0698866548954833, "percentage": 93.92}
|
588 |
+
{"current_steps": 18816, "total_steps": 20000, "loss": 3.1562, "eval_loss": null, "predict_loss": null, "learning_rate": 3.237519306661436e-05, "epoch": 1.0717092897419833, "percentage": 94.08}
|
589 |
+
{"current_steps": 18848, "total_steps": 20000, "loss": 3.3514, "eval_loss": null, "predict_loss": null, "learning_rate": 3.224889208953625e-05, "epoch": 1.0735319245884831, "percentage": 94.24}
|
590 |
+
{"current_steps": 18880, "total_steps": 20000, "loss": 3.4585, "eval_loss": null, "predict_loss": null, "learning_rate": 3.21260133215662e-05, "epoch": 1.0753545594349831, "percentage": 94.4}
|
591 |
+
{"current_steps": 18912, "total_steps": 20000, "loss": 3.3478, "eval_loss": null, "predict_loss": null, "learning_rate": 3.200655993041291e-05, "epoch": 1.0771771942814832, "percentage": 94.56}
|
592 |
+
{"current_steps": 18944, "total_steps": 20000, "loss": 3.4537, "eval_loss": null, "predict_loss": null, "learning_rate": 3.1890534995481836e-05, "epoch": 1.0789998291279832, "percentage": 94.72}
|
593 |
+
{"current_steps": 18976, "total_steps": 20000, "loss": 3.2863, "eval_loss": null, "predict_loss": null, "learning_rate": 3.177794150779575e-05, "epoch": 1.0808224639744832, "percentage": 94.88}
|
594 |
+
{"current_steps": 19008, "total_steps": 20000, "loss": 3.1949, "eval_loss": null, "predict_loss": null, "learning_rate": 3.166878236991767e-05, "epoch": 1.082645098820983, "percentage": 95.04}
|
595 |
+
{"current_steps": 19040, "total_steps": 20000, "loss": 3.1133, "eval_loss": null, "predict_loss": null, "learning_rate": 3.1563060395876074e-05, "epoch": 1.084467733667483, "percentage": 95.2}
|
596 |
+
{"current_steps": 19072, "total_steps": 20000, "loss": 3.2552, "eval_loss": null, "predict_loss": null, "learning_rate": 3.1460778311092306e-05, "epoch": 1.086290368513983, "percentage": 95.36}
|
597 |
+
{"current_steps": 19104, "total_steps": 20000, "loss": 3.2694, "eval_loss": null, "predict_loss": null, "learning_rate": 3.136193875231033e-05, "epoch": 1.0881130033604829, "percentage": 95.52}
|
598 |
+
{"current_steps": 19136, "total_steps": 20000, "loss": 3.2414, "eval_loss": null, "predict_loss": null, "learning_rate": 3.1266544267528746e-05, "epoch": 1.089935638206983, "percentage": 95.68}
|
599 |
+
{"current_steps": 19168, "total_steps": 20000, "loss": 3.2347, "eval_loss": null, "predict_loss": null, "learning_rate": 3.117459731593514e-05, "epoch": 1.091758273053483, "percentage": 95.84}
|
600 |
+
{"current_steps": 19200, "total_steps": 20000, "loss": 3.2764, "eval_loss": null, "predict_loss": null, "learning_rate": 3.1086100267842626e-05, "epoch": 1.093580907899983, "percentage": 96.0}
|
601 |
+
{"current_steps": 19232, "total_steps": 20000, "loss": 3.2749, "eval_loss": null, "predict_loss": null, "learning_rate": 3.1001055404628825e-05, "epoch": 1.095403542746483, "percentage": 96.16}
|
602 |
+
{"current_steps": 19264, "total_steps": 20000, "loss": 3.3168, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0919464918676995e-05, "epoch": 1.0972261775929828, "percentage": 96.32}
|
603 |
+
{"current_steps": 19296, "total_steps": 20000, "loss": 3.2748, "eval_loss": null, "predict_loss": null, "learning_rate": 3.084133091331949e-05, "epoch": 1.0990488124394828, "percentage": 96.48}
|
604 |
+
{"current_steps": 19328, "total_steps": 20000, "loss": 3.2654, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0766655402783616e-05, "epoch": 1.1008714472859829, "percentage": 96.64}
|
605 |
+
{"current_steps": 19360, "total_steps": 20000, "loss": 3.3248, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0695440312139664e-05, "epoch": 1.1026940821324827, "percentage": 96.8}
|
606 |
+
{"current_steps": 19392, "total_steps": 20000, "loss": 3.3433, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0627687477251264e-05, "epoch": 1.1045167169789827, "percentage": 96.96}
|
607 |
+
{"current_steps": 19424, "total_steps": 20000, "loss": 3.3813, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0563398644728066e-05, "epoch": 1.1063393518254827, "percentage": 97.12}
|
608 |
+
{"current_steps": 19456, "total_steps": 20000, "loss": 3.3254, "eval_loss": null, "predict_loss": null, "learning_rate": 3.050257547188077e-05, "epoch": 1.1081619866719827, "percentage": 97.28}
|
609 |
+
{"current_steps": 19488, "total_steps": 20000, "loss": 3.2212, "eval_loss": null, "predict_loss": null, "learning_rate": 3.044521952667833e-05, "epoch": 1.1099846215184828, "percentage": 97.44}
|
610 |
+
{"current_steps": 19520, "total_steps": 20000, "loss": 3.2429, "eval_loss": null, "predict_loss": null, "learning_rate": 3.039133228770754e-05, "epoch": 1.1118072563649826, "percentage": 97.6}
|
611 |
+
{"current_steps": 19552, "total_steps": 20000, "loss": 3.3407, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0340915144134993e-05, "epoch": 1.1136298912114826, "percentage": 97.76}
|
612 |
+
{"current_steps": 19584, "total_steps": 20000, "loss": 3.189, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0293969395671157e-05, "epoch": 1.1154525260579826, "percentage": 97.92}
|
613 |
+
{"current_steps": 19616, "total_steps": 20000, "loss": 3.3041, "eval_loss": null, "predict_loss": null, "learning_rate": 3.025049625253697e-05, "epoch": 1.1172751609044826, "percentage": 98.08}
|
614 |
+
{"current_steps": 19648, "total_steps": 20000, "loss": 3.3413, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0210496835432564e-05, "epoch": 1.1190977957509824, "percentage": 98.24}
|
615 |
+
{"current_steps": 19680, "total_steps": 20000, "loss": 3.506, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0173972175508445e-05, "epoch": 1.1209204305974825, "percentage": 98.4}
|
616 |
+
{"current_steps": 19712, "total_steps": 20000, "loss": 3.4766, "eval_loss": null, "predict_loss": null, "learning_rate": 3.014092321433883e-05, "epoch": 1.1227430654439825, "percentage": 98.56}
|
617 |
+
{"current_steps": 19744, "total_steps": 20000, "loss": 3.2175, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0111350803897467e-05, "epoch": 1.1245657002904825, "percentage": 98.72}
|
618 |
+
{"current_steps": 19776, "total_steps": 20000, "loss": 3.1741, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0085255706535594e-05, "epoch": 1.1263883351369823, "percentage": 98.88}
|
619 |
+
{"current_steps": 19808, "total_steps": 20000, "loss": 3.2176, "eval_loss": null, "predict_loss": null, "learning_rate": 3.006263859496231e-05, "epoch": 1.1282109699834824, "percentage": 99.04}
|
620 |
+
{"current_steps": 19840, "total_steps": 20000, "loss": 3.2439, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0043500052227267e-05, "epoch": 1.1300336048299824, "percentage": 99.2}
|
621 |
+
{"current_steps": 19872, "total_steps": 20000, "loss": 3.2651, "eval_loss": null, "predict_loss": null, "learning_rate": 3.002784057170561e-05, "epoch": 1.1318562396764824, "percentage": 99.36}
|
622 |
+
{"current_steps": 19904, "total_steps": 20000, "loss": 3.1567, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0015660557085206e-05, "epoch": 1.1336788745229822, "percentage": 99.52}
|
623 |
+
{"current_steps": 19936, "total_steps": 20000, "loss": 3.2084, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0006960322356355e-05, "epoch": 1.1355015093694822, "percentage": 99.68}
|
624 |
+
{"current_steps": 19968, "total_steps": 20000, "loss": 3.2765, "eval_loss": null, "predict_loss": null, "learning_rate": 3.0001740091803596e-05, "epoch": 1.1373241442159823, "percentage": 99.84}
|
625 |
+
{"current_steps": 20000, "total_steps": 20000, "loss": 3.3661, "eval_loss": null, "predict_loss": null, "learning_rate": 2.9999999999999997e-05, "epoch": 1.1391467790624823, "percentage": 100.0}
|
626 |
+
{"current_steps": 20000, "total_steps": 20000, "loss": null, "eval_loss": null, "predict_loss": null, "learning_rate": null, "epoch": 1.1391467790624823, "percentage": 100.0}
|