{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "grad_norm": 0.886165976524353, "learning_rate": 6.666666666666667e-05, "loss": 0.8569, "step": 1 }, { "epoch": 0.2, "grad_norm": 0.9352289438247681, "learning_rate": 0.00013333333333333334, "loss": 0.9157, "step": 2 }, { "epoch": 0.3, "grad_norm": 0.6887379288673401, "learning_rate": 0.0002, "loss": 0.8368, "step": 3 }, { "epoch": 0.4, "grad_norm": 0.5438244938850403, "learning_rate": 0.00019977668786231534, "loss": 0.7759, "step": 4 }, { "epoch": 0.5, "grad_norm": 0.4049173593521118, "learning_rate": 0.000199107748815478, "loss": 0.7108, "step": 5 }, { "epoch": 0.6, "grad_norm": 0.3374454379081726, "learning_rate": 0.0001979961705036587, "loss": 0.6949, "step": 6 }, { "epoch": 0.7, "grad_norm": 0.31860846281051636, "learning_rate": 0.00019644691750543767, "loss": 0.6725, "step": 7 }, { "epoch": 0.8, "grad_norm": 0.3116016089916229, "learning_rate": 0.0001944669091607919, "loss": 0.6814, "step": 8 }, { "epoch": 0.9, "grad_norm": 0.3020254969596863, "learning_rate": 0.00019206498866764288, "loss": 0.6628, "step": 9 }, { "epoch": 1.0, "grad_norm": 0.3751624524593353, "learning_rate": 0.00018925188358598813, "loss": 0.6491, "step": 10 }, { "epoch": 1.1, "grad_norm": 0.33873409032821655, "learning_rate": 0.00018604015792601396, "loss": 0.5887, "step": 11 }, { "epoch": 1.2, "grad_norm": 0.3233942985534668, "learning_rate": 0.00018244415603417603, "loss": 0.5414, "step": 12 }, { "epoch": 1.3, "grad_norm": 0.33184298872947693, "learning_rate": 0.0001784799385278661, "loss": 0.5534, "step": 13 }, { "epoch": 1.4, "grad_norm": 0.3251623511314392, "learning_rate": 0.00017416521056479577, "loss": 0.5303, "step": 14 }, { "epoch": 1.5, "grad_norm": 0.30667614936828613, "learning_rate": 0.00016951924276746425, "loss": 0.5261, "step": 15 }, { "epoch": 1.6, "grad_norm": 0.30262428522109985, "learning_rate": 0.00016456278515588024, "loss": 0.5101, "step": 16 }, { "epoch": 1.7, "grad_norm": 0.30721279978752136, "learning_rate": 0.00015931797447293552, "loss": 0.5059, "step": 17 }, { "epoch": 1.8, "grad_norm": 0.2939946949481964, "learning_rate": 0.00015380823531633729, "loss": 0.4949, "step": 18 }, { "epoch": 1.9, "grad_norm": 0.28288447856903076, "learning_rate": 0.00014805817551866838, "loss": 0.4869, "step": 19 }, { "epoch": 2.0, "grad_norm": 0.2906552255153656, "learning_rate": 0.0001420934762428335, "loss": 0.4605, "step": 20 }, { "epoch": 2.1, "grad_norm": 0.2544306218624115, "learning_rate": 0.00013594077728375128, "loss": 0.4436, "step": 21 }, { "epoch": 2.2, "grad_norm": 0.2552658021450043, "learning_rate": 0.00012962755808856342, "loss": 0.4192, "step": 22 }, { "epoch": 2.3, "grad_norm": 0.2546628713607788, "learning_rate": 0.00012318201502675285, "loss": 0.4106, "step": 23 }, { "epoch": 2.4, "grad_norm": 0.23741765320301056, "learning_rate": 0.00011663293545831302, "loss": 0.3938, "step": 24 }, { "epoch": 2.5, "grad_norm": 0.2525660991668701, "learning_rate": 0.00011000956916240985, "loss": 0.4017, "step": 25 }, { "epoch": 2.6, "grad_norm": 0.2451285570859909, "learning_rate": 0.00010334149770076747, "loss": 0.3794, "step": 26 }, { "epoch": 2.7, "grad_norm": 0.2524901032447815, "learning_rate": 9.665850229923258e-05, "loss": 0.3855, "step": 27 }, { "epoch": 2.8, "grad_norm": 0.24502427875995636, "learning_rate": 8.999043083759017e-05, "loss": 0.3616, "step": 28 }, { "epoch": 2.9, "grad_norm": 0.2539559602737427, "learning_rate": 8.336706454168701e-05, "loss": 0.3763, "step": 29 }, { "epoch": 3.0, "grad_norm": 0.24636436998844147, "learning_rate": 7.681798497324716e-05, "loss": 0.3517, "step": 30 }, { "epoch": 3.1, "grad_norm": 0.23345255851745605, "learning_rate": 7.037244191143661e-05, "loss": 0.3284, "step": 31 }, { "epoch": 3.2, "grad_norm": 0.2510325014591217, "learning_rate": 6.405922271624874e-05, "loss": 0.3301, "step": 32 }, { "epoch": 3.3, "grad_norm": 0.24276795983314514, "learning_rate": 5.790652375716652e-05, "loss": 0.3339, "step": 33 }, { "epoch": 3.4, "grad_norm": 0.23698894679546356, "learning_rate": 5.1941824481331626e-05, "loss": 0.3273, "step": 34 }, { "epoch": 3.5, "grad_norm": 0.24454469978809357, "learning_rate": 4.6191764683662744e-05, "loss": 0.3073, "step": 35 }, { "epoch": 3.6, "grad_norm": 0.25007879734039307, "learning_rate": 4.0682025527064486e-05, "loss": 0.3149, "step": 36 }, { "epoch": 3.7, "grad_norm": 0.25587430596351624, "learning_rate": 3.543721484411976e-05, "loss": 0.3081, "step": 37 }, { "epoch": 3.8, "grad_norm": 0.2517322599887848, "learning_rate": 3.0480757232535772e-05, "loss": 0.3131, "step": 38 }, { "epoch": 3.9, "grad_norm": 0.23721425235271454, "learning_rate": 2.5834789435204243e-05, "loss": 0.2987, "step": 39 }, { "epoch": 4.0, "grad_norm": 0.23976309597492218, "learning_rate": 2.1520061472133902e-05, "loss": 0.3148, "step": 40 }, { "epoch": 4.1, "grad_norm": 0.22691340744495392, "learning_rate": 1.7555843965823992e-05, "loss": 0.2777, "step": 41 }, { "epoch": 4.2, "grad_norm": 0.23945631086826324, "learning_rate": 1.3959842073986085e-05, "loss": 0.2954, "step": 42 }, { "epoch": 4.3, "grad_norm": 0.23303616046905518, "learning_rate": 1.0748116414011888e-05, "loss": 0.2882, "step": 43 }, { "epoch": 4.4, "grad_norm": 0.24104540050029755, "learning_rate": 7.935011332357112e-06, "loss": 0.2939, "step": 44 }, { "epoch": 4.5, "grad_norm": 0.23101571202278137, "learning_rate": 5.533090839208133e-06, "loss": 0.2789, "step": 45 }, { "epoch": 4.6, "grad_norm": 0.2310316413640976, "learning_rate": 3.5530824945623542e-06, "loss": 0.2925, "step": 46 }, { "epoch": 4.7, "grad_norm": 0.23140206933021545, "learning_rate": 2.003829496341325e-06, "loss": 0.2824, "step": 47 }, { "epoch": 4.8, "grad_norm": 0.22320057451725006, "learning_rate": 8.922511845219971e-07, "loss": 0.2975, "step": 48 }, { "epoch": 4.9, "grad_norm": 0.22732405364513397, "learning_rate": 2.2331213768468363e-07, "loss": 0.2891, "step": 49 }, { "epoch": 5.0, "grad_norm": 0.2486155927181244, "learning_rate": 0.0, "loss": 0.2815, "step": 50 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.457968683122688e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }