{ "best_metric": null, "best_model_checkpoint": null, "epoch": 30.0, "eval_steps": 500, "global_step": 1080, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 6.0880818367004395, "learning_rate": 3.6e-05, "loss": 3.2849, "step": 36 }, { "epoch": 1.0, "eval_accuracy": 0.010123239436619719, "eval_bleu": 0.10518697359828132, "eval_f1": 0.010153581748448041, "eval_loss": 2.6219797134399414, "eval_precision": 0.010202794685557478, "eval_recall": 0.010123239436619719, "eval_runtime": 6.2453, "eval_samples_per_second": 22.737, "eval_steps_per_second": 2.882, "step": 36 }, { "epoch": 2.0, "grad_norm": 6.915263652801514, "learning_rate": 4.893203883495146e-05, "loss": 2.2637, "step": 72 }, { "epoch": 2.0, "eval_accuracy": 0.01248899647887324, "eval_bleu": 0.13792868891001447, "eval_f1": 0.010680838430589857, "eval_loss": 2.017970561981201, "eval_precision": 0.009509030569956453, "eval_recall": 0.012488996478873238, "eval_runtime": 7.0653, "eval_samples_per_second": 20.098, "eval_steps_per_second": 2.548, "step": 72 }, { "epoch": 3.0, "grad_norm": 7.544444561004639, "learning_rate": 4.718446601941748e-05, "loss": 1.789, "step": 108 }, { "epoch": 3.0, "eval_accuracy": 0.012654049295774648, "eval_bleu": 0.17566093139607744, "eval_f1": 0.011466852239797607, "eval_loss": 1.7966645956039429, "eval_precision": 0.01049697288064746, "eval_recall": 0.012654049295774647, "eval_runtime": 6.0435, "eval_samples_per_second": 23.496, "eval_steps_per_second": 2.978, "step": 108 }, { "epoch": 4.0, "grad_norm": 6.955804347991943, "learning_rate": 4.543689320388349e-05, "loss": 1.5522, "step": 144 }, { "epoch": 4.0, "eval_accuracy": 0.012378961267605635, "eval_bleu": 0.20413318737157596, "eval_f1": 0.011132478900133054, "eval_loss": 1.6657302379608154, "eval_precision": 0.010120661403149588, "eval_recall": 0.012378961267605633, "eval_runtime": 6.1905, "eval_samples_per_second": 22.938, "eval_steps_per_second": 2.908, "step": 144 }, { "epoch": 5.0, "grad_norm": 7.229798793792725, "learning_rate": 4.368932038834951e-05, "loss": 1.3885, "step": 180 }, { "epoch": 5.0, "eval_accuracy": 0.012378961267605635, "eval_bleu": 0.2305940937780942, "eval_f1": 0.011073979352246164, "eval_loss": 1.5827170610427856, "eval_precision": 0.010022195626080541, "eval_recall": 0.012378961267605633, "eval_runtime": 6.9109, "eval_samples_per_second": 20.547, "eval_steps_per_second": 2.605, "step": 180 }, { "epoch": 6.0, "grad_norm": 6.714682102203369, "learning_rate": 4.194174757281554e-05, "loss": 1.2561, "step": 216 }, { "epoch": 6.0, "eval_accuracy": 0.012268926056338027, "eval_bleu": 0.25012821385731565, "eval_f1": 0.01143102719262594, "eval_loss": 1.5294432640075684, "eval_precision": 0.010707247710344536, "eval_recall": 0.012268926056338027, "eval_runtime": 6.5697, "eval_samples_per_second": 21.614, "eval_steps_per_second": 2.74, "step": 216 }, { "epoch": 7.0, "grad_norm": 6.063908100128174, "learning_rate": 4.019417475728156e-05, "loss": 1.1466, "step": 252 }, { "epoch": 7.0, "eval_accuracy": 0.012268926056338027, "eval_bleu": 0.2719546447213355, "eval_f1": 0.01116903184495446, "eval_loss": 1.4926397800445557, "eval_precision": 0.010253161244814542, "eval_recall": 0.012268926056338027, "eval_runtime": 6.4161, "eval_samples_per_second": 22.132, "eval_steps_per_second": 2.805, "step": 252 }, { "epoch": 8.0, "grad_norm": 6.155008316040039, "learning_rate": 3.844660194174757e-05, "loss": 1.0616, "step": 288 }, { "epoch": 8.0, "eval_accuracy": 0.012378961267605635, "eval_bleu": 0.28742380325292655, "eval_f1": 0.01144092667248244, "eval_loss": 1.4650629758834839, "eval_precision": 0.010640060069611993, "eval_recall": 0.012378961267605633, "eval_runtime": 6.3235, "eval_samples_per_second": 22.456, "eval_steps_per_second": 2.847, "step": 288 }, { "epoch": 9.0, "grad_norm": 5.874176979064941, "learning_rate": 3.6699029126213596e-05, "loss": 0.9821, "step": 324 }, { "epoch": 9.0, "eval_accuracy": 0.012268926056338027, "eval_bleu": 0.30478814512361185, "eval_f1": 0.011331946253740536, "eval_loss": 1.4492626190185547, "eval_precision": 0.010531151474565863, "eval_recall": 0.012268926056338027, "eval_runtime": 6.2565, "eval_samples_per_second": 22.696, "eval_steps_per_second": 2.877, "step": 324 }, { "epoch": 10.0, "grad_norm": 5.705406665802002, "learning_rate": 3.4951456310679615e-05, "loss": 0.9071, "step": 360 }, { "epoch": 10.0, "eval_accuracy": 0.012268926056338027, "eval_bleu": 0.3250108739654091, "eval_f1": 0.011214281037053235, "eval_loss": 1.4369895458221436, "eval_precision": 0.010327542425543311, "eval_recall": 0.012268926056338027, "eval_runtime": 6.1165, "eval_samples_per_second": 23.216, "eval_steps_per_second": 2.943, "step": 360 }, { "epoch": 11.0, "grad_norm": 5.345541000366211, "learning_rate": 3.3203883495145634e-05, "loss": 0.8445, "step": 396 }, { "epoch": 11.0, "eval_accuracy": 0.012268926056338027, "eval_bleu": 0.32825465058506376, "eval_f1": 0.011419929314754076, "eval_loss": 1.4337056875228882, "eval_precision": 0.010683679060359908, "eval_recall": 0.012268926056338027, "eval_runtime": 6.9862, "eval_samples_per_second": 20.326, "eval_steps_per_second": 2.577, "step": 396 }, { "epoch": 12.0, "grad_norm": 4.928064346313477, "learning_rate": 3.145631067961165e-05, "loss": 0.7869, "step": 432 }, { "epoch": 12.0, "eval_accuracy": 0.012268926056338027, "eval_bleu": 0.34315751189373966, "eval_f1": 0.011425905613358798, "eval_loss": 1.431230902671814, "eval_precision": 0.010692912514740159, "eval_recall": 0.012268926056338027, "eval_runtime": 6.3375, "eval_samples_per_second": 22.406, "eval_steps_per_second": 2.84, "step": 432 }, { "epoch": 13.0, "grad_norm": 4.787758827209473, "learning_rate": 2.9708737864077673e-05, "loss": 0.7388, "step": 468 }, { "epoch": 13.0, "eval_accuracy": 0.012268926056338027, "eval_bleu": 0.3506530097105922, "eval_f1": 0.011567695105672023, "eval_loss": 1.432782530784607, "eval_precision": 0.010942929955906473, "eval_recall": 0.012268926056338027, "eval_runtime": 6.6203, "eval_samples_per_second": 21.449, "eval_steps_per_second": 2.719, "step": 468 }, { "epoch": 14.0, "grad_norm": 4.721966743469238, "learning_rate": 2.7961165048543692e-05, "loss": 0.6948, "step": 504 }, { "epoch": 14.0, "eval_accuracy": 0.012268926056338027, "eval_bleu": 0.36203576750790184, "eval_f1": 0.011535961695115268, "eval_loss": 1.4401381015777588, "eval_precision": 0.010887377543860504, "eval_recall": 0.012268926056338027, "eval_runtime": 6.8054, "eval_samples_per_second": 20.866, "eval_steps_per_second": 2.645, "step": 504 }, { "epoch": 15.0, "grad_norm": 3.970003128051758, "learning_rate": 2.6213592233009708e-05, "loss": 0.646, "step": 540 }, { "epoch": 15.0, "eval_accuracy": 0.012048855633802816, "eval_bleu": 0.37067819665088503, "eval_f1": 0.011355838282605724, "eval_loss": 1.4626511335372925, "eval_precision": 0.010738384191565671, "eval_recall": 0.012048855633802815, "eval_runtime": 6.6898, "eval_samples_per_second": 21.226, "eval_steps_per_second": 2.691, "step": 540 }, { "epoch": 16.0, "grad_norm": 3.967465877532959, "learning_rate": 2.446601941747573e-05, "loss": 0.6137, "step": 576 }, { "epoch": 16.0, "eval_accuracy": 0.012048855633802816, "eval_bleu": 0.3721186663899375, "eval_f1": 0.01150499351512691, "eval_loss": 1.460695505142212, "eval_precision": 0.01100817275358665, "eval_recall": 0.012048855633802815, "eval_runtime": 6.1473, "eval_samples_per_second": 23.1, "eval_steps_per_second": 2.928, "step": 576 }, { "epoch": 17.0, "grad_norm": 3.9431395530700684, "learning_rate": 2.2718446601941746e-05, "loss": 0.5833, "step": 612 }, { "epoch": 17.0, "eval_accuracy": 0.012048855633802816, "eval_bleu": 0.3782352067209992, "eval_f1": 0.011355838282605724, "eval_loss": 1.4740684032440186, "eval_precision": 0.010738384191565671, "eval_recall": 0.012048855633802815, "eval_runtime": 6.2111, "eval_samples_per_second": 22.862, "eval_steps_per_second": 2.898, "step": 612 }, { "epoch": 18.0, "grad_norm": 4.299834251403809, "learning_rate": 2.097087378640777e-05, "loss": 0.5523, "step": 648 }, { "epoch": 18.0, "eval_accuracy": 0.012048855633802816, "eval_bleu": 0.38158208167842117, "eval_f1": 0.011454837538514543, "eval_loss": 1.4804751873016357, "eval_precision": 0.010916734980993936, "eval_recall": 0.012048855633802815, "eval_runtime": 6.2563, "eval_samples_per_second": 22.697, "eval_steps_per_second": 2.877, "step": 648 }, { "epoch": 19.0, "grad_norm": 3.584228038787842, "learning_rate": 1.9223300970873785e-05, "loss": 0.5282, "step": 684 }, { "epoch": 19.0, "eval_accuracy": 0.012268926056338027, "eval_bleu": 0.38941442914279584, "eval_f1": 0.011665531630013295, "eval_loss": 1.4897931814193726, "eval_precision": 0.011119728166708294, "eval_recall": 0.012268926056338027, "eval_runtime": 6.1705, "eval_samples_per_second": 23.013, "eval_steps_per_second": 2.917, "step": 684 }, { "epoch": 20.0, "grad_norm": 3.575045108795166, "learning_rate": 1.7475728155339808e-05, "loss": 0.5047, "step": 720 }, { "epoch": 20.0, "eval_accuracy": 0.01210387323943662, "eval_bleu": 0.3904044704756756, "eval_f1": 0.011583122303403293, "eval_loss": 1.4997267723083496, "eval_precision": 0.011105397197031464, "eval_recall": 0.01210387323943662, "eval_runtime": 6.3359, "eval_samples_per_second": 22.412, "eval_steps_per_second": 2.841, "step": 720 }, { "epoch": 21.0, "grad_norm": 3.5939736366271973, "learning_rate": 1.5728155339805823e-05, "loss": 0.4854, "step": 756 }, { "epoch": 21.0, "eval_accuracy": 0.012048855633802816, "eval_bleu": 0.38824458290957736, "eval_f1": 0.011442367439933718, "eval_loss": 1.5054779052734375, "eval_precision": 0.010894114653359857, "eval_recall": 0.012048855633802815, "eval_runtime": 6.2963, "eval_samples_per_second": 22.553, "eval_steps_per_second": 2.859, "step": 756 }, { "epoch": 22.0, "grad_norm": 3.807431697845459, "learning_rate": 1.3980582524271846e-05, "loss": 0.4697, "step": 792 }, { "epoch": 22.0, "eval_accuracy": 0.012048855633802816, "eval_bleu": 0.3905756115063895, "eval_f1": 0.011442367439933718, "eval_loss": 1.5122706890106201, "eval_precision": 0.010894114653359857, "eval_recall": 0.012048855633802815, "eval_runtime": 7.012, "eval_samples_per_second": 20.251, "eval_steps_per_second": 2.567, "step": 792 }, { "epoch": 23.0, "grad_norm": 3.8207340240478516, "learning_rate": 1.2233009708737865e-05, "loss": 0.4547, "step": 828 }, { "epoch": 23.0, "eval_accuracy": 0.012048855633802816, "eval_bleu": 0.39316674027140547, "eval_f1": 0.011517601862123769, "eval_loss": 1.5183604955673218, "eval_precision": 0.011031274351852924, "eval_recall": 0.012048855633802815, "eval_runtime": 6.8261, "eval_samples_per_second": 20.803, "eval_steps_per_second": 2.637, "step": 828 }, { "epoch": 24.0, "grad_norm": 3.831852912902832, "learning_rate": 1.0485436893203885e-05, "loss": 0.4413, "step": 864 }, { "epoch": 24.0, "eval_accuracy": 0.012048855633802816, "eval_bleu": 0.3956605229111985, "eval_f1": 0.011392759914288347, "eval_loss": 1.5295231342315674, "eval_precision": 0.010804571945629947, "eval_recall": 0.012048855633802815, "eval_runtime": 6.289, "eval_samples_per_second": 22.579, "eval_steps_per_second": 2.862, "step": 864 }, { "epoch": 25.0, "grad_norm": 3.056539297103882, "learning_rate": 8.737864077669904e-06, "loss": 0.4325, "step": 900 }, { "epoch": 25.0, "eval_accuracy": 0.012048855633802816, "eval_bleu": 0.39496386454550636, "eval_f1": 0.0114798602280593, "eval_loss": 1.535691738128662, "eval_precision": 0.010962261366716413, "eval_recall": 0.012048855633802815, "eval_runtime": 6.1059, "eval_samples_per_second": 23.256, "eval_steps_per_second": 2.948, "step": 900 }, { "epoch": 26.0, "grad_norm": 3.1146154403686523, "learning_rate": 6.990291262135923e-06, "loss": 0.4188, "step": 936 }, { "epoch": 26.0, "eval_accuracy": 0.012048855633802816, "eval_bleu": 0.3960440421138887, "eval_f1": 0.011517601862123769, "eval_loss": 1.5396318435668945, "eval_precision": 0.011031274351852924, "eval_recall": 0.012048855633802815, "eval_runtime": 7.2756, "eval_samples_per_second": 19.517, "eval_steps_per_second": 2.474, "step": 936 }, { "epoch": 27.0, "grad_norm": 3.4233100414276123, "learning_rate": 5.242718446601942e-06, "loss": 0.4131, "step": 972 }, { "epoch": 27.0, "eval_accuracy": 0.012048855633802816, "eval_bleu": 0.4001388307186371, "eval_f1": 0.011492413001125629, "eval_loss": 1.5463387966156006, "eval_precision": 0.010985168630418547, "eval_recall": 0.012048855633802815, "eval_runtime": 7.2564, "eval_samples_per_second": 19.569, "eval_steps_per_second": 2.481, "step": 972 }, { "epoch": 28.0, "grad_norm": 2.70424485206604, "learning_rate": 3.4951456310679615e-06, "loss": 0.4089, "step": 1008 }, { "epoch": 28.0, "eval_accuracy": 0.012048855633802816, "eval_bleu": 0.39854032138633055, "eval_f1": 0.011492413001125629, "eval_loss": 1.544076681137085, "eval_precision": 0.010985168630418547, "eval_recall": 0.012048855633802815, "eval_runtime": 7.0435, "eval_samples_per_second": 20.16, "eval_steps_per_second": 2.556, "step": 1008 }, { "epoch": 29.0, "grad_norm": 3.052654266357422, "learning_rate": 1.7475728155339808e-06, "loss": 0.4026, "step": 1044 }, { "epoch": 29.0, "eval_accuracy": 0.012048855633802816, "eval_bleu": 0.3987203294842987, "eval_f1": 0.01150499351512691, "eval_loss": 1.5447367429733276, "eval_precision": 0.01100817275358665, "eval_recall": 0.012048855633802815, "eval_runtime": 6.3248, "eval_samples_per_second": 22.451, "eval_steps_per_second": 2.846, "step": 1044 }, { "epoch": 30.0, "grad_norm": 3.11586856842041, "learning_rate": 0.0, "loss": 0.3999, "step": 1080 }, { "epoch": 30.0, "eval_accuracy": 0.012048855633802816, "eval_bleu": 0.398637528263251, "eval_f1": 0.011517601862123769, "eval_loss": 1.5456310510635376, "eval_precision": 0.011031274351852924, "eval_recall": 0.012048855633802815, "eval_runtime": 6.2921, "eval_samples_per_second": 22.568, "eval_steps_per_second": 2.861, "step": 1080 } ], "logging_steps": 500, "max_steps": 1080, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1113104056320000.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }