{ "best_metric": 0.7109387516975403, "best_model_checkpoint": "../KevinKibe/nllb-200-distilled-1.3B-finetuned-finetuned/checkpoint-800", "epoch": 1000.0, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 100.0, "grad_norm": 15.290511131286621, "learning_rate": 1.8060000000000003e-05, "loss": 6.4789, "step": 100 }, { "epoch": 100.0, "eval_gen_len": 32.5, "eval_loss": 5.883636951446533, "eval_rouge": 0.082, "eval_runtime": 2.1783, "eval_samples_per_second": 0.918, "eval_steps_per_second": 0.459, "step": 100 }, { "epoch": 200.0, "grad_norm": 15.802659034729004, "learning_rate": 1.6060000000000002e-05, "loss": 4.8961, "step": 200 }, { "epoch": 200.0, "eval_gen_len": 29.0, "eval_loss": 4.802582263946533, "eval_rouge": 0.1552, "eval_runtime": 2.0128, "eval_samples_per_second": 0.994, "eval_steps_per_second": 0.497, "step": 200 }, { "epoch": 300.0, "grad_norm": 15.621439933776855, "learning_rate": 1.4060000000000001e-05, "loss": 3.636, "step": 300 }, { "epoch": 300.0, "eval_gen_len": 63.0, "eval_loss": 3.5860304832458496, "eval_rouge": 0.0292, "eval_runtime": 5.5559, "eval_samples_per_second": 0.36, "eval_steps_per_second": 0.18, "step": 300 }, { "epoch": 400.0, "grad_norm": 14.424906730651855, "learning_rate": 1.2060000000000001e-05, "loss": 2.4528, "step": 400 }, { "epoch": 400.0, "eval_gen_len": 28.0, "eval_loss": 2.453127384185791, "eval_rouge": 0.2018, "eval_runtime": 1.6344, "eval_samples_per_second": 1.224, "eval_steps_per_second": 0.612, "step": 400 }, { "epoch": 500.0, "grad_norm": 11.111770629882812, "learning_rate": 1.006e-05, "loss": 1.4112, "step": 500 }, { "epoch": 500.0, "eval_gen_len": 70.0, "eval_loss": 1.6681925058364868, "eval_rouge": 0.1899, "eval_runtime": 6.0247, "eval_samples_per_second": 0.332, "eval_steps_per_second": 0.166, "step": 500 }, { "epoch": 600.0, "grad_norm": 6.4994120597839355, "learning_rate": 8.06e-06, "loss": 0.6738, "step": 600 }, { "epoch": 600.0, "eval_gen_len": 68.0, "eval_loss": 1.1857625246047974, "eval_rouge": 0.1907, "eval_runtime": 6.0626, "eval_samples_per_second": 0.33, "eval_steps_per_second": 0.165, "step": 600 }, { "epoch": 700.0, "grad_norm": 3.2670979499816895, "learning_rate": 6.0600000000000004e-06, "loss": 0.2921, "step": 700 }, { "epoch": 700.0, "eval_gen_len": 28.0, "eval_loss": 0.8545865416526794, "eval_rouge": 0.2776, "eval_runtime": 1.6454, "eval_samples_per_second": 1.215, "eval_steps_per_second": 0.608, "step": 700 }, { "epoch": 800.0, "grad_norm": 1.7595362663269043, "learning_rate": 4.060000000000001e-06, "loss": 0.1361, "step": 800 }, { "epoch": 800.0, "eval_gen_len": 59.5, "eval_loss": 0.7109387516975403, "eval_rouge": 0.3649, "eval_runtime": 10.8248, "eval_samples_per_second": 0.185, "eval_steps_per_second": 0.092, "step": 800 }, { "epoch": 900.0, "grad_norm": 1.1663947105407715, "learning_rate": 2.06e-06, "loss": 0.0764, "step": 900 }, { "epoch": 900.0, "eval_gen_len": 54.0, "eval_loss": 0.7293275594711304, "eval_rouge": 0.4568, "eval_runtime": 9.9208, "eval_samples_per_second": 0.202, "eval_steps_per_second": 0.101, "step": 900 }, { "epoch": 1000.0, "grad_norm": 0.9859239459037781, "learning_rate": 6.000000000000001e-08, "loss": 0.0559, "step": 1000 }, { "epoch": 1000.0, "eval_gen_len": 49.0, "eval_loss": 0.7133963704109192, "eval_rouge": 0.467, "eval_runtime": 9.5903, "eval_samples_per_second": 0.209, "eval_steps_per_second": 0.104, "step": 1000 } ], "logging_steps": 100, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 1000, "save_steps": 100, "total_flos": 2500287332352000.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }