{ "best_metric": 0.4840739071369171, "best_model_checkpoint": "/mnt/beegfs/farid/mlora/outputs/xnli/aya-101/tr/rank4_lr5e-5/checkpoint-5500", "epoch": 0.24445893089960888, "eval_steps": 500, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.020371577574967405, "grad_norm": 1.8574336767196655, "learning_rate": 4.166666666666667e-05, "loss": 1.1457, "step": 500 }, { "epoch": 0.020371577574967405, "eval_accuracy": 0.4429718875502008, "eval_f1": 0.4164069076365425, "eval_loss": 1.073067307472229, "eval_runtime": 413.6919, "eval_samples_per_second": 6.019, "eval_steps_per_second": 0.377, "step": 500 }, { "epoch": 0.04074315514993481, "grad_norm": 10.016312599182129, "learning_rate": 4.62962962962963e-05, "loss": 0.9193, "step": 1000 }, { "epoch": 0.04074315514993481, "eval_accuracy": 0.738152610441767, "eval_f1": 0.7369047817418783, "eval_loss": 0.6893119812011719, "eval_runtime": 411.7464, "eval_samples_per_second": 6.047, "eval_steps_per_second": 0.379, "step": 1000 }, { "epoch": 0.06111473272490222, "grad_norm": 11.630990028381348, "learning_rate": 4.166666666666667e-05, "loss": 0.755, "step": 1500 }, { "epoch": 0.06111473272490222, "eval_accuracy": 0.770281124497992, "eval_f1": 0.7692403135749393, "eval_loss": 0.6047824025154114, "eval_runtime": 431.4567, "eval_samples_per_second": 5.771, "eval_steps_per_second": 0.362, "step": 1500 }, { "epoch": 0.08148631029986962, "grad_norm": 11.266816139221191, "learning_rate": 3.7037037037037037e-05, "loss": 0.6935, "step": 2000 }, { "epoch": 0.08148631029986962, "eval_accuracy": 0.770281124497992, "eval_f1": 0.7716097568036839, "eval_loss": 0.5751612186431885, "eval_runtime": 411.4318, "eval_samples_per_second": 6.052, "eval_steps_per_second": 0.379, "step": 2000 }, { "epoch": 0.10185788787483703, "grad_norm": 5.681130886077881, "learning_rate": 3.240740740740741e-05, "loss": 0.6705, "step": 2500 }, { "epoch": 0.10185788787483703, "eval_accuracy": 0.7991967871485943, "eval_f1": 0.799397196834657, "eval_loss": 0.5203642845153809, "eval_runtime": 411.5367, "eval_samples_per_second": 6.05, "eval_steps_per_second": 0.379, "step": 2500 }, { "epoch": 0.12222946544980444, "grad_norm": 13.885538101196289, "learning_rate": 2.777777777777778e-05, "loss": 0.6535, "step": 3000 }, { "epoch": 0.12222946544980444, "eval_accuracy": 0.8080321285140563, "eval_f1": 0.8079778470492675, "eval_loss": 0.5077390670776367, "eval_runtime": 431.4507, "eval_samples_per_second": 5.771, "eval_steps_per_second": 0.362, "step": 3000 }, { "epoch": 0.14260104302477183, "grad_norm": 11.62227725982666, "learning_rate": 2.314814814814815e-05, "loss": 0.6344, "step": 3500 }, { "epoch": 0.14260104302477183, "eval_accuracy": 0.8056224899598393, "eval_f1": 0.8060120856906442, "eval_loss": 0.5052712559700012, "eval_runtime": 411.9037, "eval_samples_per_second": 6.045, "eval_steps_per_second": 0.379, "step": 3500 }, { "epoch": 0.16297262059973924, "grad_norm": 7.741968631744385, "learning_rate": 1.8518518518518518e-05, "loss": 0.6461, "step": 4000 }, { "epoch": 0.16297262059973924, "eval_accuracy": 0.8168674698795181, "eval_f1": 0.8171764533567928, "eval_loss": 0.48436030745506287, "eval_runtime": 431.8438, "eval_samples_per_second": 5.766, "eval_steps_per_second": 0.361, "step": 4000 }, { "epoch": 0.18334419817470665, "grad_norm": 5.995279788970947, "learning_rate": 1.388888888888889e-05, "loss": 0.6149, "step": 4500 }, { "epoch": 0.18334419817470665, "eval_accuracy": 0.8072289156626506, "eval_f1": 0.8077366231036467, "eval_loss": 0.49473461508750916, "eval_runtime": 411.5781, "eval_samples_per_second": 6.05, "eval_steps_per_second": 0.379, "step": 4500 }, { "epoch": 0.20371577574967406, "grad_norm": 8.767409324645996, "learning_rate": 9.259259259259259e-06, "loss": 0.6235, "step": 5000 }, { "epoch": 0.20371577574967406, "eval_accuracy": 0.8052208835341366, "eval_f1": 0.8055606645944181, "eval_loss": 0.4961791932582855, "eval_runtime": 411.3835, "eval_samples_per_second": 6.053, "eval_steps_per_second": 0.379, "step": 5000 }, { "epoch": 0.22408735332464147, "grad_norm": 7.734911918640137, "learning_rate": 4.6296296296296296e-06, "loss": 0.6145, "step": 5500 }, { "epoch": 0.22408735332464147, "eval_accuracy": 0.8112449799196787, "eval_f1": 0.8113855121951574, "eval_loss": 0.4840739071369171, "eval_runtime": 431.5261, "eval_samples_per_second": 5.77, "eval_steps_per_second": 0.362, "step": 5500 }, { "epoch": 0.24445893089960888, "grad_norm": 5.888601779937744, "learning_rate": 0.0, "loss": 0.6107, "step": 6000 }, { "epoch": 0.24445893089960888, "eval_accuracy": 0.8076305220883534, "eval_f1": 0.8079306262177152, "eval_loss": 0.48599937558174133, "eval_runtime": 411.7687, "eval_samples_per_second": 6.047, "eval_steps_per_second": 0.379, "step": 6000 } ], "logging_steps": 500, "max_steps": 6000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 8.03166870528e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }