{ "best_metric": 0.8829862754377662, "best_model_checkpoint": "bge-small-hotpotwa-matryoshka-fine-tuned-50/checkpoint-500", "epoch": 3.3655868742111905, "eval_steps": 50, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.33655868742111905, "grad_norm": 1.810544490814209, "learning_rate": 3.3783783783783788e-06, "loss": 19.5492, "step": 50 }, { "epoch": 0.33655868742111905, "eval_dim_128_cosine_accuracy": 0.9584713677236157, "eval_dim_128_dot_accuracy": 0.06874112636062471, "eval_dim_128_euclidean_accuracy": 0.9569332702318978, "eval_dim_128_manhattan_accuracy": 0.9557501183151916, "eval_dim_128_max_accuracy": 0.9584713677236157, "eval_dim_256_cosine_accuracy": 0.9656885944155229, "eval_dim_256_dot_accuracy": 0.03726928537624231, "eval_dim_256_euclidean_accuracy": 0.9667534311405585, "eval_dim_256_manhattan_accuracy": 0.9635589209654519, "eval_dim_256_max_accuracy": 0.9667534311405585, "eval_dim_384_cosine_accuracy": 0.966280170373876, "eval_dim_384_dot_accuracy": 0.03371982962612399, "eval_dim_384_euclidean_accuracy": 0.966280170373876, "eval_dim_384_manhattan_accuracy": 0.9650970184571699, "eval_dim_384_max_accuracy": 0.966280170373876, "eval_dim_64_cosine_accuracy": 0.943208707998107, "eval_dim_64_dot_accuracy": 0.1025792711784193, "eval_dim_64_euclidean_accuracy": 0.941670610506389, "eval_dim_64_manhattan_accuracy": 0.9364647420728821, "eval_dim_64_max_accuracy": 0.943208707998107, "eval_loss": 19.260427474975586, "eval_runtime": 113.0655, "eval_samples_per_second": 74.753, "eval_sequential_score": 0.943208707998107, "eval_steps_per_second": 2.344, "step": 50 }, { "epoch": 0.6731173748422381, "grad_norm": 1.9286330938339233, "learning_rate": 6.7567567567567575e-06, "loss": 19.1976, "step": 100 }, { "epoch": 0.6731173748422381, "eval_dim_128_cosine_accuracy": 0.9358731661145291, "eval_dim_128_dot_accuracy": 0.0698059630856602, "eval_dim_128_euclidean_accuracy": 0.9371746332229058, "eval_dim_128_manhattan_accuracy": 0.9326786559394227, "eval_dim_128_max_accuracy": 0.9371746332229058, "eval_dim_256_cosine_accuracy": 0.9391859914813062, "eval_dim_256_dot_accuracy": 0.06282536677709417, "eval_dim_256_euclidean_accuracy": 0.9390676762896356, "eval_dim_256_manhattan_accuracy": 0.9387127307146238, "eval_dim_256_max_accuracy": 0.9391859914813062, "eval_dim_384_cosine_accuracy": 0.9424988168480833, "eval_dim_384_dot_accuracy": 0.057501183151916706, "eval_dim_384_euclidean_accuracy": 0.9424988168480833, "eval_dim_384_manhattan_accuracy": 0.9403691433980123, "eval_dim_384_max_accuracy": 0.9424988168480833, "eval_dim_64_cosine_accuracy": 0.9275911026975864, "eval_dim_64_dot_accuracy": 0.08021769995267393, "eval_dim_64_euclidean_accuracy": 0.92830099384761, "eval_dim_64_manhattan_accuracy": 0.9260530052058684, "eval_dim_64_max_accuracy": 0.92830099384761, "eval_loss": 18.295848846435547, "eval_runtime": 112.3139, "eval_samples_per_second": 75.253, "eval_sequential_score": 0.9275911026975864, "eval_steps_per_second": 2.359, "step": 100 }, { "epoch": 1.0096760622633572, "grad_norm": 1.4801104068756104, "learning_rate": 1.0135135135135136e-05, "loss": 18.4746, "step": 150 }, { "epoch": 1.0096760622633572, "eval_dim_128_cosine_accuracy": 0.9053478466635116, "eval_dim_128_dot_accuracy": 0.09548035967818268, "eval_dim_128_euclidean_accuracy": 0.9085423568386181, "eval_dim_128_manhattan_accuracy": 0.9060577378135353, "eval_dim_128_max_accuracy": 0.9085423568386181, "eval_dim_256_cosine_accuracy": 0.9074775201135826, "eval_dim_256_dot_accuracy": 0.09264079507808802, "eval_dim_256_euclidean_accuracy": 0.9093705631803124, "eval_dim_256_manhattan_accuracy": 0.907950780880265, "eval_dim_256_max_accuracy": 0.9093705631803124, "eval_dim_384_cosine_accuracy": 0.9085423568386181, "eval_dim_384_dot_accuracy": 0.09145764316138193, "eval_dim_384_euclidean_accuracy": 0.9085423568386181, "eval_dim_384_manhattan_accuracy": 0.9081874112636062, "eval_dim_384_max_accuracy": 0.9085423568386181, "eval_dim_64_cosine_accuracy": 0.8995504022716517, "eval_dim_64_dot_accuracy": 0.10234264079507809, "eval_dim_64_euclidean_accuracy": 0.9028632276384287, "eval_dim_64_manhattan_accuracy": 0.900970184571699, "eval_dim_64_max_accuracy": 0.9028632276384287, "eval_loss": 16.984577178955078, "eval_runtime": 112.2207, "eval_samples_per_second": 75.316, "eval_sequential_score": 0.8995504022716517, "eval_steps_per_second": 2.361, "step": 150 }, { "epoch": 1.3462347496844762, "grad_norm": 1.4812753200531006, "learning_rate": 1.3513513513513515e-05, "loss": 18.0684, "step": 200 }, { "epoch": 1.3462347496844762, "eval_dim_128_cosine_accuracy": 0.9029815428300993, "eval_dim_128_dot_accuracy": 0.09737340274491245, "eval_dim_128_euclidean_accuracy": 0.900378608613346, "eval_dim_128_manhattan_accuracy": 0.9004969238050166, "eval_dim_128_max_accuracy": 0.9029815428300993, "eval_dim_256_cosine_accuracy": 0.9051112162801703, "eval_dim_256_dot_accuracy": 0.09512541410317085, "eval_dim_256_euclidean_accuracy": 0.9034548035967819, "eval_dim_256_manhattan_accuracy": 0.9021533364884051, "eval_dim_256_max_accuracy": 0.9051112162801703, "eval_dim_384_cosine_accuracy": 0.9048745858968291, "eval_dim_384_dot_accuracy": 0.09512541410317085, "eval_dim_384_euclidean_accuracy": 0.9048745858968291, "eval_dim_384_manhattan_accuracy": 0.9029815428300993, "eval_dim_384_max_accuracy": 0.9048745858968291, "eval_dim_64_cosine_accuracy": 0.8958826313298628, "eval_dim_64_dot_accuracy": 0.10506389020350212, "eval_dim_64_euclidean_accuracy": 0.8997870326549929, "eval_dim_64_manhattan_accuracy": 0.8989588263132986, "eval_dim_64_max_accuracy": 0.8997870326549929, "eval_loss": 16.686861038208008, "eval_runtime": 112.2576, "eval_samples_per_second": 75.291, "eval_sequential_score": 0.8958826313298628, "eval_steps_per_second": 2.361, "step": 200 }, { "epoch": 1.6827934371055953, "grad_norm": 1.361006736755371, "learning_rate": 1.6891891891891896e-05, "loss": 17.8979, "step": 250 }, { "epoch": 1.6827934371055953, "eval_dim_128_cosine_accuracy": 0.9016800757217227, "eval_dim_128_dot_accuracy": 0.09867486985328916, "eval_dim_128_euclidean_accuracy": 0.9020350212967345, "eval_dim_128_manhattan_accuracy": 0.9000236630383341, "eval_dim_128_max_accuracy": 0.9020350212967345, "eval_dim_256_cosine_accuracy": 0.9029815428300993, "eval_dim_256_dot_accuracy": 0.09831992427827733, "eval_dim_256_euclidean_accuracy": 0.900970184571699, "eval_dim_256_manhattan_accuracy": 0.9026265972550875, "eval_dim_256_max_accuracy": 0.9029815428300993, "eval_dim_384_cosine_accuracy": 0.9015617605300521, "eval_dim_384_dot_accuracy": 0.09843823946994794, "eval_dim_384_euclidean_accuracy": 0.9015617605300521, "eval_dim_384_manhattan_accuracy": 0.9032181732134406, "eval_dim_384_max_accuracy": 0.9032181732134406, "eval_dim_64_cosine_accuracy": 0.8954093705631803, "eval_dim_64_dot_accuracy": 0.10518220539517274, "eval_dim_64_euclidean_accuracy": 0.8995504022716517, "eval_dim_64_manhattan_accuracy": 0.8978939895882632, "eval_dim_64_max_accuracy": 0.8995504022716517, "eval_loss": 16.577987670898438, "eval_runtime": 112.319, "eval_samples_per_second": 75.25, "eval_sequential_score": 0.8954093705631803, "eval_steps_per_second": 2.359, "step": 250 }, { "epoch": 2.0193521245267143, "grad_norm": 1.656162142753601, "learning_rate": 1.9999888744757143e-05, "loss": 17.7545, "step": 300 }, { "epoch": 2.0193521245267143, "eval_dim_128_cosine_accuracy": 0.897657359204922, "eval_dim_128_dot_accuracy": 0.10328916232844297, "eval_dim_128_euclidean_accuracy": 0.8977756743965926, "eval_dim_128_manhattan_accuracy": 0.896710837671557, "eval_dim_128_max_accuracy": 0.8977756743965926, "eval_dim_256_cosine_accuracy": 0.8990771415049692, "eval_dim_256_dot_accuracy": 0.10210601041173686, "eval_dim_256_euclidean_accuracy": 0.8981306199716044, "eval_dim_256_manhattan_accuracy": 0.8968291528632276, "eval_dim_256_max_accuracy": 0.8990771415049692, "eval_dim_384_cosine_accuracy": 0.8983672503549456, "eval_dim_384_dot_accuracy": 0.10163274964505442, "eval_dim_384_euclidean_accuracy": 0.8983672503549456, "eval_dim_384_manhattan_accuracy": 0.897657359204922, "eval_dim_384_max_accuracy": 0.8983672503549456, "eval_dim_64_cosine_accuracy": 0.8924514907714151, "eval_dim_64_dot_accuracy": 0.10884997633696167, "eval_dim_64_euclidean_accuracy": 0.8923331755797445, "eval_dim_64_manhattan_accuracy": 0.8930430667297681, "eval_dim_64_max_accuracy": 0.8930430667297681, "eval_loss": 16.513458251953125, "eval_runtime": 111.7362, "eval_samples_per_second": 75.642, "eval_sequential_score": 0.8924514907714151, "eval_steps_per_second": 2.372, "step": 300 }, { "epoch": 2.3559108119478336, "grad_norm": 2.0629849433898926, "learning_rate": 1.9979730545608128e-05, "loss": 17.6046, "step": 350 }, { "epoch": 2.3559108119478336, "eval_dim_128_cosine_accuracy": 0.8893752957879791, "eval_dim_128_dot_accuracy": 0.11074301940369144, "eval_dim_128_euclidean_accuracy": 0.8889020350212967, "eval_dim_128_manhattan_accuracy": 0.8898485565546617, "eval_dim_128_max_accuracy": 0.8898485565546617, "eval_dim_256_cosine_accuracy": 0.8893752957879791, "eval_dim_256_dot_accuracy": 0.11086133459536204, "eval_dim_256_euclidean_accuracy": 0.8896119261713203, "eval_dim_256_manhattan_accuracy": 0.8904401325130147, "eval_dim_256_max_accuracy": 0.8904401325130147, "eval_dim_384_cosine_accuracy": 0.8906767628963559, "eval_dim_384_dot_accuracy": 0.10932323710364411, "eval_dim_384_euclidean_accuracy": 0.8906767628963559, "eval_dim_384_manhattan_accuracy": 0.8912683388547089, "eval_dim_384_max_accuracy": 0.8912683388547089, "eval_dim_64_cosine_accuracy": 0.8861807856128727, "eval_dim_64_dot_accuracy": 0.11784193090392807, "eval_dim_64_euclidean_accuracy": 0.8860624704212021, "eval_dim_64_manhattan_accuracy": 0.8861807856128727, "eval_dim_64_max_accuracy": 0.8861807856128727, "eval_loss": 16.491697311401367, "eval_runtime": 112.5812, "eval_samples_per_second": 75.075, "eval_sequential_score": 0.8861807856128727, "eval_steps_per_second": 2.354, "step": 350 }, { "epoch": 2.6924694993689524, "grad_norm": 2.6013519763946533, "learning_rate": 1.992488554155135e-05, "loss": 17.4434, "step": 400 }, { "epoch": 2.6924694993689524, "eval_dim_128_cosine_accuracy": 0.8873639375295788, "eval_dim_128_dot_accuracy": 0.11512068149550403, "eval_dim_128_euclidean_accuracy": 0.8857075248461902, "eval_dim_128_manhattan_accuracy": 0.8874822527212494, "eval_dim_128_max_accuracy": 0.8874822527212494, "eval_dim_256_cosine_accuracy": 0.8861807856128727, "eval_dim_256_dot_accuracy": 0.11393752957879792, "eval_dim_256_euclidean_accuracy": 0.8868906767628963, "eval_dim_256_manhattan_accuracy": 0.8862991008045433, "eval_dim_256_max_accuracy": 0.8868906767628963, "eval_dim_384_cosine_accuracy": 0.8874822527212494, "eval_dim_384_dot_accuracy": 0.1125177472787506, "eval_dim_384_euclidean_accuracy": 0.8874822527212494, "eval_dim_384_manhattan_accuracy": 0.8865357311878845, "eval_dim_384_max_accuracy": 0.8874822527212494, "eval_dim_64_cosine_accuracy": 0.8858258400378609, "eval_dim_64_dot_accuracy": 0.12068149550402271, "eval_dim_64_euclidean_accuracy": 0.8855892096545196, "eval_dim_64_manhattan_accuracy": 0.885470894462849, "eval_dim_64_max_accuracy": 0.8858258400378609, "eval_loss": 16.492637634277344, "eval_runtime": 111.0561, "eval_samples_per_second": 76.106, "eval_sequential_score": 0.8858258400378609, "eval_steps_per_second": 2.386, "step": 400 }, { "epoch": 3.0290281867900717, "grad_norm": 3.0714638233184814, "learning_rate": 1.983554435877128e-05, "loss": 17.3278, "step": 450 }, { "epoch": 3.0290281867900717, "eval_dim_128_cosine_accuracy": 0.8853525792711784, "eval_dim_128_dot_accuracy": 0.1160672030288689, "eval_dim_128_euclidean_accuracy": 0.8867723615712257, "eval_dim_128_manhattan_accuracy": 0.8855892096545196, "eval_dim_128_max_accuracy": 0.8867723615712257, "eval_dim_256_cosine_accuracy": 0.8860624704212021, "eval_dim_256_dot_accuracy": 0.11429247515380975, "eval_dim_256_euclidean_accuracy": 0.885470894462849, "eval_dim_256_manhattan_accuracy": 0.8864174159962139, "eval_dim_256_max_accuracy": 0.8864174159962139, "eval_dim_384_cosine_accuracy": 0.8868906767628963, "eval_dim_384_dot_accuracy": 0.11310932323710364, "eval_dim_384_euclidean_accuracy": 0.8868906767628963, "eval_dim_384_manhattan_accuracy": 0.8867723615712257, "eval_dim_384_max_accuracy": 0.8868906767628963, "eval_dim_64_cosine_accuracy": 0.8859441552295315, "eval_dim_64_dot_accuracy": 0.12091812588736393, "eval_dim_64_euclidean_accuracy": 0.884287742546143, "eval_dim_64_manhattan_accuracy": 0.8847610033128254, "eval_dim_64_max_accuracy": 0.8859441552295315, "eval_loss": 16.475650787353516, "eval_runtime": 112.3325, "eval_samples_per_second": 75.241, "eval_sequential_score": 0.8859441552295315, "eval_steps_per_second": 2.359, "step": 450 }, { "epoch": 3.3655868742111905, "grad_norm": 3.793804407119751, "learning_rate": 1.9712017522703764e-05, "loss": 17.247, "step": 500 }, { "epoch": 3.3655868742111905, "eval_dim_128_cosine_accuracy": 0.8829862754377662, "eval_dim_128_dot_accuracy": 0.11831519167061051, "eval_dim_128_euclidean_accuracy": 0.8836961665877898, "eval_dim_128_manhattan_accuracy": 0.8823946994794132, "eval_dim_128_max_accuracy": 0.8836961665877898, "eval_dim_256_cosine_accuracy": 0.8840511121628017, "eval_dim_256_dot_accuracy": 0.11571225745385708, "eval_dim_256_euclidean_accuracy": 0.8841694273544723, "eval_dim_256_manhattan_accuracy": 0.8851159488878372, "eval_dim_256_max_accuracy": 0.8851159488878372, "eval_dim_384_cosine_accuracy": 0.8853525792711784, "eval_dim_384_dot_accuracy": 0.11464742072882159, "eval_dim_384_euclidean_accuracy": 0.8853525792711784, "eval_dim_384_manhattan_accuracy": 0.8862991008045433, "eval_dim_384_max_accuracy": 0.8862991008045433, "eval_dim_64_cosine_accuracy": 0.8815664931377188, "eval_dim_64_dot_accuracy": 0.12434926644581164, "eval_dim_64_euclidean_accuracy": 0.88180312352106, "eval_dim_64_manhattan_accuracy": 0.88180312352106, "eval_dim_64_max_accuracy": 0.88180312352106, "eval_loss": 16.47345542907715, "eval_runtime": 111.9611, "eval_samples_per_second": 75.49, "eval_sequential_score": 0.8815664931377188, "eval_steps_per_second": 2.367, "step": 500 } ], "logging_steps": 50, "max_steps": 2960, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }