|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.864197530864196, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0625, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 0.28051668405532837, |
|
"eval_runtime": 198.6071, |
|
"eval_samples_per_second": 16.374, |
|
"eval_steps_per_second": 2.049, |
|
"eval_wer": 0.2381097005406062, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.987639060568604e-05, |
|
"loss": 0.047, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 0.34423157572746277, |
|
"eval_runtime": 147.7154, |
|
"eval_samples_per_second": 22.015, |
|
"eval_steps_per_second": 2.755, |
|
"eval_wer": 0.2334325457085586, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 9.975278121137207e-05, |
|
"loss": 0.0409, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_loss": 0.3597787916660309, |
|
"eval_runtime": 156.092, |
|
"eval_samples_per_second": 20.834, |
|
"eval_steps_per_second": 2.607, |
|
"eval_wer": 0.23124582396889995, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 9.96291718170581e-05, |
|
"loss": 0.0413, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_loss": 0.35860675573349, |
|
"eval_runtime": 151.5312, |
|
"eval_samples_per_second": 21.461, |
|
"eval_steps_per_second": 2.686, |
|
"eval_wer": 0.2421794326671931, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 9.950556242274414e-05, |
|
"loss": 0.0388, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"eval_loss": 0.3748931586742401, |
|
"eval_runtime": 154.1563, |
|
"eval_samples_per_second": 21.095, |
|
"eval_steps_per_second": 2.64, |
|
"eval_wer": 0.23914231913988945, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 9.938195302843017e-05, |
|
"loss": 0.0383, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_loss": 0.36109668016433716, |
|
"eval_runtime": 163.2656, |
|
"eval_samples_per_second": 19.918, |
|
"eval_steps_per_second": 2.493, |
|
"eval_wer": 0.2352548138249408, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 9.92583436341162e-05, |
|
"loss": 0.0381, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"eval_loss": 0.3883003294467926, |
|
"eval_runtime": 164.0468, |
|
"eval_samples_per_second": 19.824, |
|
"eval_steps_per_second": 2.481, |
|
"eval_wer": 0.23683411285913866, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 9.913473423980223e-05, |
|
"loss": 0.0379, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"eval_loss": 0.3676028549671173, |
|
"eval_runtime": 174.6686, |
|
"eval_samples_per_second": 18.618, |
|
"eval_steps_per_second": 2.33, |
|
"eval_wer": 0.2324606693798214, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 9.901112484548825e-05, |
|
"loss": 0.0364, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"eval_loss": 0.37265580892562866, |
|
"eval_runtime": 150.5621, |
|
"eval_samples_per_second": 21.599, |
|
"eval_steps_per_second": 2.703, |
|
"eval_wer": 0.23331106116746644, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"learning_rate": 9.88875154511743e-05, |
|
"loss": 0.0355, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"eval_loss": 0.3740839958190918, |
|
"eval_runtime": 180.5633, |
|
"eval_samples_per_second": 18.01, |
|
"eval_steps_per_second": 2.254, |
|
"eval_wer": 0.23282512300309785, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 13.58, |
|
"learning_rate": 9.876390605686032e-05, |
|
"loss": 0.0365, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 13.58, |
|
"eval_loss": 0.37167179584503174, |
|
"eval_runtime": 177.8674, |
|
"eval_samples_per_second": 18.283, |
|
"eval_steps_per_second": 2.288, |
|
"eval_wer": 0.23580149425985544, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 9.864029666254637e-05, |
|
"loss": 0.0343, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"eval_loss": 0.3842860460281372, |
|
"eval_runtime": 182.4399, |
|
"eval_samples_per_second": 17.825, |
|
"eval_steps_per_second": 2.231, |
|
"eval_wer": 0.23580149425985544, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 16.05, |
|
"learning_rate": 9.851668726823239e-05, |
|
"loss": 0.0358, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 16.05, |
|
"eval_loss": 0.3594246208667755, |
|
"eval_runtime": 180.3747, |
|
"eval_samples_per_second": 18.029, |
|
"eval_steps_per_second": 2.256, |
|
"eval_wer": 0.23616594788313186, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 17.28, |
|
"learning_rate": 9.839307787391843e-05, |
|
"loss": 0.0343, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 17.28, |
|
"eval_loss": 0.4051465690135956, |
|
"eval_runtime": 180.0644, |
|
"eval_samples_per_second": 18.06, |
|
"eval_steps_per_second": 2.26, |
|
"eval_wer": 0.2351940715543947, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"learning_rate": 9.826946847960445e-05, |
|
"loss": 0.0363, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"eval_loss": 0.3952256739139557, |
|
"eval_runtime": 185.3946, |
|
"eval_samples_per_second": 17.541, |
|
"eval_steps_per_second": 2.195, |
|
"eval_wer": 0.23962825730425805, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 19.75, |
|
"learning_rate": 9.814585908529048e-05, |
|
"loss": 0.0382, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 19.75, |
|
"eval_loss": 0.3582073450088501, |
|
"eval_runtime": 179.8373, |
|
"eval_samples_per_second": 18.083, |
|
"eval_steps_per_second": 2.263, |
|
"eval_wer": 0.23756302010569155, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"learning_rate": 9.802224969097652e-05, |
|
"loss": 0.0337, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_loss": 0.3877179026603699, |
|
"eval_runtime": 183.3138, |
|
"eval_samples_per_second": 17.74, |
|
"eval_steps_per_second": 2.22, |
|
"eval_wer": 0.2347081333900261, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"learning_rate": 9.789864029666255e-05, |
|
"loss": 0.0331, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"eval_loss": 0.3826364576816559, |
|
"eval_runtime": 174.2395, |
|
"eval_samples_per_second": 18.664, |
|
"eval_steps_per_second": 2.336, |
|
"eval_wer": 0.2383526696227905, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 23.46, |
|
"learning_rate": 9.777503090234858e-05, |
|
"loss": 0.0321, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 23.46, |
|
"eval_loss": 0.3872096538543701, |
|
"eval_runtime": 181.6396, |
|
"eval_samples_per_second": 17.904, |
|
"eval_steps_per_second": 2.241, |
|
"eval_wer": 0.23847415416388265, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 24.69, |
|
"learning_rate": 9.765142150803462e-05, |
|
"loss": 0.0342, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 24.69, |
|
"eval_loss": 0.4173298180103302, |
|
"eval_runtime": 185.9384, |
|
"eval_samples_per_second": 17.49, |
|
"eval_steps_per_second": 2.189, |
|
"eval_wer": 0.24266537083156167, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 25.93, |
|
"learning_rate": 9.752781211372065e-05, |
|
"loss": 0.0348, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 25.93, |
|
"eval_loss": 0.36838769912719727, |
|
"eval_runtime": 184.1892, |
|
"eval_samples_per_second": 17.656, |
|
"eval_steps_per_second": 2.21, |
|
"eval_wer": 0.23671262831804654, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 27.16, |
|
"learning_rate": 9.740420271940668e-05, |
|
"loss": 0.0332, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 27.16, |
|
"eval_loss": 0.3941015899181366, |
|
"eval_runtime": 184.3624, |
|
"eval_samples_per_second": 17.639, |
|
"eval_steps_per_second": 2.208, |
|
"eval_wer": 0.23233918483872928, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 28.4, |
|
"learning_rate": 9.728059332509271e-05, |
|
"loss": 0.0339, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 28.4, |
|
"eval_loss": 0.3854130506515503, |
|
"eval_runtime": 186.7325, |
|
"eval_samples_per_second": 17.415, |
|
"eval_steps_per_second": 2.18, |
|
"eval_wer": 0.24254388629046955, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 29.63, |
|
"learning_rate": 9.715698393077875e-05, |
|
"loss": 0.0349, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 29.63, |
|
"eval_loss": 0.39330288767814636, |
|
"eval_runtime": 183.7314, |
|
"eval_samples_per_second": 17.7, |
|
"eval_steps_per_second": 2.215, |
|
"eval_wer": 0.23774524691732976, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 30.86, |
|
"learning_rate": 9.703337453646477e-05, |
|
"loss": 0.0327, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 30.86, |
|
"eval_loss": 0.3882978558540344, |
|
"eval_runtime": 186.0182, |
|
"eval_samples_per_second": 17.482, |
|
"eval_steps_per_second": 2.188, |
|
"eval_wer": 0.2347081333900261, |
|
"step": 2500 |
|
} |
|
], |
|
"max_steps": 81000, |
|
"num_train_epochs": 1000, |
|
"total_flos": 7.954928320372541e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|