|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 49.99819168173599, |
|
"global_step": 6900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.485e-05, |
|
"loss": 10.7673, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.985e-05, |
|
"loss": 4.4331, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.484999999999999e-05, |
|
"loss": 3.4707, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 5.985e-05, |
|
"loss": 3.0807, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 7.484999999999999e-05, |
|
"loss": 2.9736, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"eval_cer": 1.0, |
|
"eval_loss": 2.9508235454559326, |
|
"eval_runtime": 114.1579, |
|
"eval_samples_per_second": 33.664, |
|
"eval_steps_per_second": 4.213, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 8.984999999999999e-05, |
|
"loss": 2.9229, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.00010484999999999999, |
|
"loss": 2.8971, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 0.00011985, |
|
"loss": 2.7156, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00013485, |
|
"loss": 1.8057, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.00014984999999999998, |
|
"loss": 1.3293, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"eval_cer": 0.08615393306828052, |
|
"eval_loss": 0.33302000164985657, |
|
"eval_runtime": 114.4981, |
|
"eval_samples_per_second": 33.564, |
|
"eval_steps_per_second": 4.201, |
|
"eval_wer": 0.8407494145199064, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 0.00016485, |
|
"loss": 1.1212, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 0.00017984999999999998, |
|
"loss": 1.0403, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 0.00019484999999999997, |
|
"loss": 0.9903, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 0.00020984999999999998, |
|
"loss": 0.9766, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 0.00022485, |
|
"loss": 0.956, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"eval_cer": 0.06022301157428658, |
|
"eval_loss": 0.2042141854763031, |
|
"eval_runtime": 114.0353, |
|
"eval_samples_per_second": 33.7, |
|
"eval_steps_per_second": 4.218, |
|
"eval_wer": 0.6872235232890971, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 0.00023984999999999998, |
|
"loss": 0.9616, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 0.00025485, |
|
"loss": 0.9653, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 0.00026984999999999997, |
|
"loss": 0.9653, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 0.00028484999999999996, |
|
"loss": 0.9486, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 0.00029985, |
|
"loss": 0.9509, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"eval_cer": 0.06515214643343903, |
|
"eval_loss": 0.21843121945858002, |
|
"eval_runtime": 115.7242, |
|
"eval_samples_per_second": 33.208, |
|
"eval_steps_per_second": 4.156, |
|
"eval_wer": 0.7088212334113974, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 0.00029393877551020406, |
|
"loss": 0.9402, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 15.94, |
|
"learning_rate": 0.0002878163265306122, |
|
"loss": 0.9357, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 0.0002816938775510204, |
|
"loss": 0.9301, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 0.00027563265306122445, |
|
"loss": 0.9268, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"learning_rate": 0.00026951020408163266, |
|
"loss": 0.9272, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"eval_cer": 0.07027901163078096, |
|
"eval_loss": 0.2312462031841278, |
|
"eval_runtime": 107.6714, |
|
"eval_samples_per_second": 35.692, |
|
"eval_steps_per_second": 4.467, |
|
"eval_wer": 0.7210512620348686, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 18.84, |
|
"learning_rate": 0.0002633877551020408, |
|
"loss": 0.9191, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 19.56, |
|
"learning_rate": 0.00025726530612244896, |
|
"loss": 0.9016, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 20.29, |
|
"learning_rate": 0.0002511428571428571, |
|
"loss": 0.9105, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 0.00024502040816326527, |
|
"loss": 0.8923, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 0.00023889795918367345, |
|
"loss": 0.8561, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"eval_cer": 0.06309716327582676, |
|
"eval_loss": 0.21578675508499146, |
|
"eval_runtime": 108.0601, |
|
"eval_samples_per_second": 35.564, |
|
"eval_steps_per_second": 4.451, |
|
"eval_wer": 0.6838407494145199, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 22.46, |
|
"learning_rate": 0.0002327755102040816, |
|
"loss": 0.8562, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 23.19, |
|
"learning_rate": 0.00022665306122448975, |
|
"loss": 0.855, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 23.91, |
|
"learning_rate": 0.00022053061224489796, |
|
"loss": 0.8398, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 24.64, |
|
"learning_rate": 0.0002144081632653061, |
|
"loss": 0.827, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 25.36, |
|
"learning_rate": 0.00020828571428571426, |
|
"loss": 0.8258, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 25.36, |
|
"eval_cer": 0.060060590225059496, |
|
"eval_loss": 0.19697847962379456, |
|
"eval_runtime": 107.078, |
|
"eval_samples_per_second": 35.89, |
|
"eval_steps_per_second": 4.492, |
|
"eval_wer": 0.6843611761644548, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"learning_rate": 0.00020216326530612242, |
|
"loss": 0.8283, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 26.81, |
|
"learning_rate": 0.00019604081632653057, |
|
"loss": 0.8212, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 27.54, |
|
"learning_rate": 0.00018991836734693878, |
|
"loss": 0.8332, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 28.26, |
|
"learning_rate": 0.00018379591836734693, |
|
"loss": 0.8324, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 28.98, |
|
"learning_rate": 0.00017767346938775508, |
|
"loss": 0.7993, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 28.98, |
|
"eval_cer": 0.05770901156016298, |
|
"eval_loss": 0.18949392437934875, |
|
"eval_runtime": 106.6069, |
|
"eval_samples_per_second": 36.048, |
|
"eval_steps_per_second": 4.512, |
|
"eval_wer": 0.6697892271662763, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 29.71, |
|
"learning_rate": 0.00017155102040816324, |
|
"loss": 0.7777, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 30.43, |
|
"learning_rate": 0.0001654285714285714, |
|
"loss": 0.7764, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 31.16, |
|
"learning_rate": 0.0001593061224489796, |
|
"loss": 0.7748, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 31.88, |
|
"learning_rate": 0.00015318367346938775, |
|
"loss": 0.7587, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 32.61, |
|
"learning_rate": 0.00014712244897959183, |
|
"loss": 0.7525, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 32.61, |
|
"eval_cer": 0.05496197221888748, |
|
"eval_loss": 0.18448850512504578, |
|
"eval_runtime": 106.7665, |
|
"eval_samples_per_second": 35.994, |
|
"eval_steps_per_second": 4.505, |
|
"eval_wer": 0.6453291699193339, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 0.00014099999999999998, |
|
"loss": 0.7558, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 34.06, |
|
"learning_rate": 0.00013487755102040816, |
|
"loss": 0.7444, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 34.78, |
|
"learning_rate": 0.00012875510204081632, |
|
"loss": 0.7302, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 35.51, |
|
"learning_rate": 0.0001226326530612245, |
|
"loss": 0.7391, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 36.23, |
|
"learning_rate": 0.00011657142857142856, |
|
"loss": 0.7211, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 36.23, |
|
"eval_cer": 0.05309765760167223, |
|
"eval_loss": 0.17814987897872925, |
|
"eval_runtime": 106.0316, |
|
"eval_samples_per_second": 36.244, |
|
"eval_steps_per_second": 4.536, |
|
"eval_wer": 0.6273744470465782, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 36.95, |
|
"learning_rate": 0.00011044897959183672, |
|
"loss": 0.7074, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 37.68, |
|
"learning_rate": 0.00010432653061224489, |
|
"loss": 0.6976, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 38.41, |
|
"learning_rate": 9.820408163265305e-05, |
|
"loss": 0.6933, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 39.13, |
|
"learning_rate": 9.214285714285714e-05, |
|
"loss": 0.6845, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 39.85, |
|
"learning_rate": 8.602040816326529e-05, |
|
"loss": 0.677, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 39.85, |
|
"eval_cer": 0.05139576433368407, |
|
"eval_loss": 0.17315863072872162, |
|
"eval_runtime": 106.2239, |
|
"eval_samples_per_second": 36.178, |
|
"eval_steps_per_second": 4.528, |
|
"eval_wer": 0.6187874056726516, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 40.58, |
|
"learning_rate": 7.989795918367346e-05, |
|
"loss": 0.6951, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 41.3, |
|
"learning_rate": 7.377551020408162e-05, |
|
"loss": 0.6814, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 42.03, |
|
"learning_rate": 6.765306122448979e-05, |
|
"loss": 0.6822, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 42.75, |
|
"learning_rate": 6.153061224489796e-05, |
|
"loss": 0.6658, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 43.48, |
|
"learning_rate": 5.5408163265306116e-05, |
|
"loss": 0.6517, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 43.48, |
|
"eval_cer": 0.05030824747364184, |
|
"eval_loss": 0.1690707802772522, |
|
"eval_runtime": 105.9054, |
|
"eval_samples_per_second": 36.287, |
|
"eval_steps_per_second": 4.542, |
|
"eval_wer": 0.6177465521727816, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 44.2, |
|
"learning_rate": 4.928571428571428e-05, |
|
"loss": 0.6327, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 44.93, |
|
"learning_rate": 4.316326530612245e-05, |
|
"loss": 0.6355, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 45.65, |
|
"learning_rate": 3.704081632653061e-05, |
|
"loss": 0.6328, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 46.38, |
|
"learning_rate": 3.0918367346938774e-05, |
|
"loss": 0.6388, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 47.1, |
|
"learning_rate": 2.4795918367346937e-05, |
|
"loss": 0.6326, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 47.1, |
|
"eval_cer": 0.0478507418418581, |
|
"eval_loss": 0.1618689000606537, |
|
"eval_runtime": 108.5769, |
|
"eval_samples_per_second": 35.394, |
|
"eval_steps_per_second": 4.43, |
|
"eval_wer": 0.6044756700494406, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 47.82, |
|
"learning_rate": 1.86734693877551e-05, |
|
"loss": 0.6231, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 48.55, |
|
"learning_rate": 1.2551020408163265e-05, |
|
"loss": 0.6309, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 49.27, |
|
"learning_rate": 6.428571428571428e-06, |
|
"loss": 0.6341, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 3.0612244897959183e-07, |
|
"loss": 0.6141, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 6900, |
|
"total_flos": 4.523197640315366e+19, |
|
"train_loss": 1.2140741569408473, |
|
"train_runtime": 17169.0647, |
|
"train_samples_per_second": 25.747, |
|
"train_steps_per_second": 0.402 |
|
} |
|
], |
|
"max_steps": 6900, |
|
"num_train_epochs": 50, |
|
"total_flos": 4.523197640315366e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|