|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 20400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24509803921568626, |
|
"grad_norm": 2.2194137573242188, |
|
"learning_rate": 4.901960784313726e-06, |
|
"loss": 0.6351, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.49019607843137253, |
|
"grad_norm": 5.901831150054932, |
|
"learning_rate": 9.803921568627451e-06, |
|
"loss": 0.4992, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7352941176470589, |
|
"grad_norm": 5.434150695800781, |
|
"learning_rate": 1.4705882352941179e-05, |
|
"loss": 0.3933, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9803921568627451, |
|
"grad_norm": 6.714095115661621, |
|
"learning_rate": 1.9607843137254903e-05, |
|
"loss": 0.3433, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.795555184225917, |
|
"eval_f1": 0.758320987654321, |
|
"eval_precision": 0.9295399515738498, |
|
"eval_recall": 0.6403669724770642, |
|
"eval_runtime": 50.1487, |
|
"eval_samples_per_second": 238.67, |
|
"eval_steps_per_second": 3.749, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.2254901960784315, |
|
"grad_norm": 4.650435924530029, |
|
"learning_rate": 1.9501089324618738e-05, |
|
"loss": 0.2707, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4705882352941178, |
|
"grad_norm": 5.975987434387207, |
|
"learning_rate": 1.8956427015250548e-05, |
|
"loss": 0.2568, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.715686274509804, |
|
"grad_norm": 4.295902729034424, |
|
"learning_rate": 1.8413943355119828e-05, |
|
"loss": 0.2396, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.9607843137254903, |
|
"grad_norm": 4.352377414703369, |
|
"learning_rate": 1.7869281045751637e-05, |
|
"loss": 0.2253, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.821873172361935, |
|
"eval_f1": 0.7964094728800611, |
|
"eval_precision": 0.9314272950636587, |
|
"eval_recall": 0.69557964970809, |
|
"eval_runtime": 50.0719, |
|
"eval_samples_per_second": 239.036, |
|
"eval_steps_per_second": 3.755, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.2058823529411766, |
|
"grad_norm": 8.222220420837402, |
|
"learning_rate": 1.7324618736383444e-05, |
|
"loss": 0.1567, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.450980392156863, |
|
"grad_norm": 8.841856956481934, |
|
"learning_rate": 1.677995642701525e-05, |
|
"loss": 0.1406, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.696078431372549, |
|
"grad_norm": 5.413289546966553, |
|
"learning_rate": 1.6236383442265798e-05, |
|
"loss": 0.1379, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"grad_norm": 6.019312381744385, |
|
"learning_rate": 1.5691721132897604e-05, |
|
"loss": 0.1406, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8236277049043362, |
|
"eval_f1": 0.7998103366524419, |
|
"eval_precision": 0.9268131868131868, |
|
"eval_recall": 0.7034195162635529, |
|
"eval_runtime": 50.1875, |
|
"eval_samples_per_second": 238.486, |
|
"eval_steps_per_second": 3.746, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 3.186274509803922, |
|
"grad_norm": 4.456413269042969, |
|
"learning_rate": 1.5148148148148149e-05, |
|
"loss": 0.0954, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.431372549019608, |
|
"grad_norm": 3.628704786300659, |
|
"learning_rate": 1.4603485838779959e-05, |
|
"loss": 0.0892, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.6764705882352944, |
|
"grad_norm": 2.9508347511291504, |
|
"learning_rate": 1.4058823529411765e-05, |
|
"loss": 0.0919, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.9215686274509802, |
|
"grad_norm": 6.59318208694458, |
|
"learning_rate": 1.3514161220043573e-05, |
|
"loss": 0.0873, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8282229091820537, |
|
"eval_f1": 0.808351976137211, |
|
"eval_precision": 0.9161208535812381, |
|
"eval_recall": 0.7232693911592994, |
|
"eval_runtime": 50.5224, |
|
"eval_samples_per_second": 236.905, |
|
"eval_steps_per_second": 3.721, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 4.166666666666667, |
|
"grad_norm": 2.5619020462036133, |
|
"learning_rate": 1.2969498910675382e-05, |
|
"loss": 0.0696, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.411764705882353, |
|
"grad_norm": 6.823647499084473, |
|
"learning_rate": 1.242483660130719e-05, |
|
"loss": 0.0564, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.6568627450980395, |
|
"grad_norm": 7.367131233215332, |
|
"learning_rate": 1.1880174291939e-05, |
|
"loss": 0.0608, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.901960784313726, |
|
"grad_norm": 1.920448660850525, |
|
"learning_rate": 1.1335511982570806e-05, |
|
"loss": 0.0642, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8349903918456011, |
|
"eval_f1": 0.8161593595829842, |
|
"eval_precision": 0.9233361415332771, |
|
"eval_recall": 0.7312760633861551, |
|
"eval_runtime": 50.2334, |
|
"eval_samples_per_second": 238.268, |
|
"eval_steps_per_second": 3.743, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 5.147058823529412, |
|
"grad_norm": 0.24896511435508728, |
|
"learning_rate": 1.0790849673202614e-05, |
|
"loss": 0.0484, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.392156862745098, |
|
"grad_norm": 0.1454085409641266, |
|
"learning_rate": 1.0248366013071896e-05, |
|
"loss": 0.0428, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.637254901960784, |
|
"grad_norm": 0.20693035423755646, |
|
"learning_rate": 9.703703703703703e-06, |
|
"loss": 0.0422, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 5.882352941176471, |
|
"grad_norm": 7.705965518951416, |
|
"learning_rate": 9.159041394335513e-06, |
|
"loss": 0.0439, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.82078703316902, |
|
"eval_f1": 0.7939283312518013, |
|
"eval_precision": 0.9361123697326688, |
|
"eval_recall": 0.6892410341951626, |
|
"eval_runtime": 50.1119, |
|
"eval_samples_per_second": 238.846, |
|
"eval_steps_per_second": 3.752, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 6.127450980392156, |
|
"grad_norm": 3.6634812355041504, |
|
"learning_rate": 8.614379084967321e-06, |
|
"loss": 0.0368, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 6.372549019607844, |
|
"grad_norm": 0.9099088311195374, |
|
"learning_rate": 8.069716775599129e-06, |
|
"loss": 0.0299, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 6.617647058823529, |
|
"grad_norm": 4.98677921295166, |
|
"learning_rate": 7.5250544662309376e-06, |
|
"loss": 0.0305, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 6.862745098039216, |
|
"grad_norm": 1.047827124595642, |
|
"learning_rate": 6.9803921568627454e-06, |
|
"loss": 0.0322, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8311471300860557, |
|
"eval_f1": 0.8085630387420669, |
|
"eval_precision": 0.9355545813239807, |
|
"eval_recall": 0.7119266055045872, |
|
"eval_runtime": 50.2076, |
|
"eval_samples_per_second": 238.39, |
|
"eval_steps_per_second": 3.744, |
|
"step": 14280 |
|
}, |
|
{ |
|
"epoch": 7.107843137254902, |
|
"grad_norm": 4.889837741851807, |
|
"learning_rate": 6.435729847494554e-06, |
|
"loss": 0.0278, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 7.352941176470588, |
|
"grad_norm": 0.027428582310676575, |
|
"learning_rate": 5.891067538126363e-06, |
|
"loss": 0.0242, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 7.598039215686274, |
|
"grad_norm": 15.176671981811523, |
|
"learning_rate": 5.34640522875817e-06, |
|
"loss": 0.0216, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 7.8431372549019605, |
|
"grad_norm": 0.10369198769330978, |
|
"learning_rate": 4.801742919389979e-06, |
|
"loss": 0.0241, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8279722616759964, |
|
"eval_f1": 0.8052586777641162, |
|
"eval_precision": 0.9298820445609436, |
|
"eval_recall": 0.710091743119266, |
|
"eval_runtime": 50.123, |
|
"eval_samples_per_second": 238.792, |
|
"eval_steps_per_second": 3.751, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 8.088235294117647, |
|
"grad_norm": 6.414863109588623, |
|
"learning_rate": 4.2570806100217874e-06, |
|
"loss": 0.0223, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 8.333333333333334, |
|
"grad_norm": 7.71334981918335, |
|
"learning_rate": 3.7135076252723314e-06, |
|
"loss": 0.0161, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 8.57843137254902, |
|
"grad_norm": 6.453121662139893, |
|
"learning_rate": 3.1688453159041397e-06, |
|
"loss": 0.0191, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 8.823529411764707, |
|
"grad_norm": 17.10112762451172, |
|
"learning_rate": 2.624183006535948e-06, |
|
"loss": 0.0182, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8256328849527947, |
|
"eval_f1": 0.8022738038844149, |
|
"eval_precision": 0.9285087719298246, |
|
"eval_recall": 0.706255212677231, |
|
"eval_runtime": 50.0398, |
|
"eval_samples_per_second": 239.19, |
|
"eval_steps_per_second": 3.757, |
|
"step": 18360 |
|
}, |
|
{ |
|
"epoch": 9.068627450980392, |
|
"grad_norm": 0.027658773586153984, |
|
"learning_rate": 2.0795206971677563e-06, |
|
"loss": 0.0179, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 9.313725490196079, |
|
"grad_norm": 0.012034358456730843, |
|
"learning_rate": 1.5348583877995642e-06, |
|
"loss": 0.0142, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 9.558823529411764, |
|
"grad_norm": 0.053181909024715424, |
|
"learning_rate": 9.923747276688455e-07, |
|
"loss": 0.0117, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 9.803921568627452, |
|
"grad_norm": 0.0060155196115374565, |
|
"learning_rate": 4.477124183006536e-07, |
|
"loss": 0.0125, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.831397777592113, |
|
"eval_f1": 0.8103383458646617, |
|
"eval_precision": 0.9280947255113025, |
|
"eval_recall": 0.7190992493744788, |
|
"eval_runtime": 50.2235, |
|
"eval_samples_per_second": 238.315, |
|
"eval_steps_per_second": 3.743, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 20400, |
|
"total_flos": 3.433678205764608e+17, |
|
"train_loss": 0.1127255682851754, |
|
"train_runtime": 14662.5965, |
|
"train_samples_per_second": 89.004, |
|
"train_steps_per_second": 1.391 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 20400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.433678205764608e+17, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|