|
{ |
|
"best_metric": 0.7707859873771667, |
|
"best_model_checkpoint": "bert-balanced-v1/checkpoint-204", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 204, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04411764705882353, |
|
"grad_norm": 11.873502731323242, |
|
"learning_rate": 7.142857142857143e-06, |
|
"loss": 1.1054, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.08823529411764706, |
|
"grad_norm": 6.837568283081055, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 1.1623, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.1323529411764706, |
|
"grad_norm": 6.079131126403809, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 1.1727, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.17647058823529413, |
|
"grad_norm": 7.644276142120361, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 1.1962, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.22058823529411764, |
|
"grad_norm": 6.449653625488281, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 1.1334, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2647058823529412, |
|
"grad_norm": 3.3152713775634766, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 1.0897, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.3088235294117647, |
|
"grad_norm": 6.6442036628723145, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1438, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.35294117647058826, |
|
"grad_norm": 3.3698086738586426, |
|
"learning_rate": 4.918032786885246e-05, |
|
"loss": 1.2006, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.39705882352941174, |
|
"grad_norm": 7.311341762542725, |
|
"learning_rate": 4.836065573770492e-05, |
|
"loss": 1.0572, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.4411764705882353, |
|
"grad_norm": 8.757562637329102, |
|
"learning_rate": 4.754098360655738e-05, |
|
"loss": 1.0659, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4852941176470588, |
|
"grad_norm": 7.169471263885498, |
|
"learning_rate": 4.672131147540984e-05, |
|
"loss": 1.1263, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.5294117647058824, |
|
"grad_norm": 6.60593843460083, |
|
"learning_rate": 4.59016393442623e-05, |
|
"loss": 1.0724, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.5735294117647058, |
|
"grad_norm": 5.335102558135986, |
|
"learning_rate": 4.508196721311476e-05, |
|
"loss": 1.0535, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.6176470588235294, |
|
"grad_norm": 5.638067245483398, |
|
"learning_rate": 4.426229508196721e-05, |
|
"loss": 1.0671, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.6617647058823529, |
|
"grad_norm": 3.3761277198791504, |
|
"learning_rate": 4.3442622950819674e-05, |
|
"loss": 1.0608, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.7058823529411765, |
|
"grad_norm": 3.7545318603515625, |
|
"learning_rate": 4.262295081967213e-05, |
|
"loss": 1.036, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 3.6131749153137207, |
|
"learning_rate": 4.1803278688524595e-05, |
|
"loss": 1.0085, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.7941176470588235, |
|
"grad_norm": 10.428824424743652, |
|
"learning_rate": 4.098360655737705e-05, |
|
"loss": 0.9663, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.8382352941176471, |
|
"grad_norm": 9.748165130615234, |
|
"learning_rate": 4.016393442622951e-05, |
|
"loss": 0.9838, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.8823529411764706, |
|
"grad_norm": 7.34658145904541, |
|
"learning_rate": 3.934426229508197e-05, |
|
"loss": 0.914, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9264705882352942, |
|
"grad_norm": 9.839737892150879, |
|
"learning_rate": 3.8524590163934424e-05, |
|
"loss": 1.0774, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.9705882352941176, |
|
"grad_norm": 6.513272762298584, |
|
"learning_rate": 3.7704918032786885e-05, |
|
"loss": 0.9173, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6074074074074074, |
|
"eval_f1_macro": 0.5499705029992206, |
|
"eval_f1_micro": 0.6074074074074074, |
|
"eval_f1_weighted": 0.5499705029992207, |
|
"eval_loss": 0.9292003512382507, |
|
"eval_precision_macro": 0.6655672369958084, |
|
"eval_precision_micro": 0.6074074074074074, |
|
"eval_precision_weighted": 0.6655672369958084, |
|
"eval_recall_macro": 0.6074074074074074, |
|
"eval_recall_micro": 0.6074074074074074, |
|
"eval_recall_weighted": 0.6074074074074074, |
|
"eval_runtime": 134.9252, |
|
"eval_samples_per_second": 1.001, |
|
"eval_steps_per_second": 0.067, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.0147058823529411, |
|
"grad_norm": 6.397573947906494, |
|
"learning_rate": 3.6885245901639346e-05, |
|
"loss": 1.1385, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.0588235294117647, |
|
"grad_norm": 8.004409790039062, |
|
"learning_rate": 3.6065573770491806e-05, |
|
"loss": 0.8346, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.1029411764705883, |
|
"grad_norm": 9.197546005249023, |
|
"learning_rate": 3.524590163934427e-05, |
|
"loss": 0.7762, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.1470588235294117, |
|
"grad_norm": 15.531254768371582, |
|
"learning_rate": 3.442622950819672e-05, |
|
"loss": 1.0938, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.1911764705882353, |
|
"grad_norm": 9.45050048828125, |
|
"learning_rate": 3.360655737704918e-05, |
|
"loss": 0.7187, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.2352941176470589, |
|
"grad_norm": 7.438510417938232, |
|
"learning_rate": 3.2786885245901635e-05, |
|
"loss": 0.6956, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.2794117647058822, |
|
"grad_norm": 7.671905994415283, |
|
"learning_rate": 3.19672131147541e-05, |
|
"loss": 0.8591, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.3235294117647058, |
|
"grad_norm": 5.862545013427734, |
|
"learning_rate": 3.114754098360656e-05, |
|
"loss": 0.7878, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.3676470588235294, |
|
"grad_norm": 12.08448314666748, |
|
"learning_rate": 3.0327868852459017e-05, |
|
"loss": 0.922, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.4117647058823528, |
|
"grad_norm": 25.875642776489258, |
|
"learning_rate": 2.9508196721311478e-05, |
|
"loss": 0.806, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.4558823529411764, |
|
"grad_norm": 7.914124488830566, |
|
"learning_rate": 2.8688524590163935e-05, |
|
"loss": 0.7999, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 7.366003513336182, |
|
"learning_rate": 2.7868852459016392e-05, |
|
"loss": 0.8753, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.5441176470588234, |
|
"grad_norm": 7.287258625030518, |
|
"learning_rate": 2.7049180327868856e-05, |
|
"loss": 0.75, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.5882352941176472, |
|
"grad_norm": 6.466761112213135, |
|
"learning_rate": 2.6229508196721314e-05, |
|
"loss": 0.7331, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.6323529411764706, |
|
"grad_norm": 11.213692665100098, |
|
"learning_rate": 2.540983606557377e-05, |
|
"loss": 0.9052, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.6764705882352942, |
|
"grad_norm": 10.924762725830078, |
|
"learning_rate": 2.459016393442623e-05, |
|
"loss": 1.0455, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.7205882352941178, |
|
"grad_norm": 8.067938804626465, |
|
"learning_rate": 2.377049180327869e-05, |
|
"loss": 0.8233, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.7647058823529411, |
|
"grad_norm": 6.702548027038574, |
|
"learning_rate": 2.295081967213115e-05, |
|
"loss": 1.0777, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.8088235294117647, |
|
"grad_norm": 5.71842098236084, |
|
"learning_rate": 2.2131147540983607e-05, |
|
"loss": 0.6789, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.8529411764705883, |
|
"grad_norm": 8.926857948303223, |
|
"learning_rate": 2.1311475409836064e-05, |
|
"loss": 0.6347, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.8970588235294117, |
|
"grad_norm": 21.031246185302734, |
|
"learning_rate": 2.0491803278688525e-05, |
|
"loss": 0.7921, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.9411764705882353, |
|
"grad_norm": 12.221565246582031, |
|
"learning_rate": 1.9672131147540985e-05, |
|
"loss": 0.8242, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.9852941176470589, |
|
"grad_norm": 4.548130035400391, |
|
"learning_rate": 1.8852459016393442e-05, |
|
"loss": 0.5987, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6888888888888889, |
|
"eval_f1_macro": 0.6845081103855066, |
|
"eval_f1_micro": 0.6888888888888889, |
|
"eval_f1_weighted": 0.6845081103855067, |
|
"eval_loss": 0.811522901058197, |
|
"eval_precision_macro": 0.7045918367346938, |
|
"eval_precision_micro": 0.6888888888888889, |
|
"eval_precision_weighted": 0.7045918367346938, |
|
"eval_recall_macro": 0.6888888888888888, |
|
"eval_recall_micro": 0.6888888888888889, |
|
"eval_recall_weighted": 0.6888888888888889, |
|
"eval_runtime": 131.5615, |
|
"eval_samples_per_second": 1.026, |
|
"eval_steps_per_second": 0.068, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.0294117647058822, |
|
"grad_norm": 7.7553510665893555, |
|
"learning_rate": 1.8032786885245903e-05, |
|
"loss": 0.6751, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.073529411764706, |
|
"grad_norm": 3.660639762878418, |
|
"learning_rate": 1.721311475409836e-05, |
|
"loss": 0.637, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 2.1176470588235294, |
|
"grad_norm": 7.876177787780762, |
|
"learning_rate": 1.6393442622950818e-05, |
|
"loss": 0.6454, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.161764705882353, |
|
"grad_norm": 8.173316955566406, |
|
"learning_rate": 1.557377049180328e-05, |
|
"loss": 0.5053, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 2.2058823529411766, |
|
"grad_norm": 7.380105018615723, |
|
"learning_rate": 1.4754098360655739e-05, |
|
"loss": 0.64, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 13.381223678588867, |
|
"learning_rate": 1.3934426229508196e-05, |
|
"loss": 0.7163, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.2941176470588234, |
|
"grad_norm": 21.8228759765625, |
|
"learning_rate": 1.3114754098360657e-05, |
|
"loss": 0.8269, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.338235294117647, |
|
"grad_norm": 6.742076396942139, |
|
"learning_rate": 1.2295081967213116e-05, |
|
"loss": 0.5206, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 2.3823529411764706, |
|
"grad_norm": 8.277242660522461, |
|
"learning_rate": 1.1475409836065575e-05, |
|
"loss": 0.5042, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.426470588235294, |
|
"grad_norm": 9.727288246154785, |
|
"learning_rate": 1.0655737704918032e-05, |
|
"loss": 0.6658, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.4705882352941178, |
|
"grad_norm": 18.33995819091797, |
|
"learning_rate": 9.836065573770493e-06, |
|
"loss": 0.6377, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 2.514705882352941, |
|
"grad_norm": 8.533559799194336, |
|
"learning_rate": 9.016393442622952e-06, |
|
"loss": 0.8016, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 2.5588235294117645, |
|
"grad_norm": 22.230371475219727, |
|
"learning_rate": 8.196721311475409e-06, |
|
"loss": 0.6268, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 2.6029411764705883, |
|
"grad_norm": 7.892202854156494, |
|
"learning_rate": 7.3770491803278695e-06, |
|
"loss": 0.732, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 2.6470588235294117, |
|
"grad_norm": 6.714171886444092, |
|
"learning_rate": 6.557377049180328e-06, |
|
"loss": 0.6977, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.6911764705882355, |
|
"grad_norm": 5.960449695587158, |
|
"learning_rate": 5.737704918032787e-06, |
|
"loss": 0.3479, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 2.735294117647059, |
|
"grad_norm": 5.625789165496826, |
|
"learning_rate": 4.918032786885246e-06, |
|
"loss": 0.5536, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 2.7794117647058822, |
|
"grad_norm": 3.9824419021606445, |
|
"learning_rate": 4.098360655737704e-06, |
|
"loss": 0.6288, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 2.8235294117647056, |
|
"grad_norm": 4.617522716522217, |
|
"learning_rate": 3.278688524590164e-06, |
|
"loss": 0.4954, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.8676470588235294, |
|
"grad_norm": 5.714521408081055, |
|
"learning_rate": 2.459016393442623e-06, |
|
"loss": 0.4388, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.911764705882353, |
|
"grad_norm": 11.925094604492188, |
|
"learning_rate": 1.639344262295082e-06, |
|
"loss": 0.871, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 2.9558823529411766, |
|
"grad_norm": 9.523457527160645, |
|
"learning_rate": 8.19672131147541e-07, |
|
"loss": 0.5172, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 19.425655364990234, |
|
"learning_rate": 0.0, |
|
"loss": 0.4496, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.674074074074074, |
|
"eval_f1_macro": 0.6722359201219389, |
|
"eval_f1_micro": 0.674074074074074, |
|
"eval_f1_weighted": 0.6722359201219389, |
|
"eval_loss": 0.7707859873771667, |
|
"eval_precision_macro": 0.6750194250194249, |
|
"eval_precision_micro": 0.674074074074074, |
|
"eval_precision_weighted": 0.675019425019425, |
|
"eval_recall_macro": 0.674074074074074, |
|
"eval_recall_micro": 0.674074074074074, |
|
"eval_recall_weighted": 0.674074074074074, |
|
"eval_runtime": 139.3011, |
|
"eval_samples_per_second": 0.969, |
|
"eval_steps_per_second": 0.065, |
|
"step": 204 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 204, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 423875715959808.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|