|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 21190, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.952807928268051e-05, |
|
"loss": 0.1097, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.905615856536102e-05, |
|
"loss": 0.0885, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.858423784804153e-05, |
|
"loss": 0.1291, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.8112317130722038e-05, |
|
"loss": 0.1202, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_LOC": { |
|
"f1": 0.7736781419864931, |
|
"number": 12507, |
|
"precision": 0.7976564490107837, |
|
"recall": 0.751099384344767 |
|
}, |
|
"eval_MISC": { |
|
"f1": 0.7151848937844216, |
|
"number": 2816, |
|
"precision": 0.8015873015873016, |
|
"recall": 0.6455965909090909 |
|
}, |
|
"eval_NORP": { |
|
"f1": 0.8523970985879222, |
|
"number": 10969, |
|
"precision": 0.805352798053528, |
|
"recall": 0.9052785121706628 |
|
}, |
|
"eval_PER": { |
|
"f1": 0.9086936163571093, |
|
"number": 28256, |
|
"precision": 0.8909103274730507, |
|
"recall": 0.9272013023782559 |
|
}, |
|
"eval_loss": 0.11920765787363052, |
|
"eval_overall_accuracy": 0.967201754087, |
|
"eval_overall_f1": 0.8581709417202936, |
|
"eval_overall_precision": 0.8486787852712344, |
|
"eval_overall_recall": 0.8678778323678228, |
|
"eval_runtime": 57.7447, |
|
"eval_samples_per_second": 73.357, |
|
"eval_steps_per_second": 9.178, |
|
"step": 2119 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.7640396413402548e-05, |
|
"loss": 0.0999, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.7168475696083062e-05, |
|
"loss": 0.0863, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.669655497876357e-05, |
|
"loss": 0.09, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.622463426144408e-05, |
|
"loss": 0.0881, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_LOC": { |
|
"f1": 0.8009789829454955, |
|
"number": 12507, |
|
"precision": 0.7789783889980354, |
|
"recall": 0.824258415287439 |
|
}, |
|
"eval_MISC": { |
|
"f1": 0.7636171337916445, |
|
"number": 2816, |
|
"precision": 0.7581379068953448, |
|
"recall": 0.7691761363636364 |
|
}, |
|
"eval_NORP": { |
|
"f1": 0.8755772529357434, |
|
"number": 10969, |
|
"precision": 0.8458531611148878, |
|
"recall": 0.9074664964901085 |
|
}, |
|
"eval_PER": { |
|
"f1": 0.9198921794894381, |
|
"number": 28256, |
|
"precision": 0.9158744079985968, |
|
"recall": 0.9239453567383918 |
|
}, |
|
"eval_loss": 0.1091507151722908, |
|
"eval_overall_accuracy": 0.9707860445565364, |
|
"eval_overall_f1": 0.8752163877668782, |
|
"eval_overall_precision": 0.8611170250514513, |
|
"eval_overall_recall": 0.8897851433599765, |
|
"eval_runtime": 59.0952, |
|
"eval_samples_per_second": 71.681, |
|
"eval_steps_per_second": 8.969, |
|
"step": 4238 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.575271354412459e-05, |
|
"loss": 0.0739, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.52807928268051e-05, |
|
"loss": 0.0616, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.4808872109485608e-05, |
|
"loss": 0.062, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.4336951392166117e-05, |
|
"loss": 0.0629, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_LOC": { |
|
"f1": 0.7920257336880983, |
|
"number": 12507, |
|
"precision": 0.7868076376834464, |
|
"recall": 0.797313504437515 |
|
}, |
|
"eval_MISC": { |
|
"f1": 0.7715430861723446, |
|
"number": 2816, |
|
"precision": 0.8854645814167433, |
|
"recall": 0.68359375 |
|
}, |
|
"eval_NORP": { |
|
"f1": 0.8747102811912363, |
|
"number": 10969, |
|
"precision": 0.8405614388972936, |
|
"recall": 0.9117512991156896 |
|
}, |
|
"eval_PER": { |
|
"f1": 0.9222061110043501, |
|
"number": 28256, |
|
"precision": 0.910608935656374, |
|
"recall": 0.9341024915062288 |
|
}, |
|
"eval_loss": 0.11280690133571625, |
|
"eval_overall_accuracy": 0.9706866387194256, |
|
"eval_overall_f1": 0.8758149783730356, |
|
"eval_overall_precision": 0.8665195313200912, |
|
"eval_overall_recall": 0.8853120187724572, |
|
"eval_runtime": 59.3431, |
|
"eval_samples_per_second": 71.381, |
|
"eval_steps_per_second": 8.931, |
|
"step": 6357 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.3865030674846627e-05, |
|
"loss": 0.0554, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.3393109957527137e-05, |
|
"loss": 0.0433, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 1.2921189240207645e-05, |
|
"loss": 0.0442, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 1.2449268522888157e-05, |
|
"loss": 0.0409, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_LOC": { |
|
"f1": 0.8152558604321939, |
|
"number": 12507, |
|
"precision": 0.8505647263249348, |
|
"recall": 0.7827616534740546 |
|
}, |
|
"eval_MISC": { |
|
"f1": 0.7983193277310924, |
|
"number": 2816, |
|
"precision": 0.8220466516177577, |
|
"recall": 0.7759232954545454 |
|
}, |
|
"eval_NORP": { |
|
"f1": 0.886389100524358, |
|
"number": 10969, |
|
"precision": 0.8717383638928068, |
|
"recall": 0.901540705624943 |
|
}, |
|
"eval_PER": { |
|
"f1": 0.9238697668005309, |
|
"number": 28256, |
|
"precision": 0.9004804623189867, |
|
"recall": 0.9485065118912798 |
|
}, |
|
"eval_loss": 0.11524520814418793, |
|
"eval_overall_accuracy": 0.9734700021585267, |
|
"eval_overall_f1": 0.8862442293508646, |
|
"eval_overall_precision": 0.8804161013116237, |
|
"eval_overall_recall": 0.8921500329984601, |
|
"eval_runtime": 59.3779, |
|
"eval_samples_per_second": 71.34, |
|
"eval_steps_per_second": 8.926, |
|
"step": 8476 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 1.1977347805568667e-05, |
|
"loss": 0.0461, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 1.1505427088249175e-05, |
|
"loss": 0.028, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.1033506370929685e-05, |
|
"loss": 0.0289, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 1.0561585653610193e-05, |
|
"loss": 0.0331, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 1.0089664936290703e-05, |
|
"loss": 0.0306, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_LOC": { |
|
"f1": 0.8143539881344759, |
|
"number": 12507, |
|
"precision": 0.8400339991500213, |
|
"recall": 0.7901974894059327 |
|
}, |
|
"eval_MISC": { |
|
"f1": 0.8030969558331867, |
|
"number": 2816, |
|
"precision": 0.7959539588419952, |
|
"recall": 0.8103693181818182 |
|
}, |
|
"eval_NORP": { |
|
"f1": 0.893543068490709, |
|
"number": 10969, |
|
"precision": 0.8739539748953975, |
|
"recall": 0.9140304494484456 |
|
}, |
|
"eval_PER": { |
|
"f1": 0.9285937771597428, |
|
"number": 28256, |
|
"precision": 0.9124479059916649, |
|
"recall": 0.9453213476783692 |
|
}, |
|
"eval_loss": 0.1234506368637085, |
|
"eval_overall_accuracy": 0.9740408756802199, |
|
"eval_overall_f1": 0.8897258155486418, |
|
"eval_overall_precision": 0.8830582541803604, |
|
"eval_overall_recall": 0.8964948302412554, |
|
"eval_runtime": 58.1348, |
|
"eval_samples_per_second": 72.865, |
|
"eval_steps_per_second": 9.117, |
|
"step": 10595 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 9.617744218971213e-06, |
|
"loss": 0.0237, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 9.145823501651723e-06, |
|
"loss": 0.023, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 8.673902784332233e-06, |
|
"loss": 0.0246, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 8.201982067012742e-06, |
|
"loss": 0.0184, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_LOC": { |
|
"f1": 0.8221609305672143, |
|
"number": 12507, |
|
"precision": 0.822062350119904, |
|
"recall": 0.8222595346605901 |
|
}, |
|
"eval_MISC": { |
|
"f1": 0.8192371475953565, |
|
"number": 2816, |
|
"precision": 0.8513979318268863, |
|
"recall": 0.7894176136363636 |
|
}, |
|
"eval_NORP": { |
|
"f1": 0.8961138379790156, |
|
"number": 10969, |
|
"precision": 0.8853888592276206, |
|
"recall": 0.9071018324368675 |
|
}, |
|
"eval_PER": { |
|
"f1": 0.9297248123720718, |
|
"number": 28256, |
|
"precision": 0.9192873205327798, |
|
"recall": 0.9404020385050963 |
|
}, |
|
"eval_loss": 0.1410222351551056, |
|
"eval_overall_accuracy": 0.9745975483680401, |
|
"eval_overall_f1": 0.8929625177576221, |
|
"eval_overall_precision": 0.8871779096699479, |
|
"eval_overall_recall": 0.8988230549241035, |
|
"eval_runtime": 60.1835, |
|
"eval_samples_per_second": 70.385, |
|
"eval_steps_per_second": 8.806, |
|
"step": 12714 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 7.730061349693252e-06, |
|
"loss": 0.0181, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 7.258140632373761e-06, |
|
"loss": 0.014, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 6.786219915054272e-06, |
|
"loss": 0.0152, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 6.314299197734781e-06, |
|
"loss": 0.0144, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_LOC": { |
|
"f1": 0.8194483254161512, |
|
"number": 12507, |
|
"precision": 0.8182397959183674, |
|
"recall": 0.8206604301591109 |
|
}, |
|
"eval_MISC": { |
|
"f1": 0.8199495858840475, |
|
"number": 2816, |
|
"precision": 0.8316289262235208, |
|
"recall": 0.80859375 |
|
}, |
|
"eval_NORP": { |
|
"f1": 0.8962966345495228, |
|
"number": 10969, |
|
"precision": 0.8978956999085087, |
|
"recall": 0.8947032546266752 |
|
}, |
|
"eval_PER": { |
|
"f1": 0.9302089886069755, |
|
"number": 28256, |
|
"precision": 0.9187146210133922, |
|
"recall": 0.9419946206115515 |
|
}, |
|
"eval_loss": 0.15106713771820068, |
|
"eval_overall_accuracy": 0.9748702043784011, |
|
"eval_overall_f1": 0.8925746363868333, |
|
"eval_overall_precision": 0.8874311394607133, |
|
"eval_overall_recall": 0.8977781036884945, |
|
"eval_runtime": 57.4606, |
|
"eval_samples_per_second": 73.72, |
|
"eval_steps_per_second": 9.224, |
|
"step": 14833 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 5.84237848041529e-06, |
|
"loss": 0.0142, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 5.3704577630958e-06, |
|
"loss": 0.0113, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 4.89853704577631e-06, |
|
"loss": 0.0094, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 4.42661632845682e-06, |
|
"loss": 0.0118, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_LOC": { |
|
"f1": 0.8198295522746369, |
|
"number": 12507, |
|
"precision": 0.8205189812590101, |
|
"recall": 0.8191412808827057 |
|
}, |
|
"eval_MISC": { |
|
"f1": 0.8218876239441792, |
|
"number": 2816, |
|
"precision": 0.8509505703422053, |
|
"recall": 0.7947443181818182 |
|
}, |
|
"eval_NORP": { |
|
"f1": 0.8973384030418251, |
|
"number": 10969, |
|
"precision": 0.8911264946507237, |
|
"recall": 0.9036375239310785 |
|
}, |
|
"eval_PER": { |
|
"f1": 0.9298639775676194, |
|
"number": 28256, |
|
"precision": 0.9154349987997669, |
|
"recall": 0.9447550962627407 |
|
}, |
|
"eval_loss": 0.16531305015087128, |
|
"eval_overall_accuracy": 0.9748900855458232, |
|
"eval_overall_f1": 0.8929675846763926, |
|
"eval_overall_precision": 0.886101083032491, |
|
"eval_overall_recall": 0.8999413360709834, |
|
"eval_runtime": 59.2269, |
|
"eval_samples_per_second": 71.522, |
|
"eval_steps_per_second": 8.949, |
|
"step": 16952 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 3.954695611137329e-06, |
|
"loss": 0.009, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 3.482774893817839e-06, |
|
"loss": 0.007, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 3.0108541764983485e-06, |
|
"loss": 0.0072, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 2.5389334591788585e-06, |
|
"loss": 0.009, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 2.0670127418593677e-06, |
|
"loss": 0.0076, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_LOC": { |
|
"f1": 0.8171711433904082, |
|
"number": 12507, |
|
"precision": 0.8295576241865062, |
|
"recall": 0.805149116494763 |
|
}, |
|
"eval_MISC": { |
|
"f1": 0.8233618233618233, |
|
"number": 2816, |
|
"precision": 0.8257142857142857, |
|
"recall": 0.8210227272727273 |
|
}, |
|
"eval_NORP": { |
|
"f1": 0.8973920863309351, |
|
"number": 10969, |
|
"precision": 0.8853695324283559, |
|
"recall": 0.9097456468228644 |
|
}, |
|
"eval_PER": { |
|
"f1": 0.9303293743112526, |
|
"number": 28256, |
|
"precision": 0.9197592778335005, |
|
"recall": 0.9411452434881087 |
|
}, |
|
"eval_loss": 0.16768833994865417, |
|
"eval_overall_accuracy": 0.9747083148722493, |
|
"eval_overall_f1": 0.8927428399485734, |
|
"eval_overall_precision": 0.8880866425992779, |
|
"eval_overall_recall": 0.8974481190877759, |
|
"eval_runtime": 59.1217, |
|
"eval_samples_per_second": 71.649, |
|
"eval_steps_per_second": 8.965, |
|
"step": 19071 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 1.5950920245398775e-06, |
|
"loss": 0.0063, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 1.123171307220387e-06, |
|
"loss": 0.005, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 6.512505899008966e-07, |
|
"loss": 0.0046, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 1.7932987258140632e-07, |
|
"loss": 0.0056, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_LOC": { |
|
"f1": 0.8228770339188861, |
|
"number": 12507, |
|
"precision": 0.833265021723092, |
|
"recall": 0.812744862876789 |
|
}, |
|
"eval_MISC": { |
|
"f1": 0.8222182275750494, |
|
"number": 2816, |
|
"precision": 0.8325445941026575, |
|
"recall": 0.8121448863636364 |
|
}, |
|
"eval_NORP": { |
|
"f1": 0.8972602739726027, |
|
"number": 10969, |
|
"precision": 0.8871068341798093, |
|
"recall": 0.907648828516729 |
|
}, |
|
"eval_PER": { |
|
"f1": 0.9305083267939662, |
|
"number": 28256, |
|
"precision": 0.917185190278112, |
|
"recall": 0.9442242355605889 |
|
}, |
|
"eval_loss": 0.17190925776958466, |
|
"eval_overall_accuracy": 0.9750008520500324, |
|
"eval_overall_f1": 0.8940859333734041, |
|
"eval_overall_precision": 0.888341959535271, |
|
"eval_overall_recall": 0.899904671115348, |
|
"eval_runtime": 57.6029, |
|
"eval_samples_per_second": 73.538, |
|
"eval_steps_per_second": 9.201, |
|
"step": 21190 |
|
} |
|
], |
|
"max_steps": 21190, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.5668822780351856e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|