finetuned-MBart50-en-hi / trainer_state.json
rahimunisab's picture
Upload 6 files
dab7964 verified
raw
history blame
10.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 563,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.017761989342806393,
"grad_norm": 10.114191055297852,
"learning_rate": 1.9644760213143874e-05,
"loss": 3.1416,
"step": 10
},
{
"epoch": 0.035523978685612786,
"grad_norm": 4.123176097869873,
"learning_rate": 1.9289520426287745e-05,
"loss": 2.5072,
"step": 20
},
{
"epoch": 0.05328596802841918,
"grad_norm": 5.617274761199951,
"learning_rate": 1.8934280639431617e-05,
"loss": 2.5089,
"step": 30
},
{
"epoch": 0.07104795737122557,
"grad_norm": 5.457020282745361,
"learning_rate": 1.857904085257549e-05,
"loss": 2.4729,
"step": 40
},
{
"epoch": 0.08880994671403197,
"grad_norm": 5.344240665435791,
"learning_rate": 1.822380106571936e-05,
"loss": 2.2751,
"step": 50
},
{
"epoch": 0.10657193605683836,
"grad_norm": 4.997218132019043,
"learning_rate": 1.7868561278863233e-05,
"loss": 2.1903,
"step": 60
},
{
"epoch": 0.12433392539964476,
"grad_norm": 5.089644908905029,
"learning_rate": 1.751332149200711e-05,
"loss": 2.2842,
"step": 70
},
{
"epoch": 0.14209591474245115,
"grad_norm": 4.242591857910156,
"learning_rate": 1.7158081705150977e-05,
"loss": 2.3067,
"step": 80
},
{
"epoch": 0.15985790408525755,
"grad_norm": 4.736593246459961,
"learning_rate": 1.680284191829485e-05,
"loss": 2.2627,
"step": 90
},
{
"epoch": 0.17761989342806395,
"grad_norm": 4.923036575317383,
"learning_rate": 1.644760213143872e-05,
"loss": 2.3868,
"step": 100
},
{
"epoch": 0.19538188277087035,
"grad_norm": 3.8926291465759277,
"learning_rate": 1.6092362344582596e-05,
"loss": 2.2509,
"step": 110
},
{
"epoch": 0.21314387211367672,
"grad_norm": 4.4710373878479,
"learning_rate": 1.5737122557726468e-05,
"loss": 2.2431,
"step": 120
},
{
"epoch": 0.23090586145648312,
"grad_norm": 4.363410949707031,
"learning_rate": 1.5381882770870337e-05,
"loss": 2.197,
"step": 130
},
{
"epoch": 0.24866785079928952,
"grad_norm": 4.414468288421631,
"learning_rate": 1.502664298401421e-05,
"loss": 2.1243,
"step": 140
},
{
"epoch": 0.2664298401420959,
"grad_norm": 5.004277229309082,
"learning_rate": 1.4671403197158082e-05,
"loss": 2.2229,
"step": 150
},
{
"epoch": 0.2841918294849023,
"grad_norm": 5.9036431312561035,
"learning_rate": 1.4316163410301956e-05,
"loss": 2.35,
"step": 160
},
{
"epoch": 0.3019538188277087,
"grad_norm": 3.8498282432556152,
"learning_rate": 1.3960923623445828e-05,
"loss": 2.197,
"step": 170
},
{
"epoch": 0.3197158081705151,
"grad_norm": 4.546940326690674,
"learning_rate": 1.3605683836589698e-05,
"loss": 2.2107,
"step": 180
},
{
"epoch": 0.33747779751332146,
"grad_norm": 4.567042350769043,
"learning_rate": 1.3250444049733571e-05,
"loss": 2.1656,
"step": 190
},
{
"epoch": 0.3552397868561279,
"grad_norm": 4.599060535430908,
"learning_rate": 1.2895204262877443e-05,
"loss": 2.097,
"step": 200
},
{
"epoch": 0.37300177619893427,
"grad_norm": 4.4139084815979,
"learning_rate": 1.2539964476021315e-05,
"loss": 2.2216,
"step": 210
},
{
"epoch": 0.3907637655417407,
"grad_norm": 4.82399320602417,
"learning_rate": 1.2184724689165189e-05,
"loss": 2.2686,
"step": 220
},
{
"epoch": 0.40852575488454707,
"grad_norm": 5.282867431640625,
"learning_rate": 1.1829484902309059e-05,
"loss": 2.2976,
"step": 230
},
{
"epoch": 0.42628774422735344,
"grad_norm": 4.4112229347229,
"learning_rate": 1.1474245115452931e-05,
"loss": 2.0539,
"step": 240
},
{
"epoch": 0.44404973357015987,
"grad_norm": 4.809335708618164,
"learning_rate": 1.1119005328596803e-05,
"loss": 2.0071,
"step": 250
},
{
"epoch": 0.46181172291296624,
"grad_norm": 4.342746734619141,
"learning_rate": 1.0763765541740677e-05,
"loss": 2.2071,
"step": 260
},
{
"epoch": 0.47957371225577267,
"grad_norm": 4.607499599456787,
"learning_rate": 1.0408525754884548e-05,
"loss": 2.1116,
"step": 270
},
{
"epoch": 0.49733570159857904,
"grad_norm": 4.707973957061768,
"learning_rate": 1.0053285968028419e-05,
"loss": 2.3467,
"step": 280
},
{
"epoch": 0.5150976909413855,
"grad_norm": 5.3809814453125,
"learning_rate": 9.698046181172292e-06,
"loss": 2.265,
"step": 290
},
{
"epoch": 0.5328596802841918,
"grad_norm": 4.981960296630859,
"learning_rate": 9.342806394316164e-06,
"loss": 2.239,
"step": 300
},
{
"epoch": 0.5506216696269982,
"grad_norm": 4.437684059143066,
"learning_rate": 8.987566607460036e-06,
"loss": 2.2065,
"step": 310
},
{
"epoch": 0.5683836589698046,
"grad_norm": 4.121458530426025,
"learning_rate": 8.632326820603908e-06,
"loss": 2.2214,
"step": 320
},
{
"epoch": 0.5861456483126111,
"grad_norm": 4.4555768966674805,
"learning_rate": 8.27708703374778e-06,
"loss": 2.0929,
"step": 330
},
{
"epoch": 0.6039076376554174,
"grad_norm": 4.929543972015381,
"learning_rate": 7.921847246891654e-06,
"loss": 1.9948,
"step": 340
},
{
"epoch": 0.6216696269982238,
"grad_norm": 4.537841320037842,
"learning_rate": 7.566607460035525e-06,
"loss": 2.0598,
"step": 350
},
{
"epoch": 0.6394316163410302,
"grad_norm": 4.358743667602539,
"learning_rate": 7.2113676731793965e-06,
"loss": 2.2486,
"step": 360
},
{
"epoch": 0.6571936056838366,
"grad_norm": 5.218609809875488,
"learning_rate": 6.8561278863232685e-06,
"loss": 2.1021,
"step": 370
},
{
"epoch": 0.6749555950266429,
"grad_norm": 4.320308685302734,
"learning_rate": 6.500888099467141e-06,
"loss": 2.0445,
"step": 380
},
{
"epoch": 0.6927175843694494,
"grad_norm": 5.573863506317139,
"learning_rate": 6.145648312611013e-06,
"loss": 2.0654,
"step": 390
},
{
"epoch": 0.7104795737122558,
"grad_norm": 4.323981761932373,
"learning_rate": 5.790408525754885e-06,
"loss": 1.988,
"step": 400
},
{
"epoch": 0.7282415630550622,
"grad_norm": 5.24467658996582,
"learning_rate": 5.435168738898757e-06,
"loss": 2.2422,
"step": 410
},
{
"epoch": 0.7460035523978685,
"grad_norm": 4.423385143280029,
"learning_rate": 5.079928952042629e-06,
"loss": 2.2142,
"step": 420
},
{
"epoch": 0.7637655417406749,
"grad_norm": 4.1253275871276855,
"learning_rate": 4.724689165186501e-06,
"loss": 2.0602,
"step": 430
},
{
"epoch": 0.7815275310834814,
"grad_norm": 5.4505815505981445,
"learning_rate": 4.3694493783303736e-06,
"loss": 2.1685,
"step": 440
},
{
"epoch": 0.7992895204262878,
"grad_norm": 4.528077602386475,
"learning_rate": 4.0142095914742455e-06,
"loss": 2.1555,
"step": 450
},
{
"epoch": 0.8170515097690941,
"grad_norm": 4.204817771911621,
"learning_rate": 3.658969804618118e-06,
"loss": 2.085,
"step": 460
},
{
"epoch": 0.8348134991119005,
"grad_norm": 5.724953651428223,
"learning_rate": 3.3037300177619897e-06,
"loss": 2.0761,
"step": 470
},
{
"epoch": 0.8525754884547069,
"grad_norm": 3.909616231918335,
"learning_rate": 2.9484902309058617e-06,
"loss": 2.1404,
"step": 480
},
{
"epoch": 0.8703374777975134,
"grad_norm": 4.555464744567871,
"learning_rate": 2.5932504440497336e-06,
"loss": 2.1344,
"step": 490
},
{
"epoch": 0.8880994671403197,
"grad_norm": 4.636111736297607,
"learning_rate": 2.238010657193606e-06,
"loss": 2.1713,
"step": 500
},
{
"epoch": 0.9058614564831261,
"grad_norm": 4.105838775634766,
"learning_rate": 1.882770870337478e-06,
"loss": 2.0844,
"step": 510
},
{
"epoch": 0.9236234458259325,
"grad_norm": 5.204021453857422,
"learning_rate": 1.52753108348135e-06,
"loss": 2.0752,
"step": 520
},
{
"epoch": 0.9413854351687388,
"grad_norm": 5.496901988983154,
"learning_rate": 1.172291296625222e-06,
"loss": 2.164,
"step": 530
},
{
"epoch": 0.9591474245115453,
"grad_norm": 4.677813529968262,
"learning_rate": 8.170515097690942e-07,
"loss": 1.9237,
"step": 540
},
{
"epoch": 0.9769094138543517,
"grad_norm": 4.969646453857422,
"learning_rate": 4.618117229129663e-07,
"loss": 2.134,
"step": 550
},
{
"epoch": 0.9946714031971581,
"grad_norm": 4.424323558807373,
"learning_rate": 1.0657193605683837e-07,
"loss": 2.2305,
"step": 560
}
],
"logging_steps": 10,
"max_steps": 563,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2438020988928000.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}