Lora_Gemme_Ai_Rights / trainer_state.json
kkewat's picture
Upload 12 files
0f26237 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 16.0,
"eval_steps": 500,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.16,
"grad_norm": 1.0079981088638306,
"learning_rate": 4e-05,
"loss": 2.3624,
"step": 1
},
{
"epoch": 0.32,
"grad_norm": 1.0123796463012695,
"learning_rate": 8e-05,
"loss": 2.4117,
"step": 2
},
{
"epoch": 0.48,
"grad_norm": 1.0385504961013794,
"learning_rate": 0.00012,
"loss": 2.4351,
"step": 3
},
{
"epoch": 0.64,
"grad_norm": 0.7601240277290344,
"learning_rate": 0.00016,
"loss": 1.9867,
"step": 4
},
{
"epoch": 0.8,
"grad_norm": 0.9805667400360107,
"learning_rate": 0.0002,
"loss": 2.0655,
"step": 5
},
{
"epoch": 0.96,
"grad_norm": 1.6322834491729736,
"learning_rate": 0.00019789473684210526,
"loss": 1.8377,
"step": 6
},
{
"epoch": 1.12,
"grad_norm": 1.1208696365356445,
"learning_rate": 0.00019578947368421054,
"loss": 1.5558,
"step": 7
},
{
"epoch": 1.28,
"grad_norm": 1.3962080478668213,
"learning_rate": 0.0001936842105263158,
"loss": 1.4199,
"step": 8
},
{
"epoch": 1.44,
"grad_norm": 1.4532853364944458,
"learning_rate": 0.00019157894736842104,
"loss": 1.2996,
"step": 9
},
{
"epoch": 1.6,
"grad_norm": 2.3988616466522217,
"learning_rate": 0.00018947368421052632,
"loss": 1.2371,
"step": 10
},
{
"epoch": 1.76,
"grad_norm": 1.3582508563995361,
"learning_rate": 0.0001873684210526316,
"loss": 1.1839,
"step": 11
},
{
"epoch": 1.92,
"grad_norm": 1.2997236251831055,
"learning_rate": 0.00018526315789473685,
"loss": 0.983,
"step": 12
},
{
"epoch": 2.08,
"grad_norm": 1.1868802309036255,
"learning_rate": 0.0001831578947368421,
"loss": 0.7447,
"step": 13
},
{
"epoch": 2.24,
"grad_norm": 1.0286939144134521,
"learning_rate": 0.00018105263157894739,
"loss": 0.8524,
"step": 14
},
{
"epoch": 2.4,
"grad_norm": 1.00070321559906,
"learning_rate": 0.00017894736842105264,
"loss": 0.8649,
"step": 15
},
{
"epoch": 2.56,
"grad_norm": 1.4189987182617188,
"learning_rate": 0.0001768421052631579,
"loss": 0.8116,
"step": 16
},
{
"epoch": 2.7199999999999998,
"grad_norm": 1.2303727865219116,
"learning_rate": 0.00017473684210526317,
"loss": 0.8071,
"step": 17
},
{
"epoch": 2.88,
"grad_norm": 0.9925879240036011,
"learning_rate": 0.00017263157894736842,
"loss": 0.7081,
"step": 18
},
{
"epoch": 3.04,
"grad_norm": 1.0683646202087402,
"learning_rate": 0.0001705263157894737,
"loss": 0.5269,
"step": 19
},
{
"epoch": 3.2,
"grad_norm": 1.0474812984466553,
"learning_rate": 0.00016842105263157895,
"loss": 0.6947,
"step": 20
},
{
"epoch": 3.36,
"grad_norm": 1.0291672945022583,
"learning_rate": 0.00016631578947368423,
"loss": 0.5014,
"step": 21
},
{
"epoch": 3.52,
"grad_norm": 1.1327933073043823,
"learning_rate": 0.00016421052631578948,
"loss": 0.481,
"step": 22
},
{
"epoch": 3.68,
"grad_norm": 1.4890342950820923,
"learning_rate": 0.00016210526315789473,
"loss": 0.5253,
"step": 23
},
{
"epoch": 3.84,
"grad_norm": 1.532833456993103,
"learning_rate": 0.00016,
"loss": 0.4937,
"step": 24
},
{
"epoch": 4.0,
"grad_norm": 1.7453362941741943,
"learning_rate": 0.00015789473684210527,
"loss": 0.5188,
"step": 25
},
{
"epoch": 4.16,
"grad_norm": 1.2242546081542969,
"learning_rate": 0.00015578947368421052,
"loss": 0.1893,
"step": 26
},
{
"epoch": 4.32,
"grad_norm": 1.7437238693237305,
"learning_rate": 0.0001536842105263158,
"loss": 0.433,
"step": 27
},
{
"epoch": 4.48,
"grad_norm": 1.4618209600448608,
"learning_rate": 0.00015157894736842108,
"loss": 0.3996,
"step": 28
},
{
"epoch": 4.64,
"grad_norm": 1.3685592412948608,
"learning_rate": 0.00014947368421052633,
"loss": 0.2189,
"step": 29
},
{
"epoch": 4.8,
"grad_norm": 1.741402268409729,
"learning_rate": 0.00014736842105263158,
"loss": 0.2934,
"step": 30
},
{
"epoch": 4.96,
"grad_norm": 1.5545222759246826,
"learning_rate": 0.00014526315789473686,
"loss": 0.2099,
"step": 31
},
{
"epoch": 5.12,
"grad_norm": 1.2092806100845337,
"learning_rate": 0.0001431578947368421,
"loss": 0.1916,
"step": 32
},
{
"epoch": 5.28,
"grad_norm": 1.7175395488739014,
"learning_rate": 0.00014105263157894736,
"loss": 0.2527,
"step": 33
},
{
"epoch": 5.44,
"grad_norm": 1.368059754371643,
"learning_rate": 0.00013894736842105264,
"loss": 0.114,
"step": 34
},
{
"epoch": 5.6,
"grad_norm": 1.6632587909698486,
"learning_rate": 0.0001368421052631579,
"loss": 0.1549,
"step": 35
},
{
"epoch": 5.76,
"grad_norm": 1.6607255935668945,
"learning_rate": 0.00013473684210526317,
"loss": 0.1171,
"step": 36
},
{
"epoch": 5.92,
"grad_norm": 2.4954917430877686,
"learning_rate": 0.00013263157894736842,
"loss": 0.1614,
"step": 37
},
{
"epoch": 6.08,
"grad_norm": 1.7216722965240479,
"learning_rate": 0.0001305263157894737,
"loss": 0.1459,
"step": 38
},
{
"epoch": 6.24,
"grad_norm": 0.9449135065078735,
"learning_rate": 0.00012842105263157895,
"loss": 0.1001,
"step": 39
},
{
"epoch": 6.4,
"grad_norm": 1.4137742519378662,
"learning_rate": 0.0001263157894736842,
"loss": 0.0859,
"step": 40
},
{
"epoch": 6.5600000000000005,
"grad_norm": 1.8110110759735107,
"learning_rate": 0.00012421052631578949,
"loss": 0.1404,
"step": 41
},
{
"epoch": 6.72,
"grad_norm": 1.1322952508926392,
"learning_rate": 0.00012210526315789474,
"loss": 0.0687,
"step": 42
},
{
"epoch": 6.88,
"grad_norm": 2.2961461544036865,
"learning_rate": 0.00012,
"loss": 0.1203,
"step": 43
},
{
"epoch": 7.04,
"grad_norm": 1.5652666091918945,
"learning_rate": 0.00011789473684210525,
"loss": 0.1299,
"step": 44
},
{
"epoch": 7.2,
"grad_norm": 0.7390972375869751,
"learning_rate": 0.00011578947368421053,
"loss": 0.0595,
"step": 45
},
{
"epoch": 7.36,
"grad_norm": 1.0376925468444824,
"learning_rate": 0.0001136842105263158,
"loss": 0.0578,
"step": 46
},
{
"epoch": 7.52,
"grad_norm": 0.9976247549057007,
"learning_rate": 0.00011157894736842105,
"loss": 0.0695,
"step": 47
},
{
"epoch": 7.68,
"grad_norm": 1.0853309631347656,
"learning_rate": 0.00010947368421052633,
"loss": 0.0985,
"step": 48
},
{
"epoch": 7.84,
"grad_norm": 1.3621833324432373,
"learning_rate": 0.00010736842105263158,
"loss": 0.1269,
"step": 49
},
{
"epoch": 8.0,
"grad_norm": 0.8868013024330139,
"learning_rate": 0.00010526315789473685,
"loss": 0.0641,
"step": 50
},
{
"epoch": 8.16,
"grad_norm": 0.6473409533500671,
"learning_rate": 0.00010315789473684211,
"loss": 0.0474,
"step": 51
},
{
"epoch": 8.32,
"grad_norm": 1.6032112836837769,
"learning_rate": 0.00010105263157894738,
"loss": 0.0597,
"step": 52
},
{
"epoch": 8.48,
"grad_norm": 1.120687484741211,
"learning_rate": 9.894736842105263e-05,
"loss": 0.0582,
"step": 53
},
{
"epoch": 8.64,
"grad_norm": 0.7064136862754822,
"learning_rate": 9.68421052631579e-05,
"loss": 0.0557,
"step": 54
},
{
"epoch": 8.8,
"grad_norm": 0.5838208794593811,
"learning_rate": 9.473684210526316e-05,
"loss": 0.0436,
"step": 55
},
{
"epoch": 8.96,
"grad_norm": 1.2315547466278076,
"learning_rate": 9.263157894736843e-05,
"loss": 0.063,
"step": 56
},
{
"epoch": 9.12,
"grad_norm": 0.3518936336040497,
"learning_rate": 9.052631578947369e-05,
"loss": 0.0311,
"step": 57
},
{
"epoch": 9.28,
"grad_norm": 0.6926944851875305,
"learning_rate": 8.842105263157894e-05,
"loss": 0.039,
"step": 58
},
{
"epoch": 9.44,
"grad_norm": 0.26300671696662903,
"learning_rate": 8.631578947368421e-05,
"loss": 0.0252,
"step": 59
},
{
"epoch": 9.6,
"grad_norm": 0.7903566360473633,
"learning_rate": 8.421052631578948e-05,
"loss": 0.0415,
"step": 60
},
{
"epoch": 9.76,
"grad_norm": 0.5427919626235962,
"learning_rate": 8.210526315789474e-05,
"loss": 0.0453,
"step": 61
},
{
"epoch": 9.92,
"grad_norm": 0.5827217698097229,
"learning_rate": 8e-05,
"loss": 0.0368,
"step": 62
},
{
"epoch": 10.08,
"grad_norm": 1.45575749874115,
"learning_rate": 7.789473684210526e-05,
"loss": 0.0736,
"step": 63
},
{
"epoch": 10.24,
"grad_norm": 0.32767948508262634,
"learning_rate": 7.578947368421054e-05,
"loss": 0.0316,
"step": 64
},
{
"epoch": 10.4,
"grad_norm": 0.30059218406677246,
"learning_rate": 7.368421052631579e-05,
"loss": 0.0277,
"step": 65
},
{
"epoch": 10.56,
"grad_norm": 0.4859299659729004,
"learning_rate": 7.157894736842105e-05,
"loss": 0.0313,
"step": 66
},
{
"epoch": 10.72,
"grad_norm": 0.4874284267425537,
"learning_rate": 6.947368421052632e-05,
"loss": 0.0322,
"step": 67
},
{
"epoch": 10.88,
"grad_norm": 0.41711848974227905,
"learning_rate": 6.736842105263159e-05,
"loss": 0.0389,
"step": 68
},
{
"epoch": 11.04,
"grad_norm": 0.8408872485160828,
"learning_rate": 6.526315789473685e-05,
"loss": 0.0312,
"step": 69
},
{
"epoch": 11.2,
"grad_norm": 0.32355204224586487,
"learning_rate": 6.31578947368421e-05,
"loss": 0.0328,
"step": 70
},
{
"epoch": 11.36,
"grad_norm": 0.42406928539276123,
"learning_rate": 6.105263157894737e-05,
"loss": 0.0277,
"step": 71
},
{
"epoch": 11.52,
"grad_norm": 0.7678600549697876,
"learning_rate": 5.894736842105263e-05,
"loss": 0.0329,
"step": 72
},
{
"epoch": 11.68,
"grad_norm": 0.29065871238708496,
"learning_rate": 5.68421052631579e-05,
"loss": 0.0297,
"step": 73
},
{
"epoch": 11.84,
"grad_norm": 0.5853772163391113,
"learning_rate": 5.4736842105263165e-05,
"loss": 0.0393,
"step": 74
},
{
"epoch": 12.0,
"grad_norm": 0.7088480591773987,
"learning_rate": 5.2631578947368424e-05,
"loss": 0.0344,
"step": 75
},
{
"epoch": 12.16,
"grad_norm": 0.19609542191028595,
"learning_rate": 5.052631578947369e-05,
"loss": 0.0232,
"step": 76
},
{
"epoch": 12.32,
"grad_norm": 0.31028512120246887,
"learning_rate": 4.842105263157895e-05,
"loss": 0.0273,
"step": 77
},
{
"epoch": 12.48,
"grad_norm": 0.4248906672000885,
"learning_rate": 4.6315789473684214e-05,
"loss": 0.0315,
"step": 78
},
{
"epoch": 12.64,
"grad_norm": 0.4214076101779938,
"learning_rate": 4.421052631578947e-05,
"loss": 0.0309,
"step": 79
},
{
"epoch": 12.8,
"grad_norm": 0.4250756502151489,
"learning_rate": 4.210526315789474e-05,
"loss": 0.0285,
"step": 80
},
{
"epoch": 12.96,
"grad_norm": 0.2500416934490204,
"learning_rate": 4e-05,
"loss": 0.0256,
"step": 81
},
{
"epoch": 13.12,
"grad_norm": 0.2516506314277649,
"learning_rate": 3.789473684210527e-05,
"loss": 0.0244,
"step": 82
},
{
"epoch": 13.28,
"grad_norm": 0.217052161693573,
"learning_rate": 3.578947368421053e-05,
"loss": 0.0241,
"step": 83
},
{
"epoch": 13.44,
"grad_norm": 0.4375220835208893,
"learning_rate": 3.368421052631579e-05,
"loss": 0.0308,
"step": 84
},
{
"epoch": 13.6,
"grad_norm": 0.23626229166984558,
"learning_rate": 3.157894736842105e-05,
"loss": 0.029,
"step": 85
},
{
"epoch": 13.76,
"grad_norm": 0.3816908001899719,
"learning_rate": 2.9473684210526314e-05,
"loss": 0.0251,
"step": 86
},
{
"epoch": 13.92,
"grad_norm": 0.17371943593025208,
"learning_rate": 2.7368421052631583e-05,
"loss": 0.0203,
"step": 87
},
{
"epoch": 14.08,
"grad_norm": 0.21958455443382263,
"learning_rate": 2.5263157894736845e-05,
"loss": 0.0265,
"step": 88
},
{
"epoch": 14.24,
"grad_norm": 0.2628728151321411,
"learning_rate": 2.3157894736842107e-05,
"loss": 0.0242,
"step": 89
},
{
"epoch": 14.4,
"grad_norm": 0.2763591408729553,
"learning_rate": 2.105263157894737e-05,
"loss": 0.0299,
"step": 90
},
{
"epoch": 14.56,
"grad_norm": 0.2944229245185852,
"learning_rate": 1.8947368421052634e-05,
"loss": 0.0244,
"step": 91
},
{
"epoch": 14.72,
"grad_norm": 0.28353527188301086,
"learning_rate": 1.6842105263157896e-05,
"loss": 0.0241,
"step": 92
},
{
"epoch": 14.88,
"grad_norm": 0.2161315530538559,
"learning_rate": 1.4736842105263157e-05,
"loss": 0.024,
"step": 93
},
{
"epoch": 15.04,
"grad_norm": 0.2228800654411316,
"learning_rate": 1.2631578947368422e-05,
"loss": 0.0263,
"step": 94
},
{
"epoch": 15.2,
"grad_norm": 0.17299261689186096,
"learning_rate": 1.0526315789473684e-05,
"loss": 0.0227,
"step": 95
},
{
"epoch": 15.36,
"grad_norm": 0.21846872568130493,
"learning_rate": 8.421052631578948e-06,
"loss": 0.0223,
"step": 96
},
{
"epoch": 15.52,
"grad_norm": 0.23234839737415314,
"learning_rate": 6.315789473684211e-06,
"loss": 0.0269,
"step": 97
},
{
"epoch": 15.68,
"grad_norm": 0.217283234000206,
"learning_rate": 4.210526315789474e-06,
"loss": 0.0259,
"step": 98
},
{
"epoch": 15.84,
"grad_norm": 0.2666471600532532,
"learning_rate": 2.105263157894737e-06,
"loss": 0.027,
"step": 99
},
{
"epoch": 16.0,
"grad_norm": 0.2889624536037445,
"learning_rate": 0.0,
"loss": 0.0248,
"step": 100
}
],
"logging_steps": 1,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 17,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2992005070258176.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}