Lora_Gemme_Ai_Rights / trainer_state.json

Upload 12 files

0f26237 verified 5 months ago

16.5 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 16.0,
	"eval_steps": 500,
	"global_step": 100,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.16,
	"grad_norm": 1.0079981088638306,
	"learning_rate": 4e-05,
	"loss": 2.3624,
	"step": 1
	},
	{
	"epoch": 0.32,
	"grad_norm": 1.0123796463012695,
	"learning_rate": 8e-05,
	"loss": 2.4117,
	"step": 2
	},
	{
	"epoch": 0.48,
	"grad_norm": 1.0385504961013794,
	"learning_rate": 0.00012,
	"loss": 2.4351,
	"step": 3
	},
	{
	"epoch": 0.64,
	"grad_norm": 0.7601240277290344,
	"learning_rate": 0.00016,
	"loss": 1.9867,
	"step": 4
	},
	{
	"epoch": 0.8,
	"grad_norm": 0.9805667400360107,
	"learning_rate": 0.0002,
	"loss": 2.0655,
	"step": 5
	},
	{
	"epoch": 0.96,
	"grad_norm": 1.6322834491729736,
	"learning_rate": 0.00019789473684210526,
	"loss": 1.8377,
	"step": 6
	},
	{
	"epoch": 1.12,
	"grad_norm": 1.1208696365356445,
	"learning_rate": 0.00019578947368421054,
	"loss": 1.5558,
	"step": 7
	},
	{
	"epoch": 1.28,
	"grad_norm": 1.3962080478668213,
	"learning_rate": 0.0001936842105263158,
	"loss": 1.4199,
	"step": 8
	},
	{
	"epoch": 1.44,
	"grad_norm": 1.4532853364944458,
	"learning_rate": 0.00019157894736842104,
	"loss": 1.2996,
	"step": 9
	},
	{
	"epoch": 1.6,
	"grad_norm": 2.3988616466522217,
	"learning_rate": 0.00018947368421052632,
	"loss": 1.2371,
	"step": 10
	},
	{
	"epoch": 1.76,
	"grad_norm": 1.3582508563995361,
	"learning_rate": 0.0001873684210526316,
	"loss": 1.1839,
	"step": 11
	},
	{
	"epoch": 1.92,
	"grad_norm": 1.2997236251831055,
	"learning_rate": 0.00018526315789473685,
	"loss": 0.983,
	"step": 12
	},
	{
	"epoch": 2.08,
	"grad_norm": 1.1868802309036255,
	"learning_rate": 0.0001831578947368421,
	"loss": 0.7447,
	"step": 13
	},
	{
	"epoch": 2.24,
	"grad_norm": 1.0286939144134521,
	"learning_rate": 0.00018105263157894739,
	"loss": 0.8524,
	"step": 14
	},
	{
	"epoch": 2.4,
	"grad_norm": 1.00070321559906,
	"learning_rate": 0.00017894736842105264,
	"loss": 0.8649,
	"step": 15
	},
	{
	"epoch": 2.56,
	"grad_norm": 1.4189987182617188,
	"learning_rate": 0.0001768421052631579,
	"loss": 0.8116,
	"step": 16
	},
	{
	"epoch": 2.7199999999999998,
	"grad_norm": 1.2303727865219116,
	"learning_rate": 0.00017473684210526317,
	"loss": 0.8071,
	"step": 17
	},
	{
	"epoch": 2.88,
	"grad_norm": 0.9925879240036011,
	"learning_rate": 0.00017263157894736842,
	"loss": 0.7081,
	"step": 18
	},
	{
	"epoch": 3.04,
	"grad_norm": 1.0683646202087402,
	"learning_rate": 0.0001705263157894737,
	"loss": 0.5269,
	"step": 19
	},
	{
	"epoch": 3.2,
	"grad_norm": 1.0474812984466553,
	"learning_rate": 0.00016842105263157895,
	"loss": 0.6947,
	"step": 20
	},
	{
	"epoch": 3.36,
	"grad_norm": 1.0291672945022583,
	"learning_rate": 0.00016631578947368423,
	"loss": 0.5014,
	"step": 21
	},
	{
	"epoch": 3.52,
	"grad_norm": 1.1327933073043823,
	"learning_rate": 0.00016421052631578948,
	"loss": 0.481,
	"step": 22
	},
	{
	"epoch": 3.68,
	"grad_norm": 1.4890342950820923,
	"learning_rate": 0.00016210526315789473,
	"loss": 0.5253,
	"step": 23
	},
	{
	"epoch": 3.84,
	"grad_norm": 1.532833456993103,
	"learning_rate": 0.00016,
	"loss": 0.4937,
	"step": 24
	},
	{
	"epoch": 4.0,
	"grad_norm": 1.7453362941741943,
	"learning_rate": 0.00015789473684210527,
	"loss": 0.5188,
	"step": 25
	},
	{
	"epoch": 4.16,
	"grad_norm": 1.2242546081542969,
	"learning_rate": 0.00015578947368421052,
	"loss": 0.1893,
	"step": 26
	},
	{
	"epoch": 4.32,
	"grad_norm": 1.7437238693237305,
	"learning_rate": 0.0001536842105263158,
	"loss": 0.433,
	"step": 27
	},
	{
	"epoch": 4.48,
	"grad_norm": 1.4618209600448608,
	"learning_rate": 0.00015157894736842108,
	"loss": 0.3996,
	"step": 28
	},
	{
	"epoch": 4.64,
	"grad_norm": 1.3685592412948608,
	"learning_rate": 0.00014947368421052633,
	"loss": 0.2189,
	"step": 29
	},
	{
	"epoch": 4.8,
	"grad_norm": 1.741402268409729,
	"learning_rate": 0.00014736842105263158,
	"loss": 0.2934,
	"step": 30
	},
	{
	"epoch": 4.96,
	"grad_norm": 1.5545222759246826,
	"learning_rate": 0.00014526315789473686,
	"loss": 0.2099,
	"step": 31
	},
	{
	"epoch": 5.12,
	"grad_norm": 1.2092806100845337,
	"learning_rate": 0.0001431578947368421,
	"loss": 0.1916,
	"step": 32
	},
	{
	"epoch": 5.28,
	"grad_norm": 1.7175395488739014,
	"learning_rate": 0.00014105263157894736,
	"loss": 0.2527,
	"step": 33
	},
	{
	"epoch": 5.44,
	"grad_norm": 1.368059754371643,
	"learning_rate": 0.00013894736842105264,
	"loss": 0.114,
	"step": 34
	},
	{
	"epoch": 5.6,
	"grad_norm": 1.6632587909698486,
	"learning_rate": 0.0001368421052631579,
	"loss": 0.1549,
	"step": 35
	},
	{
	"epoch": 5.76,
	"grad_norm": 1.6607255935668945,
	"learning_rate": 0.00013473684210526317,
	"loss": 0.1171,
	"step": 36
	},
	{
	"epoch": 5.92,
	"grad_norm": 2.4954917430877686,
	"learning_rate": 0.00013263157894736842,
	"loss": 0.1614,
	"step": 37
	},
	{
	"epoch": 6.08,
	"grad_norm": 1.7216722965240479,
	"learning_rate": 0.0001305263157894737,
	"loss": 0.1459,
	"step": 38
	},
	{
	"epoch": 6.24,
	"grad_norm": 0.9449135065078735,
	"learning_rate": 0.00012842105263157895,
	"loss": 0.1001,
	"step": 39
	},
	{
	"epoch": 6.4,
	"grad_norm": 1.4137742519378662,
	"learning_rate": 0.0001263157894736842,
	"loss": 0.0859,
	"step": 40
	},
	{
	"epoch": 6.5600000000000005,
	"grad_norm": 1.8110110759735107,
	"learning_rate": 0.00012421052631578949,
	"loss": 0.1404,
	"step": 41
	},
	{
	"epoch": 6.72,
	"grad_norm": 1.1322952508926392,
	"learning_rate": 0.00012210526315789474,
	"loss": 0.0687,
	"step": 42
	},
	{
	"epoch": 6.88,
	"grad_norm": 2.2961461544036865,
	"learning_rate": 0.00012,
	"loss": 0.1203,
	"step": 43
	},
	{
	"epoch": 7.04,
	"grad_norm": 1.5652666091918945,
	"learning_rate": 0.00011789473684210525,
	"loss": 0.1299,
	"step": 44
	},
	{
	"epoch": 7.2,
	"grad_norm": 0.7390972375869751,
	"learning_rate": 0.00011578947368421053,
	"loss": 0.0595,
	"step": 45
	},
	{
	"epoch": 7.36,
	"grad_norm": 1.0376925468444824,
	"learning_rate": 0.0001136842105263158,
	"loss": 0.0578,
	"step": 46
	},
	{
	"epoch": 7.52,
	"grad_norm": 0.9976247549057007,
	"learning_rate": 0.00011157894736842105,
	"loss": 0.0695,
	"step": 47
	},
	{
	"epoch": 7.68,
	"grad_norm": 1.0853309631347656,
	"learning_rate": 0.00010947368421052633,
	"loss": 0.0985,
	"step": 48
	},
	{
	"epoch": 7.84,
	"grad_norm": 1.3621833324432373,
	"learning_rate": 0.00010736842105263158,
	"loss": 0.1269,
	"step": 49
	},
	{
	"epoch": 8.0,
	"grad_norm": 0.8868013024330139,
	"learning_rate": 0.00010526315789473685,
	"loss": 0.0641,
	"step": 50
	},
	{
	"epoch": 8.16,
	"grad_norm": 0.6473409533500671,
	"learning_rate": 0.00010315789473684211,
	"loss": 0.0474,
	"step": 51
	},
	{
	"epoch": 8.32,
	"grad_norm": 1.6032112836837769,
	"learning_rate": 0.00010105263157894738,
	"loss": 0.0597,
	"step": 52
	},
	{
	"epoch": 8.48,
	"grad_norm": 1.120687484741211,
	"learning_rate": 9.894736842105263e-05,
	"loss": 0.0582,
	"step": 53
	},
	{
	"epoch": 8.64,
	"grad_norm": 0.7064136862754822,
	"learning_rate": 9.68421052631579e-05,
	"loss": 0.0557,
	"step": 54
	},
	{
	"epoch": 8.8,
	"grad_norm": 0.5838208794593811,
	"learning_rate": 9.473684210526316e-05,
	"loss": 0.0436,
	"step": 55
	},
	{
	"epoch": 8.96,
	"grad_norm": 1.2315547466278076,
	"learning_rate": 9.263157894736843e-05,
	"loss": 0.063,
	"step": 56
	},
	{
	"epoch": 9.12,
	"grad_norm": 0.3518936336040497,
	"learning_rate": 9.052631578947369e-05,
	"loss": 0.0311,
	"step": 57
	},
	{
	"epoch": 9.28,
	"grad_norm": 0.6926944851875305,
	"learning_rate": 8.842105263157894e-05,
	"loss": 0.039,
	"step": 58
	},
	{
	"epoch": 9.44,
	"grad_norm": 0.26300671696662903,
	"learning_rate": 8.631578947368421e-05,
	"loss": 0.0252,
	"step": 59
	},
	{
	"epoch": 9.6,
	"grad_norm": 0.7903566360473633,
	"learning_rate": 8.421052631578948e-05,
	"loss": 0.0415,
	"step": 60
	},
	{
	"epoch": 9.76,
	"grad_norm": 0.5427919626235962,
	"learning_rate": 8.210526315789474e-05,
	"loss": 0.0453,
	"step": 61
	},
	{
	"epoch": 9.92,
	"grad_norm": 0.5827217698097229,
	"learning_rate": 8e-05,
	"loss": 0.0368,
	"step": 62
	},
	{
	"epoch": 10.08,
	"grad_norm": 1.45575749874115,
	"learning_rate": 7.789473684210526e-05,
	"loss": 0.0736,
	"step": 63
	},
	{
	"epoch": 10.24,
	"grad_norm": 0.32767948508262634,
	"learning_rate": 7.578947368421054e-05,
	"loss": 0.0316,
	"step": 64
	},
	{
	"epoch": 10.4,
	"grad_norm": 0.30059218406677246,
	"learning_rate": 7.368421052631579e-05,
	"loss": 0.0277,
	"step": 65
	},
	{
	"epoch": 10.56,
	"grad_norm": 0.4859299659729004,
	"learning_rate": 7.157894736842105e-05,
	"loss": 0.0313,
	"step": 66
	},
	{
	"epoch": 10.72,
	"grad_norm": 0.4874284267425537,
	"learning_rate": 6.947368421052632e-05,
	"loss": 0.0322,
	"step": 67
	},
	{
	"epoch": 10.88,
	"grad_norm": 0.41711848974227905,
	"learning_rate": 6.736842105263159e-05,
	"loss": 0.0389,
	"step": 68
	},
	{
	"epoch": 11.04,
	"grad_norm": 0.8408872485160828,
	"learning_rate": 6.526315789473685e-05,
	"loss": 0.0312,
	"step": 69
	},
	{
	"epoch": 11.2,
	"grad_norm": 0.32355204224586487,
	"learning_rate": 6.31578947368421e-05,
	"loss": 0.0328,
	"step": 70
	},
	{
	"epoch": 11.36,
	"grad_norm": 0.42406928539276123,
	"learning_rate": 6.105263157894737e-05,
	"loss": 0.0277,
	"step": 71
	},
	{
	"epoch": 11.52,
	"grad_norm": 0.7678600549697876,
	"learning_rate": 5.894736842105263e-05,
	"loss": 0.0329,
	"step": 72
	},
	{
	"epoch": 11.68,
	"grad_norm": 0.29065871238708496,
	"learning_rate": 5.68421052631579e-05,
	"loss": 0.0297,
	"step": 73
	},
	{
	"epoch": 11.84,
	"grad_norm": 0.5853772163391113,
	"learning_rate": 5.4736842105263165e-05,
	"loss": 0.0393,
	"step": 74
	},
	{
	"epoch": 12.0,
	"grad_norm": 0.7088480591773987,
	"learning_rate": 5.2631578947368424e-05,
	"loss": 0.0344,
	"step": 75
	},
	{
	"epoch": 12.16,
	"grad_norm": 0.19609542191028595,
	"learning_rate": 5.052631578947369e-05,
	"loss": 0.0232,
	"step": 76
	},
	{
	"epoch": 12.32,
	"grad_norm": 0.31028512120246887,
	"learning_rate": 4.842105263157895e-05,
	"loss": 0.0273,
	"step": 77
	},
	{
	"epoch": 12.48,
	"grad_norm": 0.4248906672000885,
	"learning_rate": 4.6315789473684214e-05,
	"loss": 0.0315,
	"step": 78
	},
	{
	"epoch": 12.64,
	"grad_norm": 0.4214076101779938,
	"learning_rate": 4.421052631578947e-05,
	"loss": 0.0309,
	"step": 79
	},
	{
	"epoch": 12.8,
	"grad_norm": 0.4250756502151489,
	"learning_rate": 4.210526315789474e-05,
	"loss": 0.0285,
	"step": 80
	},
	{
	"epoch": 12.96,
	"grad_norm": 0.2500416934490204,
	"learning_rate": 4e-05,
	"loss": 0.0256,
	"step": 81
	},
	{
	"epoch": 13.12,
	"grad_norm": 0.2516506314277649,
	"learning_rate": 3.789473684210527e-05,
	"loss": 0.0244,
	"step": 82
	},
	{
	"epoch": 13.28,
	"grad_norm": 0.217052161693573,
	"learning_rate": 3.578947368421053e-05,
	"loss": 0.0241,
	"step": 83
	},
	{
	"epoch": 13.44,
	"grad_norm": 0.4375220835208893,
	"learning_rate": 3.368421052631579e-05,
	"loss": 0.0308,
	"step": 84
	},
	{
	"epoch": 13.6,
	"grad_norm": 0.23626229166984558,
	"learning_rate": 3.157894736842105e-05,
	"loss": 0.029,
	"step": 85
	},
	{
	"epoch": 13.76,
	"grad_norm": 0.3816908001899719,
	"learning_rate": 2.9473684210526314e-05,
	"loss": 0.0251,
	"step": 86
	},
	{
	"epoch": 13.92,
	"grad_norm": 0.17371943593025208,
	"learning_rate": 2.7368421052631583e-05,
	"loss": 0.0203,
	"step": 87
	},
	{
	"epoch": 14.08,
	"grad_norm": 0.21958455443382263,
	"learning_rate": 2.5263157894736845e-05,
	"loss": 0.0265,
	"step": 88
	},
	{
	"epoch": 14.24,
	"grad_norm": 0.2628728151321411,
	"learning_rate": 2.3157894736842107e-05,
	"loss": 0.0242,
	"step": 89
	},
	{
	"epoch": 14.4,
	"grad_norm": 0.2763591408729553,
	"learning_rate": 2.105263157894737e-05,
	"loss": 0.0299,
	"step": 90
	},
	{
	"epoch": 14.56,
	"grad_norm": 0.2944229245185852,
	"learning_rate": 1.8947368421052634e-05,
	"loss": 0.0244,
	"step": 91
	},
	{
	"epoch": 14.72,
	"grad_norm": 0.28353527188301086,
	"learning_rate": 1.6842105263157896e-05,
	"loss": 0.0241,
	"step": 92
	},
	{
	"epoch": 14.88,
	"grad_norm": 0.2161315530538559,
	"learning_rate": 1.4736842105263157e-05,
	"loss": 0.024,
	"step": 93
	},
	{
	"epoch": 15.04,
	"grad_norm": 0.2228800654411316,
	"learning_rate": 1.2631578947368422e-05,
	"loss": 0.0263,
	"step": 94
	},
	{
	"epoch": 15.2,
	"grad_norm": 0.17299261689186096,
	"learning_rate": 1.0526315789473684e-05,
	"loss": 0.0227,
	"step": 95
	},
	{
	"epoch": 15.36,
	"grad_norm": 0.21846872568130493,
	"learning_rate": 8.421052631578948e-06,
	"loss": 0.0223,
	"step": 96
	},
	{
	"epoch": 15.52,
	"grad_norm": 0.23234839737415314,
	"learning_rate": 6.315789473684211e-06,
	"loss": 0.0269,
	"step": 97
	},
	{
	"epoch": 15.68,
	"grad_norm": 0.217283234000206,
	"learning_rate": 4.210526315789474e-06,
	"loss": 0.0259,
	"step": 98
	},
	{
	"epoch": 15.84,
	"grad_norm": 0.2666471600532532,
	"learning_rate": 2.105263157894737e-06,
	"loss": 0.027,
	"step": 99
	},
	{
	"epoch": 16.0,
	"grad_norm": 0.2889624536037445,
	"learning_rate": 0.0,
	"loss": 0.0248,
	"step": 100
	}
	],
	"logging_steps": 1,
	"max_steps": 100,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 17,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 2992005070258176.0,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}