musicgen-melody-lora-kk-colab / trainer_state.json

End of training

107bea7 verified 8 months ago

14 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 15.238095238095237,
	"eval_steps": 500,
	"global_step": 160,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.19047619047619047,
	"grad_norm": 0.9942206144332886,
	"learning_rate": 0.00019750000000000003,
	"loss": 9.5613,
	"step": 2
	},
	{
	"epoch": 0.38095238095238093,
	"grad_norm": 1.2790788412094116,
	"learning_rate": 0.000195,
	"loss": 9.2339,
	"step": 4
	},
	{
	"epoch": 0.5714285714285714,
	"grad_norm": 1.9939367771148682,
	"learning_rate": 0.00019250000000000002,
	"loss": 8.7953,
	"step": 6
	},
	{
	"epoch": 0.7619047619047619,
	"grad_norm": 2.010485887527466,
	"learning_rate": 0.00019,
	"loss": 8.2168,
	"step": 8
	},
	{
	"epoch": 0.9523809523809523,
	"grad_norm": 1.352328896522522,
	"learning_rate": 0.0001875,
	"loss": 7.8941,
	"step": 10
	},
	{
	"epoch": 1.1428571428571428,
	"grad_norm": 0.9626594185829163,
	"learning_rate": 0.00018500000000000002,
	"loss": 7.6817,
	"step": 12
	},
	{
	"epoch": 1.3333333333333333,
	"grad_norm": 1.1568268537521362,
	"learning_rate": 0.0001825,
	"loss": 7.5131,
	"step": 14
	},
	{
	"epoch": 1.5238095238095237,
	"grad_norm": 1.0264520645141602,
	"learning_rate": 0.00018,
	"loss": 7.4247,
	"step": 16
	},
	{
	"epoch": 1.7142857142857144,
	"grad_norm": 0.9865540862083435,
	"learning_rate": 0.0001775,
	"loss": 7.4369,
	"step": 18
	},
	{
	"epoch": 1.9047619047619047,
	"grad_norm": 1.0182702541351318,
	"learning_rate": 0.000175,
	"loss": 7.3787,
	"step": 20
	},
	{
	"epoch": 2.0952380952380953,
	"grad_norm": 0.7922359108924866,
	"learning_rate": 0.00017250000000000002,
	"loss": 7.373,
	"step": 22
	},
	{
	"epoch": 2.2857142857142856,
	"grad_norm": 0.7033187747001648,
	"learning_rate": 0.00017,
	"loss": 7.3096,
	"step": 24
	},
	{
	"epoch": 2.4761904761904763,
	"grad_norm": 2.9758119583129883,
	"learning_rate": 0.0001675,
	"loss": 7.1991,
	"step": 26
	},
	{
	"epoch": 2.6666666666666665,
	"grad_norm": 0.7531760931015015,
	"learning_rate": 0.000165,
	"loss": 7.2661,
	"step": 28
	},
	{
	"epoch": 2.857142857142857,
	"grad_norm": 1.3790533542633057,
	"learning_rate": 0.00016250000000000002,
	"loss": 7.2782,
	"step": 30
	},
	{
	"epoch": 3.0476190476190474,
	"grad_norm": 0.6538093686103821,
	"learning_rate": 0.00016,
	"loss": 7.2109,
	"step": 32
	},
	{
	"epoch": 3.238095238095238,
	"grad_norm": 0.6145215630531311,
	"learning_rate": 0.0001575,
	"loss": 7.2192,
	"step": 34
	},
	{
	"epoch": 3.4285714285714284,
	"grad_norm": 0.4128475785255432,
	"learning_rate": 0.000155,
	"loss": 7.2892,
	"step": 36
	},
	{
	"epoch": 3.619047619047619,
	"grad_norm": 1.0160013437271118,
	"learning_rate": 0.0001525,
	"loss": 7.2049,
	"step": 38
	},
	{
	"epoch": 3.8095238095238093,
	"grad_norm": 0.5834835171699524,
	"learning_rate": 0.00015000000000000001,
	"loss": 7.1672,
	"step": 40
	},
	{
	"epoch": 4.0,
	"grad_norm": 0.4894554615020752,
	"learning_rate": 0.0001475,
	"loss": 7.1269,
	"step": 42
	},
	{
	"epoch": 4.190476190476191,
	"grad_norm": 0.593618631362915,
	"learning_rate": 0.000145,
	"loss": 7.0175,
	"step": 44
	},
	{
	"epoch": 4.380952380952381,
	"grad_norm": 1.6190487146377563,
	"learning_rate": 0.00014250000000000002,
	"loss": 7.2919,
	"step": 46
	},
	{
	"epoch": 4.571428571428571,
	"grad_norm": 0.755859911441803,
	"learning_rate": 0.00014,
	"loss": 7.1624,
	"step": 48
	},
	{
	"epoch": 4.761904761904762,
	"grad_norm": 0.46613645553588867,
	"learning_rate": 0.0001375,
	"loss": 7.2233,
	"step": 50
	},
	{
	"epoch": 4.9523809523809526,
	"grad_norm": 0.5973020792007446,
	"learning_rate": 0.00013500000000000003,
	"loss": 7.1642,
	"step": 52
	},
	{
	"epoch": 5.142857142857143,
	"grad_norm": 0.97837233543396,
	"learning_rate": 0.0001325,
	"loss": 7.1172,
	"step": 54
	},
	{
	"epoch": 5.333333333333333,
	"grad_norm": 0.9348046183586121,
	"learning_rate": 0.00013000000000000002,
	"loss": 7.1564,
	"step": 56
	},
	{
	"epoch": 5.523809523809524,
	"grad_norm": 0.6632198691368103,
	"learning_rate": 0.0001275,
	"loss": 7.0821,
	"step": 58
	},
	{
	"epoch": 5.714285714285714,
	"grad_norm": 0.7776179909706116,
	"learning_rate": 0.000125,
	"loss": 7.2272,
	"step": 60
	},
	{
	"epoch": 5.904761904761905,
	"grad_norm": 0.6282438039779663,
	"learning_rate": 0.00012250000000000002,
	"loss": 7.0926,
	"step": 62
	},
	{
	"epoch": 6.095238095238095,
	"grad_norm": 0.6008353233337402,
	"learning_rate": 0.00012,
	"loss": 7.1073,
	"step": 64
	},
	{
	"epoch": 6.285714285714286,
	"grad_norm": 0.8796420097351074,
	"learning_rate": 0.00011750000000000001,
	"loss": 7.1737,
	"step": 66
	},
	{
	"epoch": 6.476190476190476,
	"grad_norm": 0.6400454640388489,
	"learning_rate": 0.00011499999999999999,
	"loss": 7.0924,
	"step": 68
	},
	{
	"epoch": 6.666666666666667,
	"grad_norm": 0.5479526519775391,
	"learning_rate": 0.00011250000000000001,
	"loss": 7.1275,
	"step": 70
	},
	{
	"epoch": 6.857142857142857,
	"grad_norm": 0.5992618203163147,
	"learning_rate": 0.00011000000000000002,
	"loss": 7.0599,
	"step": 72
	},
	{
	"epoch": 7.0476190476190474,
	"grad_norm": 0.5336684584617615,
	"learning_rate": 0.0001075,
	"loss": 7.0206,
	"step": 74
	},
	{
	"epoch": 7.238095238095238,
	"grad_norm": 0.3991040289402008,
	"learning_rate": 0.000105,
	"loss": 7.0123,
	"step": 76
	},
	{
	"epoch": 7.428571428571429,
	"grad_norm": 1.032917857170105,
	"learning_rate": 0.0001025,
	"loss": 7.0267,
	"step": 78
	},
	{
	"epoch": 7.619047619047619,
	"grad_norm": 0.5554404854774475,
	"learning_rate": 0.0001,
	"loss": 7.0203,
	"step": 80
	},
	{
	"epoch": 7.809523809523809,
	"grad_norm": 0.7755109667778015,
	"learning_rate": 9.75e-05,
	"loss": 7.1445,
	"step": 82
	},
	{
	"epoch": 8.0,
	"grad_norm": 1.8295842409133911,
	"learning_rate": 9.5e-05,
	"loss": 7.0002,
	"step": 84
	},
	{
	"epoch": 8.19047619047619,
	"grad_norm": 1.4985620975494385,
	"learning_rate": 9.250000000000001e-05,
	"loss": 7.0613,
	"step": 86
	},
	{
	"epoch": 8.380952380952381,
	"grad_norm": 1.0733778476715088,
	"learning_rate": 9e-05,
	"loss": 7.0594,
	"step": 88
	},
	{
	"epoch": 8.571428571428571,
	"grad_norm": 0.7009026408195496,
	"learning_rate": 8.75e-05,
	"loss": 6.9432,
	"step": 90
	},
	{
	"epoch": 8.761904761904763,
	"grad_norm": 1.195196509361267,
	"learning_rate": 8.5e-05,
	"loss": 6.9266,
	"step": 92
	},
	{
	"epoch": 8.952380952380953,
	"grad_norm": 2.6835684776306152,
	"learning_rate": 8.25e-05,
	"loss": 6.9855,
	"step": 94
	},
	{
	"epoch": 9.142857142857142,
	"grad_norm": 0.7434377670288086,
	"learning_rate": 8e-05,
	"loss": 6.7975,
	"step": 96
	},
	{
	"epoch": 9.333333333333334,
	"grad_norm": 0.5993837118148804,
	"learning_rate": 7.75e-05,
	"loss": 7.0476,
	"step": 98
	},
	{
	"epoch": 9.523809523809524,
	"grad_norm": 0.4656153619289398,
	"learning_rate": 7.500000000000001e-05,
	"loss": 6.9894,
	"step": 100
	},
	{
	"epoch": 9.714285714285714,
	"grad_norm": 0.7926774621009827,
	"learning_rate": 7.25e-05,
	"loss": 6.9854,
	"step": 102
	},
	{
	"epoch": 9.904761904761905,
	"grad_norm": 1.0828678607940674,
	"learning_rate": 7e-05,
	"loss": 6.9185,
	"step": 104
	},
	{
	"epoch": 10.095238095238095,
	"grad_norm": 0.6923830509185791,
	"learning_rate": 6.750000000000001e-05,
	"loss": 6.9804,
	"step": 106
	},
	{
	"epoch": 10.285714285714286,
	"grad_norm": 0.5546735525131226,
	"learning_rate": 6.500000000000001e-05,
	"loss": 6.9273,
	"step": 108
	},
	{
	"epoch": 10.476190476190476,
	"grad_norm": 0.8265076875686646,
	"learning_rate": 6.25e-05,
	"loss": 6.9087,
	"step": 110
	},
	{
	"epoch": 10.666666666666666,
	"grad_norm": 0.3945198655128479,
	"learning_rate": 6e-05,
	"loss": 6.9375,
	"step": 112
	},
	{
	"epoch": 10.857142857142858,
	"grad_norm": 0.5948878526687622,
	"learning_rate": 5.7499999999999995e-05,
	"loss": 6.8764,
	"step": 114
	},
	{
	"epoch": 11.047619047619047,
	"grad_norm": 0.7741471529006958,
	"learning_rate": 5.500000000000001e-05,
	"loss": 6.7551,
	"step": 116
	},
	{
	"epoch": 11.238095238095237,
	"grad_norm": 0.32554784417152405,
	"learning_rate": 5.25e-05,
	"loss": 6.8862,
	"step": 118
	},
	{
	"epoch": 11.428571428571429,
	"grad_norm": 0.5033702850341797,
	"learning_rate": 5e-05,
	"loss": 6.7297,
	"step": 120
	},
	{
	"epoch": 11.619047619047619,
	"grad_norm": 0.5291158556938171,
	"learning_rate": 4.75e-05,
	"loss": 6.9826,
	"step": 122
	},
	{
	"epoch": 11.80952380952381,
	"grad_norm": 0.39498385787010193,
	"learning_rate": 4.5e-05,
	"loss": 6.837,
	"step": 124
	},
	{
	"epoch": 12.0,
	"grad_norm": 0.4402136206626892,
	"learning_rate": 4.25e-05,
	"loss": 7.0434,
	"step": 126
	},
	{
	"epoch": 12.19047619047619,
	"grad_norm": 0.6476764678955078,
	"learning_rate": 4e-05,
	"loss": 6.8524,
	"step": 128
	},
	{
	"epoch": 12.380952380952381,
	"grad_norm": 0.330609530210495,
	"learning_rate": 3.7500000000000003e-05,
	"loss": 6.8742,
	"step": 130
	},
	{
	"epoch": 12.571428571428571,
	"grad_norm": 0.5420040488243103,
	"learning_rate": 3.5e-05,
	"loss": 6.7931,
	"step": 132
	},
	{
	"epoch": 12.761904761904763,
	"grad_norm": 0.3482373356819153,
	"learning_rate": 3.2500000000000004e-05,
	"loss": 6.883,
	"step": 134
	},
	{
	"epoch": 12.952380952380953,
	"grad_norm": 0.3476051092147827,
	"learning_rate": 3e-05,
	"loss": 6.9857,
	"step": 136
	},
	{
	"epoch": 13.142857142857142,
	"grad_norm": 0.43590274453163147,
	"learning_rate": 2.8749999999999997e-05,
	"loss": 8.116,
	"step": 138
	},
	{
	"epoch": 13.333333333333334,
	"grad_norm": 0.2993098497390747,
	"learning_rate": 2.625e-05,
	"loss": 6.657,
	"step": 140
	},
	{
	"epoch": 13.523809523809524,
	"grad_norm": 0.3477262556552887,
	"learning_rate": 2.375e-05,
	"loss": 6.9781,
	"step": 142
	},
	{
	"epoch": 13.714285714285714,
	"grad_norm": 0.47370073199272156,
	"learning_rate": 2.125e-05,
	"loss": 6.9277,
	"step": 144
	},
	{
	"epoch": 13.904761904761905,
	"grad_norm": 0.3924289345741272,
	"learning_rate": 1.8750000000000002e-05,
	"loss": 6.8967,
	"step": 146
	},
	{
	"epoch": 14.095238095238095,
	"grad_norm": 0.5621922612190247,
	"learning_rate": 1.6250000000000002e-05,
	"loss": 6.7197,
	"step": 148
	},
	{
	"epoch": 14.285714285714286,
	"grad_norm": 0.3454875349998474,
	"learning_rate": 1.3750000000000002e-05,
	"loss": 6.9314,
	"step": 150
	},
	{
	"epoch": 14.476190476190476,
	"grad_norm": 0.3146642744541168,
	"learning_rate": 1.125e-05,
	"loss": 6.9142,
	"step": 152
	},
	{
	"epoch": 14.666666666666666,
	"grad_norm": 0.3762160837650299,
	"learning_rate": 8.75e-06,
	"loss": 6.8759,
	"step": 154
	},
	{
	"epoch": 14.857142857142858,
	"grad_norm": 0.33906954526901245,
	"learning_rate": 6.25e-06,
	"loss": 6.8712,
	"step": 156
	},
	{
	"epoch": 15.047619047619047,
	"grad_norm": 0.3414846360683441,
	"learning_rate": 3.75e-06,
	"loss": 6.737,
	"step": 158
	},
	{
	"epoch": 15.238095238095237,
	"grad_norm": 0.4463809132575989,
	"learning_rate": 1.25e-06,
	"loss": 6.9144,
	"step": 160
	},
	{
	"epoch": 15.238095238095237,
	"step": 160,
	"total_flos": 800861569170024.0,
	"train_loss": 7.170098584890366,
	"train_runtime": 677.1666,
	"train_samples_per_second": 3.969,
	"train_steps_per_second": 0.236
	}
	],
	"logging_steps": 2,
	"max_steps": 160,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 16,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": false,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 800861569170024.0,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}