internlm-xcomposer2-7b / trainer_state.json
DLight1551's picture
update
194829f
raw
history blame
20.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.997545409916544,
"eval_steps": 500,
"global_step": 127,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": "0.0000e+00",
"loss": 3.1086,
"slid_loss": 3.1086,
"step": 1,
"time": 93.96
},
{
"epoch": 0.02,
"learning_rate": "5.0000e-06",
"loss": 2.8339,
"slid_loss": 2.9713,
"step": 2,
"time": 84.7
},
{
"epoch": 0.02,
"learning_rate": "5.0000e-06",
"loss": 2.8656,
"slid_loss": 2.936,
"step": 3,
"time": 79.97
},
{
"epoch": 0.03,
"learning_rate": "5.0000e-06",
"loss": 2.5524,
"slid_loss": 2.8401,
"step": 4,
"time": 76.7
},
{
"epoch": 0.04,
"learning_rate": "5.0000e-06",
"loss": 2.5858,
"slid_loss": 2.7892,
"step": 5,
"time": 81.12
},
{
"epoch": 0.05,
"learning_rate": "5.0000e-06",
"loss": 2.0771,
"slid_loss": 2.6706,
"step": 6,
"time": 78.35
},
{
"epoch": 0.05,
"learning_rate": "5.0000e-06",
"loss": 2.0975,
"slid_loss": 2.5887,
"step": 7,
"time": 81.78
},
{
"epoch": 0.06,
"learning_rate": "5.0000e-06",
"loss": 1.9181,
"slid_loss": 2.5049,
"step": 8,
"time": 81.35
},
{
"epoch": 0.07,
"learning_rate": "5.0000e-06",
"loss": 1.9339,
"slid_loss": 2.4414,
"step": 9,
"time": 83.78
},
{
"epoch": 0.08,
"learning_rate": "5.0000e-06",
"loss": 1.8134,
"slid_loss": 2.3786,
"step": 10,
"time": 79.06
},
{
"epoch": 0.09,
"learning_rate": "5.0000e-06",
"loss": 1.8346,
"slid_loss": 2.3292,
"step": 11,
"time": 80.19
},
{
"epoch": 0.09,
"learning_rate": "5.0000e-06",
"loss": 1.801,
"slid_loss": 2.2851,
"step": 12,
"time": 87.7
},
{
"epoch": 0.1,
"learning_rate": "5.0000e-06",
"loss": 1.8712,
"slid_loss": 2.2533,
"step": 13,
"time": 81.42
},
{
"epoch": 0.11,
"learning_rate": "5.0000e-06",
"loss": 1.8017,
"slid_loss": 2.221,
"step": 14,
"time": 81.39
},
{
"epoch": 0.12,
"learning_rate": "5.0000e-06",
"loss": 1.7378,
"slid_loss": 2.1888,
"step": 15,
"time": 80.29
},
{
"epoch": 0.13,
"learning_rate": "5.0000e-06",
"loss": 1.9118,
"slid_loss": 2.1715,
"step": 16,
"time": 83.04
},
{
"epoch": 0.13,
"learning_rate": "5.0000e-06",
"loss": 1.7942,
"slid_loss": 2.1493,
"step": 17,
"time": 75.98
},
{
"epoch": 0.14,
"learning_rate": "5.0000e-06",
"loss": 1.7516,
"slid_loss": 2.1272,
"step": 18,
"time": 88.91
},
{
"epoch": 0.15,
"learning_rate": "5.0000e-06",
"loss": 1.8129,
"slid_loss": 2.1107,
"step": 19,
"time": 80.76
},
{
"epoch": 0.16,
"learning_rate": "5.0000e-06",
"loss": 1.7673,
"slid_loss": 2.0935,
"step": 20,
"time": 81.28
},
{
"epoch": 0.16,
"learning_rate": "5.0000e-06",
"loss": 1.7984,
"slid_loss": 2.0795,
"step": 21,
"time": 85.37
},
{
"epoch": 0.17,
"learning_rate": "5.0000e-06",
"loss": 1.7733,
"slid_loss": 2.0655,
"step": 22,
"time": 80.72
},
{
"epoch": 0.18,
"learning_rate": "5.0000e-06",
"loss": 1.7691,
"slid_loss": 2.0527,
"step": 23,
"time": 80.11
},
{
"epoch": 0.19,
"learning_rate": "5.0000e-06",
"loss": 1.8212,
"slid_loss": 2.043,
"step": 24,
"time": 81.83
},
{
"epoch": 0.2,
"learning_rate": "5.0000e-06",
"loss": 1.9154,
"slid_loss": 2.0379,
"step": 25,
"time": 78.48
},
{
"epoch": 0.2,
"learning_rate": "5.0000e-06",
"loss": 1.8267,
"slid_loss": 2.0298,
"step": 26,
"time": 80.38
},
{
"epoch": 0.21,
"learning_rate": "5.0000e-06",
"loss": 1.9029,
"slid_loss": 2.0251,
"step": 27,
"time": 84.88
},
{
"epoch": 0.22,
"learning_rate": "5.0000e-06",
"loss": 1.8892,
"slid_loss": 2.0202,
"step": 28,
"time": 81.13
},
{
"epoch": 0.23,
"learning_rate": "5.0000e-06",
"loss": 1.7599,
"slid_loss": 2.0112,
"step": 29,
"time": 83.73
},
{
"epoch": 0.24,
"learning_rate": "5.0000e-06",
"loss": 1.7919,
"slid_loss": 2.0039,
"step": 30,
"time": 82.44
},
{
"epoch": 0.24,
"learning_rate": "5.0000e-06",
"loss": 1.8086,
"slid_loss": 1.9976,
"step": 31,
"time": 83.46
},
{
"epoch": 0.25,
"learning_rate": "5.0000e-06",
"loss": 1.8349,
"slid_loss": 1.9925,
"step": 32,
"time": 78.89
},
{
"epoch": 0.26,
"learning_rate": "5.0000e-06",
"loss": 1.8263,
"slid_loss": 1.9875,
"step": 33,
"time": 79.08
},
{
"epoch": 0.27,
"learning_rate": "5.0000e-06",
"loss": 1.9287,
"slid_loss": 1.9858,
"step": 34,
"time": 81.76
},
{
"epoch": 0.27,
"learning_rate": "5.0000e-06",
"loss": 1.785,
"slid_loss": 1.98,
"step": 35,
"time": 78.4
},
{
"epoch": 0.28,
"learning_rate": "5.0000e-06",
"loss": 1.8191,
"slid_loss": 1.9756,
"step": 36,
"time": 77.74
},
{
"epoch": 0.29,
"learning_rate": "5.0000e-06",
"loss": 1.8219,
"slid_loss": 1.9714,
"step": 37,
"time": 86.24
},
{
"epoch": 0.3,
"learning_rate": "5.0000e-06",
"loss": 1.8075,
"slid_loss": 1.9671,
"step": 38,
"time": 76.73
},
{
"epoch": 0.31,
"learning_rate": "5.0000e-06",
"loss": 1.7785,
"slid_loss": 1.9623,
"step": 39,
"time": 80.96
},
{
"epoch": 0.31,
"learning_rate": "5.0000e-06",
"loss": 1.8296,
"slid_loss": 1.959,
"step": 40,
"time": 83.93
},
{
"epoch": 0.32,
"learning_rate": "5.0000e-06",
"loss": 1.7834,
"slid_loss": 1.9547,
"step": 41,
"time": 77.98
},
{
"epoch": 0.33,
"learning_rate": "5.0000e-06",
"loss": 1.7894,
"slid_loss": 1.9507,
"step": 42,
"time": 82.3
},
{
"epoch": 0.34,
"learning_rate": "5.0000e-06",
"loss": 1.8013,
"slid_loss": 1.9473,
"step": 43,
"time": 84.45
},
{
"epoch": 0.35,
"learning_rate": "5.0000e-06",
"loss": 1.7882,
"slid_loss": 1.9436,
"step": 44,
"time": 78.67
},
{
"epoch": 0.35,
"learning_rate": "5.0000e-06",
"loss": 1.7633,
"slid_loss": 1.9396,
"step": 45,
"time": 79.58
},
{
"epoch": 0.36,
"learning_rate": "5.0000e-06",
"loss": 1.8443,
"slid_loss": 1.9376,
"step": 46,
"time": 79.13
},
{
"epoch": 0.37,
"learning_rate": "5.0000e-06",
"loss": 1.7184,
"slid_loss": 1.9329,
"step": 47,
"time": 78.73
},
{
"epoch": 0.38,
"learning_rate": "5.0000e-06",
"loss": 1.7869,
"slid_loss": 1.9299,
"step": 48,
"time": 77.96
},
{
"epoch": 0.38,
"learning_rate": "5.0000e-06",
"loss": 1.8581,
"slid_loss": 1.9284,
"step": 49,
"time": 84.5
},
{
"epoch": 0.39,
"learning_rate": "5.0000e-06",
"loss": 1.7501,
"slid_loss": 1.9248,
"step": 50,
"time": 79.17
},
{
"epoch": 0.4,
"learning_rate": "5.0000e-06",
"loss": 1.7825,
"slid_loss": 1.922,
"step": 51,
"time": 199.7
},
{
"epoch": 0.41,
"learning_rate": "5.0000e-06",
"loss": 1.863,
"slid_loss": 1.9209,
"step": 52,
"time": 81.1
},
{
"epoch": 0.42,
"learning_rate": "5.0000e-06",
"loss": 1.8452,
"slid_loss": 1.9195,
"step": 53,
"time": 77.93
},
{
"epoch": 0.42,
"learning_rate": "5.0000e-06",
"loss": 1.8031,
"slid_loss": 1.9173,
"step": 54,
"time": 78.92
},
{
"epoch": 0.43,
"learning_rate": "5.0000e-06",
"loss": 1.7708,
"slid_loss": 1.9147,
"step": 55,
"time": 77.85
},
{
"epoch": 0.44,
"learning_rate": "5.0000e-06",
"loss": 1.7764,
"slid_loss": 1.9122,
"step": 56,
"time": 76.81
},
{
"epoch": 0.45,
"learning_rate": "5.0000e-06",
"loss": 1.7854,
"slid_loss": 1.91,
"step": 57,
"time": 80.48
},
{
"epoch": 0.46,
"learning_rate": "5.0000e-06",
"loss": 1.7826,
"slid_loss": 1.9078,
"step": 58,
"time": 77.28
},
{
"epoch": 0.46,
"learning_rate": "5.0000e-06",
"loss": 1.8057,
"slid_loss": 1.906,
"step": 59,
"time": 77.73
},
{
"epoch": 0.47,
"learning_rate": "5.0000e-06",
"loss": 1.7842,
"slid_loss": 1.904,
"step": 60,
"time": 80.25
},
{
"epoch": 0.48,
"learning_rate": "5.0000e-06",
"loss": 1.8633,
"slid_loss": 1.9033,
"step": 61,
"time": 83.66
},
{
"epoch": 0.49,
"learning_rate": "5.0000e-06",
"loss": 1.7775,
"slid_loss": 1.9013,
"step": 62,
"time": 83.44
},
{
"epoch": 0.49,
"learning_rate": "5.0000e-06",
"loss": 1.7193,
"slid_loss": 1.8984,
"step": 63,
"time": 87.14
},
{
"epoch": 0.5,
"learning_rate": "5.0000e-06",
"loss": 1.7909,
"slid_loss": 1.8967,
"step": 64,
"time": 83.19
},
{
"epoch": 0.51,
"learning_rate": "5.0000e-06",
"loss": 1.7992,
"slid_loss": 1.8952,
"step": 65,
"time": 80.02
},
{
"epoch": 0.52,
"learning_rate": "5.0000e-06",
"loss": 1.8568,
"slid_loss": 1.8947,
"step": 66,
"time": 82.08
},
{
"epoch": 0.53,
"learning_rate": "5.0000e-06",
"loss": 1.8153,
"slid_loss": 1.8935,
"step": 67,
"time": 81.18
},
{
"epoch": 0.53,
"learning_rate": "5.0000e-06",
"loss": 1.8354,
"slid_loss": 1.8926,
"step": 68,
"time": 80.32
},
{
"epoch": 0.54,
"learning_rate": "5.0000e-06",
"loss": 1.8226,
"slid_loss": 1.8916,
"step": 69,
"time": 80.34
},
{
"epoch": 0.55,
"learning_rate": "5.0000e-06",
"loss": 1.7428,
"slid_loss": 1.8895,
"step": 70,
"time": 81.95
},
{
"epoch": 0.56,
"learning_rate": "5.0000e-06",
"loss": 1.7535,
"slid_loss": 1.8876,
"step": 71,
"time": 79.71
},
{
"epoch": 0.57,
"learning_rate": "5.0000e-06",
"loss": 1.7228,
"slid_loss": 1.8853,
"step": 72,
"time": 81.39
},
{
"epoch": 0.57,
"learning_rate": "5.0000e-06",
"loss": 1.768,
"slid_loss": 1.8837,
"step": 73,
"time": 83.67
},
{
"epoch": 0.58,
"learning_rate": "5.0000e-06",
"loss": 1.8065,
"slid_loss": 1.8826,
"step": 74,
"time": 84.41
},
{
"epoch": 0.59,
"learning_rate": "5.0000e-06",
"loss": 1.8008,
"slid_loss": 1.8815,
"step": 75,
"time": 80.91
},
{
"epoch": 0.6,
"learning_rate": "5.0000e-06",
"loss": 1.8525,
"slid_loss": 1.8812,
"step": 76,
"time": 84.8
},
{
"epoch": 0.6,
"learning_rate": "5.0000e-06",
"loss": 1.7307,
"slid_loss": 1.8792,
"step": 77,
"time": 78.98
},
{
"epoch": 0.61,
"learning_rate": "5.0000e-06",
"loss": 1.7338,
"slid_loss": 1.8773,
"step": 78,
"time": 84.67
},
{
"epoch": 0.62,
"learning_rate": "5.0000e-06",
"loss": 1.7968,
"slid_loss": 1.8763,
"step": 79,
"time": 78.49
},
{
"epoch": 0.63,
"learning_rate": "5.0000e-06",
"loss": 1.7787,
"slid_loss": 1.8751,
"step": 80,
"time": 82.89
},
{
"epoch": 0.64,
"learning_rate": "5.0000e-06",
"loss": 1.779,
"slid_loss": 1.8739,
"step": 81,
"time": 78.23
},
{
"epoch": 0.64,
"learning_rate": "5.0000e-06",
"loss": 1.7907,
"slid_loss": 1.8729,
"step": 82,
"time": 81.05
},
{
"epoch": 0.65,
"learning_rate": "5.0000e-06",
"loss": 1.7231,
"slid_loss": 1.8711,
"step": 83,
"time": 79.99
},
{
"epoch": 0.66,
"learning_rate": "5.0000e-06",
"loss": 1.7397,
"slid_loss": 1.8695,
"step": 84,
"time": 79.98
},
{
"epoch": 0.67,
"learning_rate": "5.0000e-06",
"loss": 1.7482,
"slid_loss": 1.8681,
"step": 85,
"time": 79.09
},
{
"epoch": 0.68,
"learning_rate": "5.0000e-06",
"loss": 1.7731,
"slid_loss": 1.867,
"step": 86,
"time": 76.58
},
{
"epoch": 0.68,
"learning_rate": "5.0000e-06",
"loss": 1.8358,
"slid_loss": 1.8666,
"step": 87,
"time": 81.05
},
{
"epoch": 0.69,
"learning_rate": "5.0000e-06",
"loss": 1.7569,
"slid_loss": 1.8654,
"step": 88,
"time": 78.52
},
{
"epoch": 0.7,
"learning_rate": "5.0000e-06",
"loss": 1.7772,
"slid_loss": 1.8644,
"step": 89,
"time": 80.4
},
{
"epoch": 0.71,
"learning_rate": "5.0000e-06",
"loss": 1.784,
"slid_loss": 1.8635,
"step": 90,
"time": 86.12
},
{
"epoch": 0.71,
"learning_rate": "5.0000e-06",
"loss": 1.7401,
"slid_loss": 1.8621,
"step": 91,
"time": 80.56
},
{
"epoch": 0.72,
"learning_rate": "5.0000e-06",
"loss": 1.7645,
"slid_loss": 1.8611,
"step": 92,
"time": 76.77
},
{
"epoch": 0.73,
"learning_rate": "5.0000e-06",
"loss": 1.7251,
"slid_loss": 1.8596,
"step": 93,
"time": 80.72
},
{
"epoch": 0.74,
"learning_rate": "5.0000e-06",
"loss": 1.792,
"slid_loss": 1.8589,
"step": 94,
"time": 80.45
},
{
"epoch": 0.75,
"learning_rate": "5.0000e-06",
"loss": 1.7834,
"slid_loss": 1.8581,
"step": 95,
"time": 80.28
},
{
"epoch": 0.75,
"learning_rate": "5.0000e-06",
"loss": 1.7851,
"slid_loss": 1.8574,
"step": 96,
"time": 81.67
},
{
"epoch": 0.76,
"learning_rate": "5.0000e-06",
"loss": 1.8255,
"slid_loss": 1.857,
"step": 97,
"time": 77.73
},
{
"epoch": 0.77,
"learning_rate": "5.0000e-06",
"loss": 1.8322,
"slid_loss": 1.8568,
"step": 98,
"time": 79.82
},
{
"epoch": 0.78,
"learning_rate": "5.0000e-06",
"loss": 1.7494,
"slid_loss": 1.8557,
"step": 99,
"time": 83.73
},
{
"epoch": 0.79,
"learning_rate": "5.0000e-06",
"loss": 1.8002,
"slid_loss": 1.8551,
"step": 100,
"time": 80.04
},
{
"epoch": 0.79,
"learning_rate": "5.0000e-06",
"loss": 1.6975,
"slid_loss": 1.841,
"step": 101,
"time": 205.33
},
{
"epoch": 0.8,
"learning_rate": "5.0000e-06",
"loss": 1.792,
"slid_loss": 1.8306,
"step": 102,
"time": 86.16
},
{
"epoch": 0.81,
"learning_rate": "5.0000e-06",
"loss": 1.8129,
"slid_loss": 1.8201,
"step": 103,
"time": 80.84
},
{
"epoch": 0.82,
"learning_rate": "5.0000e-06",
"loss": 1.7504,
"slid_loss": 1.8121,
"step": 104,
"time": 79.26
},
{
"epoch": 0.82,
"learning_rate": "5.0000e-06",
"loss": 1.688,
"slid_loss": 1.8031,
"step": 105,
"time": 99.38
},
{
"epoch": 0.83,
"learning_rate": "5.0000e-06",
"loss": 1.8118,
"slid_loss": 1.8004,
"step": 106,
"time": 85.96
},
{
"epoch": 0.84,
"learning_rate": "5.0000e-06",
"loss": 1.7048,
"slid_loss": 1.7965,
"step": 107,
"time": 84.68
},
{
"epoch": 0.85,
"learning_rate": "5.0000e-06",
"loss": 1.762,
"slid_loss": 1.7949,
"step": 108,
"time": 79.12
},
{
"epoch": 0.86,
"learning_rate": "5.0000e-06",
"loss": 1.799,
"slid_loss": 1.7936,
"step": 109,
"time": 85.23
},
{
"epoch": 0.86,
"learning_rate": "5.0000e-06",
"loss": 1.7582,
"slid_loss": 1.793,
"step": 110,
"time": 85.47
},
{
"epoch": 0.87,
"learning_rate": "5.0000e-06",
"loss": 1.7241,
"slid_loss": 1.7919,
"step": 111,
"time": 81.73
},
{
"epoch": 0.88,
"learning_rate": "5.0000e-06",
"loss": 1.7806,
"slid_loss": 1.7917,
"step": 112,
"time": 85.27
},
{
"epoch": 0.89,
"learning_rate": "5.0000e-06",
"loss": 1.7839,
"slid_loss": 1.7909,
"step": 113,
"time": 78.94
},
{
"epoch": 0.9,
"learning_rate": "5.0000e-06",
"loss": 1.789,
"slid_loss": 1.7907,
"step": 114,
"time": 80.05
},
{
"epoch": 0.9,
"learning_rate": "5.0000e-06",
"loss": 1.7831,
"slid_loss": 1.7912,
"step": 115,
"time": 82.11
},
{
"epoch": 0.91,
"learning_rate": "5.0000e-06",
"loss": 1.7619,
"slid_loss": 1.7897,
"step": 116,
"time": 78.06
},
{
"epoch": 0.92,
"learning_rate": "5.0000e-06",
"loss": 1.8384,
"slid_loss": 1.7901,
"step": 117,
"time": 77.8
},
{
"epoch": 0.93,
"learning_rate": "5.0000e-06",
"loss": 1.78,
"slid_loss": 1.7904,
"step": 118,
"time": 81.19
},
{
"epoch": 0.93,
"learning_rate": "5.0000e-06",
"loss": 1.7805,
"slid_loss": 1.7901,
"step": 119,
"time": 83.57
},
{
"epoch": 0.94,
"learning_rate": "5.0000e-06",
"loss": 1.7509,
"slid_loss": 1.7899,
"step": 120,
"time": 78.54
},
{
"epoch": 0.95,
"learning_rate": "5.0000e-06",
"loss": 1.7806,
"slid_loss": 1.7897,
"step": 121,
"time": 83.97
},
{
"epoch": 0.96,
"learning_rate": "5.0000e-06",
"loss": 1.7887,
"slid_loss": 1.7899,
"step": 122,
"time": 83.74
},
{
"epoch": 0.97,
"learning_rate": "5.0000e-06",
"loss": 1.7084,
"slid_loss": 1.7893,
"step": 123,
"time": 78.85
},
{
"epoch": 0.97,
"learning_rate": "5.0000e-06",
"loss": 1.7843,
"slid_loss": 1.7889,
"step": 124,
"time": 79.91
},
{
"epoch": 0.98,
"learning_rate": "5.0000e-06",
"loss": 1.8215,
"slid_loss": 1.788,
"step": 125,
"time": 77.9
},
{
"epoch": 0.99,
"learning_rate": "5.0000e-06",
"loss": 1.8352,
"slid_loss": 1.7881,
"step": 126,
"time": 82.06
},
{
"epoch": 1.0,
"learning_rate": "5.0000e-06",
"loss": 1.7802,
"slid_loss": 1.7868,
"step": 127,
"time": 80.15
},
{
"epoch": 1.0,
"step": 127,
"time": 0.01,
"total_flos": 0.0,
"train_loss": 1.837487073395196,
"train_runtime": 10569.7638,
"train_samples_per_second": 12.332,
"train_steps_per_second": 0.012
}
],
"logging_steps": 1.0,
"max_steps": 127,
"num_train_epochs": 1,
"save_steps": 50,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}