sedrickkeh commited on
Commit
41a2ff6
·
verified ·
1 Parent(s): d62472d

Training in progress, epoch 1

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bffbf89d7b18bd8df5e21657f4943256b108649461e1fa0c6d58c5ab2c05dfb7
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37c7fb638823d2c5565fcd2dd3892942d48829e2196290b3bc57aa5e8f6c397e
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b80bfb13d4ac14cd288bc4ab048a0f2f38d60c1289819d4f02dd575e46fd1a9d
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae6f058ca51bd35ddb7dba6d552d3840ff55724e8bc1677af96780467619ed8a
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:263a38288322d73d762267f14dd8c0a0c16f4de38246d597a090b550b9e64130
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8252d1b03b0659e1dc1e341e423385cb59aae3c9b3ed29c233781a12693f5d0
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71069954a69f848f2e7967881b64dbe64cb8349e203d682c1974385deeda329b
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ad8580c92d189840b5991033a294d06d305ff92ec8ea19650d1a30fb1987189
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -64,3 +64,68 @@
64
  {"current_steps": 640, "total_steps": 1938, "loss": 0.8188, "lr": 5e-06, "epoch": 0.9893719806763285, "percentage": 33.02, "elapsed_time": "10:16:47", "remaining_time": "20:50:55"}
65
  {"current_steps": 646, "total_steps": 1938, "eval_loss": 0.8183467984199524, "epoch": 0.9986473429951691, "percentage": 33.33, "elapsed_time": "10:34:44", "remaining_time": "21:09:28"}
66
  {"current_steps": 650, "total_steps": 1938, "loss": 0.8235, "lr": 5e-06, "epoch": 1.0050241545893719, "percentage": 33.54, "elapsed_time": "10:39:07", "remaining_time": "21:06:26"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  {"current_steps": 640, "total_steps": 1938, "loss": 0.8188, "lr": 5e-06, "epoch": 0.9893719806763285, "percentage": 33.02, "elapsed_time": "10:16:47", "remaining_time": "20:50:55"}
65
  {"current_steps": 646, "total_steps": 1938, "eval_loss": 0.8183467984199524, "epoch": 0.9986473429951691, "percentage": 33.33, "elapsed_time": "10:34:44", "remaining_time": "21:09:28"}
66
  {"current_steps": 650, "total_steps": 1938, "loss": 0.8235, "lr": 5e-06, "epoch": 1.0050241545893719, "percentage": 33.54, "elapsed_time": "10:39:07", "remaining_time": "21:06:26"}
67
+ {"current_steps": 660, "total_steps": 1938, "loss": 0.7771, "lr": 5e-06, "epoch": 1.0204830917874397, "percentage": 34.06, "elapsed_time": "10:48:45", "remaining_time": "20:56:13"}
68
+ {"current_steps": 670, "total_steps": 1938, "loss": 0.7802, "lr": 5e-06, "epoch": 1.0359420289855072, "percentage": 34.57, "elapsed_time": "10:58:24", "remaining_time": "20:46:02"}
69
+ {"current_steps": 680, "total_steps": 1938, "loss": 0.7777, "lr": 5e-06, "epoch": 1.0514009661835748, "percentage": 35.09, "elapsed_time": "11:08:02", "remaining_time": "20:35:52"}
70
+ {"current_steps": 690, "total_steps": 1938, "loss": 0.7751, "lr": 5e-06, "epoch": 1.0668599033816426, "percentage": 35.6, "elapsed_time": "11:17:40", "remaining_time": "20:25:43"}
71
+ {"current_steps": 700, "total_steps": 1938, "loss": 0.7808, "lr": 5e-06, "epoch": 1.0823188405797102, "percentage": 36.12, "elapsed_time": "11:27:20", "remaining_time": "20:15:36"}
72
+ {"current_steps": 710, "total_steps": 1938, "loss": 0.7769, "lr": 5e-06, "epoch": 1.0977777777777777, "percentage": 36.64, "elapsed_time": "11:36:59", "remaining_time": "20:05:30"}
73
+ {"current_steps": 720, "total_steps": 1938, "loss": 0.7775, "lr": 5e-06, "epoch": 1.1132367149758453, "percentage": 37.15, "elapsed_time": "11:46:38", "remaining_time": "19:55:24"}
74
+ {"current_steps": 730, "total_steps": 1938, "loss": 0.7787, "lr": 5e-06, "epoch": 1.128695652173913, "percentage": 37.67, "elapsed_time": "11:56:17", "remaining_time": "19:45:19"}
75
+ {"current_steps": 740, "total_steps": 1938, "loss": 0.7816, "lr": 5e-06, "epoch": 1.1441545893719807, "percentage": 38.18, "elapsed_time": "12:05:56", "remaining_time": "19:35:14"}
76
+ {"current_steps": 750, "total_steps": 1938, "loss": 0.7811, "lr": 5e-06, "epoch": 1.1596135265700482, "percentage": 38.7, "elapsed_time": "12:15:35", "remaining_time": "19:25:10"}
77
+ {"current_steps": 760, "total_steps": 1938, "loss": 0.7763, "lr": 5e-06, "epoch": 1.175072463768116, "percentage": 39.22, "elapsed_time": "12:25:13", "remaining_time": "19:15:06"}
78
+ {"current_steps": 770, "total_steps": 1938, "loss": 0.7821, "lr": 5e-06, "epoch": 1.1905314009661836, "percentage": 39.73, "elapsed_time": "12:34:52", "remaining_time": "19:05:03"}
79
+ {"current_steps": 780, "total_steps": 1938, "loss": 0.7771, "lr": 5e-06, "epoch": 1.2059903381642512, "percentage": 40.25, "elapsed_time": "12:44:31", "remaining_time": "18:55:01"}
80
+ {"current_steps": 790, "total_steps": 1938, "loss": 0.7832, "lr": 5e-06, "epoch": 1.221449275362319, "percentage": 40.76, "elapsed_time": "12:54:10", "remaining_time": "18:44:59"}
81
+ {"current_steps": 800, "total_steps": 1938, "loss": 0.7795, "lr": 5e-06, "epoch": 1.2369082125603865, "percentage": 41.28, "elapsed_time": "13:03:49", "remaining_time": "18:34:59"}
82
+ {"current_steps": 810, "total_steps": 1938, "loss": 0.7767, "lr": 5e-06, "epoch": 1.252367149758454, "percentage": 41.8, "elapsed_time": "13:13:29", "remaining_time": "18:25:00"}
83
+ {"current_steps": 820, "total_steps": 1938, "loss": 0.7784, "lr": 5e-06, "epoch": 1.2678260869565217, "percentage": 42.31, "elapsed_time": "13:23:09", "remaining_time": "18:15:01"}
84
+ {"current_steps": 830, "total_steps": 1938, "loss": 0.7765, "lr": 5e-06, "epoch": 1.2832850241545894, "percentage": 42.83, "elapsed_time": "13:32:48", "remaining_time": "18:05:02"}
85
+ {"current_steps": 840, "total_steps": 1938, "loss": 0.7791, "lr": 5e-06, "epoch": 1.298743961352657, "percentage": 43.34, "elapsed_time": "13:42:26", "remaining_time": "17:55:02"}
86
+ {"current_steps": 850, "total_steps": 1938, "loss": 0.7775, "lr": 5e-06, "epoch": 1.3142028985507246, "percentage": 43.86, "elapsed_time": "13:52:05", "remaining_time": "17:45:04"}
87
+ {"current_steps": 860, "total_steps": 1938, "loss": 0.7743, "lr": 5e-06, "epoch": 1.3296618357487922, "percentage": 44.38, "elapsed_time": "14:01:44", "remaining_time": "17:35:06"}
88
+ {"current_steps": 870, "total_steps": 1938, "loss": 0.7768, "lr": 5e-06, "epoch": 1.34512077294686, "percentage": 44.89, "elapsed_time": "14:11:23", "remaining_time": "17:25:09"}
89
+ {"current_steps": 880, "total_steps": 1938, "loss": 0.773, "lr": 5e-06, "epoch": 1.3605797101449275, "percentage": 45.41, "elapsed_time": "14:21:03", "remaining_time": "17:15:13"}
90
+ {"current_steps": 890, "total_steps": 1938, "loss": 0.774, "lr": 5e-06, "epoch": 1.376038647342995, "percentage": 45.92, "elapsed_time": "14:30:40", "remaining_time": "17:05:15"}
91
+ {"current_steps": 900, "total_steps": 1938, "loss": 0.7802, "lr": 5e-06, "epoch": 1.3914975845410629, "percentage": 46.44, "elapsed_time": "14:40:19", "remaining_time": "16:55:18"}
92
+ {"current_steps": 910, "total_steps": 1938, "loss": 0.7787, "lr": 5e-06, "epoch": 1.4069565217391304, "percentage": 46.96, "elapsed_time": "14:49:58", "remaining_time": "16:45:22"}
93
+ {"current_steps": 920, "total_steps": 1938, "loss": 0.7738, "lr": 5e-06, "epoch": 1.422415458937198, "percentage": 47.47, "elapsed_time": "14:59:38", "remaining_time": "16:35:28"}
94
+ {"current_steps": 930, "total_steps": 1938, "loss": 0.7773, "lr": 5e-06, "epoch": 1.4378743961352658, "percentage": 47.99, "elapsed_time": "15:09:17", "remaining_time": "16:25:32"}
95
+ {"current_steps": 940, "total_steps": 1938, "loss": 0.7767, "lr": 5e-06, "epoch": 1.4533333333333334, "percentage": 48.5, "elapsed_time": "15:18:55", "remaining_time": "16:15:37"}
96
+ {"current_steps": 950, "total_steps": 1938, "loss": 0.7765, "lr": 5e-06, "epoch": 1.468792270531401, "percentage": 49.02, "elapsed_time": "15:28:34", "remaining_time": "16:05:43"}
97
+ {"current_steps": 960, "total_steps": 1938, "loss": 0.7728, "lr": 5e-06, "epoch": 1.4842512077294687, "percentage": 49.54, "elapsed_time": "15:38:13", "remaining_time": "15:55:48"}
98
+ {"current_steps": 970, "total_steps": 1938, "loss": 0.7777, "lr": 5e-06, "epoch": 1.4997101449275363, "percentage": 50.05, "elapsed_time": "15:47:52", "remaining_time": "15:45:55"}
99
+ {"current_steps": 980, "total_steps": 1938, "loss": 0.7747, "lr": 5e-06, "epoch": 1.5151690821256039, "percentage": 50.57, "elapsed_time": "15:57:31", "remaining_time": "15:36:02"}
100
+ {"current_steps": 990, "total_steps": 1938, "loss": 0.7786, "lr": 5e-06, "epoch": 1.5306280193236717, "percentage": 51.08, "elapsed_time": "16:07:10", "remaining_time": "15:26:08"}
101
+ {"current_steps": 1000, "total_steps": 1938, "loss": 0.7823, "lr": 5e-06, "epoch": 1.546086956521739, "percentage": 51.6, "elapsed_time": "16:16:49", "remaining_time": "15:16:15"}
102
+ {"current_steps": 1010, "total_steps": 1938, "loss": 0.7704, "lr": 5e-06, "epoch": 1.5615458937198068, "percentage": 52.12, "elapsed_time": "16:26:28", "remaining_time": "15:06:23"}
103
+ {"current_steps": 1020, "total_steps": 1938, "loss": 0.7758, "lr": 5e-06, "epoch": 1.5770048309178744, "percentage": 52.63, "elapsed_time": "16:36:07", "remaining_time": "14:56:30"}
104
+ {"current_steps": 1030, "total_steps": 1938, "loss": 0.7751, "lr": 5e-06, "epoch": 1.592463768115942, "percentage": 53.15, "elapsed_time": "16:45:46", "remaining_time": "14:46:38"}
105
+ {"current_steps": 1040, "total_steps": 1938, "loss": 0.7742, "lr": 5e-06, "epoch": 1.6079227053140097, "percentage": 53.66, "elapsed_time": "16:55:25", "remaining_time": "14:36:46"}
106
+ {"current_steps": 1050, "total_steps": 1938, "loss": 0.777, "lr": 5e-06, "epoch": 1.6233816425120773, "percentage": 54.18, "elapsed_time": "17:05:04", "remaining_time": "14:26:54"}
107
+ {"current_steps": 1060, "total_steps": 1938, "loss": 0.7757, "lr": 5e-06, "epoch": 1.6388405797101449, "percentage": 54.7, "elapsed_time": "17:14:43", "remaining_time": "14:17:03"}
108
+ {"current_steps": 1070, "total_steps": 1938, "loss": 0.7744, "lr": 5e-06, "epoch": 1.6542995169082126, "percentage": 55.21, "elapsed_time": "17:24:22", "remaining_time": "14:07:12"}
109
+ {"current_steps": 1080, "total_steps": 1938, "loss": 0.7753, "lr": 5e-06, "epoch": 1.6697584541062802, "percentage": 55.73, "elapsed_time": "17:34:02", "remaining_time": "13:57:22"}
110
+ {"current_steps": 1090, "total_steps": 1938, "loss": 0.7753, "lr": 5e-06, "epoch": 1.6852173913043478, "percentage": 56.24, "elapsed_time": "17:43:41", "remaining_time": "13:47:32"}
111
+ {"current_steps": 1100, "total_steps": 1938, "loss": 0.774, "lr": 5e-06, "epoch": 1.7006763285024156, "percentage": 56.76, "elapsed_time": "17:53:20", "remaining_time": "13:37:41"}
112
+ {"current_steps": 1110, "total_steps": 1938, "loss": 0.7772, "lr": 5e-06, "epoch": 1.7161352657004831, "percentage": 57.28, "elapsed_time": "18:02:59", "remaining_time": "13:27:51"}
113
+ {"current_steps": 1120, "total_steps": 1938, "loss": 0.781, "lr": 5e-06, "epoch": 1.7315942028985507, "percentage": 57.79, "elapsed_time": "18:12:38", "remaining_time": "13:18:01"}
114
+ {"current_steps": 1130, "total_steps": 1938, "loss": 0.7749, "lr": 5e-06, "epoch": 1.7470531400966185, "percentage": 58.31, "elapsed_time": "18:22:17", "remaining_time": "13:08:10"}
115
+ {"current_steps": 1140, "total_steps": 1938, "loss": 0.7719, "lr": 5e-06, "epoch": 1.7625120772946858, "percentage": 58.82, "elapsed_time": "18:31:55", "remaining_time": "12:58:20"}
116
+ {"current_steps": 1150, "total_steps": 1938, "loss": 0.7753, "lr": 5e-06, "epoch": 1.7779710144927536, "percentage": 59.34, "elapsed_time": "18:41:34", "remaining_time": "12:48:31"}
117
+ {"current_steps": 1160, "total_steps": 1938, "loss": 0.7777, "lr": 5e-06, "epoch": 1.7934299516908214, "percentage": 59.86, "elapsed_time": "18:51:14", "remaining_time": "12:38:42"}
118
+ {"current_steps": 1170, "total_steps": 1938, "loss": 0.7774, "lr": 5e-06, "epoch": 1.8088888888888888, "percentage": 60.37, "elapsed_time": "19:00:52", "remaining_time": "12:28:52"}
119
+ {"current_steps": 1180, "total_steps": 1938, "loss": 0.7734, "lr": 5e-06, "epoch": 1.8243478260869566, "percentage": 60.89, "elapsed_time": "19:10:31", "remaining_time": "12:19:04"}
120
+ {"current_steps": 1190, "total_steps": 1938, "loss": 0.7743, "lr": 5e-06, "epoch": 1.8398067632850241, "percentage": 61.4, "elapsed_time": "19:20:11", "remaining_time": "12:09:15"}
121
+ {"current_steps": 1200, "total_steps": 1938, "loss": 0.7728, "lr": 5e-06, "epoch": 1.8552657004830917, "percentage": 61.92, "elapsed_time": "19:29:49", "remaining_time": "11:59:26"}
122
+ {"current_steps": 1210, "total_steps": 1938, "loss": 0.774, "lr": 5e-06, "epoch": 1.8707246376811595, "percentage": 62.44, "elapsed_time": "19:39:28", "remaining_time": "11:49:38"}
123
+ {"current_steps": 1220, "total_steps": 1938, "loss": 0.7746, "lr": 5e-06, "epoch": 1.886183574879227, "percentage": 62.95, "elapsed_time": "19:49:07", "remaining_time": "11:39:49"}
124
+ {"current_steps": 1230, "total_steps": 1938, "loss": 0.7693, "lr": 5e-06, "epoch": 1.9016425120772946, "percentage": 63.47, "elapsed_time": "19:58:47", "remaining_time": "11:30:02"}
125
+ {"current_steps": 1240, "total_steps": 1938, "loss": 0.7717, "lr": 5e-06, "epoch": 1.9171014492753624, "percentage": 63.98, "elapsed_time": "20:08:25", "remaining_time": "11:20:13"}
126
+ {"current_steps": 1250, "total_steps": 1938, "loss": 0.7754, "lr": 5e-06, "epoch": 1.93256038647343, "percentage": 64.5, "elapsed_time": "20:18:04", "remaining_time": "11:10:25"}
127
+ {"current_steps": 1260, "total_steps": 1938, "loss": 0.7755, "lr": 5e-06, "epoch": 1.9480193236714975, "percentage": 65.02, "elapsed_time": "20:27:43", "remaining_time": "11:00:38"}
128
+ {"current_steps": 1270, "total_steps": 1938, "loss": 0.7722, "lr": 5e-06, "epoch": 1.9634782608695653, "percentage": 65.53, "elapsed_time": "20:37:23", "remaining_time": "10:50:50"}
129
+ {"current_steps": 1280, "total_steps": 1938, "loss": 0.7755, "lr": 5e-06, "epoch": 1.9789371980676327, "percentage": 66.05, "elapsed_time": "20:47:01", "remaining_time": "10:41:02"}
130
+ {"current_steps": 1290, "total_steps": 1938, "loss": 0.7709, "lr": 5e-06, "epoch": 1.9943961352657005, "percentage": 66.56, "elapsed_time": "20:56:39", "remaining_time": "10:31:15"}
131
+ {"current_steps": 1293, "total_steps": 1938, "eval_loss": 0.8051349520683289, "epoch": 1.9990338164251207, "percentage": 66.72, "elapsed_time": "21:11:26", "remaining_time": "10:34:14"}