sedrickkeh commited on
Commit
3cba1b4
·
verified ·
1 Parent(s): ed45bdd

Training in progress, epoch 2

Browse files
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d7cfab624a23662566da50de12b753559547dcdc1ee9c2cdf6d864931297cb8
3
  size 4949453792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e71d26d5fa5f2853031fb83b2715d67e027b9e6a0bd878d50704df6c3c960b30
3
  size 4949453792
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6ac5d5c7dd00ce4fcc15608d8ff1a59c4dc91203263161ffa1db668f0acfd70
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ef2cf18953e31c5caecf91c8c238b0b5d9300803594f3869b1bf1a4b2aea130
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:768d3db59cd1aaf387aebf39951330f17144e64ca78829b1484f5cf2030a63fe
3
  size 4546807800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:719158224a12f480b84ca12f8851d924614f0f559916f89bae1040718a0587ce
3
  size 4546807800
trainer_log.jsonl CHANGED
@@ -133,3 +133,70 @@
133
  {"current_steps": 1320, "total_steps": 2001, "loss": 0.3788, "lr": 1.7318527015421476e-06, "epoch": 1.9786396852164136, "percentage": 65.97, "elapsed_time": "11:42:49", "remaining_time": "6:02:35"}
134
  {"current_steps": 1330, "total_steps": 2001, "loss": 0.3775, "lr": 1.6880213484367574e-06, "epoch": 1.9936293798013867, "percentage": 66.47, "elapsed_time": "11:48:06", "remaining_time": "5:57:14"}
135
  {"current_steps": 1334, "total_steps": 2001, "eval_loss": 0.055769529193639755, "epoch": 1.9996252576353757, "percentage": 66.67, "elapsed_time": "11:57:45", "remaining_time": "5:58:52"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  {"current_steps": 1320, "total_steps": 2001, "loss": 0.3788, "lr": 1.7318527015421476e-06, "epoch": 1.9786396852164136, "percentage": 65.97, "elapsed_time": "11:42:49", "remaining_time": "6:02:35"}
134
  {"current_steps": 1330, "total_steps": 2001, "loss": 0.3775, "lr": 1.6880213484367574e-06, "epoch": 1.9936293798013867, "percentage": 66.47, "elapsed_time": "11:48:06", "remaining_time": "5:57:14"}
135
  {"current_steps": 1334, "total_steps": 2001, "eval_loss": 0.055769529193639755, "epoch": 1.9996252576353757, "percentage": 66.67, "elapsed_time": "11:57:45", "remaining_time": "5:58:52"}
136
+ {"current_steps": 1340, "total_steps": 2001, "loss": 0.3289, "lr": 1.6444672852970194e-06, "epoch": 2.0086190743863592, "percentage": 66.97, "elapsed_time": "12:02:01", "remaining_time": "5:56:09"}
137
+ {"current_steps": 1350, "total_steps": 2001, "loss": 0.2871, "lr": 1.6012053857955943e-06, "epoch": 2.0236087689713322, "percentage": 67.47, "elapsed_time": "12:07:17", "remaining_time": "5:50:43"}
138
+ {"current_steps": 1360, "total_steps": 2001, "loss": 0.2842, "lr": 1.5582504238315293e-06, "epoch": 2.0385984635563053, "percentage": 67.97, "elapsed_time": "12:12:34", "remaining_time": "5:45:16"}
139
+ {"current_steps": 1370, "total_steps": 2001, "loss": 0.2841, "lr": 1.5156170684849844e-06, "epoch": 2.053588158141278, "percentage": 68.47, "elapsed_time": "12:17:50", "remaining_time": "5:39:50"}
140
+ {"current_steps": 1380, "total_steps": 2001, "loss": 0.2841, "lr": 1.4733198790077518e-06, "epoch": 2.068577852726251, "percentage": 68.97, "elapsed_time": "12:23:06", "remaining_time": "5:34:23"}
141
+ {"current_steps": 1390, "total_steps": 2001, "loss": 0.2841, "lr": 1.4313732998512927e-06, "epoch": 2.0835675473112234, "percentage": 69.47, "elapsed_time": "12:28:21", "remaining_time": "5:28:57"}
142
+ {"current_steps": 1400, "total_steps": 2001, "loss": 0.283, "lr": 1.389791655733959e-06, "epoch": 2.0985572418961964, "percentage": 69.97, "elapsed_time": "12:33:36", "remaining_time": "5:23:30"}
143
+ {"current_steps": 1410, "total_steps": 2001, "loss": 0.2822, "lr": 1.3485891467491247e-06, "epoch": 2.1135469364811694, "percentage": 70.46, "elapsed_time": "12:38:53", "remaining_time": "5:18:05"}
144
+ {"current_steps": 1420, "total_steps": 2001, "loss": 0.2858, "lr": 1.3077798435158615e-06, "epoch": 2.128536631066142, "percentage": 70.96, "elapsed_time": "12:44:09", "remaining_time": "5:12:39"}
145
+ {"current_steps": 1430, "total_steps": 2001, "loss": 0.2832, "lr": 1.2673776823738385e-06, "epoch": 2.143526325651115, "percentage": 71.46, "elapsed_time": "12:49:25", "remaining_time": "5:07:13"}
146
+ {"current_steps": 1440, "total_steps": 2001, "loss": 0.2849, "lr": 1.2273964606240718e-06, "epoch": 2.1585160202360876, "percentage": 71.96, "elapsed_time": "12:54:42", "remaining_time": "5:01:48"}
147
+ {"current_steps": 1450, "total_steps": 2001, "loss": 0.2814, "lr": 1.1878498318171641e-06, "epoch": 2.1735057148210606, "percentage": 72.46, "elapsed_time": "13:00:00", "remaining_time": "4:56:24"}
148
+ {"current_steps": 1460, "total_steps": 2001, "loss": 0.2832, "lr": 1.1487513010906232e-06, "epoch": 2.188495409406033, "percentage": 72.96, "elapsed_time": "13:05:17", "remaining_time": "4:50:59"}
149
+ {"current_steps": 1470, "total_steps": 2001, "loss": 0.2849, "lr": 1.1101142205568734e-06, "epoch": 2.203485103991006, "percentage": 73.46, "elapsed_time": "13:10:32", "remaining_time": "4:45:33"}
150
+ {"current_steps": 1480, "total_steps": 2001, "loss": 0.2866, "lr": 1.0719517847435218e-06, "epoch": 2.218474798575979, "percentage": 73.96, "elapsed_time": "13:15:48", "remaining_time": "4:40:08"}
151
+ {"current_steps": 1490, "total_steps": 2001, "loss": 0.2857, "lr": 1.0342770260874365e-06, "epoch": 2.2334644931609517, "percentage": 74.46, "elapsed_time": "13:21:02", "remaining_time": "4:34:43"}
152
+ {"current_steps": 1500, "total_steps": 2001, "loss": 0.2817, "lr": 9.971028104841869e-07, "epoch": 2.2484541877459248, "percentage": 74.96, "elapsed_time": "13:26:17", "remaining_time": "4:29:18"}
153
+ {"current_steps": 1510, "total_steps": 2001, "loss": 0.2821, "lr": 9.604418328943447e-07, "epoch": 2.2634438823308973, "percentage": 75.46, "elapsed_time": "13:31:32", "remaining_time": "4:23:53"}
154
+ {"current_steps": 1520, "total_steps": 2001, "loss": 0.2835, "lr": 9.243066130081713e-07, "epoch": 2.2784335769158703, "percentage": 75.96, "elapsed_time": "13:36:49", "remaining_time": "4:18:28"}
155
+ {"current_steps": 1530, "total_steps": 2001, "loss": 0.2834, "lr": 8.887094909701547e-07, "epoch": 2.2934232715008434, "percentage": 76.46, "elapsed_time": "13:42:03", "remaining_time": "4:13:03"}
156
+ {"current_steps": 1540, "total_steps": 2001, "loss": 0.2842, "lr": 8.536626231648595e-07, "epoch": 2.308412966085816, "percentage": 76.96, "elapsed_time": "13:47:17", "remaining_time": "4:07:39"}
157
+ {"current_steps": 1550, "total_steps": 2001, "loss": 0.2832, "lr": 8.191779780655354e-07, "epoch": 2.323402660670789, "percentage": 77.46, "elapsed_time": "13:52:33", "remaining_time": "4:02:14"}
158
+ {"current_steps": 1560, "total_steps": 2001, "loss": 0.2843, "lr": 7.852673321468918e-07, "epoch": 2.3383923552557615, "percentage": 77.96, "elapsed_time": "13:57:49", "remaining_time": "3:56:50"}
159
+ {"current_steps": 1570, "total_steps": 2001, "loss": 0.2823, "lr": 7.519422658634443e-07, "epoch": 2.3533820498407345, "percentage": 78.46, "elapsed_time": "14:03:04", "remaining_time": "3:51:26"}
160
+ {"current_steps": 1580, "total_steps": 2001, "loss": 0.2828, "lr": 7.19214159694799e-07, "epoch": 2.3683717444257075, "percentage": 78.96, "elapsed_time": "14:08:19", "remaining_time": "3:46:02"}
161
+ {"current_steps": 1590, "total_steps": 2001, "loss": 0.2825, "lr": 6.870941902592307e-07, "epoch": 2.38336143901068, "percentage": 79.46, "elapsed_time": "14:13:33", "remaining_time": "3:40:38"}
162
+ {"current_steps": 1600, "total_steps": 2001, "loss": 0.2822, "lr": 6.555933264968753e-07, "epoch": 2.398351133595653, "percentage": 79.96, "elapsed_time": "14:18:47", "remaining_time": "3:35:14"}
163
+ {"current_steps": 1610, "total_steps": 2001, "loss": 0.2836, "lr": 6.247223259238511e-07, "epoch": 2.4133408281806257, "percentage": 80.46, "elapsed_time": "14:24:04", "remaining_time": "3:29:50"}
164
+ {"current_steps": 1620, "total_steps": 2001, "loss": 0.2826, "lr": 5.944917309585721e-07, "epoch": 2.4283305227655987, "percentage": 80.96, "elapsed_time": "14:29:21", "remaining_time": "3:24:27"}
165
+ {"current_steps": 1630, "total_steps": 2001, "loss": 0.2812, "lr": 5.649118653215243e-07, "epoch": 2.4433202173505713, "percentage": 81.46, "elapsed_time": "14:34:39", "remaining_time": "3:19:04"}
166
+ {"current_steps": 1640, "total_steps": 2001, "loss": 0.2831, "lr": 5.359928305097245e-07, "epoch": 2.4583099119355443, "percentage": 81.96, "elapsed_time": "14:39:55", "remaining_time": "3:13:41"}
167
+ {"current_steps": 1650, "total_steps": 2001, "loss": 0.2811, "lr": 5.077445023470676e-07, "epoch": 2.4732996065205173, "percentage": 82.46, "elapsed_time": "14:45:12", "remaining_time": "3:08:18"}
168
+ {"current_steps": 1660, "total_steps": 2001, "loss": 0.2808, "lr": 4.801765276117437e-07, "epoch": 2.48828930110549, "percentage": 82.96, "elapsed_time": "14:50:26", "remaining_time": "3:02:54"}
169
+ {"current_steps": 1670, "total_steps": 2001, "loss": 0.2794, "lr": 4.5329832074186953e-07, "epoch": 2.503278995690463, "percentage": 83.46, "elapsed_time": "14:55:41", "remaining_time": "2:57:31"}
170
+ {"current_steps": 1680, "total_steps": 2001, "loss": 0.2826, "lr": 4.271190606204681e-07, "epoch": 2.518268690275436, "percentage": 83.96, "elapsed_time": "15:00:57", "remaining_time": "2:52:08"}
171
+ {"current_steps": 1690, "total_steps": 2001, "loss": 0.2828, "lr": 4.016476874408867e-07, "epoch": 2.5332583848604084, "percentage": 84.46, "elapsed_time": "15:06:12", "remaining_time": "2:46:45"}
172
+ {"current_steps": 1700, "total_steps": 2001, "loss": 0.2803, "lr": 3.768928996537319e-07, "epoch": 2.5482480794453815, "percentage": 84.96, "elapsed_time": "15:11:25", "remaining_time": "2:41:22"}
173
+ {"current_steps": 1710, "total_steps": 2001, "loss": 0.2799, "lr": 3.528631509963562e-07, "epoch": 2.563237774030354, "percentage": 85.46, "elapsed_time": "15:16:40", "remaining_time": "2:35:59"}
174
+ {"current_steps": 1720, "total_steps": 2001, "loss": 0.2829, "lr": 3.2956664760591143e-07, "epoch": 2.578227468615327, "percentage": 85.96, "elapsed_time": "15:21:56", "remaining_time": "2:30:37"}
175
+ {"current_steps": 1730, "total_steps": 2001, "loss": 0.2832, "lr": 3.0701134521696373e-07, "epoch": 2.5932171632002996, "percentage": 86.46, "elapsed_time": "15:27:11", "remaining_time": "2:25:14"}
176
+ {"current_steps": 1740, "total_steps": 2001, "loss": 0.2815, "lr": 2.852049464446158e-07, "epoch": 2.6082068577852726, "percentage": 86.96, "elapsed_time": "15:32:27", "remaining_time": "2:19:52"}
177
+ {"current_steps": 1750, "total_steps": 2001, "loss": 0.2818, "lr": 2.641548981540712e-07, "epoch": 2.6231965523702456, "percentage": 87.46, "elapsed_time": "15:37:42", "remaining_time": "2:14:29"}
178
+ {"current_steps": 1760, "total_steps": 2001, "loss": 0.2804, "lr": 2.4386838891753695e-07, "epoch": 2.638186246955218, "percentage": 87.96, "elapsed_time": "15:42:59", "remaining_time": "2:09:07"}
179
+ {"current_steps": 1770, "total_steps": 2001, "loss": 0.2802, "lr": 2.2435234655933363e-07, "epoch": 2.653175941540191, "percentage": 88.46, "elapsed_time": "15:48:15", "remaining_time": "2:03:45"}
180
+ {"current_steps": 1780, "total_steps": 2001, "loss": 0.2824, "lr": 2.0561343579004716e-07, "epoch": 2.668165636125164, "percentage": 88.96, "elapsed_time": "15:53:30", "remaining_time": "1:58:23"}
181
+ {"current_steps": 1790, "total_steps": 2001, "loss": 0.2818, "lr": 1.8765805593053855e-07, "epoch": 2.683155330710137, "percentage": 89.46, "elapsed_time": "15:58:44", "remaining_time": "1:53:00"}
182
+ {"current_steps": 1800, "total_steps": 2001, "loss": 0.2852, "lr": 1.7049233872658084e-07, "epoch": 2.6981450252951094, "percentage": 89.96, "elapsed_time": "16:03:59", "remaining_time": "1:47:38"}
183
+ {"current_steps": 1810, "total_steps": 2001, "loss": 0.2824, "lr": 1.5412214625487336e-07, "epoch": 2.7131347198800824, "percentage": 90.45, "elapsed_time": "16:09:14", "remaining_time": "1:42:16"}
184
+ {"current_steps": 1820, "total_steps": 2001, "loss": 0.2814, "lr": 1.3855306892114867e-07, "epoch": 2.7281244144650554, "percentage": 90.95, "elapsed_time": "16:14:28", "remaining_time": "1:36:54"}
185
+ {"current_steps": 1830, "total_steps": 2001, "loss": 0.2774, "lr": 1.2379042355105314e-07, "epoch": 2.743114109050028, "percentage": 91.45, "elapsed_time": "16:19:45", "remaining_time": "1:31:33"}
186
+ {"current_steps": 1840, "total_steps": 2001, "loss": 0.2836, "lr": 1.0983925157445674e-07, "epoch": 2.758103803635001, "percentage": 91.95, "elapsed_time": "16:25:02", "remaining_time": "1:26:11"}
187
+ {"current_steps": 1850, "total_steps": 2001, "loss": 0.2821, "lr": 9.670431730380847e-08, "epoch": 2.773093498219974, "percentage": 92.45, "elapsed_time": "16:30:19", "remaining_time": "1:20:49"}
188
+ {"current_steps": 1860, "total_steps": 2001, "loss": 0.2816, "lr": 8.439010630712841e-08, "epoch": 2.7880831928049465, "percentage": 92.95, "elapsed_time": "16:35:35", "remaining_time": "1:15:28"}
189
+ {"current_steps": 1870, "total_steps": 2001, "loss": 0.2808, "lr": 7.29008238761919e-08, "epoch": 2.8030728873899196, "percentage": 93.45, "elapsed_time": "16:40:50", "remaining_time": "1:10:06"}
190
+ {"current_steps": 1880, "total_steps": 2001, "loss": 0.2819, "lr": 6.224039359042284e-08, "epoch": 2.818062581974892, "percentage": 93.95, "elapsed_time": "16:46:03", "remaining_time": "1:04:45"}
191
+ {"current_steps": 1890, "total_steps": 2001, "loss": 0.2815, "lr": 5.2412455976998125e-08, "epoch": 2.833052276559865, "percentage": 94.45, "elapsed_time": "16:51:19", "remaining_time": "0:59:23"}
192
+ {"current_steps": 1900, "total_steps": 2001, "loss": 0.2823, "lr": 4.342036726760895e-08, "epoch": 2.8480419711448377, "percentage": 94.95, "elapsed_time": "16:56:34", "remaining_time": "0:54:02"}
193
+ {"current_steps": 1910, "total_steps": 2001, "loss": 0.2847, "lr": 3.5267198252312286e-08, "epoch": 2.8630316657298107, "percentage": 95.45, "elapsed_time": "17:01:50", "remaining_time": "0:48:41"}
194
+ {"current_steps": 1920, "total_steps": 2001, "loss": 0.2794, "lr": 2.795573323085721e-08, "epoch": 2.8780213603147837, "percentage": 95.95, "elapsed_time": "17:07:06", "remaining_time": "0:43:19"}
195
+ {"current_steps": 1930, "total_steps": 2001, "loss": 0.2798, "lr": 2.148846906185109e-08, "epoch": 2.8930110548997563, "percentage": 96.45, "elapsed_time": "17:12:22", "remaining_time": "0:37:58"}
196
+ {"current_steps": 1940, "total_steps": 2001, "loss": 0.2798, "lr": 1.586761431008249e-08, "epoch": 2.9080007494847293, "percentage": 96.95, "elapsed_time": "17:17:40", "remaining_time": "0:32:37"}
197
+ {"current_steps": 1950, "total_steps": 2001, "loss": 0.281, "lr": 1.109508849230001e-08, "epoch": 2.9229904440697023, "percentage": 97.45, "elapsed_time": "17:22:59", "remaining_time": "0:27:16"}
198
+ {"current_steps": 1960, "total_steps": 2001, "loss": 0.2805, "lr": 7.172521421698331e-09, "epoch": 2.937980138654675, "percentage": 97.95, "elapsed_time": "17:28:14", "remaining_time": "0:21:55"}
199
+ {"current_steps": 1970, "total_steps": 2001, "loss": 0.2816, "lr": 4.101252651338428e-09, "epoch": 2.952969833239648, "percentage": 98.45, "elapsed_time": "17:33:32", "remaining_time": "0:16:34"}
200
+ {"current_steps": 1980, "total_steps": 2001, "loss": 0.2806, "lr": 1.8823310166918297e-09, "epoch": 2.9679595278246205, "percentage": 98.95, "elapsed_time": "17:38:49", "remaining_time": "0:11:13"}
201
+ {"current_steps": 1990, "total_steps": 2001, "loss": 0.28, "lr": 5.165142774640752e-10, "epoch": 2.9829492224095935, "percentage": 99.45, "elapsed_time": "17:44:06", "remaining_time": "0:05:52"}
202
+ {"current_steps": 2000, "total_steps": 2001, "loss": 0.2791, "lr": 4.26885882032213e-12, "epoch": 2.997938916994566, "percentage": 99.95, "elapsed_time": "17:49:21", "remaining_time": "0:00:32"}