sedrickkeh commited on
Commit
3c454c6
·
verified ·
1 Parent(s): 512771c

Training in progress, epoch 3

Browse files
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e68d997793febb5c1418c2527adfac086d861d3ad5baa340d20474b2bf777be
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:077cdc8724c09462dc15cc02420f50952d9f19d76c10eed1b6dc1d638fd15d66
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4827cd691ede4136d3efc496aea6a85016ead1033c4f5748f9b7495a25c698f2
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc00707882dca20b53ec7384fc8b26829a5df14ded3ce652ab75e6eda25782ac
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbbec7d672a39900c4c2a6770dd73d0c4d87526fca138082e0d62f35818c7845
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30392eba21f1a74f5a9d1fc52de5c0faa287ea71f9dbe2af1646718c67005a72
3
  size 4540516344
trainer_log.jsonl CHANGED
@@ -149,3 +149,52 @@
149
  {"current_steps": 1470, "total_steps": 1972, "loss": 0.403, "lr": 5e-06, "epoch": 2.978227848101266, "percentage": 74.54, "elapsed_time": "11:39:15", "remaining_time": "3:58:47"}
150
  {"current_steps": 1480, "total_steps": 1972, "loss": 0.4008, "lr": 5e-06, "epoch": 2.9984810126582278, "percentage": 75.05, "elapsed_time": "11:43:56", "remaining_time": "3:54:00"}
151
  {"current_steps": 1480, "total_steps": 1972, "eval_loss": 0.1451626569032669, "epoch": 2.9984810126582278, "percentage": 75.05, "elapsed_time": "11:48:23", "remaining_time": "3:55:29"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  {"current_steps": 1470, "total_steps": 1972, "loss": 0.403, "lr": 5e-06, "epoch": 2.978227848101266, "percentage": 74.54, "elapsed_time": "11:39:15", "remaining_time": "3:58:47"}
150
  {"current_steps": 1480, "total_steps": 1972, "loss": 0.4008, "lr": 5e-06, "epoch": 2.9984810126582278, "percentage": 75.05, "elapsed_time": "11:43:56", "remaining_time": "3:54:00"}
151
  {"current_steps": 1480, "total_steps": 1972, "eval_loss": 0.1451626569032669, "epoch": 2.9984810126582278, "percentage": 75.05, "elapsed_time": "11:48:23", "remaining_time": "3:55:29"}
152
+ {"current_steps": 1490, "total_steps": 1972, "loss": 0.3123, "lr": 5e-06, "epoch": 3.019240506329114, "percentage": 75.56, "elapsed_time": "11:53:44", "remaining_time": "3:50:53"}
153
+ {"current_steps": 1500, "total_steps": 1972, "loss": 0.2924, "lr": 5e-06, "epoch": 3.039493670886076, "percentage": 76.06, "elapsed_time": "11:58:25", "remaining_time": "3:46:04"}
154
+ {"current_steps": 1510, "total_steps": 1972, "loss": 0.2942, "lr": 5e-06, "epoch": 3.059746835443038, "percentage": 76.57, "elapsed_time": "12:03:07", "remaining_time": "3:41:14"}
155
+ {"current_steps": 1520, "total_steps": 1972, "loss": 0.2933, "lr": 5e-06, "epoch": 3.08, "percentage": 77.08, "elapsed_time": "12:07:48", "remaining_time": "3:36:25"}
156
+ {"current_steps": 1530, "total_steps": 1972, "loss": 0.2944, "lr": 5e-06, "epoch": 3.100253164556962, "percentage": 77.59, "elapsed_time": "12:12:30", "remaining_time": "3:31:36"}
157
+ {"current_steps": 1540, "total_steps": 1972, "loss": 0.292, "lr": 5e-06, "epoch": 3.120506329113924, "percentage": 78.09, "elapsed_time": "12:17:12", "remaining_time": "3:26:48"}
158
+ {"current_steps": 1550, "total_steps": 1972, "loss": 0.29, "lr": 5e-06, "epoch": 3.140759493670886, "percentage": 78.6, "elapsed_time": "12:21:53", "remaining_time": "3:21:59"}
159
+ {"current_steps": 1560, "total_steps": 1972, "loss": 0.2964, "lr": 5e-06, "epoch": 3.161012658227848, "percentage": 79.11, "elapsed_time": "12:26:35", "remaining_time": "3:17:10"}
160
+ {"current_steps": 1570, "total_steps": 1972, "loss": 0.2967, "lr": 5e-06, "epoch": 3.1812658227848103, "percentage": 79.61, "elapsed_time": "12:31:16", "remaining_time": "3:12:21"}
161
+ {"current_steps": 1580, "total_steps": 1972, "loss": 0.2999, "lr": 5e-06, "epoch": 3.201518987341772, "percentage": 80.12, "elapsed_time": "12:35:57", "remaining_time": "3:07:33"}
162
+ {"current_steps": 1590, "total_steps": 1972, "loss": 0.3003, "lr": 5e-06, "epoch": 3.221772151898734, "percentage": 80.63, "elapsed_time": "12:40:39", "remaining_time": "3:02:44"}
163
+ {"current_steps": 1600, "total_steps": 1972, "loss": 0.3007, "lr": 5e-06, "epoch": 3.2420253164556962, "percentage": 81.14, "elapsed_time": "12:45:20", "remaining_time": "2:57:56"}
164
+ {"current_steps": 1610, "total_steps": 1972, "loss": 0.2992, "lr": 5e-06, "epoch": 3.2622784810126584, "percentage": 81.64, "elapsed_time": "12:50:02", "remaining_time": "2:53:08"}
165
+ {"current_steps": 1620, "total_steps": 1972, "loss": 0.3016, "lr": 5e-06, "epoch": 3.28253164556962, "percentage": 82.15, "elapsed_time": "12:54:43", "remaining_time": "2:48:20"}
166
+ {"current_steps": 1630, "total_steps": 1972, "loss": 0.3046, "lr": 5e-06, "epoch": 3.302784810126582, "percentage": 82.66, "elapsed_time": "12:59:24", "remaining_time": "2:43:31"}
167
+ {"current_steps": 1640, "total_steps": 1972, "loss": 0.3014, "lr": 5e-06, "epoch": 3.3230379746835443, "percentage": 83.16, "elapsed_time": "13:04:05", "remaining_time": "2:38:43"}
168
+ {"current_steps": 1650, "total_steps": 1972, "loss": 0.3031, "lr": 5e-06, "epoch": 3.3432911392405065, "percentage": 83.67, "elapsed_time": "13:08:47", "remaining_time": "2:33:56"}
169
+ {"current_steps": 1660, "total_steps": 1972, "loss": 0.3052, "lr": 5e-06, "epoch": 3.363544303797468, "percentage": 84.18, "elapsed_time": "13:13:28", "remaining_time": "2:29:08"}
170
+ {"current_steps": 1670, "total_steps": 1972, "loss": 0.2994, "lr": 5e-06, "epoch": 3.3837974683544303, "percentage": 84.69, "elapsed_time": "13:18:10", "remaining_time": "2:24:20"}
171
+ {"current_steps": 1680, "total_steps": 1972, "loss": 0.2987, "lr": 5e-06, "epoch": 3.4040506329113924, "percentage": 85.19, "elapsed_time": "13:22:52", "remaining_time": "2:19:32"}
172
+ {"current_steps": 1690, "total_steps": 1972, "loss": 0.3028, "lr": 5e-06, "epoch": 3.4243037974683546, "percentage": 85.7, "elapsed_time": "13:27:33", "remaining_time": "2:14:45"}
173
+ {"current_steps": 1700, "total_steps": 1972, "loss": 0.3012, "lr": 5e-06, "epoch": 3.4445569620253167, "percentage": 86.21, "elapsed_time": "13:32:15", "remaining_time": "2:09:57"}
174
+ {"current_steps": 1710, "total_steps": 1972, "loss": 0.3033, "lr": 5e-06, "epoch": 3.4648101265822784, "percentage": 86.71, "elapsed_time": "13:36:56", "remaining_time": "2:05:10"}
175
+ {"current_steps": 1720, "total_steps": 1972, "loss": 0.3078, "lr": 5e-06, "epoch": 3.4850632911392405, "percentage": 87.22, "elapsed_time": "13:41:37", "remaining_time": "2:00:22"}
176
+ {"current_steps": 1730, "total_steps": 1972, "loss": 0.3076, "lr": 5e-06, "epoch": 3.5053164556962026, "percentage": 87.73, "elapsed_time": "13:46:19", "remaining_time": "1:55:35"}
177
+ {"current_steps": 1740, "total_steps": 1972, "loss": 0.3058, "lr": 5e-06, "epoch": 3.5255696202531643, "percentage": 88.24, "elapsed_time": "13:51:00", "remaining_time": "1:50:48"}
178
+ {"current_steps": 1750, "total_steps": 1972, "loss": 0.3092, "lr": 5e-06, "epoch": 3.5458227848101265, "percentage": 88.74, "elapsed_time": "13:55:42", "remaining_time": "1:46:00"}
179
+ {"current_steps": 1760, "total_steps": 1972, "loss": 0.3031, "lr": 5e-06, "epoch": 3.5660759493670886, "percentage": 89.25, "elapsed_time": "14:00:24", "remaining_time": "1:41:13"}
180
+ {"current_steps": 1770, "total_steps": 1972, "loss": 0.3097, "lr": 5e-06, "epoch": 3.5863291139240507, "percentage": 89.76, "elapsed_time": "14:05:05", "remaining_time": "1:36:26"}
181
+ {"current_steps": 1780, "total_steps": 1972, "loss": 0.3076, "lr": 5e-06, "epoch": 3.606582278481013, "percentage": 90.26, "elapsed_time": "14:09:47", "remaining_time": "1:31:39"}
182
+ {"current_steps": 1790, "total_steps": 1972, "loss": 0.3054, "lr": 5e-06, "epoch": 3.6268354430379746, "percentage": 90.77, "elapsed_time": "14:14:29", "remaining_time": "1:26:52"}
183
+ {"current_steps": 1800, "total_steps": 1972, "loss": 0.3089, "lr": 5e-06, "epoch": 3.6470886075949367, "percentage": 91.28, "elapsed_time": "14:19:10", "remaining_time": "1:22:05"}
184
+ {"current_steps": 1810, "total_steps": 1972, "loss": 0.3141, "lr": 5e-06, "epoch": 3.667341772151899, "percentage": 91.78, "elapsed_time": "14:23:52", "remaining_time": "1:17:19"}
185
+ {"current_steps": 1820, "total_steps": 1972, "loss": 0.3093, "lr": 5e-06, "epoch": 3.6875949367088605, "percentage": 92.29, "elapsed_time": "14:28:33", "remaining_time": "1:12:32"}
186
+ {"current_steps": 1830, "total_steps": 1972, "loss": 0.3105, "lr": 5e-06, "epoch": 3.7078481012658226, "percentage": 92.8, "elapsed_time": "14:33:14", "remaining_time": "1:07:45"}
187
+ {"current_steps": 1840, "total_steps": 1972, "loss": 0.3124, "lr": 5e-06, "epoch": 3.728101265822785, "percentage": 93.31, "elapsed_time": "14:37:56", "remaining_time": "1:02:58"}
188
+ {"current_steps": 1850, "total_steps": 1972, "loss": 0.3099, "lr": 5e-06, "epoch": 3.748354430379747, "percentage": 93.81, "elapsed_time": "14:42:38", "remaining_time": "0:58:12"}
189
+ {"current_steps": 1860, "total_steps": 1972, "loss": 0.3173, "lr": 5e-06, "epoch": 3.768607594936709, "percentage": 94.32, "elapsed_time": "14:47:19", "remaining_time": "0:53:25"}
190
+ {"current_steps": 1870, "total_steps": 1972, "loss": 0.3145, "lr": 5e-06, "epoch": 3.7888607594936707, "percentage": 94.83, "elapsed_time": "14:52:01", "remaining_time": "0:48:39"}
191
+ {"current_steps": 1880, "total_steps": 1972, "loss": 0.3154, "lr": 5e-06, "epoch": 3.809113924050633, "percentage": 95.33, "elapsed_time": "14:56:42", "remaining_time": "0:43:52"}
192
+ {"current_steps": 1890, "total_steps": 1972, "loss": 0.315, "lr": 5e-06, "epoch": 3.829367088607595, "percentage": 95.84, "elapsed_time": "15:01:24", "remaining_time": "0:39:06"}
193
+ {"current_steps": 1900, "total_steps": 1972, "loss": 0.3157, "lr": 5e-06, "epoch": 3.8496202531645567, "percentage": 96.35, "elapsed_time": "15:06:05", "remaining_time": "0:34:20"}
194
+ {"current_steps": 1910, "total_steps": 1972, "loss": 0.3117, "lr": 5e-06, "epoch": 3.869873417721519, "percentage": 96.86, "elapsed_time": "15:10:47", "remaining_time": "0:29:33"}
195
+ {"current_steps": 1920, "total_steps": 1972, "loss": 0.3172, "lr": 5e-06, "epoch": 3.890126582278481, "percentage": 97.36, "elapsed_time": "15:15:28", "remaining_time": "0:24:47"}
196
+ {"current_steps": 1930, "total_steps": 1972, "loss": 0.3147, "lr": 5e-06, "epoch": 3.910379746835443, "percentage": 97.87, "elapsed_time": "15:20:10", "remaining_time": "0:20:01"}
197
+ {"current_steps": 1940, "total_steps": 1972, "loss": 0.312, "lr": 5e-06, "epoch": 3.9306329113924052, "percentage": 98.38, "elapsed_time": "15:24:51", "remaining_time": "0:15:15"}
198
+ {"current_steps": 1950, "total_steps": 1972, "loss": 0.3164, "lr": 5e-06, "epoch": 3.9508860759493674, "percentage": 98.88, "elapsed_time": "15:29:33", "remaining_time": "0:10:29"}
199
+ {"current_steps": 1960, "total_steps": 1972, "loss": 0.3175, "lr": 5e-06, "epoch": 3.971139240506329, "percentage": 99.39, "elapsed_time": "15:34:15", "remaining_time": "0:05:43"}
200
+ {"current_steps": 1970, "total_steps": 1972, "loss": 0.3172, "lr": 5e-06, "epoch": 3.991392405063291, "percentage": 99.9, "elapsed_time": "15:38:56", "remaining_time": "0:00:57"}