sedrickkeh commited on
Commit
95ca10d
·
verified ·
1 Parent(s): e221b34

Training in progress, epoch 1

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e90be5805a711796e02b4a0ce61b371dd66eb865bd8c2e94c7ca2b3ed1a4602d
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e2e0d4ed10d23430ba789692ac46870bd29c4310a988d11e2888351b23db307
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:242883474fa766abd74812cb0276fb129859a7f1d851623ba3f5d0464729ad74
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ea4137c4b6030a62898d7fa61f01dd7d9c42c0d503e0e8ac6f4cb0f26293709
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a712913833b311647483e447a3a6461cb2ab442f31cd2a6ae1842ca4a8967780
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff59135a66dcd49f29f7f93094e6a6636dd2abf8ae387fada23bc41677b4f87
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a170d846f0bbff455eaaec994461bc763328244ccb7e473a660c885c380ece9d
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a083a56c7a700e7e4e986a49bc3eb3cdb2014ba5de0268ac80bc52da1be8ec33
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -67,3 +67,73 @@
67
  {"current_steps": 670, "total_steps": 2052, "loss": 0.8121, "lr": 5e-06, "epoch": 0.978280708158423, "percentage": 32.65, "elapsed_time": "10:44:41", "remaining_time": "22:09:47"}
68
  {"current_steps": 680, "total_steps": 2052, "loss": 0.8154, "lr": 5e-06, "epoch": 0.9928819127578025, "percentage": 33.14, "elapsed_time": "10:54:18", "remaining_time": "22:00:10"}
69
  {"current_steps": 684, "total_steps": 2052, "eval_loss": 0.8185040950775146, "epoch": 0.9987223945975543, "percentage": 33.33, "elapsed_time": "11:11:08", "remaining_time": "22:22:17"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  {"current_steps": 670, "total_steps": 2052, "loss": 0.8121, "lr": 5e-06, "epoch": 0.978280708158423, "percentage": 32.65, "elapsed_time": "10:44:41", "remaining_time": "22:09:47"}
68
  {"current_steps": 680, "total_steps": 2052, "loss": 0.8154, "lr": 5e-06, "epoch": 0.9928819127578025, "percentage": 33.14, "elapsed_time": "10:54:18", "remaining_time": "22:00:10"}
69
  {"current_steps": 684, "total_steps": 2052, "eval_loss": 0.8185040950775146, "epoch": 0.9987223945975543, "percentage": 33.33, "elapsed_time": "11:11:08", "remaining_time": "22:22:17"}
70
+ {"current_steps": 690, "total_steps": 2052, "loss": 0.8068, "lr": 5e-06, "epoch": 1.0074831173571819, "percentage": 33.63, "elapsed_time": "11:17:19", "remaining_time": "22:16:58"}
71
+ {"current_steps": 700, "total_steps": 2052, "loss": 0.7796, "lr": 5e-06, "epoch": 1.0220843219565614, "percentage": 34.11, "elapsed_time": "11:26:58", "remaining_time": "22:06:50"}
72
+ {"current_steps": 710, "total_steps": 2052, "loss": 0.7784, "lr": 5e-06, "epoch": 1.036685526555941, "percentage": 34.6, "elapsed_time": "11:36:37", "remaining_time": "21:56:42"}
73
+ {"current_steps": 720, "total_steps": 2052, "loss": 0.7789, "lr": 5e-06, "epoch": 1.0512867311553202, "percentage": 35.09, "elapsed_time": "11:46:15", "remaining_time": "21:46:34"}
74
+ {"current_steps": 730, "total_steps": 2052, "loss": 0.7785, "lr": 5e-06, "epoch": 1.0658879357546998, "percentage": 35.58, "elapsed_time": "11:55:52", "remaining_time": "21:36:25"}
75
+ {"current_steps": 740, "total_steps": 2052, "loss": 0.7788, "lr": 5e-06, "epoch": 1.0804891403540793, "percentage": 36.06, "elapsed_time": "12:05:31", "remaining_time": "21:26:20"}
76
+ {"current_steps": 750, "total_steps": 2052, "loss": 0.7791, "lr": 5e-06, "epoch": 1.0950903449534586, "percentage": 36.55, "elapsed_time": "12:15:09", "remaining_time": "21:16:14"}
77
+ {"current_steps": 760, "total_steps": 2052, "loss": 0.7772, "lr": 5e-06, "epoch": 1.109691549552838, "percentage": 37.04, "elapsed_time": "12:24:47", "remaining_time": "21:06:08"}
78
+ {"current_steps": 770, "total_steps": 2052, "loss": 0.778, "lr": 5e-06, "epoch": 1.1242927541522176, "percentage": 37.52, "elapsed_time": "12:34:25", "remaining_time": "20:56:04"}
79
+ {"current_steps": 780, "total_steps": 2052, "loss": 0.7797, "lr": 5e-06, "epoch": 1.138893958751597, "percentage": 38.01, "elapsed_time": "12:44:04", "remaining_time": "20:46:01"}
80
+ {"current_steps": 790, "total_steps": 2052, "loss": 0.7776, "lr": 5e-06, "epoch": 1.1534951633509765, "percentage": 38.5, "elapsed_time": "12:53:42", "remaining_time": "20:35:59"}
81
+ {"current_steps": 800, "total_steps": 2052, "loss": 0.7812, "lr": 5e-06, "epoch": 1.168096367950356, "percentage": 38.99, "elapsed_time": "13:03:21", "remaining_time": "20:25:57"}
82
+ {"current_steps": 810, "total_steps": 2052, "loss": 0.7742, "lr": 5e-06, "epoch": 1.1826975725497353, "percentage": 39.47, "elapsed_time": "13:13:00", "remaining_time": "20:15:56"}
83
+ {"current_steps": 820, "total_steps": 2052, "loss": 0.7821, "lr": 5e-06, "epoch": 1.1972987771491148, "percentage": 39.96, "elapsed_time": "13:22:38", "remaining_time": "20:05:54"}
84
+ {"current_steps": 830, "total_steps": 2052, "loss": 0.7832, "lr": 5e-06, "epoch": 1.2118999817484943, "percentage": 40.45, "elapsed_time": "13:32:16", "remaining_time": "19:55:54"}
85
+ {"current_steps": 840, "total_steps": 2052, "loss": 0.7764, "lr": 5e-06, "epoch": 1.2265011863478736, "percentage": 40.94, "elapsed_time": "13:41:54", "remaining_time": "19:45:53"}
86
+ {"current_steps": 850, "total_steps": 2052, "loss": 0.776, "lr": 5e-06, "epoch": 1.2411023909472532, "percentage": 41.42, "elapsed_time": "13:51:33", "remaining_time": "19:35:55"}
87
+ {"current_steps": 860, "total_steps": 2052, "loss": 0.7746, "lr": 5e-06, "epoch": 1.2557035955466325, "percentage": 41.91, "elapsed_time": "14:01:11", "remaining_time": "19:25:56"}
88
+ {"current_steps": 870, "total_steps": 2052, "loss": 0.7725, "lr": 5e-06, "epoch": 1.270304800146012, "percentage": 42.4, "elapsed_time": "14:10:50", "remaining_time": "19:15:57"}
89
+ {"current_steps": 880, "total_steps": 2052, "loss": 0.7787, "lr": 5e-06, "epoch": 1.2849060047453915, "percentage": 42.88, "elapsed_time": "14:20:29", "remaining_time": "19:06:00"}
90
+ {"current_steps": 890, "total_steps": 2052, "loss": 0.7712, "lr": 5e-06, "epoch": 1.299507209344771, "percentage": 43.37, "elapsed_time": "14:30:07", "remaining_time": "18:56:02"}
91
+ {"current_steps": 900, "total_steps": 2052, "loss": 0.777, "lr": 5e-06, "epoch": 1.3141084139441503, "percentage": 43.86, "elapsed_time": "14:39:45", "remaining_time": "18:46:04"}
92
+ {"current_steps": 910, "total_steps": 2052, "loss": 0.7791, "lr": 5e-06, "epoch": 1.3287096185435299, "percentage": 44.35, "elapsed_time": "14:49:23", "remaining_time": "18:36:08"}
93
+ {"current_steps": 920, "total_steps": 2052, "loss": 0.7813, "lr": 5e-06, "epoch": 1.3433108231429092, "percentage": 44.83, "elapsed_time": "14:59:02", "remaining_time": "18:26:12"}
94
+ {"current_steps": 930, "total_steps": 2052, "loss": 0.7718, "lr": 5e-06, "epoch": 1.3579120277422887, "percentage": 45.32, "elapsed_time": "15:08:40", "remaining_time": "18:16:16"}
95
+ {"current_steps": 940, "total_steps": 2052, "loss": 0.7716, "lr": 5e-06, "epoch": 1.3725132323416682, "percentage": 45.81, "elapsed_time": "15:18:18", "remaining_time": "18:06:20"}
96
+ {"current_steps": 950, "total_steps": 2052, "loss": 0.7801, "lr": 5e-06, "epoch": 1.3871144369410477, "percentage": 46.3, "elapsed_time": "15:27:58", "remaining_time": "17:56:26"}
97
+ {"current_steps": 960, "total_steps": 2052, "loss": 0.7749, "lr": 5e-06, "epoch": 1.401715641540427, "percentage": 46.78, "elapsed_time": "15:37:37", "remaining_time": "17:46:33"}
98
+ {"current_steps": 970, "total_steps": 2052, "loss": 0.7809, "lr": 5e-06, "epoch": 1.4163168461398066, "percentage": 47.27, "elapsed_time": "15:47:17", "remaining_time": "17:36:40"}
99
+ {"current_steps": 980, "total_steps": 2052, "loss": 0.7769, "lr": 5e-06, "epoch": 1.4309180507391859, "percentage": 47.76, "elapsed_time": "15:56:55", "remaining_time": "17:26:46"}
100
+ {"current_steps": 990, "total_steps": 2052, "loss": 0.7733, "lr": 5e-06, "epoch": 1.4455192553385654, "percentage": 48.25, "elapsed_time": "16:06:35", "remaining_time": "17:16:52"}
101
+ {"current_steps": 1000, "total_steps": 2052, "loss": 0.778, "lr": 5e-06, "epoch": 1.460120459937945, "percentage": 48.73, "elapsed_time": "16:16:14", "remaining_time": "17:06:59"}
102
+ {"current_steps": 1010, "total_steps": 2052, "loss": 0.7755, "lr": 5e-06, "epoch": 1.4747216645373245, "percentage": 49.22, "elapsed_time": "16:25:52", "remaining_time": "16:57:06"}
103
+ {"current_steps": 1020, "total_steps": 2052, "loss": 0.7762, "lr": 5e-06, "epoch": 1.4893228691367038, "percentage": 49.71, "elapsed_time": "16:35:31", "remaining_time": "16:47:14"}
104
+ {"current_steps": 1030, "total_steps": 2052, "loss": 0.775, "lr": 5e-06, "epoch": 1.5039240737360833, "percentage": 50.19, "elapsed_time": "16:45:09", "remaining_time": "16:37:21"}
105
+ {"current_steps": 1040, "total_steps": 2052, "loss": 0.7791, "lr": 5e-06, "epoch": 1.5185252783354626, "percentage": 50.68, "elapsed_time": "16:54:47", "remaining_time": "16:27:28"}
106
+ {"current_steps": 1050, "total_steps": 2052, "loss": 0.7732, "lr": 5e-06, "epoch": 1.533126482934842, "percentage": 51.17, "elapsed_time": "17:04:26", "remaining_time": "16:17:36"}
107
+ {"current_steps": 1060, "total_steps": 2052, "loss": 0.7734, "lr": 5e-06, "epoch": 1.5477276875342216, "percentage": 51.66, "elapsed_time": "17:14:04", "remaining_time": "16:07:44"}
108
+ {"current_steps": 1070, "total_steps": 2052, "loss": 0.7786, "lr": 5e-06, "epoch": 1.5623288921336012, "percentage": 52.14, "elapsed_time": "17:23:43", "remaining_time": "15:57:52"}
109
+ {"current_steps": 1080, "total_steps": 2052, "loss": 0.772, "lr": 5e-06, "epoch": 1.5769300967329805, "percentage": 52.63, "elapsed_time": "17:33:20", "remaining_time": "15:48:00"}
110
+ {"current_steps": 1090, "total_steps": 2052, "loss": 0.7788, "lr": 5e-06, "epoch": 1.5915313013323598, "percentage": 53.12, "elapsed_time": "17:42:58", "remaining_time": "15:38:08"}
111
+ {"current_steps": 1100, "total_steps": 2052, "loss": 0.7764, "lr": 5e-06, "epoch": 1.6061325059317393, "percentage": 53.61, "elapsed_time": "17:52:36", "remaining_time": "15:28:17"}
112
+ {"current_steps": 1110, "total_steps": 2052, "loss": 0.7698, "lr": 5e-06, "epoch": 1.6207337105311188, "percentage": 54.09, "elapsed_time": "18:02:15", "remaining_time": "15:18:27"}
113
+ {"current_steps": 1120, "total_steps": 2052, "loss": 0.7754, "lr": 5e-06, "epoch": 1.6353349151304983, "percentage": 54.58, "elapsed_time": "18:11:53", "remaining_time": "15:08:36"}
114
+ {"current_steps": 1130, "total_steps": 2052, "loss": 0.7754, "lr": 5e-06, "epoch": 1.6499361197298779, "percentage": 55.07, "elapsed_time": "18:21:32", "remaining_time": "14:58:46"}
115
+ {"current_steps": 1140, "total_steps": 2052, "loss": 0.7732, "lr": 5e-06, "epoch": 1.6645373243292572, "percentage": 55.56, "elapsed_time": "18:31:11", "remaining_time": "14:48:57"}
116
+ {"current_steps": 1150, "total_steps": 2052, "loss": 0.775, "lr": 5e-06, "epoch": 1.6791385289286365, "percentage": 56.04, "elapsed_time": "18:40:50", "remaining_time": "14:39:07"}
117
+ {"current_steps": 1160, "total_steps": 2052, "loss": 0.7734, "lr": 5e-06, "epoch": 1.693739733528016, "percentage": 56.53, "elapsed_time": "18:50:29", "remaining_time": "14:29:18"}
118
+ {"current_steps": 1170, "total_steps": 2052, "loss": 0.7776, "lr": 5e-06, "epoch": 1.7083409381273955, "percentage": 57.02, "elapsed_time": "19:00:07", "remaining_time": "14:19:28"}
119
+ {"current_steps": 1180, "total_steps": 2052, "loss": 0.7718, "lr": 5e-06, "epoch": 1.722942142726775, "percentage": 57.5, "elapsed_time": "19:09:46", "remaining_time": "14:09:39"}
120
+ {"current_steps": 1190, "total_steps": 2052, "loss": 0.778, "lr": 5e-06, "epoch": 1.7375433473261546, "percentage": 57.99, "elapsed_time": "19:19:23", "remaining_time": "13:59:49"}
121
+ {"current_steps": 1200, "total_steps": 2052, "loss": 0.7731, "lr": 5e-06, "epoch": 1.7521445519255339, "percentage": 58.48, "elapsed_time": "19:29:02", "remaining_time": "13:50:01"}
122
+ {"current_steps": 1210, "total_steps": 2052, "loss": 0.7762, "lr": 5e-06, "epoch": 1.7667457565249132, "percentage": 58.97, "elapsed_time": "19:38:41", "remaining_time": "13:40:12"}
123
+ {"current_steps": 1220, "total_steps": 2052, "loss": 0.7737, "lr": 5e-06, "epoch": 1.7813469611242927, "percentage": 59.45, "elapsed_time": "19:48:20", "remaining_time": "13:30:24"}
124
+ {"current_steps": 1230, "total_steps": 2052, "loss": 0.7752, "lr": 5e-06, "epoch": 1.7959481657236722, "percentage": 59.94, "elapsed_time": "19:57:59", "remaining_time": "13:20:36"}
125
+ {"current_steps": 1240, "total_steps": 2052, "loss": 0.7746, "lr": 5e-06, "epoch": 1.8105493703230517, "percentage": 60.43, "elapsed_time": "20:07:36", "remaining_time": "13:10:47"}
126
+ {"current_steps": 1250, "total_steps": 2052, "loss": 0.7758, "lr": 5e-06, "epoch": 1.825150574922431, "percentage": 60.92, "elapsed_time": "20:17:14", "remaining_time": "13:00:58"}
127
+ {"current_steps": 1260, "total_steps": 2052, "loss": 0.7737, "lr": 5e-06, "epoch": 1.8397517795218106, "percentage": 61.4, "elapsed_time": "20:26:52", "remaining_time": "12:51:10"}
128
+ {"current_steps": 1270, "total_steps": 2052, "loss": 0.7734, "lr": 5e-06, "epoch": 1.8543529841211899, "percentage": 61.89, "elapsed_time": "20:36:29", "remaining_time": "12:41:22"}
129
+ {"current_steps": 1280, "total_steps": 2052, "loss": 0.7702, "lr": 5e-06, "epoch": 1.8689541887205694, "percentage": 62.38, "elapsed_time": "20:46:09", "remaining_time": "12:31:35"}
130
+ {"current_steps": 1290, "total_steps": 2052, "loss": 0.7738, "lr": 5e-06, "epoch": 1.883555393319949, "percentage": 62.87, "elapsed_time": "20:55:48", "remaining_time": "12:21:48"}
131
+ {"current_steps": 1300, "total_steps": 2052, "loss": 0.7744, "lr": 5e-06, "epoch": 1.8981565979193284, "percentage": 63.35, "elapsed_time": "21:05:28", "remaining_time": "12:12:01"}
132
+ {"current_steps": 1310, "total_steps": 2052, "loss": 0.7707, "lr": 5e-06, "epoch": 1.9127578025187078, "percentage": 63.84, "elapsed_time": "21:15:06", "remaining_time": "12:02:14"}
133
+ {"current_steps": 1320, "total_steps": 2052, "loss": 0.7729, "lr": 5e-06, "epoch": 1.9273590071180873, "percentage": 64.33, "elapsed_time": "21:24:45", "remaining_time": "11:52:27"}
134
+ {"current_steps": 1330, "total_steps": 2052, "loss": 0.7704, "lr": 5e-06, "epoch": 1.9419602117174666, "percentage": 64.81, "elapsed_time": "21:34:24", "remaining_time": "11:42:40"}
135
+ {"current_steps": 1340, "total_steps": 2052, "loss": 0.7711, "lr": 5e-06, "epoch": 1.956561416316846, "percentage": 65.3, "elapsed_time": "21:44:03", "remaining_time": "11:32:54"}
136
+ {"current_steps": 1350, "total_steps": 2052, "loss": 0.7714, "lr": 5e-06, "epoch": 1.9711626209162256, "percentage": 65.79, "elapsed_time": "21:53:43", "remaining_time": "11:23:08"}
137
+ {"current_steps": 1360, "total_steps": 2052, "loss": 0.7681, "lr": 5e-06, "epoch": 1.9857638255156052, "percentage": 66.28, "elapsed_time": "22:03:22", "remaining_time": "11:13:21"}
138
+ {"current_steps": 1369, "total_steps": 2052, "eval_loss": 0.8045554161071777, "epoch": 1.9989049096550464, "percentage": 66.72, "elapsed_time": "22:24:50", "remaining_time": "11:10:56"}
139
+ {"current_steps": 1370, "total_steps": 2052, "loss": 0.7813, "lr": 5e-06, "epoch": 2.0003650301149847, "percentage": 66.76, "elapsed_time": "22:26:21", "remaining_time": "11:10:13"}