sedrickkeh commited on
Commit
e0ed9f8
·
verified ·
1 Parent(s): e429f25

Training in progress, epoch 3

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2ccbb61723cda6c0ee2508482ff08a31357909a83703157db62b1b9ce26940d
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:113648938e4edfee37db13f790643ac242e31e85779567acbca19ff4a3d6ab18
3
  size 4976698672
model-00001-of-00004.safetensors.sagemaker-uploaded ADDED
File without changes
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:174055128aaf2106387b3319d0d493a71ad8d812a5b4202e87ae272ccf6e1065
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30e68daca74171ca94d5f97a1c1367422ff6ec70e31c76c8b39e96b24dee20f2
3
  size 4999802720
model-00002-of-00004.safetensors.sagemaker-uploaded ADDED
File without changes
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9623abf9711da367b72a3680f129adb5fb2d1d3b0d9dfc21ca47f80d98fb68e
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d321367ce198da883c891037abb1e15896c50b1cd244b42a6c8f03b7afb44679
3
  size 4915916176
model-00003-of-00004.safetensors.sagemaker-uploaded ADDED
File without changes
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:583d89d62cdd9e73cb019ed0197ae36a7b4b466bcc16692b4704f038fc76dc3a
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f475c7943c4984ad3ef523b84c56fd7b369de7f13a7ce1514c7e56f1d82fbcc
3
  size 1168138808
model-00004-of-00004.safetensors.sagemaker-uploaded ADDED
File without changes
special_tokens_map.json.sagemaker-uploaded ADDED
File without changes
tokenizer.json.sagemaker-uploaded ADDED
File without changes
tokenizer_config.json.sagemaker-uploaded ADDED
File without changes
trainer_log.jsonl CHANGED
@@ -19,3 +19,45 @@
19
  {"current_steps": 190, "total_steps": 591, "loss": 0.5428, "learning_rate": 5e-06, "epoch": 0.9644670050761421, "percentage": 32.15, "elapsed_time": "0:54:27", "remaining_time": "1:54:56"}
20
  {"current_steps": 197, "total_steps": 591, "eval_loss": 0.545667827129364, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:57:36", "remaining_time": "1:55:12"}
21
  {"current_steps": 200, "total_steps": 591, "loss": 0.538, "learning_rate": 5e-06, "epoch": 1.015228426395939, "percentage": 33.84, "elapsed_time": "0:59:26", "remaining_time": "1:56:12"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  {"current_steps": 190, "total_steps": 591, "loss": 0.5428, "learning_rate": 5e-06, "epoch": 0.9644670050761421, "percentage": 32.15, "elapsed_time": "0:54:27", "remaining_time": "1:54:56"}
20
  {"current_steps": 197, "total_steps": 591, "eval_loss": 0.545667827129364, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:57:36", "remaining_time": "1:55:12"}
21
  {"current_steps": 200, "total_steps": 591, "loss": 0.538, "learning_rate": 5e-06, "epoch": 1.015228426395939, "percentage": 33.84, "elapsed_time": "0:59:26", "remaining_time": "1:56:12"}
22
+ {"current_steps": 210, "total_steps": 591, "loss": 0.5005, "learning_rate": 5e-06, "epoch": 1.0659898477157361, "percentage": 35.53, "elapsed_time": "1:02:18", "remaining_time": "1:53:01"}
23
+ {"current_steps": 220, "total_steps": 591, "loss": 0.5116, "learning_rate": 5e-06, "epoch": 1.116751269035533, "percentage": 37.23, "elapsed_time": "1:05:07", "remaining_time": "1:49:50"}
24
+ {"current_steps": 230, "total_steps": 591, "loss": 0.5039, "learning_rate": 5e-06, "epoch": 1.16751269035533, "percentage": 38.92, "elapsed_time": "1:07:58", "remaining_time": "1:46:41"}
25
+ {"current_steps": 240, "total_steps": 591, "loss": 0.4979, "learning_rate": 5e-06, "epoch": 1.218274111675127, "percentage": 40.61, "elapsed_time": "1:10:49", "remaining_time": "1:43:34"}
26
+ {"current_steps": 250, "total_steps": 591, "loss": 0.4977, "learning_rate": 5e-06, "epoch": 1.2690355329949239, "percentage": 42.3, "elapsed_time": "1:13:40", "remaining_time": "1:40:29"}
27
+ {"current_steps": 260, "total_steps": 591, "loss": 0.5015, "learning_rate": 5e-06, "epoch": 1.3197969543147208, "percentage": 43.99, "elapsed_time": "1:16:31", "remaining_time": "1:37:25"}
28
+ {"current_steps": 270, "total_steps": 591, "loss": 0.5052, "learning_rate": 5e-06, "epoch": 1.3705583756345177, "percentage": 45.69, "elapsed_time": "1:19:21", "remaining_time": "1:34:20"}
29
+ {"current_steps": 280, "total_steps": 591, "loss": 0.4974, "learning_rate": 5e-06, "epoch": 1.4213197969543148, "percentage": 47.38, "elapsed_time": "1:22:11", "remaining_time": "1:31:18"}
30
+ {"current_steps": 290, "total_steps": 591, "loss": 0.5044, "learning_rate": 5e-06, "epoch": 1.4720812182741116, "percentage": 49.07, "elapsed_time": "1:25:02", "remaining_time": "1:28:15"}
31
+ {"current_steps": 300, "total_steps": 591, "loss": 0.5134, "learning_rate": 5e-06, "epoch": 1.5228426395939088, "percentage": 50.76, "elapsed_time": "1:27:53", "remaining_time": "1:25:15"}
32
+ {"current_steps": 310, "total_steps": 591, "loss": 0.5022, "learning_rate": 5e-06, "epoch": 1.5736040609137056, "percentage": 52.45, "elapsed_time": "1:30:44", "remaining_time": "1:22:14"}
33
+ {"current_steps": 320, "total_steps": 591, "loss": 0.5001, "learning_rate": 5e-06, "epoch": 1.6243654822335025, "percentage": 54.15, "elapsed_time": "1:33:33", "remaining_time": "1:19:13"}
34
+ {"current_steps": 330, "total_steps": 591, "loss": 0.504, "learning_rate": 5e-06, "epoch": 1.6751269035532994, "percentage": 55.84, "elapsed_time": "1:36:23", "remaining_time": "1:16:14"}
35
+ {"current_steps": 340, "total_steps": 591, "loss": 0.5008, "learning_rate": 5e-06, "epoch": 1.7258883248730963, "percentage": 57.53, "elapsed_time": "1:39:13", "remaining_time": "1:13:15"}
36
+ {"current_steps": 350, "total_steps": 591, "loss": 0.5058, "learning_rate": 5e-06, "epoch": 1.7766497461928934, "percentage": 59.22, "elapsed_time": "1:42:04", "remaining_time": "1:10:17"}
37
+ {"current_steps": 360, "total_steps": 591, "loss": 0.5013, "learning_rate": 5e-06, "epoch": 1.8274111675126905, "percentage": 60.91, "elapsed_time": "1:44:55", "remaining_time": "1:07:19"}
38
+ {"current_steps": 370, "total_steps": 591, "loss": 0.505, "learning_rate": 5e-06, "epoch": 1.8781725888324874, "percentage": 62.61, "elapsed_time": "1:47:45", "remaining_time": "1:04:22"}
39
+ {"current_steps": 380, "total_steps": 591, "loss": 0.5029, "learning_rate": 5e-06, "epoch": 1.9289340101522843, "percentage": 64.3, "elapsed_time": "1:50:35", "remaining_time": "1:01:24"}
40
+ {"current_steps": 390, "total_steps": 591, "loss": 0.4991, "learning_rate": 5e-06, "epoch": 1.9796954314720812, "percentage": 65.99, "elapsed_time": "1:53:26", "remaining_time": "0:58:27"}
41
+ {"current_steps": 394, "total_steps": 591, "eval_loss": 0.5367357730865479, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "1:55:43", "remaining_time": "0:57:51"}
42
+ {"current_steps": 400, "total_steps": 591, "loss": 0.4785, "learning_rate": 5e-06, "epoch": 2.030456852791878, "percentage": 67.68, "elapsed_time": "1:58:31", "remaining_time": "0:56:35"}
43
+ {"current_steps": 410, "total_steps": 591, "loss": 0.4563, "learning_rate": 5e-06, "epoch": 2.081218274111675, "percentage": 69.37, "elapsed_time": "2:01:20", "remaining_time": "0:53:34"}
44
+ {"current_steps": 420, "total_steps": 591, "loss": 0.4564, "learning_rate": 5e-06, "epoch": 2.1319796954314723, "percentage": 71.07, "elapsed_time": "2:04:09", "remaining_time": "0:50:33"}
45
+ {"current_steps": 430, "total_steps": 591, "loss": 0.4559, "learning_rate": 5e-06, "epoch": 2.182741116751269, "percentage": 72.76, "elapsed_time": "2:06:59", "remaining_time": "0:47:32"}
46
+ {"current_steps": 440, "total_steps": 591, "loss": 0.454, "learning_rate": 5e-06, "epoch": 2.233502538071066, "percentage": 74.45, "elapsed_time": "2:09:47", "remaining_time": "0:44:32"}
47
+ {"current_steps": 450, "total_steps": 591, "loss": 0.4584, "learning_rate": 5e-06, "epoch": 2.284263959390863, "percentage": 76.14, "elapsed_time": "2:12:38", "remaining_time": "0:41:33"}
48
+ {"current_steps": 460, "total_steps": 591, "loss": 0.4603, "learning_rate": 5e-06, "epoch": 2.33502538071066, "percentage": 77.83, "elapsed_time": "2:15:29", "remaining_time": "0:38:35"}
49
+ {"current_steps": 470, "total_steps": 591, "loss": 0.4586, "learning_rate": 5e-06, "epoch": 2.3857868020304567, "percentage": 79.53, "elapsed_time": "2:18:19", "remaining_time": "0:35:36"}
50
+ {"current_steps": 480, "total_steps": 591, "loss": 0.4547, "learning_rate": 5e-06, "epoch": 2.436548223350254, "percentage": 81.22, "elapsed_time": "2:21:09", "remaining_time": "0:32:38"}
51
+ {"current_steps": 490, "total_steps": 591, "loss": 0.4553, "learning_rate": 5e-06, "epoch": 2.487309644670051, "percentage": 82.91, "elapsed_time": "2:24:00", "remaining_time": "0:29:41"}
52
+ {"current_steps": 500, "total_steps": 591, "loss": 0.4552, "learning_rate": 5e-06, "epoch": 2.5380710659898478, "percentage": 84.6, "elapsed_time": "2:26:51", "remaining_time": "0:26:43"}
53
+ {"current_steps": 510, "total_steps": 591, "loss": 0.4495, "learning_rate": 5e-06, "epoch": 2.5888324873096447, "percentage": 86.29, "elapsed_time": "2:29:40", "remaining_time": "0:23:46"}
54
+ {"current_steps": 520, "total_steps": 591, "loss": 0.4601, "learning_rate": 5e-06, "epoch": 2.6395939086294415, "percentage": 87.99, "elapsed_time": "2:32:31", "remaining_time": "0:20:49"}
55
+ {"current_steps": 530, "total_steps": 591, "loss": 0.4639, "learning_rate": 5e-06, "epoch": 2.6903553299492384, "percentage": 89.68, "elapsed_time": "2:35:21", "remaining_time": "0:17:52"}
56
+ {"current_steps": 540, "total_steps": 591, "loss": 0.4561, "learning_rate": 5e-06, "epoch": 2.7411167512690353, "percentage": 91.37, "elapsed_time": "2:38:10", "remaining_time": "0:14:56"}
57
+ {"current_steps": 550, "total_steps": 591, "loss": 0.458, "learning_rate": 5e-06, "epoch": 2.7918781725888326, "percentage": 93.06, "elapsed_time": "2:40:59", "remaining_time": "0:12:00"}
58
+ {"current_steps": 560, "total_steps": 591, "loss": 0.4582, "learning_rate": 5e-06, "epoch": 2.8426395939086295, "percentage": 94.75, "elapsed_time": "2:43:47", "remaining_time": "0:09:04"}
59
+ {"current_steps": 570, "total_steps": 591, "loss": 0.4535, "learning_rate": 5e-06, "epoch": 2.8934010152284264, "percentage": 96.45, "elapsed_time": "2:46:36", "remaining_time": "0:06:08"}
60
+ {"current_steps": 580, "total_steps": 591, "loss": 0.4609, "learning_rate": 5e-06, "epoch": 2.9441624365482233, "percentage": 98.14, "elapsed_time": "2:49:25", "remaining_time": "0:03:12"}
61
+ {"current_steps": 590, "total_steps": 591, "loss": 0.4636, "learning_rate": 5e-06, "epoch": 2.99492385786802, "percentage": 99.83, "elapsed_time": "2:52:15", "remaining_time": "0:00:17"}
62
+ {"current_steps": 591, "total_steps": 591, "eval_loss": 0.5386124849319458, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "2:54:45", "remaining_time": "0:00:00"}
63
+ {"current_steps": 591, "total_steps": 591, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "2:55:42", "remaining_time": "0:00:00"}
training_args.bin.sagemaker-uploaded ADDED
File without changes