hlillemark commited on
Commit
b30015c
·
verified ·
1 Parent(s): c5a79dd

Training in progress, step 500

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d90ded63025152b8423a07973a24997993d818a5559bcfad8342379d8f9141a6
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80a75922bbe71cf04eeae60f8faa44e32811e7a0dbef5da46903c9c052a49fa9
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5972e9cd2fc5219e905ecd50d6108e231ffbc0ecc081ef5869e109e17cb2e8f
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ce1d7974b1ac53127e4393a19a887614a23d7ff63ae64b8cef4b45d0d5b9a48
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbb7ab1d4765bc00584fb9ad03428b1b64fe25ecbe1c1fe609aea57af2e6c2e9
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6b440d3810a01f165fa6acc0ba880bd85ac7d6c5dc570a254613a37cccb498e
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf22d9dfe31144dd9e91a6b79377b10228306b209f31b9e747a4385b4c43a127
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f277fabaf661bf9352705206db6161334a7daae21382c22e995adcf8e766bcdc
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -1,81 +1,67 @@
1
- {"current_steps": 10, "total_steps": 670, "loss": 1.921, "lr": 1.4925373134328358e-06, "epoch": 0.14925373134328357, "percentage": 1.49, "elapsed_time": "0:00:29", "remaining_time": "0:32:51"}
2
- {"current_steps": 20, "total_steps": 670, "loss": 1.1511, "lr": 2.9850746268656716e-06, "epoch": 0.29850746268656714, "percentage": 2.99, "elapsed_time": "0:00:57", "remaining_time": "0:31:00"}
3
- {"current_steps": 30, "total_steps": 670, "loss": 0.9965, "lr": 4.477611940298508e-06, "epoch": 0.44776119402985076, "percentage": 4.48, "elapsed_time": "0:01:25", "remaining_time": "0:30:16"}
4
- {"current_steps": 40, "total_steps": 670, "loss": 1.0133, "lr": 5.970149253731343e-06, "epoch": 0.5970149253731343, "percentage": 5.97, "elapsed_time": "0:01:52", "remaining_time": "0:29:36"}
5
- {"current_steps": 50, "total_steps": 670, "loss": 1.053, "lr": 7.46268656716418e-06, "epoch": 0.746268656716418, "percentage": 7.46, "elapsed_time": "0:02:20", "remaining_time": "0:29:04"}
6
- {"current_steps": 50, "total_steps": 670, "eval_loss": 1.2667393684387207, "epoch": 0.746268656716418, "percentage": 7.46, "elapsed_time": "0:02:21", "remaining_time": "0:29:16"}
7
- {"current_steps": 60, "total_steps": 670, "loss": 1.0093, "lr": 8.955223880597016e-06, "epoch": 0.8955223880597015, "percentage": 8.96, "elapsed_time": "0:02:49", "remaining_time": "0:28:39"}
8
- {"current_steps": 70, "total_steps": 670, "loss": 0.9774, "lr": 9.999389284703265e-06, "epoch": 1.044776119402985, "percentage": 10.45, "elapsed_time": "0:03:16", "remaining_time": "0:28:06"}
9
- {"current_steps": 80, "total_steps": 670, "loss": 0.7151, "lr": 9.988536273658876e-06, "epoch": 1.1940298507462686, "percentage": 11.94, "elapsed_time": "0:03:44", "remaining_time": "0:27:34"}
10
- {"current_steps": 90, "total_steps": 670, "loss": 0.7292, "lr": 9.964145714351633e-06, "epoch": 1.3432835820895521, "percentage": 13.43, "elapsed_time": "0:04:12", "remaining_time": "0:27:04"}
11
- {"current_steps": 100, "total_steps": 670, "loss": 0.8178, "lr": 9.926283796211796e-06, "epoch": 1.4925373134328357, "percentage": 14.93, "elapsed_time": "0:04:39", "remaining_time": "0:26:35"}
12
- {"current_steps": 100, "total_steps": 670, "eval_loss": 1.3229804039001465, "epoch": 1.4925373134328357, "percentage": 14.93, "elapsed_time": "0:04:40", "remaining_time": "0:26:40"}
13
- {"current_steps": 110, "total_steps": 670, "loss": 0.8059, "lr": 9.87505326632108e-06, "epoch": 1.6417910447761193, "percentage": 16.42, "elapsed_time": "0:05:08", "remaining_time": "0:26:10"}
14
- {"current_steps": 120, "total_steps": 670, "loss": 0.8046, "lr": 9.810593150584658e-06, "epoch": 1.7910447761194028, "percentage": 17.91, "elapsed_time": "0:05:36", "remaining_time": "0:25:40"}
15
- {"current_steps": 130, "total_steps": 670, "loss": 0.7796, "lr": 9.733078376452172e-06, "epoch": 1.9402985074626866, "percentage": 19.4, "elapsed_time": "0:06:03", "remaining_time": "0:25:10"}
16
- {"current_steps": 140, "total_steps": 670, "loss": 0.5014, "lr": 9.642719298211602e-06, "epoch": 2.08955223880597, "percentage": 20.9, "elapsed_time": "0:06:32", "remaining_time": "0:24:44"}
17
- {"current_steps": 150, "total_steps": 670, "loss": 0.3803, "lr": 9.539761126144193e-06, "epoch": 2.2388059701492535, "percentage": 22.39, "elapsed_time": "0:06:59", "remaining_time": "0:24:14"}
18
- {"current_steps": 150, "total_steps": 670, "eval_loss": 1.5490957498550415, "epoch": 2.2388059701492535, "percentage": 22.39, "elapsed_time": "0:07:00", "remaining_time": "0:24:17"}
19
- {"current_steps": 160, "total_steps": 670, "loss": 0.4, "lr": 9.424483261089584e-06, "epoch": 2.388059701492537, "percentage": 23.88, "elapsed_time": "0:07:28", "remaining_time": "0:23:48"}
20
- {"current_steps": 170, "total_steps": 670, "loss": 0.4118, "lr": 9.297198536226927e-06, "epoch": 2.5373134328358207, "percentage": 25.37, "elapsed_time": "0:07:55", "remaining_time": "0:23:19"}
21
- {"current_steps": 180, "total_steps": 670, "loss": 0.4074, "lr": 9.158252368129628e-06, "epoch": 2.6865671641791042, "percentage": 26.87, "elapsed_time": "0:08:23", "remaining_time": "0:22:51"}
22
- {"current_steps": 190, "total_steps": 670, "loss": 0.438, "lr": 9.008021819397488e-06, "epoch": 2.835820895522388, "percentage": 28.36, "elapsed_time": "0:08:51", "remaining_time": "0:22:23"}
23
- {"current_steps": 200, "total_steps": 670, "loss": 0.436, "lr": 8.846914575410035e-06, "epoch": 2.9850746268656714, "percentage": 29.85, "elapsed_time": "0:09:19", "remaining_time": "0:21:54"}
24
- {"current_steps": 200, "total_steps": 670, "eval_loss": 1.5159422159194946, "epoch": 2.9850746268656714, "percentage": 29.85, "elapsed_time": "0:09:20", "remaining_time": "0:21:57"}
25
- {"current_steps": 210, "total_steps": 670, "loss": 0.2353, "lr": 8.675367837977848e-06, "epoch": 3.1343283582089554, "percentage": 31.34, "elapsed_time": "0:09:48", "remaining_time": "0:21:29"}
26
- {"current_steps": 220, "total_steps": 670, "loss": 0.2442, "lr": 8.49384713889421e-06, "epoch": 3.283582089552239, "percentage": 32.84, "elapsed_time": "0:10:16", "remaining_time": "0:21:00"}
27
- {"current_steps": 230, "total_steps": 670, "loss": 0.264, "lr": 8.302845076606786e-06, "epoch": 3.4328358208955225, "percentage": 34.33, "elapsed_time": "0:10:44", "remaining_time": "0:20:32"}
28
- {"current_steps": 240, "total_steps": 670, "loss": 0.2486, "lr": 8.10287997943769e-06, "epoch": 3.582089552238806, "percentage": 35.82, "elapsed_time": "0:11:11", "remaining_time": "0:20:03"}
29
- {"current_steps": 250, "total_steps": 670, "loss": 0.2529, "lr": 7.894494498979558e-06, "epoch": 3.7313432835820897, "percentage": 37.31, "elapsed_time": "0:11:39", "remaining_time": "0:19:35"}
30
- {"current_steps": 250, "total_steps": 670, "eval_loss": 1.6716222763061523, "epoch": 3.7313432835820897, "percentage": 37.31, "elapsed_time": "0:11:40", "remaining_time": "0:19:36"}
31
- {"current_steps": 260, "total_steps": 670, "loss": 0.2382, "lr": 7.678254137484797e-06, "epoch": 3.8805970149253732, "percentage": 38.81, "elapsed_time": "0:12:08", "remaining_time": "0:19:08"}
32
- {"current_steps": 270, "total_steps": 670, "loss": 0.2137, "lr": 7.4547457132442895e-06, "epoch": 4.029850746268656, "percentage": 40.3, "elapsed_time": "0:12:35", "remaining_time": "0:18:39"}
33
- {"current_steps": 280, "total_steps": 670, "loss": 0.1201, "lr": 7.2245757681200835e-06, "epoch": 4.17910447761194, "percentage": 41.79, "elapsed_time": "0:13:03", "remaining_time": "0:18:11"}
34
- {"current_steps": 290, "total_steps": 670, "loss": 0.1246, "lr": 6.988368921553601e-06, "epoch": 4.3283582089552235, "percentage": 43.28, "elapsed_time": "0:13:30", "remaining_time": "0:17:42"}
35
- {"current_steps": 300, "total_steps": 670, "loss": 0.1555, "lr": 6.746766175516159e-06, "epoch": 4.477611940298507, "percentage": 44.78, "elapsed_time": "0:13:58", "remaining_time": "0:17:13"}
36
- {"current_steps": 300, "total_steps": 670, "eval_loss": 1.847908616065979, "epoch": 4.477611940298507, "percentage": 44.78, "elapsed_time": "0:13:59", "remaining_time": "0:17:15"}
37
- {"current_steps": 310, "total_steps": 670, "loss": 0.1391, "lr": 6.500423175001705e-06, "epoch": 4.6268656716417915, "percentage": 46.27, "elapsed_time": "0:14:26", "remaining_time": "0:16:46"}
38
- {"current_steps": 320, "total_steps": 670, "loss": 0.1576, "lr": 6.2500084287822925e-06, "epoch": 4.776119402985074, "percentage": 47.76, "elapsed_time": "0:14:54", "remaining_time": "0:16:18"}
39
- {"current_steps": 330, "total_steps": 670, "loss": 0.1441, "lr": 5.996201495254757e-06, "epoch": 4.925373134328359, "percentage": 49.25, "elapsed_time": "0:15:22", "remaining_time": "0:15:50"}
40
- {"current_steps": 340, "total_steps": 670, "loss": 0.0898, "lr": 5.73969113830165e-06, "epoch": 5.074626865671641, "percentage": 50.75, "elapsed_time": "0:15:50", "remaining_time": "0:15:22"}
41
- {"current_steps": 350, "total_steps": 670, "loss": 0.078, "lr": 5.481173458170952e-06, "epoch": 5.223880597014926, "percentage": 52.24, "elapsed_time": "0:16:17", "remaining_time": "0:14:53"}
42
- {"current_steps": 350, "total_steps": 670, "eval_loss": 2.03751277923584, "epoch": 5.223880597014926, "percentage": 52.24, "elapsed_time": "0:16:18", "remaining_time": "0:14:54"}
43
- {"current_steps": 360, "total_steps": 670, "loss": 0.0828, "lr": 5.221350002446882e-06, "epoch": 5.373134328358209, "percentage": 53.73, "elapsed_time": "0:16:46", "remaining_time": "0:14:26"}
44
- {"current_steps": 370, "total_steps": 670, "loss": 0.0734, "lr": 4.96092586223808e-06, "epoch": 5.522388059701493, "percentage": 55.22, "elapsed_time": "0:17:14", "remaining_time": "0:13:58"}
45
- {"current_steps": 380, "total_steps": 670, "loss": 0.0873, "lr": 4.700607758749626e-06, "epoch": 5.6716417910447765, "percentage": 56.72, "elapsed_time": "0:17:41", "remaining_time": "0:13:30"}
46
- {"current_steps": 390, "total_steps": 670, "loss": 0.0647, "lr": 4.441102125431398e-06, "epoch": 5.82089552238806, "percentage": 58.21, "elapsed_time": "0:18:09", "remaining_time": "0:13:02"}
47
- {"current_steps": 400, "total_steps": 670, "loss": 0.0726, "lr": 4.183113190907349e-06, "epoch": 5.970149253731344, "percentage": 59.7, "elapsed_time": "0:18:36", "remaining_time": "0:12:33"}
48
- {"current_steps": 400, "total_steps": 670, "eval_loss": 1.8965630531311035, "epoch": 5.970149253731344, "percentage": 59.7, "elapsed_time": "0:18:37", "remaining_time": "0:12:34"}
49
- {"current_steps": 410, "total_steps": 670, "loss": 0.0471, "lr": 3.927341067888065e-06, "epoch": 6.119402985074627, "percentage": 61.19, "elapsed_time": "0:19:05", "remaining_time": "0:12:06"}
50
- {"current_steps": 420, "total_steps": 670, "loss": 0.0332, "lr": 3.6744798532528137e-06, "epoch": 6.268656716417911, "percentage": 62.69, "elapsed_time": "0:19:33", "remaining_time": "0:11:38"}
51
- {"current_steps": 430, "total_steps": 670, "loss": 0.0302, "lr": 3.4252157444569478e-06, "epoch": 6.417910447761194, "percentage": 64.18, "elapsed_time": "0:20:01", "remaining_time": "0:11:10"}
52
- {"current_steps": 440, "total_steps": 670, "loss": 0.048, "lr": 3.1802251773762294e-06, "epoch": 6.567164179104478, "percentage": 65.67, "elapsed_time": "0:20:29", "remaining_time": "0:10:42"}
53
- {"current_steps": 450, "total_steps": 670, "loss": 0.0411, "lr": 2.9401729906414385e-06, "epoch": 6.7164179104477615, "percentage": 67.16, "elapsed_time": "0:20:56", "remaining_time": "0:10:14"}
54
- {"current_steps": 450, "total_steps": 670, "eval_loss": 2.1223175525665283, "epoch": 6.7164179104477615, "percentage": 67.16, "elapsed_time": "0:20:57", "remaining_time": "0:10:14"}
55
- {"current_steps": 460, "total_steps": 670, "loss": 0.0388, "lr": 2.7057106214448216e-06, "epoch": 6.865671641791045, "percentage": 68.66, "elapsed_time": "0:21:25", "remaining_time": "0:09:46"}
56
- {"current_steps": 470, "total_steps": 670, "loss": 0.0395, "lr": 2.4774743377144265e-06, "epoch": 7.014925373134329, "percentage": 70.15, "elapsed_time": "0:21:52", "remaining_time": "0:09:18"}
57
- {"current_steps": 480, "total_steps": 670, "loss": 0.0106, "lr": 2.256083511453747e-06, "epoch": 7.164179104477612, "percentage": 71.64, "elapsed_time": "0:22:20", "remaining_time": "0:08:50"}
58
- {"current_steps": 490, "total_steps": 670, "loss": 0.0252, "lr": 2.042138937932388e-06, "epoch": 7.313432835820896, "percentage": 73.13, "elapsed_time": "0:22:48", "remaining_time": "0:08:22"}
59
- {"current_steps": 500, "total_steps": 670, "loss": 0.0127, "lr": 1.8362212052889827e-06, "epoch": 7.462686567164179, "percentage": 74.63, "elapsed_time": "0:23:15", "remaining_time": "0:07:54"}
60
- {"current_steps": 500, "total_steps": 670, "eval_loss": 2.1807539463043213, "epoch": 7.462686567164179, "percentage": 74.63, "elapsed_time": "0:23:16", "remaining_time": "0:07:54"}
61
- {"current_steps": 510, "total_steps": 670, "loss": 0.017, "lr": 1.63888911897084e-06, "epoch": 7.611940298507463, "percentage": 76.12, "elapsed_time": "0:25:07", "remaining_time": "0:07:52"}
62
- {"current_steps": 520, "total_steps": 670, "loss": 0.0192, "lr": 1.4506781852859836e-06, "epoch": 7.7611940298507465, "percentage": 77.61, "elapsed_time": "0:25:35", "remaining_time": "0:07:22"}
63
- {"current_steps": 530, "total_steps": 670, "loss": 0.0102, "lr": 1.2720991581827852e-06, "epoch": 7.91044776119403, "percentage": 79.1, "elapsed_time": "0:26:03", "remaining_time": "0:06:52"}
64
- {"current_steps": 540, "total_steps": 670, "loss": 0.0119, "lr": 1.1036366532008552e-06, "epoch": 8.059701492537313, "percentage": 80.6, "elapsed_time": "0:26:30", "remaining_time": "0:06:23"}
65
- {"current_steps": 550, "total_steps": 670, "loss": 0.0083, "lr": 9.457478323545749e-07, "epoch": 8.208955223880597, "percentage": 82.09, "elapsed_time": "0:26:58", "remaining_time": "0:05:53"}
66
- {"current_steps": 550, "total_steps": 670, "eval_loss": 2.2392308712005615, "epoch": 8.208955223880597, "percentage": 82.09, "elapsed_time": "0:26:59", "remaining_time": "0:05:53"}
67
- {"current_steps": 560, "total_steps": 670, "loss": 0.0059, "lr": 7.988611635181099e-07, "epoch": 8.35820895522388, "percentage": 83.58, "elapsed_time": "0:27:27", "remaining_time": "0:05:23"}
68
- {"current_steps": 570, "total_steps": 670, "loss": 0.012, "lr": 6.633752576786251e-07, "epoch": 8.507462686567164, "percentage": 85.07, "elapsed_time": "0:27:54", "remaining_time": "0:04:53"}
69
- {"current_steps": 580, "total_steps": 670, "loss": 0.0071, "lr": 5.396577872130676e-07, "epoch": 8.656716417910447, "percentage": 86.57, "elapsed_time": "0:28:22", "remaining_time": "0:04:24"}
70
- {"current_steps": 590, "total_steps": 670, "loss": 0.0036, "lr": 4.2804448812404754e-07, "epoch": 8.805970149253731, "percentage": 88.06, "elapsed_time": "0:28:50", "remaining_time": "0:03:54"}
71
- {"current_steps": 600, "total_steps": 670, "loss": 0.0049, "lr": 3.288382489424502e-07, "epoch": 8.955223880597014, "percentage": 89.55, "elapsed_time": "0:29:18", "remaining_time": "0:03:25"}
72
- {"current_steps": 600, "total_steps": 670, "eval_loss": 2.3008365631103516, "epoch": 8.955223880597014, "percentage": 89.55, "elapsed_time": "0:29:19", "remaining_time": "0:03:25"}
73
- {"current_steps": 610, "total_steps": 670, "loss": 0.0051, "lr": 2.4230828876927293e-07, "epoch": 9.104477611940299, "percentage": 91.04, "elapsed_time": "0:29:47", "remaining_time": "0:02:55"}
74
- {"current_steps": 620, "total_steps": 670, "loss": 0.0032, "lr": 1.6868942668726408e-07, "epoch": 9.253731343283581, "percentage": 92.54, "elapsed_time": "0:30:14", "remaining_time": "0:02:26"}
75
- {"current_steps": 630, "total_steps": 670, "loss": 0.0063, "lr": 1.0818144452496293e-07, "epoch": 9.402985074626866, "percentage": 94.03, "elapsed_time": "0:30:41", "remaining_time": "0:01:56"}
76
- {"current_steps": 640, "total_steps": 670, "loss": 0.0024, "lr": 6.094854470245326e-08, "epoch": 9.552238805970148, "percentage": 95.52, "elapsed_time": "0:31:08", "remaining_time": "0:01:27"}
77
- {"current_steps": 650, "total_steps": 670, "loss": 0.0035, "lr": 2.711890463007405e-08, "epoch": 9.701492537313433, "percentage": 97.01, "elapsed_time": "0:31:36", "remaining_time": "0:00:58"}
78
- {"current_steps": 650, "total_steps": 670, "eval_loss": 2.3232414722442627, "epoch": 9.701492537313433, "percentage": 97.01, "elapsed_time": "0:31:37", "remaining_time": "0:00:58"}
79
- {"current_steps": 660, "total_steps": 670, "loss": 0.0035, "lr": 6.784328869339218e-09, "epoch": 9.850746268656717, "percentage": 98.51, "elapsed_time": "0:32:05", "remaining_time": "0:00:29"}
80
- {"current_steps": 670, "total_steps": 670, "loss": 0.0021, "lr": 0.0, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:32:33", "remaining_time": "0:00:00"}
81
- {"current_steps": 670, "total_steps": 670, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:35:28", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 10, "total_steps": 670, "loss": 1.9213, "lr": 1.4925373134328358e-06, "epoch": 0.14925373134328357, "percentage": 1.49, "elapsed_time": "0:00:29", "remaining_time": "0:32:49"}
2
+ {"current_steps": 20, "total_steps": 670, "loss": 1.151, "lr": 2.9850746268656716e-06, "epoch": 0.29850746268656714, "percentage": 2.99, "elapsed_time": "0:00:57", "remaining_time": "0:30:55"}
3
+ {"current_steps": 30, "total_steps": 670, "loss": 0.9953, "lr": 4.477611940298508e-06, "epoch": 0.44776119402985076, "percentage": 4.48, "elapsed_time": "0:01:24", "remaining_time": "0:30:09"}
4
+ {"current_steps": 40, "total_steps": 670, "loss": 1.0143, "lr": 5.970149253731343e-06, "epoch": 0.5970149253731343, "percentage": 5.97, "elapsed_time": "0:01:52", "remaining_time": "0:29:28"}
5
+ {"current_steps": 50, "total_steps": 670, "loss": 1.0534, "lr": 7.46268656716418e-06, "epoch": 0.746268656716418, "percentage": 7.46, "elapsed_time": "0:02:20", "remaining_time": "0:28:57"}
6
+ {"current_steps": 50, "total_steps": 670, "eval_loss": 1.2635215520858765, "epoch": 0.746268656716418, "percentage": 7.46, "elapsed_time": "0:02:21", "remaining_time": "0:29:10"}
7
+ {"current_steps": 60, "total_steps": 670, "loss": 1.0096, "lr": 8.955223880597016e-06, "epoch": 0.8955223880597015, "percentage": 8.96, "elapsed_time": "0:02:48", "remaining_time": "0:28:33"}
8
+ {"current_steps": 70, "total_steps": 670, "loss": 0.9688, "lr": 9.999389284703265e-06, "epoch": 1.044776119402985, "percentage": 10.45, "elapsed_time": "0:03:16", "remaining_time": "0:28:00"}
9
+ {"current_steps": 80, "total_steps": 670, "loss": 0.711, "lr": 9.988536273658876e-06, "epoch": 1.1940298507462686, "percentage": 11.94, "elapsed_time": "0:03:43", "remaining_time": "0:27:28"}
10
+ {"current_steps": 90, "total_steps": 670, "loss": 0.7263, "lr": 9.964145714351633e-06, "epoch": 1.3432835820895521, "percentage": 13.43, "elapsed_time": "0:04:11", "remaining_time": "0:26:57"}
11
+ {"current_steps": 100, "total_steps": 670, "loss": 0.8118, "lr": 9.926283796211796e-06, "epoch": 1.4925373134328357, "percentage": 14.93, "elapsed_time": "0:04:38", "remaining_time": "0:26:28"}
12
+ {"current_steps": 100, "total_steps": 670, "eval_loss": 1.3804577589035034, "epoch": 1.4925373134328357, "percentage": 14.93, "elapsed_time": "0:04:39", "remaining_time": "0:26:34"}
13
+ {"current_steps": 110, "total_steps": 670, "loss": 0.8126, "lr": 9.87505326632108e-06, "epoch": 1.6417910447761193, "percentage": 16.42, "elapsed_time": "0:05:07", "remaining_time": "0:26:03"}
14
+ {"current_steps": 120, "total_steps": 670, "loss": 0.8099, "lr": 9.810593150584658e-06, "epoch": 1.7910447761194028, "percentage": 17.91, "elapsed_time": "0:05:34", "remaining_time": "0:25:34"}
15
+ {"current_steps": 130, "total_steps": 670, "loss": 0.804, "lr": 9.733078376452172e-06, "epoch": 1.9402985074626866, "percentage": 19.4, "elapsed_time": "0:06:02", "remaining_time": "0:25:03"}
16
+ {"current_steps": 140, "total_steps": 670, "loss": 0.4978, "lr": 9.642719298211602e-06, "epoch": 2.08955223880597, "percentage": 20.9, "elapsed_time": "0:06:30", "remaining_time": "0:24:38"}
17
+ {"current_steps": 150, "total_steps": 670, "loss": 0.3889, "lr": 9.539761126144193e-06, "epoch": 2.2388059701492535, "percentage": 22.39, "elapsed_time": "0:06:57", "remaining_time": "0:24:08"}
18
+ {"current_steps": 150, "total_steps": 670, "eval_loss": 1.6006965637207031, "epoch": 2.2388059701492535, "percentage": 22.39, "elapsed_time": "0:06:58", "remaining_time": "0:24:12"}
19
+ {"current_steps": 160, "total_steps": 670, "loss": 0.3846, "lr": 9.424483261089584e-06, "epoch": 2.388059701492537, "percentage": 23.88, "elapsed_time": "0:07:26", "remaining_time": "0:23:43"}
20
+ {"current_steps": 170, "total_steps": 670, "loss": 0.4125, "lr": 9.297198536226927e-06, "epoch": 2.5373134328358207, "percentage": 25.37, "elapsed_time": "0:07:54", "remaining_time": "0:23:14"}
21
+ {"current_steps": 180, "total_steps": 670, "loss": 0.4261, "lr": 9.158252368129628e-06, "epoch": 2.6865671641791042, "percentage": 26.87, "elapsed_time": "0:08:21", "remaining_time": "0:22:46"}
22
+ {"current_steps": 190, "total_steps": 670, "loss": 0.4488, "lr": 9.008021819397488e-06, "epoch": 2.835820895522388, "percentage": 28.36, "elapsed_time": "0:08:49", "remaining_time": "0:22:18"}
23
+ {"current_steps": 200, "total_steps": 670, "loss": 0.4361, "lr": 8.846914575410035e-06, "epoch": 2.9850746268656714, "percentage": 29.85, "elapsed_time": "0:09:17", "remaining_time": "0:21:50"}
24
+ {"current_steps": 200, "total_steps": 670, "eval_loss": 1.532719612121582, "epoch": 2.9850746268656714, "percentage": 29.85, "elapsed_time": "0:09:18", "remaining_time": "0:21:52"}
25
+ {"current_steps": 210, "total_steps": 670, "loss": 0.2399, "lr": 8.675367837977848e-06, "epoch": 3.1343283582089554, "percentage": 31.34, "elapsed_time": "0:09:46", "remaining_time": "0:21:24"}
26
+ {"current_steps": 220, "total_steps": 670, "loss": 0.2246, "lr": 8.49384713889421e-06, "epoch": 3.283582089552239, "percentage": 32.84, "elapsed_time": "0:10:14", "remaining_time": "0:20:56"}
27
+ {"current_steps": 230, "total_steps": 670, "loss": 0.2415, "lr": 8.302845076606786e-06, "epoch": 3.4328358208955225, "percentage": 34.33, "elapsed_time": "0:10:41", "remaining_time": "0:20:27"}
28
+ {"current_steps": 240, "total_steps": 670, "loss": 0.2615, "lr": 8.10287997943769e-06, "epoch": 3.582089552238806, "percentage": 35.82, "elapsed_time": "0:11:09", "remaining_time": "0:19:58"}
29
+ {"current_steps": 250, "total_steps": 670, "loss": 0.265, "lr": 7.894494498979558e-06, "epoch": 3.7313432835820897, "percentage": 37.31, "elapsed_time": "0:11:36", "remaining_time": "0:19:30"}
30
+ {"current_steps": 250, "total_steps": 670, "eval_loss": 1.6067496538162231, "epoch": 3.7313432835820897, "percentage": 37.31, "elapsed_time": "0:11:37", "remaining_time": "0:19:32"}
31
+ {"current_steps": 260, "total_steps": 670, "loss": 0.2371, "lr": 7.678254137484797e-06, "epoch": 3.8805970149253732, "percentage": 38.81, "elapsed_time": "0:12:05", "remaining_time": "0:19:03"}
32
+ {"current_steps": 270, "total_steps": 670, "loss": 0.2234, "lr": 7.4547457132442895e-06, "epoch": 4.029850746268656, "percentage": 40.3, "elapsed_time": "0:12:32", "remaining_time": "0:18:35"}
33
+ {"current_steps": 280, "total_steps": 670, "loss": 0.1273, "lr": 7.2245757681200835e-06, "epoch": 4.17910447761194, "percentage": 41.79, "elapsed_time": "0:13:00", "remaining_time": "0:18:07"}
34
+ {"current_steps": 290, "total_steps": 670, "loss": 0.1232, "lr": 6.988368921553601e-06, "epoch": 4.3283582089552235, "percentage": 43.28, "elapsed_time": "0:13:27", "remaining_time": "0:17:38"}
35
+ {"current_steps": 300, "total_steps": 670, "loss": 0.1347, "lr": 6.746766175516159e-06, "epoch": 4.477611940298507, "percentage": 44.78, "elapsed_time": "0:13:55", "remaining_time": "0:17:10"}
36
+ {"current_steps": 300, "total_steps": 670, "eval_loss": 1.8177189826965332, "epoch": 4.477611940298507, "percentage": 44.78, "elapsed_time": "0:13:56", "remaining_time": "0:17:11"}
37
+ {"current_steps": 310, "total_steps": 670, "loss": 0.1326, "lr": 6.500423175001705e-06, "epoch": 4.6268656716417915, "percentage": 46.27, "elapsed_time": "0:14:23", "remaining_time": "0:16:43"}
38
+ {"current_steps": 320, "total_steps": 670, "loss": 0.1424, "lr": 6.2500084287822925e-06, "epoch": 4.776119402985074, "percentage": 47.76, "elapsed_time": "0:14:51", "remaining_time": "0:16:14"}
39
+ {"current_steps": 330, "total_steps": 670, "loss": 0.1489, "lr": 5.996201495254757e-06, "epoch": 4.925373134328359, "percentage": 49.25, "elapsed_time": "0:15:18", "remaining_time": "0:15:46"}
40
+ {"current_steps": 340, "total_steps": 670, "loss": 0.0922, "lr": 5.73969113830165e-06, "epoch": 5.074626865671641, "percentage": 50.75, "elapsed_time": "0:15:46", "remaining_time": "0:15:18"}
41
+ {"current_steps": 350, "total_steps": 670, "loss": 0.0857, "lr": 5.481173458170952e-06, "epoch": 5.223880597014926, "percentage": 52.24, "elapsed_time": "0:16:14", "remaining_time": "0:14:50"}
42
+ {"current_steps": 350, "total_steps": 670, "eval_loss": 1.977053165435791, "epoch": 5.223880597014926, "percentage": 52.24, "elapsed_time": "0:16:15", "remaining_time": "0:14:51"}
43
+ {"current_steps": 360, "total_steps": 670, "loss": 0.0723, "lr": 5.221350002446882e-06, "epoch": 5.373134328358209, "percentage": 53.73, "elapsed_time": "0:16:42", "remaining_time": "0:14:23"}
44
+ {"current_steps": 370, "total_steps": 670, "loss": 0.0693, "lr": 4.96092586223808e-06, "epoch": 5.522388059701493, "percentage": 55.22, "elapsed_time": "0:17:10", "remaining_time": "0:13:55"}
45
+ {"current_steps": 380, "total_steps": 670, "loss": 0.0978, "lr": 4.700607758749626e-06, "epoch": 5.6716417910447765, "percentage": 56.72, "elapsed_time": "0:17:37", "remaining_time": "0:13:27"}
46
+ {"current_steps": 390, "total_steps": 670, "loss": 0.0792, "lr": 4.441102125431398e-06, "epoch": 5.82089552238806, "percentage": 58.21, "elapsed_time": "0:18:05", "remaining_time": "0:12:59"}
47
+ {"current_steps": 400, "total_steps": 670, "loss": 0.0709, "lr": 4.183113190907349e-06, "epoch": 5.970149253731344, "percentage": 59.7, "elapsed_time": "0:18:32", "remaining_time": "0:12:31"}
48
+ {"current_steps": 400, "total_steps": 670, "eval_loss": 1.9007922410964966, "epoch": 5.970149253731344, "percentage": 59.7, "elapsed_time": "0:18:33", "remaining_time": "0:12:31"}
49
+ {"current_steps": 410, "total_steps": 670, "loss": 0.0309, "lr": 3.927341067888065e-06, "epoch": 6.119402985074627, "percentage": 61.19, "elapsed_time": "0:19:01", "remaining_time": "0:12:03"}
50
+ {"current_steps": 420, "total_steps": 670, "loss": 0.0318, "lr": 3.6744798532528137e-06, "epoch": 6.268656716417911, "percentage": 62.69, "elapsed_time": "0:19:29", "remaining_time": "0:11:35"}
51
+ {"current_steps": 430, "total_steps": 670, "loss": 0.0388, "lr": 3.4252157444569478e-06, "epoch": 6.417910447761194, "percentage": 64.18, "elapsed_time": "0:19:56", "remaining_time": "0:11:07"}
52
+ {"current_steps": 440, "total_steps": 670, "loss": 0.0549, "lr": 3.1802251773762294e-06, "epoch": 6.567164179104478, "percentage": 65.67, "elapsed_time": "0:20:24", "remaining_time": "0:10:40"}
53
+ {"current_steps": 450, "total_steps": 670, "loss": 0.0474, "lr": 2.9401729906414385e-06, "epoch": 6.7164179104477615, "percentage": 67.16, "elapsed_time": "0:20:51", "remaining_time": "0:10:11"}
54
+ {"current_steps": 450, "total_steps": 670, "eval_loss": 2.131742238998413, "epoch": 6.7164179104477615, "percentage": 67.16, "elapsed_time": "0:20:52", "remaining_time": "0:10:12"}
55
+ {"current_steps": 460, "total_steps": 670, "loss": 0.0502, "lr": 2.7057106214448216e-06, "epoch": 6.865671641791045, "percentage": 68.66, "elapsed_time": "0:21:20", "remaining_time": "0:09:44"}
56
+ {"current_steps": 470, "total_steps": 670, "loss": 0.0228, "lr": 2.4774743377144265e-06, "epoch": 7.014925373134329, "percentage": 70.15, "elapsed_time": "0:21:47", "remaining_time": "0:09:16"}
57
+ {"current_steps": 480, "total_steps": 670, "loss": 0.0145, "lr": 2.256083511453747e-06, "epoch": 7.164179104477612, "percentage": 71.64, "elapsed_time": "0:22:15", "remaining_time": "0:08:48"}
58
+ {"current_steps": 490, "total_steps": 670, "loss": 0.019, "lr": 2.042138937932388e-06, "epoch": 7.313432835820896, "percentage": 73.13, "elapsed_time": "0:22:43", "remaining_time": "0:08:20"}
59
+ {"current_steps": 500, "total_steps": 670, "loss": 0.0286, "lr": 1.8362212052889827e-06, "epoch": 7.462686567164179, "percentage": 74.63, "elapsed_time": "0:23:10", "remaining_time": "0:07:52"}
60
+ {"current_steps": 500, "total_steps": 670, "eval_loss": 2.2198660373687744, "epoch": 7.462686567164179, "percentage": 74.63, "elapsed_time": "0:23:11", "remaining_time": "0:07:53"}
61
+ {"current_steps": 510, "total_steps": 670, "loss": 0.0177, "lr": 1.63888911897084e-06, "epoch": 7.611940298507463, "percentage": 76.12, "elapsed_time": "0:30:31", "remaining_time": "0:09:34"}
62
+ {"current_steps": 520, "total_steps": 670, "loss": 0.0247, "lr": 1.4506781852859836e-06, "epoch": 7.7611940298507465, "percentage": 77.61, "elapsed_time": "0:30:59", "remaining_time": "0:08:56"}
63
+ {"current_steps": 530, "total_steps": 670, "loss": 0.0129, "lr": 1.2720991581827852e-06, "epoch": 7.91044776119403, "percentage": 79.1, "elapsed_time": "0:31:27", "remaining_time": "0:08:18"}
64
+ {"current_steps": 540, "total_steps": 670, "loss": 0.0118, "lr": 1.1036366532008552e-06, "epoch": 8.059701492537313, "percentage": 80.6, "elapsed_time": "0:31:54", "remaining_time": "0:07:40"}
65
+ {"current_steps": 550, "total_steps": 670, "loss": 0.0091, "lr": 9.457478323545749e-07, "epoch": 8.208955223880597, "percentage": 82.09, "elapsed_time": "0:32:22", "remaining_time": "0:07:03"}
66
+ {"current_steps": 550, "total_steps": 670, "eval_loss": 2.2086477279663086, "epoch": 8.208955223880597, "percentage": 82.09, "elapsed_time": "0:32:23", "remaining_time": "0:07:03"}
67
+ {"current_steps": 560, "total_steps": 670, "loss": 0.0065, "lr": 7.988611635181099e-07, "epoch": 8.35820895522388, "percentage": 83.58, "elapsed_time": "0:32:50", "remaining_time": "0:06:27"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5522a26d0ea084dfea6541a46f36f957646e0f988a03b90dd8f90f36828d71e4
3
  size 7480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ed1b89716b504a52e9efea3dde4f165b265ad26a53808b448c673cf23ec81cb
3
  size 7480