Training in progress, step 500
Browse files- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- trainer_log.jsonl +67 -81
- training_args.bin +1 -1
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4976698672
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80a75922bbe71cf04eeae60f8faa44e32811e7a0dbef5da46903c9c052a49fa9
|
3 |
size 4976698672
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999802720
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ce1d7974b1ac53127e4393a19a887614a23d7ff63ae64b8cef4b45d0d5b9a48
|
3 |
size 4999802720
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4915916176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6b440d3810a01f165fa6acc0ba880bd85ac7d6c5dc570a254613a37cccb498e
|
3 |
size 4915916176
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1168138808
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f277fabaf661bf9352705206db6161334a7daae21382c22e995adcf8e766bcdc
|
3 |
size 1168138808
|
trainer_log.jsonl
CHANGED
@@ -1,81 +1,67 @@
|
|
1 |
-
{"current_steps": 10, "total_steps": 670, "loss": 1.
|
2 |
-
{"current_steps": 20, "total_steps": 670, "loss": 1.
|
3 |
-
{"current_steps": 30, "total_steps": 670, "loss": 0.
|
4 |
-
{"current_steps": 40, "total_steps": 670, "loss": 1.
|
5 |
-
{"current_steps": 50, "total_steps": 670, "loss": 1.
|
6 |
-
{"current_steps": 50, "total_steps": 670, "eval_loss": 1.
|
7 |
-
{"current_steps": 60, "total_steps": 670, "loss": 1.
|
8 |
-
{"current_steps": 70, "total_steps": 670, "loss": 0.
|
9 |
-
{"current_steps": 80, "total_steps": 670, "loss": 0.
|
10 |
-
{"current_steps": 90, "total_steps": 670, "loss": 0.
|
11 |
-
{"current_steps": 100, "total_steps": 670, "loss": 0.
|
12 |
-
{"current_steps": 100, "total_steps": 670, "eval_loss": 1.
|
13 |
-
{"current_steps": 110, "total_steps": 670, "loss": 0.
|
14 |
-
{"current_steps": 120, "total_steps": 670, "loss": 0.
|
15 |
-
{"current_steps": 130, "total_steps": 670, "loss": 0.
|
16 |
-
{"current_steps": 140, "total_steps": 670, "loss": 0.
|
17 |
-
{"current_steps": 150, "total_steps": 670, "loss": 0.
|
18 |
-
{"current_steps": 150, "total_steps": 670, "eval_loss": 1.
|
19 |
-
{"current_steps": 160, "total_steps": 670, "loss": 0.
|
20 |
-
{"current_steps": 170, "total_steps": 670, "loss": 0.
|
21 |
-
{"current_steps": 180, "total_steps": 670, "loss": 0.
|
22 |
-
{"current_steps": 190, "total_steps": 670, "loss": 0.
|
23 |
-
{"current_steps": 200, "total_steps": 670, "loss": 0.
|
24 |
-
{"current_steps": 200, "total_steps": 670, "eval_loss": 1.
|
25 |
-
{"current_steps": 210, "total_steps": 670, "loss": 0.
|
26 |
-
{"current_steps": 220, "total_steps": 670, "loss": 0.
|
27 |
-
{"current_steps": 230, "total_steps": 670, "loss": 0.
|
28 |
-
{"current_steps": 240, "total_steps": 670, "loss": 0.
|
29 |
-
{"current_steps": 250, "total_steps": 670, "loss": 0.
|
30 |
-
{"current_steps": 250, "total_steps": 670, "eval_loss": 1.
|
31 |
-
{"current_steps": 260, "total_steps": 670, "loss": 0.
|
32 |
-
{"current_steps": 270, "total_steps": 670, "loss": 0.
|
33 |
-
{"current_steps": 280, "total_steps": 670, "loss": 0.
|
34 |
-
{"current_steps": 290, "total_steps": 670, "loss": 0.
|
35 |
-
{"current_steps": 300, "total_steps": 670, "loss": 0.
|
36 |
-
{"current_steps": 300, "total_steps": 670, "eval_loss": 1.
|
37 |
-
{"current_steps": 310, "total_steps": 670, "loss": 0.
|
38 |
-
{"current_steps": 320, "total_steps": 670, "loss": 0.
|
39 |
-
{"current_steps": 330, "total_steps": 670, "loss": 0.
|
40 |
-
{"current_steps": 340, "total_steps": 670, "loss": 0.
|
41 |
-
{"current_steps": 350, "total_steps": 670, "loss": 0.
|
42 |
-
{"current_steps": 350, "total_steps": 670, "eval_loss":
|
43 |
-
{"current_steps": 360, "total_steps": 670, "loss": 0.
|
44 |
-
{"current_steps": 370, "total_steps": 670, "loss": 0.
|
45 |
-
{"current_steps": 380, "total_steps": 670, "loss": 0.
|
46 |
-
{"current_steps": 390, "total_steps": 670, "loss": 0.
|
47 |
-
{"current_steps": 400, "total_steps": 670, "loss": 0.
|
48 |
-
{"current_steps": 400, "total_steps": 670, "eval_loss": 1.
|
49 |
-
{"current_steps": 410, "total_steps": 670, "loss": 0.
|
50 |
-
{"current_steps": 420, "total_steps": 670, "loss": 0.
|
51 |
-
{"current_steps": 430, "total_steps": 670, "loss": 0.
|
52 |
-
{"current_steps": 440, "total_steps": 670, "loss": 0.
|
53 |
-
{"current_steps": 450, "total_steps": 670, "loss": 0.
|
54 |
-
{"current_steps": 450, "total_steps": 670, "eval_loss": 2.
|
55 |
-
{"current_steps": 460, "total_steps": 670, "loss": 0.
|
56 |
-
{"current_steps": 470, "total_steps": 670, "loss": 0.
|
57 |
-
{"current_steps": 480, "total_steps": 670, "loss": 0.
|
58 |
-
{"current_steps": 490, "total_steps": 670, "loss": 0.
|
59 |
-
{"current_steps": 500, "total_steps": 670, "loss": 0.
|
60 |
-
{"current_steps": 500, "total_steps": 670, "eval_loss": 2.
|
61 |
-
{"current_steps": 510, "total_steps": 670, "loss": 0.
|
62 |
-
{"current_steps": 520, "total_steps": 670, "loss": 0.
|
63 |
-
{"current_steps": 530, "total_steps": 670, "loss": 0.
|
64 |
-
{"current_steps": 540, "total_steps": 670, "loss": 0.
|
65 |
-
{"current_steps": 550, "total_steps": 670, "loss": 0.
|
66 |
-
{"current_steps": 550, "total_steps": 670, "eval_loss": 2.
|
67 |
-
{"current_steps": 560, "total_steps": 670, "loss": 0.
|
68 |
-
{"current_steps": 570, "total_steps": 670, "loss": 0.012, "lr": 6.633752576786251e-07, "epoch": 8.507462686567164, "percentage": 85.07, "elapsed_time": "0:27:54", "remaining_time": "0:04:53"}
|
69 |
-
{"current_steps": 580, "total_steps": 670, "loss": 0.0071, "lr": 5.396577872130676e-07, "epoch": 8.656716417910447, "percentage": 86.57, "elapsed_time": "0:28:22", "remaining_time": "0:04:24"}
|
70 |
-
{"current_steps": 590, "total_steps": 670, "loss": 0.0036, "lr": 4.2804448812404754e-07, "epoch": 8.805970149253731, "percentage": 88.06, "elapsed_time": "0:28:50", "remaining_time": "0:03:54"}
|
71 |
-
{"current_steps": 600, "total_steps": 670, "loss": 0.0049, "lr": 3.288382489424502e-07, "epoch": 8.955223880597014, "percentage": 89.55, "elapsed_time": "0:29:18", "remaining_time": "0:03:25"}
|
72 |
-
{"current_steps": 600, "total_steps": 670, "eval_loss": 2.3008365631103516, "epoch": 8.955223880597014, "percentage": 89.55, "elapsed_time": "0:29:19", "remaining_time": "0:03:25"}
|
73 |
-
{"current_steps": 610, "total_steps": 670, "loss": 0.0051, "lr": 2.4230828876927293e-07, "epoch": 9.104477611940299, "percentage": 91.04, "elapsed_time": "0:29:47", "remaining_time": "0:02:55"}
|
74 |
-
{"current_steps": 620, "total_steps": 670, "loss": 0.0032, "lr": 1.6868942668726408e-07, "epoch": 9.253731343283581, "percentage": 92.54, "elapsed_time": "0:30:14", "remaining_time": "0:02:26"}
|
75 |
-
{"current_steps": 630, "total_steps": 670, "loss": 0.0063, "lr": 1.0818144452496293e-07, "epoch": 9.402985074626866, "percentage": 94.03, "elapsed_time": "0:30:41", "remaining_time": "0:01:56"}
|
76 |
-
{"current_steps": 640, "total_steps": 670, "loss": 0.0024, "lr": 6.094854470245326e-08, "epoch": 9.552238805970148, "percentage": 95.52, "elapsed_time": "0:31:08", "remaining_time": "0:01:27"}
|
77 |
-
{"current_steps": 650, "total_steps": 670, "loss": 0.0035, "lr": 2.711890463007405e-08, "epoch": 9.701492537313433, "percentage": 97.01, "elapsed_time": "0:31:36", "remaining_time": "0:00:58"}
|
78 |
-
{"current_steps": 650, "total_steps": 670, "eval_loss": 2.3232414722442627, "epoch": 9.701492537313433, "percentage": 97.01, "elapsed_time": "0:31:37", "remaining_time": "0:00:58"}
|
79 |
-
{"current_steps": 660, "total_steps": 670, "loss": 0.0035, "lr": 6.784328869339218e-09, "epoch": 9.850746268656717, "percentage": 98.51, "elapsed_time": "0:32:05", "remaining_time": "0:00:29"}
|
80 |
-
{"current_steps": 670, "total_steps": 670, "loss": 0.0021, "lr": 0.0, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:32:33", "remaining_time": "0:00:00"}
|
81 |
-
{"current_steps": 670, "total_steps": 670, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:35:28", "remaining_time": "0:00:00"}
|
|
|
1 |
+
{"current_steps": 10, "total_steps": 670, "loss": 1.9213, "lr": 1.4925373134328358e-06, "epoch": 0.14925373134328357, "percentage": 1.49, "elapsed_time": "0:00:29", "remaining_time": "0:32:49"}
|
2 |
+
{"current_steps": 20, "total_steps": 670, "loss": 1.151, "lr": 2.9850746268656716e-06, "epoch": 0.29850746268656714, "percentage": 2.99, "elapsed_time": "0:00:57", "remaining_time": "0:30:55"}
|
3 |
+
{"current_steps": 30, "total_steps": 670, "loss": 0.9953, "lr": 4.477611940298508e-06, "epoch": 0.44776119402985076, "percentage": 4.48, "elapsed_time": "0:01:24", "remaining_time": "0:30:09"}
|
4 |
+
{"current_steps": 40, "total_steps": 670, "loss": 1.0143, "lr": 5.970149253731343e-06, "epoch": 0.5970149253731343, "percentage": 5.97, "elapsed_time": "0:01:52", "remaining_time": "0:29:28"}
|
5 |
+
{"current_steps": 50, "total_steps": 670, "loss": 1.0534, "lr": 7.46268656716418e-06, "epoch": 0.746268656716418, "percentage": 7.46, "elapsed_time": "0:02:20", "remaining_time": "0:28:57"}
|
6 |
+
{"current_steps": 50, "total_steps": 670, "eval_loss": 1.2635215520858765, "epoch": 0.746268656716418, "percentage": 7.46, "elapsed_time": "0:02:21", "remaining_time": "0:29:10"}
|
7 |
+
{"current_steps": 60, "total_steps": 670, "loss": 1.0096, "lr": 8.955223880597016e-06, "epoch": 0.8955223880597015, "percentage": 8.96, "elapsed_time": "0:02:48", "remaining_time": "0:28:33"}
|
8 |
+
{"current_steps": 70, "total_steps": 670, "loss": 0.9688, "lr": 9.999389284703265e-06, "epoch": 1.044776119402985, "percentage": 10.45, "elapsed_time": "0:03:16", "remaining_time": "0:28:00"}
|
9 |
+
{"current_steps": 80, "total_steps": 670, "loss": 0.711, "lr": 9.988536273658876e-06, "epoch": 1.1940298507462686, "percentage": 11.94, "elapsed_time": "0:03:43", "remaining_time": "0:27:28"}
|
10 |
+
{"current_steps": 90, "total_steps": 670, "loss": 0.7263, "lr": 9.964145714351633e-06, "epoch": 1.3432835820895521, "percentage": 13.43, "elapsed_time": "0:04:11", "remaining_time": "0:26:57"}
|
11 |
+
{"current_steps": 100, "total_steps": 670, "loss": 0.8118, "lr": 9.926283796211796e-06, "epoch": 1.4925373134328357, "percentage": 14.93, "elapsed_time": "0:04:38", "remaining_time": "0:26:28"}
|
12 |
+
{"current_steps": 100, "total_steps": 670, "eval_loss": 1.3804577589035034, "epoch": 1.4925373134328357, "percentage": 14.93, "elapsed_time": "0:04:39", "remaining_time": "0:26:34"}
|
13 |
+
{"current_steps": 110, "total_steps": 670, "loss": 0.8126, "lr": 9.87505326632108e-06, "epoch": 1.6417910447761193, "percentage": 16.42, "elapsed_time": "0:05:07", "remaining_time": "0:26:03"}
|
14 |
+
{"current_steps": 120, "total_steps": 670, "loss": 0.8099, "lr": 9.810593150584658e-06, "epoch": 1.7910447761194028, "percentage": 17.91, "elapsed_time": "0:05:34", "remaining_time": "0:25:34"}
|
15 |
+
{"current_steps": 130, "total_steps": 670, "loss": 0.804, "lr": 9.733078376452172e-06, "epoch": 1.9402985074626866, "percentage": 19.4, "elapsed_time": "0:06:02", "remaining_time": "0:25:03"}
|
16 |
+
{"current_steps": 140, "total_steps": 670, "loss": 0.4978, "lr": 9.642719298211602e-06, "epoch": 2.08955223880597, "percentage": 20.9, "elapsed_time": "0:06:30", "remaining_time": "0:24:38"}
|
17 |
+
{"current_steps": 150, "total_steps": 670, "loss": 0.3889, "lr": 9.539761126144193e-06, "epoch": 2.2388059701492535, "percentage": 22.39, "elapsed_time": "0:06:57", "remaining_time": "0:24:08"}
|
18 |
+
{"current_steps": 150, "total_steps": 670, "eval_loss": 1.6006965637207031, "epoch": 2.2388059701492535, "percentage": 22.39, "elapsed_time": "0:06:58", "remaining_time": "0:24:12"}
|
19 |
+
{"current_steps": 160, "total_steps": 670, "loss": 0.3846, "lr": 9.424483261089584e-06, "epoch": 2.388059701492537, "percentage": 23.88, "elapsed_time": "0:07:26", "remaining_time": "0:23:43"}
|
20 |
+
{"current_steps": 170, "total_steps": 670, "loss": 0.4125, "lr": 9.297198536226927e-06, "epoch": 2.5373134328358207, "percentage": 25.37, "elapsed_time": "0:07:54", "remaining_time": "0:23:14"}
|
21 |
+
{"current_steps": 180, "total_steps": 670, "loss": 0.4261, "lr": 9.158252368129628e-06, "epoch": 2.6865671641791042, "percentage": 26.87, "elapsed_time": "0:08:21", "remaining_time": "0:22:46"}
|
22 |
+
{"current_steps": 190, "total_steps": 670, "loss": 0.4488, "lr": 9.008021819397488e-06, "epoch": 2.835820895522388, "percentage": 28.36, "elapsed_time": "0:08:49", "remaining_time": "0:22:18"}
|
23 |
+
{"current_steps": 200, "total_steps": 670, "loss": 0.4361, "lr": 8.846914575410035e-06, "epoch": 2.9850746268656714, "percentage": 29.85, "elapsed_time": "0:09:17", "remaining_time": "0:21:50"}
|
24 |
+
{"current_steps": 200, "total_steps": 670, "eval_loss": 1.532719612121582, "epoch": 2.9850746268656714, "percentage": 29.85, "elapsed_time": "0:09:18", "remaining_time": "0:21:52"}
|
25 |
+
{"current_steps": 210, "total_steps": 670, "loss": 0.2399, "lr": 8.675367837977848e-06, "epoch": 3.1343283582089554, "percentage": 31.34, "elapsed_time": "0:09:46", "remaining_time": "0:21:24"}
|
26 |
+
{"current_steps": 220, "total_steps": 670, "loss": 0.2246, "lr": 8.49384713889421e-06, "epoch": 3.283582089552239, "percentage": 32.84, "elapsed_time": "0:10:14", "remaining_time": "0:20:56"}
|
27 |
+
{"current_steps": 230, "total_steps": 670, "loss": 0.2415, "lr": 8.302845076606786e-06, "epoch": 3.4328358208955225, "percentage": 34.33, "elapsed_time": "0:10:41", "remaining_time": "0:20:27"}
|
28 |
+
{"current_steps": 240, "total_steps": 670, "loss": 0.2615, "lr": 8.10287997943769e-06, "epoch": 3.582089552238806, "percentage": 35.82, "elapsed_time": "0:11:09", "remaining_time": "0:19:58"}
|
29 |
+
{"current_steps": 250, "total_steps": 670, "loss": 0.265, "lr": 7.894494498979558e-06, "epoch": 3.7313432835820897, "percentage": 37.31, "elapsed_time": "0:11:36", "remaining_time": "0:19:30"}
|
30 |
+
{"current_steps": 250, "total_steps": 670, "eval_loss": 1.6067496538162231, "epoch": 3.7313432835820897, "percentage": 37.31, "elapsed_time": "0:11:37", "remaining_time": "0:19:32"}
|
31 |
+
{"current_steps": 260, "total_steps": 670, "loss": 0.2371, "lr": 7.678254137484797e-06, "epoch": 3.8805970149253732, "percentage": 38.81, "elapsed_time": "0:12:05", "remaining_time": "0:19:03"}
|
32 |
+
{"current_steps": 270, "total_steps": 670, "loss": 0.2234, "lr": 7.4547457132442895e-06, "epoch": 4.029850746268656, "percentage": 40.3, "elapsed_time": "0:12:32", "remaining_time": "0:18:35"}
|
33 |
+
{"current_steps": 280, "total_steps": 670, "loss": 0.1273, "lr": 7.2245757681200835e-06, "epoch": 4.17910447761194, "percentage": 41.79, "elapsed_time": "0:13:00", "remaining_time": "0:18:07"}
|
34 |
+
{"current_steps": 290, "total_steps": 670, "loss": 0.1232, "lr": 6.988368921553601e-06, "epoch": 4.3283582089552235, "percentage": 43.28, "elapsed_time": "0:13:27", "remaining_time": "0:17:38"}
|
35 |
+
{"current_steps": 300, "total_steps": 670, "loss": 0.1347, "lr": 6.746766175516159e-06, "epoch": 4.477611940298507, "percentage": 44.78, "elapsed_time": "0:13:55", "remaining_time": "0:17:10"}
|
36 |
+
{"current_steps": 300, "total_steps": 670, "eval_loss": 1.8177189826965332, "epoch": 4.477611940298507, "percentage": 44.78, "elapsed_time": "0:13:56", "remaining_time": "0:17:11"}
|
37 |
+
{"current_steps": 310, "total_steps": 670, "loss": 0.1326, "lr": 6.500423175001705e-06, "epoch": 4.6268656716417915, "percentage": 46.27, "elapsed_time": "0:14:23", "remaining_time": "0:16:43"}
|
38 |
+
{"current_steps": 320, "total_steps": 670, "loss": 0.1424, "lr": 6.2500084287822925e-06, "epoch": 4.776119402985074, "percentage": 47.76, "elapsed_time": "0:14:51", "remaining_time": "0:16:14"}
|
39 |
+
{"current_steps": 330, "total_steps": 670, "loss": 0.1489, "lr": 5.996201495254757e-06, "epoch": 4.925373134328359, "percentage": 49.25, "elapsed_time": "0:15:18", "remaining_time": "0:15:46"}
|
40 |
+
{"current_steps": 340, "total_steps": 670, "loss": 0.0922, "lr": 5.73969113830165e-06, "epoch": 5.074626865671641, "percentage": 50.75, "elapsed_time": "0:15:46", "remaining_time": "0:15:18"}
|
41 |
+
{"current_steps": 350, "total_steps": 670, "loss": 0.0857, "lr": 5.481173458170952e-06, "epoch": 5.223880597014926, "percentage": 52.24, "elapsed_time": "0:16:14", "remaining_time": "0:14:50"}
|
42 |
+
{"current_steps": 350, "total_steps": 670, "eval_loss": 1.977053165435791, "epoch": 5.223880597014926, "percentage": 52.24, "elapsed_time": "0:16:15", "remaining_time": "0:14:51"}
|
43 |
+
{"current_steps": 360, "total_steps": 670, "loss": 0.0723, "lr": 5.221350002446882e-06, "epoch": 5.373134328358209, "percentage": 53.73, "elapsed_time": "0:16:42", "remaining_time": "0:14:23"}
|
44 |
+
{"current_steps": 370, "total_steps": 670, "loss": 0.0693, "lr": 4.96092586223808e-06, "epoch": 5.522388059701493, "percentage": 55.22, "elapsed_time": "0:17:10", "remaining_time": "0:13:55"}
|
45 |
+
{"current_steps": 380, "total_steps": 670, "loss": 0.0978, "lr": 4.700607758749626e-06, "epoch": 5.6716417910447765, "percentage": 56.72, "elapsed_time": "0:17:37", "remaining_time": "0:13:27"}
|
46 |
+
{"current_steps": 390, "total_steps": 670, "loss": 0.0792, "lr": 4.441102125431398e-06, "epoch": 5.82089552238806, "percentage": 58.21, "elapsed_time": "0:18:05", "remaining_time": "0:12:59"}
|
47 |
+
{"current_steps": 400, "total_steps": 670, "loss": 0.0709, "lr": 4.183113190907349e-06, "epoch": 5.970149253731344, "percentage": 59.7, "elapsed_time": "0:18:32", "remaining_time": "0:12:31"}
|
48 |
+
{"current_steps": 400, "total_steps": 670, "eval_loss": 1.9007922410964966, "epoch": 5.970149253731344, "percentage": 59.7, "elapsed_time": "0:18:33", "remaining_time": "0:12:31"}
|
49 |
+
{"current_steps": 410, "total_steps": 670, "loss": 0.0309, "lr": 3.927341067888065e-06, "epoch": 6.119402985074627, "percentage": 61.19, "elapsed_time": "0:19:01", "remaining_time": "0:12:03"}
|
50 |
+
{"current_steps": 420, "total_steps": 670, "loss": 0.0318, "lr": 3.6744798532528137e-06, "epoch": 6.268656716417911, "percentage": 62.69, "elapsed_time": "0:19:29", "remaining_time": "0:11:35"}
|
51 |
+
{"current_steps": 430, "total_steps": 670, "loss": 0.0388, "lr": 3.4252157444569478e-06, "epoch": 6.417910447761194, "percentage": 64.18, "elapsed_time": "0:19:56", "remaining_time": "0:11:07"}
|
52 |
+
{"current_steps": 440, "total_steps": 670, "loss": 0.0549, "lr": 3.1802251773762294e-06, "epoch": 6.567164179104478, "percentage": 65.67, "elapsed_time": "0:20:24", "remaining_time": "0:10:40"}
|
53 |
+
{"current_steps": 450, "total_steps": 670, "loss": 0.0474, "lr": 2.9401729906414385e-06, "epoch": 6.7164179104477615, "percentage": 67.16, "elapsed_time": "0:20:51", "remaining_time": "0:10:11"}
|
54 |
+
{"current_steps": 450, "total_steps": 670, "eval_loss": 2.131742238998413, "epoch": 6.7164179104477615, "percentage": 67.16, "elapsed_time": "0:20:52", "remaining_time": "0:10:12"}
|
55 |
+
{"current_steps": 460, "total_steps": 670, "loss": 0.0502, "lr": 2.7057106214448216e-06, "epoch": 6.865671641791045, "percentage": 68.66, "elapsed_time": "0:21:20", "remaining_time": "0:09:44"}
|
56 |
+
{"current_steps": 470, "total_steps": 670, "loss": 0.0228, "lr": 2.4774743377144265e-06, "epoch": 7.014925373134329, "percentage": 70.15, "elapsed_time": "0:21:47", "remaining_time": "0:09:16"}
|
57 |
+
{"current_steps": 480, "total_steps": 670, "loss": 0.0145, "lr": 2.256083511453747e-06, "epoch": 7.164179104477612, "percentage": 71.64, "elapsed_time": "0:22:15", "remaining_time": "0:08:48"}
|
58 |
+
{"current_steps": 490, "total_steps": 670, "loss": 0.019, "lr": 2.042138937932388e-06, "epoch": 7.313432835820896, "percentage": 73.13, "elapsed_time": "0:22:43", "remaining_time": "0:08:20"}
|
59 |
+
{"current_steps": 500, "total_steps": 670, "loss": 0.0286, "lr": 1.8362212052889827e-06, "epoch": 7.462686567164179, "percentage": 74.63, "elapsed_time": "0:23:10", "remaining_time": "0:07:52"}
|
60 |
+
{"current_steps": 500, "total_steps": 670, "eval_loss": 2.2198660373687744, "epoch": 7.462686567164179, "percentage": 74.63, "elapsed_time": "0:23:11", "remaining_time": "0:07:53"}
|
61 |
+
{"current_steps": 510, "total_steps": 670, "loss": 0.0177, "lr": 1.63888911897084e-06, "epoch": 7.611940298507463, "percentage": 76.12, "elapsed_time": "0:30:31", "remaining_time": "0:09:34"}
|
62 |
+
{"current_steps": 520, "total_steps": 670, "loss": 0.0247, "lr": 1.4506781852859836e-06, "epoch": 7.7611940298507465, "percentage": 77.61, "elapsed_time": "0:30:59", "remaining_time": "0:08:56"}
|
63 |
+
{"current_steps": 530, "total_steps": 670, "loss": 0.0129, "lr": 1.2720991581827852e-06, "epoch": 7.91044776119403, "percentage": 79.1, "elapsed_time": "0:31:27", "remaining_time": "0:08:18"}
|
64 |
+
{"current_steps": 540, "total_steps": 670, "loss": 0.0118, "lr": 1.1036366532008552e-06, "epoch": 8.059701492537313, "percentage": 80.6, "elapsed_time": "0:31:54", "remaining_time": "0:07:40"}
|
65 |
+
{"current_steps": 550, "total_steps": 670, "loss": 0.0091, "lr": 9.457478323545749e-07, "epoch": 8.208955223880597, "percentage": 82.09, "elapsed_time": "0:32:22", "remaining_time": "0:07:03"}
|
66 |
+
{"current_steps": 550, "total_steps": 670, "eval_loss": 2.2086477279663086, "epoch": 8.208955223880597, "percentage": 82.09, "elapsed_time": "0:32:23", "remaining_time": "0:07:03"}
|
67 |
+
{"current_steps": 560, "total_steps": 670, "loss": 0.0065, "lr": 7.988611635181099e-07, "epoch": 8.35820895522388, "percentage": 83.58, "elapsed_time": "0:32:50", "remaining_time": "0:06:27"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7480
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ed1b89716b504a52e9efea3dde4f165b265ad26a53808b448c673cf23ec81cb
|
3 |
size 7480
|