Training in progress, step 400
Browse files- adapter_config.json +3 -3
- adapter_model.safetensors +1 -1
- trainer_log.jsonl +68 -0
- training_args.bin +2 -2
adapter_config.json
CHANGED
@@ -20,12 +20,12 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
|
|
23 |
"q_proj",
|
|
|
|
|
24 |
"gate_proj",
|
25 |
-
"down_proj",
|
26 |
"k_proj",
|
27 |
-
"up_proj",
|
28 |
-
"o_proj",
|
29 |
"v_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
+
"down_proj",
|
24 |
"q_proj",
|
25 |
+
"o_proj",
|
26 |
+
"up_proj",
|
27 |
"gate_proj",
|
|
|
28 |
"k_proj",
|
|
|
|
|
29 |
"v_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 83945296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43ef35dda0f3a5a508eab117460ff6a331211e06b198dc6e1a315d6d8897b434
|
3 |
size 83945296
|
trainer_log.jsonl
CHANGED
@@ -38,3 +38,71 @@
|
|
38 |
{"current_steps": 190, "total_steps": 4421, "loss": 0.7579, "lr": 4.994649784450465e-05, "epoch": 0.04297022238974746, "percentage": 4.3, "elapsed_time": "0:16:31", "remaining_time": "6:07:54"}
|
39 |
{"current_steps": 195, "total_steps": 4421, "loss": 0.752, "lr": 4.994039045957182e-05, "epoch": 0.044101017715793445, "percentage": 4.41, "elapsed_time": "0:16:58", "remaining_time": "6:07:58"}
|
40 |
{"current_steps": 200, "total_steps": 4421, "loss": 0.7012, "lr": 4.993395348466544e-05, "epoch": 0.04523181304183943, "percentage": 4.52, "elapsed_time": "0:17:21", "remaining_time": "6:06:16"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
{"current_steps": 190, "total_steps": 4421, "loss": 0.7579, "lr": 4.994649784450465e-05, "epoch": 0.04297022238974746, "percentage": 4.3, "elapsed_time": "0:16:31", "remaining_time": "6:07:54"}
|
39 |
{"current_steps": 195, "total_steps": 4421, "loss": 0.752, "lr": 4.994039045957182e-05, "epoch": 0.044101017715793445, "percentage": 4.41, "elapsed_time": "0:16:58", "remaining_time": "6:07:58"}
|
40 |
{"current_steps": 200, "total_steps": 4421, "loss": 0.7012, "lr": 4.993395348466544e-05, "epoch": 0.04523181304183943, "percentage": 4.52, "elapsed_time": "0:17:21", "remaining_time": "6:06:16"}
|
41 |
+
{"current_steps": 205, "total_steps": 4421, "loss": 0.7246, "lr": 4.992718700485085e-05, "epoch": 0.046362608367885416, "percentage": 4.64, "elapsed_time": "0:18:12", "remaining_time": "6:14:24"}
|
42 |
+
{"current_steps": 210, "total_steps": 4421, "loss": 0.7249, "lr": 4.99200911095478e-05, "epoch": 0.047493403693931395, "percentage": 4.75, "elapsed_time": "0:18:41", "remaining_time": "6:14:50"}
|
43 |
+
{"current_steps": 215, "total_steps": 4421, "loss": 0.7001, "lr": 4.991266589252933e-05, "epoch": 0.04862419901997738, "percentage": 4.86, "elapsed_time": "0:19:10", "remaining_time": "6:15:03"}
|
44 |
+
{"current_steps": 220, "total_steps": 4421, "loss": 0.7714, "lr": 4.990491145192049e-05, "epoch": 0.049754994346023367, "percentage": 4.98, "elapsed_time": "0:19:35", "remaining_time": "6:14:05"}
|
45 |
+
{"current_steps": 225, "total_steps": 4421, "loss": 0.7339, "lr": 4.989682789019706e-05, "epoch": 0.05088578967206935, "percentage": 5.09, "elapsed_time": "0:20:02", "remaining_time": "6:13:45"}
|
46 |
+
{"current_steps": 230, "total_steps": 4421, "loss": 0.7191, "lr": 4.988841531418418e-05, "epoch": 0.05201658499811534, "percentage": 5.2, "elapsed_time": "0:20:30", "remaining_time": "6:13:35"}
|
47 |
+
{"current_steps": 235, "total_steps": 4421, "loss": 0.7314, "lr": 4.9879673835054955e-05, "epoch": 0.053147380324161324, "percentage": 5.32, "elapsed_time": "0:20:54", "remaining_time": "6:12:29"}
|
48 |
+
{"current_steps": 240, "total_steps": 4421, "loss": 0.7493, "lr": 4.9870603568328985e-05, "epoch": 0.05427817565020731, "percentage": 5.43, "elapsed_time": "0:21:20", "remaining_time": "6:11:40"}
|
49 |
+
{"current_steps": 245, "total_steps": 4421, "loss": 0.6636, "lr": 4.986120463387084e-05, "epoch": 0.055408970976253295, "percentage": 5.54, "elapsed_time": "0:21:47", "remaining_time": "6:11:24"}
|
50 |
+
{"current_steps": 250, "total_steps": 4421, "loss": 0.7574, "lr": 4.985147715588845e-05, "epoch": 0.05653976630229928, "percentage": 5.65, "elapsed_time": "0:22:15", "remaining_time": "6:11:25"}
|
51 |
+
{"current_steps": 255, "total_steps": 4421, "loss": 0.7548, "lr": 4.9841421262931506e-05, "epoch": 0.05767056162834527, "percentage": 5.77, "elapsed_time": "0:22:41", "remaining_time": "6:10:45"}
|
52 |
+
{"current_steps": 260, "total_steps": 4421, "loss": 0.7866, "lr": 4.983103708788972e-05, "epoch": 0.05880135695439125, "percentage": 5.88, "elapsed_time": "0:23:04", "remaining_time": "6:09:18"}
|
53 |
+
{"current_steps": 265, "total_steps": 4421, "loss": 0.8106, "lr": 4.98203247679911e-05, "epoch": 0.05993215228043724, "percentage": 5.99, "elapsed_time": "0:23:30", "remaining_time": "6:08:43"}
|
54 |
+
{"current_steps": 270, "total_steps": 4421, "loss": 0.773, "lr": 4.980928444480011e-05, "epoch": 0.061062947606483224, "percentage": 6.11, "elapsed_time": "0:23:55", "remaining_time": "6:07:50"}
|
55 |
+
{"current_steps": 275, "total_steps": 4421, "loss": 0.7594, "lr": 4.9797916264215824e-05, "epoch": 0.06219374293252921, "percentage": 6.22, "elapsed_time": "0:24:21", "remaining_time": "6:07:21"}
|
56 |
+
{"current_steps": 280, "total_steps": 4421, "loss": 0.7572, "lr": 4.978622037647e-05, "epoch": 0.0633245382585752, "percentage": 6.33, "elapsed_time": "0:24:49", "remaining_time": "6:07:02"}
|
57 |
+
{"current_steps": 285, "total_steps": 4421, "loss": 0.7991, "lr": 4.9774196936125056e-05, "epoch": 0.06445533358462119, "percentage": 6.45, "elapsed_time": "0:25:12", "remaining_time": "6:05:56"}
|
58 |
+
{"current_steps": 290, "total_steps": 4421, "loss": 0.7516, "lr": 4.9761846102072065e-05, "epoch": 0.06558612891066717, "percentage": 6.56, "elapsed_time": "0:25:38", "remaining_time": "6:05:11"}
|
59 |
+
{"current_steps": 295, "total_steps": 4421, "loss": 0.6836, "lr": 4.9749168037528635e-05, "epoch": 0.06671692423671316, "percentage": 6.67, "elapsed_time": "0:26:04", "remaining_time": "6:04:44"}
|
60 |
+
{"current_steps": 300, "total_steps": 4421, "loss": 0.7661, "lr": 4.9736162910036785e-05, "epoch": 0.06784771956275915, "percentage": 6.79, "elapsed_time": "0:26:27", "remaining_time": "6:03:21"}
|
61 |
+
{"current_steps": 305, "total_steps": 4421, "loss": 0.6895, "lr": 4.972283089146067e-05, "epoch": 0.06897851488880513, "percentage": 6.9, "elapsed_time": "0:26:52", "remaining_time": "6:02:42"}
|
62 |
+
{"current_steps": 310, "total_steps": 4421, "loss": 0.7341, "lr": 4.970917215798438e-05, "epoch": 0.07010931021485112, "percentage": 7.01, "elapsed_time": "0:27:17", "remaining_time": "6:02:00"}
|
63 |
+
{"current_steps": 315, "total_steps": 4421, "loss": 0.8324, "lr": 4.9695186890109567e-05, "epoch": 0.0712401055408971, "percentage": 7.13, "elapsed_time": "0:27:39", "remaining_time": "6:00:31"}
|
64 |
+
{"current_steps": 320, "total_steps": 4421, "loss": 0.711, "lr": 4.968087527265306e-05, "epoch": 0.07237090086694309, "percentage": 7.24, "elapsed_time": "0:28:06", "remaining_time": "6:00:17"}
|
65 |
+
{"current_steps": 325, "total_steps": 4421, "loss": 0.6997, "lr": 4.966623749474445e-05, "epoch": 0.07350169619298907, "percentage": 7.35, "elapsed_time": "0:28:42", "remaining_time": "6:01:47"}
|
66 |
+
{"current_steps": 330, "total_steps": 4421, "loss": 0.8236, "lr": 4.9651273749823546e-05, "epoch": 0.07463249151903506, "percentage": 7.46, "elapsed_time": "0:29:09", "remaining_time": "6:01:22"}
|
67 |
+
{"current_steps": 335, "total_steps": 4421, "loss": 0.7015, "lr": 4.963598423563788e-05, "epoch": 0.07576328684508105, "percentage": 7.58, "elapsed_time": "0:29:38", "remaining_time": "6:01:28"}
|
68 |
+
{"current_steps": 340, "total_steps": 4421, "loss": 0.7019, "lr": 4.962036915424004e-05, "epoch": 0.07689408217112703, "percentage": 7.69, "elapsed_time": "0:30:04", "remaining_time": "6:00:58"}
|
69 |
+
{"current_steps": 205, "total_steps": 4421, "loss": 0.7247, "lr": 4.992718700485085e-05, "epoch": 0.046362608367885416, "percentage": 4.64, "elapsed_time": "0:00:27", "remaining_time": "0:09:19"}
|
70 |
+
{"current_steps": 210, "total_steps": 4421, "loss": 0.7247, "lr": 4.99200911095478e-05, "epoch": 0.047493403693931395, "percentage": 4.75, "elapsed_time": "0:00:56", "remaining_time": "0:18:56"}
|
71 |
+
{"current_steps": 215, "total_steps": 4421, "loss": 0.7001, "lr": 4.991266589252933e-05, "epoch": 0.04862419901997738, "percentage": 4.86, "elapsed_time": "0:01:25", "remaining_time": "0:27:53"}
|
72 |
+
{"current_steps": 220, "total_steps": 4421, "loss": 0.7714, "lr": 4.990491145192049e-05, "epoch": 0.049754994346023367, "percentage": 4.98, "elapsed_time": "0:01:50", "remaining_time": "0:35:14"}
|
73 |
+
{"current_steps": 225, "total_steps": 4421, "loss": 0.7338, "lr": 4.989682789019706e-05, "epoch": 0.05088578967206935, "percentage": 5.09, "elapsed_time": "0:02:17", "remaining_time": "0:42:50"}
|
74 |
+
{"current_steps": 230, "total_steps": 4421, "loss": 0.719, "lr": 4.988841531418418e-05, "epoch": 0.05201658499811534, "percentage": 5.2, "elapsed_time": "0:02:45", "remaining_time": "0:50:17"}
|
75 |
+
{"current_steps": 235, "total_steps": 4421, "loss": 0.7315, "lr": 4.9879673835054955e-05, "epoch": 0.053147380324161324, "percentage": 5.32, "elapsed_time": "0:03:10", "remaining_time": "0:56:28"}
|
76 |
+
{"current_steps": 240, "total_steps": 4421, "loss": 0.7495, "lr": 4.9870603568328985e-05, "epoch": 0.05427817565020731, "percentage": 5.43, "elapsed_time": "0:03:35", "remaining_time": "1:02:37"}
|
77 |
+
{"current_steps": 245, "total_steps": 4421, "loss": 0.6637, "lr": 4.986120463387084e-05, "epoch": 0.055408970976253295, "percentage": 5.54, "elapsed_time": "0:04:03", "remaining_time": "1:09:04"}
|
78 |
+
{"current_steps": 250, "total_steps": 4421, "loss": 0.7571, "lr": 4.985147715588845e-05, "epoch": 0.05653976630229928, "percentage": 5.65, "elapsed_time": "0:04:31", "remaining_time": "1:15:30"}
|
79 |
+
{"current_steps": 255, "total_steps": 4421, "loss": 0.7551, "lr": 4.9841421262931506e-05, "epoch": 0.05767056162834527, "percentage": 5.77, "elapsed_time": "0:04:57", "remaining_time": "1:21:02"}
|
80 |
+
{"current_steps": 260, "total_steps": 4421, "loss": 0.7863, "lr": 4.983103708788972e-05, "epoch": 0.05880135695439125, "percentage": 5.88, "elapsed_time": "0:05:20", "remaining_time": "1:25:31"}
|
81 |
+
{"current_steps": 265, "total_steps": 4421, "loss": 0.8106, "lr": 4.98203247679911e-05, "epoch": 0.05993215228043724, "percentage": 5.99, "elapsed_time": "0:05:46", "remaining_time": "1:30:40"}
|
82 |
+
{"current_steps": 270, "total_steps": 4421, "loss": 0.7729, "lr": 4.980928444480011e-05, "epoch": 0.061062947606483224, "percentage": 6.11, "elapsed_time": "0:06:11", "remaining_time": "1:35:17"}
|
83 |
+
{"current_steps": 275, "total_steps": 4421, "loss": 0.7593, "lr": 4.9797916264215824e-05, "epoch": 0.06219374293252921, "percentage": 6.22, "elapsed_time": "0:06:38", "remaining_time": "1:40:05"}
|
84 |
+
{"current_steps": 280, "total_steps": 4421, "loss": 0.7574, "lr": 4.978622037647e-05, "epoch": 0.0633245382585752, "percentage": 6.33, "elapsed_time": "0:07:05", "remaining_time": "1:44:52"}
|
85 |
+
{"current_steps": 285, "total_steps": 4421, "loss": 0.799, "lr": 4.9774196936125056e-05, "epoch": 0.06445533358462119, "percentage": 6.45, "elapsed_time": "0:07:29", "remaining_time": "1:48:42"}
|
86 |
+
{"current_steps": 290, "total_steps": 4421, "loss": 0.7519, "lr": 4.9761846102072065e-05, "epoch": 0.06558612891066717, "percentage": 6.56, "elapsed_time": "0:07:54", "remaining_time": "1:52:42"}
|
87 |
+
{"current_steps": 295, "total_steps": 4421, "loss": 0.6837, "lr": 4.9749168037528635e-05, "epoch": 0.06671692423671316, "percentage": 6.67, "elapsed_time": "0:08:21", "remaining_time": "1:56:53"}
|
88 |
+
{"current_steps": 300, "total_steps": 4421, "loss": 0.7662, "lr": 4.9736162910036785e-05, "epoch": 0.06784771956275915, "percentage": 6.79, "elapsed_time": "0:08:43", "remaining_time": "1:59:56"}
|
89 |
+
{"current_steps": 305, "total_steps": 4421, "loss": 0.6897, "lr": 4.972283089146067e-05, "epoch": 0.06897851488880513, "percentage": 6.9, "elapsed_time": "0:09:09", "remaining_time": "2:03:35"}
|
90 |
+
{"current_steps": 310, "total_steps": 4421, "loss": 0.7344, "lr": 4.970917215798438e-05, "epoch": 0.07010931021485112, "percentage": 7.01, "elapsed_time": "0:09:34", "remaining_time": "2:07:01"}
|
91 |
+
{"current_steps": 315, "total_steps": 4421, "loss": 0.832, "lr": 4.9695186890109567e-05, "epoch": 0.0712401055408971, "percentage": 7.13, "elapsed_time": "0:09:56", "remaining_time": "2:09:34"}
|
92 |
+
{"current_steps": 320, "total_steps": 4421, "loss": 0.7113, "lr": 4.968087527265306e-05, "epoch": 0.07237090086694309, "percentage": 7.24, "elapsed_time": "0:10:23", "remaining_time": "2:13:13"}
|
93 |
+
{"current_steps": 325, "total_steps": 4421, "loss": 0.6996, "lr": 4.966623749474445e-05, "epoch": 0.07350169619298907, "percentage": 7.35, "elapsed_time": "0:10:53", "remaining_time": "2:17:11"}
|
94 |
+
{"current_steps": 330, "total_steps": 4421, "loss": 0.8236, "lr": 4.9651273749823546e-05, "epoch": 0.07463249151903506, "percentage": 7.46, "elapsed_time": "0:11:19", "remaining_time": "2:20:28"}
|
95 |
+
{"current_steps": 335, "total_steps": 4421, "loss": 0.7012, "lr": 4.963598423563788e-05, "epoch": 0.07576328684508105, "percentage": 7.58, "elapsed_time": "0:11:49", "remaining_time": "2:24:09"}
|
96 |
+
{"current_steps": 340, "total_steps": 4421, "loss": 0.7018, "lr": 4.962036915424004e-05, "epoch": 0.07689408217112703, "percentage": 7.69, "elapsed_time": "0:12:15", "remaining_time": "2:27:07"}
|
97 |
+
{"current_steps": 345, "total_steps": 4421, "loss": 0.7084, "lr": 4.960442871198503e-05, "epoch": 0.07802487749717302, "percentage": 7.8, "elapsed_time": "0:12:41", "remaining_time": "2:30:01"}
|
98 |
+
{"current_steps": 350, "total_steps": 4421, "loss": 0.7217, "lr": 4.958816311952752e-05, "epoch": 0.079155672823219, "percentage": 7.92, "elapsed_time": "0:13:05", "remaining_time": "2:32:11"}
|
99 |
+
{"current_steps": 355, "total_steps": 4421, "loss": 0.7616, "lr": 4.95715725918191e-05, "epoch": 0.08028646814926499, "percentage": 8.03, "elapsed_time": "0:13:32", "remaining_time": "2:35:05"}
|
100 |
+
{"current_steps": 360, "total_steps": 4421, "loss": 0.7061, "lr": 4.9554657348105385e-05, "epoch": 0.08141726347531097, "percentage": 8.14, "elapsed_time": "0:13:58", "remaining_time": "2:37:41"}
|
101 |
+
{"current_steps": 365, "total_steps": 4421, "loss": 0.7809, "lr": 4.953741761192317e-05, "epoch": 0.08254805880135696, "percentage": 8.26, "elapsed_time": "0:14:26", "remaining_time": "2:40:24"}
|
102 |
+
{"current_steps": 370, "total_steps": 4421, "loss": 0.7282, "lr": 4.9519853611097434e-05, "epoch": 0.08367885412740295, "percentage": 8.37, "elapsed_time": "0:14:53", "remaining_time": "2:43:05"}
|
103 |
+
{"current_steps": 375, "total_steps": 4421, "loss": 0.7262, "lr": 4.950196557773837e-05, "epoch": 0.08480964945344893, "percentage": 8.48, "elapsed_time": "0:15:16", "remaining_time": "2:44:45"}
|
104 |
+
{"current_steps": 380, "total_steps": 4421, "loss": 0.7346, "lr": 4.948375374823828e-05, "epoch": 0.08594044477949492, "percentage": 8.6, "elapsed_time": "0:15:42", "remaining_time": "2:46:57"}
|
105 |
+
{"current_steps": 385, "total_steps": 4421, "loss": 0.6768, "lr": 4.946521836326847e-05, "epoch": 0.0870712401055409, "percentage": 8.71, "elapsed_time": "0:16:09", "remaining_time": "2:49:20"}
|
106 |
+
{"current_steps": 390, "total_steps": 4421, "loss": 0.7277, "lr": 4.9446359667776065e-05, "epoch": 0.08820203543158689, "percentage": 8.82, "elapsed_time": "0:16:34", "remaining_time": "2:51:15"}
|
107 |
+
{"current_steps": 395, "total_steps": 4421, "loss": 0.7481, "lr": 4.9427177910980794e-05, "epoch": 0.08933283075763288, "percentage": 8.93, "elapsed_time": "0:17:00", "remaining_time": "2:53:17"}
|
108 |
+
{"current_steps": 400, "total_steps": 4421, "loss": 0.7077, "lr": 4.9407673346371644e-05, "epoch": 0.09046362608367886, "percentage": 9.05, "elapsed_time": "0:17:25", "remaining_time": "2:55:12"}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c29b6114cee3a1eb0c6657320d373e2561ec03a011bc688ec4cc2b0b164a6831
|
3 |
+
size 5816
|