Model save
Browse files- README.md +193 -0
- adapter_config.json +3 -3
- adapter_model.safetensors +1 -1
- training_args.bin +2 -2
README.md
CHANGED
@@ -19,6 +19,8 @@ should probably proofread and complete it, then remove this comment. -->
|
|
19 |
# TWON-Agent-OSN-Replies-de
|
20 |
|
21 |
This model is a fine-tuned version of [meta-llama/Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) on the generator dataset.
|
|
|
|
|
22 |
|
23 |
## Model description
|
24 |
|
@@ -47,6 +49,197 @@ The following hyperparameters were used during training:
|
|
47 |
|
48 |
### Training results
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
|
52 |
### Framework versions
|
|
|
19 |
# TWON-Agent-OSN-Replies-de
|
20 |
|
21 |
This model is a fine-tuned version of [meta-llama/Llama-3.2-3B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) on the generator dataset.
|
22 |
+
It achieves the following results on the evaluation set:
|
23 |
+
- Loss: 0.5430
|
24 |
|
25 |
## Model description
|
26 |
|
|
|
49 |
|
50 |
### Training results
|
51 |
|
52 |
+
| Training Loss | Epoch | Step | Validation Loss |
|
53 |
+
|:-------------:|:------:|:-----:|:---------------:|
|
54 |
+
| 2.5602 | 0.0423 | 200 | 2.3015 |
|
55 |
+
| 2.1761 | 0.0845 | 400 | 2.0341 |
|
56 |
+
| 1.8998 | 0.1268 | 600 | 1.7808 |
|
57 |
+
| 1.6962 | 0.1691 | 800 | 1.6139 |
|
58 |
+
| 1.5494 | 0.2114 | 1000 | 1.4910 |
|
59 |
+
| 1.4478 | 0.2536 | 1200 | 1.4047 |
|
60 |
+
| 1.3865 | 0.2959 | 1400 | 1.3415 |
|
61 |
+
| 1.3407 | 0.3382 | 1600 | 1.2891 |
|
62 |
+
| 1.2808 | 0.3805 | 1800 | 1.2429 |
|
63 |
+
| 1.2365 | 0.4227 | 2000 | 1.2054 |
|
64 |
+
| 1.2058 | 0.4650 | 2200 | 1.1766 |
|
65 |
+
| 1.1799 | 0.5073 | 2400 | 1.1484 |
|
66 |
+
| 1.147 | 0.5496 | 2600 | 1.1282 |
|
67 |
+
| 1.1235 | 0.5918 | 2800 | 1.1087 |
|
68 |
+
| 1.1228 | 0.6341 | 3000 | 1.0923 |
|
69 |
+
| 1.0935 | 0.6764 | 3200 | 1.0765 |
|
70 |
+
| 1.0813 | 0.7187 | 3400 | 1.0629 |
|
71 |
+
| 1.0671 | 0.7609 | 3600 | 1.0504 |
|
72 |
+
| 1.0448 | 0.8032 | 3800 | 1.0390 |
|
73 |
+
| 1.041 | 0.8455 | 4000 | 1.0293 |
|
74 |
+
| 1.0167 | 0.8878 | 4200 | 1.0196 |
|
75 |
+
| 1.0293 | 0.9300 | 4400 | 1.0091 |
|
76 |
+
| 1.0254 | 0.9723 | 4600 | 1.0002 |
|
77 |
+
| 1.0053 | 1.0146 | 4800 | 0.9917 |
|
78 |
+
| 0.9957 | 1.0569 | 5000 | 0.9833 |
|
79 |
+
| 0.9976 | 1.0991 | 5200 | 0.9772 |
|
80 |
+
| 0.9891 | 1.1414 | 5400 | 0.9708 |
|
81 |
+
| 0.9648 | 1.1837 | 5600 | 0.9622 |
|
82 |
+
| 0.9711 | 1.2260 | 5800 | 0.9551 |
|
83 |
+
| 0.9569 | 1.2682 | 6000 | 0.9497 |
|
84 |
+
| 0.9526 | 1.3105 | 6200 | 0.9433 |
|
85 |
+
| 0.9571 | 1.3528 | 6400 | 0.9362 |
|
86 |
+
| 0.9265 | 1.3951 | 6600 | 0.9310 |
|
87 |
+
| 0.9274 | 1.4373 | 6800 | 0.9229 |
|
88 |
+
| 0.929 | 1.4796 | 7000 | 0.9190 |
|
89 |
+
| 0.9224 | 1.5219 | 7200 | 0.9128 |
|
90 |
+
| 0.9051 | 1.5642 | 7400 | 0.9071 |
|
91 |
+
| 0.9066 | 1.6064 | 7600 | 0.9014 |
|
92 |
+
| 0.9067 | 1.6487 | 7800 | 0.8962 |
|
93 |
+
| 0.8986 | 1.6910 | 8000 | 0.8905 |
|
94 |
+
| 0.8967 | 1.7332 | 8200 | 0.8847 |
|
95 |
+
| 0.8883 | 1.7755 | 8400 | 0.8806 |
|
96 |
+
| 0.8844 | 1.8178 | 8600 | 0.8745 |
|
97 |
+
| 0.8833 | 1.8601 | 8800 | 0.8710 |
|
98 |
+
| 0.8805 | 1.9023 | 9000 | 0.8649 |
|
99 |
+
| 0.8722 | 1.9446 | 9200 | 0.8600 |
|
100 |
+
| 0.868 | 1.9869 | 9400 | 0.8552 |
|
101 |
+
| 0.8597 | 2.0292 | 9600 | 0.8499 |
|
102 |
+
| 0.8527 | 2.0714 | 9800 | 0.8456 |
|
103 |
+
| 0.8444 | 2.1137 | 10000 | 0.8410 |
|
104 |
+
| 0.8472 | 2.1560 | 10200 | 0.8363 |
|
105 |
+
| 0.8416 | 2.1983 | 10400 | 0.8321 |
|
106 |
+
| 0.8395 | 2.2405 | 10600 | 0.8271 |
|
107 |
+
| 0.8295 | 2.2828 | 10800 | 0.8221 |
|
108 |
+
| 0.8307 | 2.3251 | 11000 | 0.8173 |
|
109 |
+
| 0.8221 | 2.3674 | 11200 | 0.8130 |
|
110 |
+
| 0.8144 | 2.4096 | 11400 | 0.8086 |
|
111 |
+
| 0.8197 | 2.4519 | 11600 | 0.8048 |
|
112 |
+
| 0.8121 | 2.4942 | 11800 | 0.8003 |
|
113 |
+
| 0.8038 | 2.5365 | 12000 | 0.7953 |
|
114 |
+
| 0.8007 | 2.5787 | 12200 | 0.7917 |
|
115 |
+
| 0.808 | 2.6210 | 12400 | 0.7878 |
|
116 |
+
| 0.8047 | 2.6633 | 12600 | 0.7827 |
|
117 |
+
| 0.7908 | 2.7056 | 12800 | 0.7795 |
|
118 |
+
| 0.7963 | 2.7478 | 13000 | 0.7755 |
|
119 |
+
| 0.782 | 2.7901 | 13200 | 0.7718 |
|
120 |
+
| 0.7899 | 2.8324 | 13400 | 0.7664 |
|
121 |
+
| 0.7712 | 2.8747 | 13600 | 0.7623 |
|
122 |
+
| 0.7737 | 2.9169 | 13800 | 0.7581 |
|
123 |
+
| 0.784 | 2.9592 | 14000 | 0.7553 |
|
124 |
+
| 0.7661 | 3.0015 | 14200 | 0.7515 |
|
125 |
+
| 0.759 | 3.0438 | 14400 | 0.7478 |
|
126 |
+
| 0.7534 | 3.0860 | 14600 | 0.7442 |
|
127 |
+
| 0.7526 | 3.1283 | 14800 | 0.7398 |
|
128 |
+
| 0.7526 | 3.1706 | 15000 | 0.7365 |
|
129 |
+
| 0.7413 | 3.2129 | 15200 | 0.7344 |
|
130 |
+
| 0.746 | 3.2551 | 15400 | 0.7296 |
|
131 |
+
| 0.7469 | 3.2974 | 15600 | 0.7264 |
|
132 |
+
| 0.7384 | 3.3397 | 15800 | 0.7221 |
|
133 |
+
| 0.7357 | 3.3819 | 16000 | 0.7191 |
|
134 |
+
| 0.7298 | 3.4242 | 16200 | 0.7173 |
|
135 |
+
| 0.7245 | 3.4665 | 16400 | 0.7122 |
|
136 |
+
| 0.7283 | 3.5088 | 16600 | 0.7087 |
|
137 |
+
| 0.7333 | 3.5510 | 16800 | 0.7062 |
|
138 |
+
| 0.7252 | 3.5933 | 17000 | 0.7040 |
|
139 |
+
| 0.7242 | 3.6356 | 17200 | 0.6987 |
|
140 |
+
| 0.7174 | 3.6779 | 17400 | 0.6956 |
|
141 |
+
| 0.7132 | 3.7201 | 17600 | 0.6931 |
|
142 |
+
| 0.7093 | 3.7624 | 17800 | 0.6898 |
|
143 |
+
| 0.7027 | 3.8047 | 18000 | 0.6869 |
|
144 |
+
| 0.7177 | 3.8470 | 18200 | 0.6838 |
|
145 |
+
| 0.707 | 3.8892 | 18400 | 0.6805 |
|
146 |
+
| 0.7091 | 3.9315 | 18600 | 0.6786 |
|
147 |
+
| 0.7031 | 3.9738 | 18800 | 0.6749 |
|
148 |
+
| 0.6913 | 4.0161 | 19000 | 0.6723 |
|
149 |
+
| 0.6895 | 4.0583 | 19200 | 0.6697 |
|
150 |
+
| 0.6858 | 4.1006 | 19400 | 0.6666 |
|
151 |
+
| 0.678 | 4.1429 | 19600 | 0.6645 |
|
152 |
+
| 0.6852 | 4.1852 | 19800 | 0.6622 |
|
153 |
+
| 0.6787 | 4.2274 | 20000 | 0.6586 |
|
154 |
+
| 0.6784 | 4.2697 | 20200 | 0.6568 |
|
155 |
+
| 0.6771 | 4.3120 | 20400 | 0.6528 |
|
156 |
+
| 0.6697 | 4.3543 | 20600 | 0.6509 |
|
157 |
+
| 0.6698 | 4.3965 | 20800 | 0.6481 |
|
158 |
+
| 0.6792 | 4.4388 | 21000 | 0.6455 |
|
159 |
+
| 0.6741 | 4.4811 | 21200 | 0.6436 |
|
160 |
+
| 0.6582 | 4.5234 | 21400 | 0.6402 |
|
161 |
+
| 0.6648 | 4.5656 | 21600 | 0.6380 |
|
162 |
+
| 0.6606 | 4.6079 | 21800 | 0.6363 |
|
163 |
+
| 0.6598 | 4.6502 | 22000 | 0.6341 |
|
164 |
+
| 0.6696 | 4.6925 | 22200 | 0.6312 |
|
165 |
+
| 0.6604 | 4.7347 | 22400 | 0.6298 |
|
166 |
+
| 0.6611 | 4.7770 | 22600 | 0.6274 |
|
167 |
+
| 0.6515 | 4.8193 | 22800 | 0.6260 |
|
168 |
+
| 0.6528 | 4.8616 | 23000 | 0.6228 |
|
169 |
+
| 0.6557 | 4.9038 | 23200 | 0.6201 |
|
170 |
+
| 0.6473 | 4.9461 | 23400 | 0.6184 |
|
171 |
+
| 0.6506 | 4.9884 | 23600 | 0.6168 |
|
172 |
+
| 0.6387 | 5.0306 | 23800 | 0.6146 |
|
173 |
+
| 0.638 | 5.0729 | 24000 | 0.6139 |
|
174 |
+
| 0.6389 | 5.1152 | 24200 | 0.6111 |
|
175 |
+
| 0.641 | 5.1575 | 24400 | 0.6103 |
|
176 |
+
| 0.6278 | 5.1997 | 24600 | 0.6080 |
|
177 |
+
| 0.6332 | 5.2420 | 24800 | 0.6068 |
|
178 |
+
| 0.6214 | 5.2843 | 25000 | 0.6047 |
|
179 |
+
| 0.6325 | 5.3266 | 25200 | 0.6020 |
|
180 |
+
| 0.6312 | 5.3688 | 25400 | 0.6000 |
|
181 |
+
| 0.6278 | 5.4111 | 25600 | 0.5979 |
|
182 |
+
| 0.6237 | 5.4534 | 25800 | 0.5962 |
|
183 |
+
| 0.6263 | 5.4957 | 26000 | 0.5942 |
|
184 |
+
| 0.6228 | 5.5379 | 26200 | 0.5938 |
|
185 |
+
| 0.625 | 5.5802 | 26400 | 0.5919 |
|
186 |
+
| 0.6271 | 5.6225 | 26600 | 0.5905 |
|
187 |
+
| 0.6206 | 5.6648 | 26800 | 0.5880 |
|
188 |
+
| 0.6204 | 5.7070 | 27000 | 0.5872 |
|
189 |
+
| 0.617 | 5.7493 | 27200 | 0.5857 |
|
190 |
+
| 0.6138 | 5.7916 | 27400 | 0.5844 |
|
191 |
+
| 0.6216 | 5.8339 | 27600 | 0.5817 |
|
192 |
+
| 0.6122 | 5.8761 | 27800 | 0.5805 |
|
193 |
+
| 0.6186 | 5.9184 | 28000 | 0.5793 |
|
194 |
+
| 0.6164 | 5.9607 | 28200 | 0.5783 |
|
195 |
+
| 0.6117 | 6.0030 | 28400 | 0.5769 |
|
196 |
+
| 0.6068 | 6.0452 | 28600 | 0.5749 |
|
197 |
+
| 0.6034 | 6.0875 | 28800 | 0.5740 |
|
198 |
+
| 0.6085 | 6.1298 | 29000 | 0.5720 |
|
199 |
+
| 0.6068 | 6.1721 | 29200 | 0.5710 |
|
200 |
+
| 0.603 | 6.2143 | 29400 | 0.5697 |
|
201 |
+
| 0.5979 | 6.2566 | 29600 | 0.5694 |
|
202 |
+
| 0.5941 | 6.2989 | 29800 | 0.5686 |
|
203 |
+
| 0.6022 | 6.3412 | 30000 | 0.5673 |
|
204 |
+
| 0.5999 | 6.3834 | 30200 | 0.5656 |
|
205 |
+
| 0.6064 | 6.4257 | 30400 | 0.5645 |
|
206 |
+
| 0.5982 | 6.4680 | 30600 | 0.5633 |
|
207 |
+
| 0.5863 | 6.5103 | 30800 | 0.5630 |
|
208 |
+
| 0.5942 | 6.5525 | 31000 | 0.5619 |
|
209 |
+
| 0.5969 | 6.5948 | 31200 | 0.5607 |
|
210 |
+
| 0.595 | 6.6371 | 31400 | 0.5600 |
|
211 |
+
| 0.6004 | 6.6793 | 31600 | 0.5583 |
|
212 |
+
| 0.593 | 6.7216 | 31800 | 0.5584 |
|
213 |
+
| 0.5903 | 6.7639 | 32000 | 0.5579 |
|
214 |
+
| 0.5917 | 6.8062 | 32200 | 0.5568 |
|
215 |
+
| 0.5946 | 6.8484 | 32400 | 0.5563 |
|
216 |
+
| 0.5876 | 6.8907 | 32600 | 0.5560 |
|
217 |
+
| 0.593 | 6.9330 | 32800 | 0.5543 |
|
218 |
+
| 0.5848 | 6.9753 | 33000 | 0.5527 |
|
219 |
+
| 0.5865 | 7.0175 | 33200 | 0.5523 |
|
220 |
+
| 0.5872 | 7.0598 | 33400 | 0.5524 |
|
221 |
+
| 0.583 | 7.1021 | 33600 | 0.5511 |
|
222 |
+
| 0.5838 | 7.1444 | 33800 | 0.5512 |
|
223 |
+
| 0.5819 | 7.1866 | 34000 | 0.5502 |
|
224 |
+
| 0.5833 | 7.2289 | 34200 | 0.5492 |
|
225 |
+
| 0.5826 | 7.2712 | 34400 | 0.5493 |
|
226 |
+
| 0.5813 | 7.3135 | 34600 | 0.5487 |
|
227 |
+
| 0.5815 | 7.3557 | 34800 | 0.5477 |
|
228 |
+
| 0.582 | 7.3980 | 35000 | 0.5472 |
|
229 |
+
| 0.5729 | 7.4403 | 35200 | 0.5466 |
|
230 |
+
| 0.5769 | 7.4826 | 35400 | 0.5457 |
|
231 |
+
| 0.5864 | 7.5248 | 35600 | 0.5457 |
|
232 |
+
| 0.5853 | 7.5671 | 35800 | 0.5453 |
|
233 |
+
| 0.5771 | 7.6094 | 36000 | 0.5449 |
|
234 |
+
| 0.5786 | 7.6517 | 36200 | 0.5447 |
|
235 |
+
| 0.5839 | 7.6939 | 36400 | 0.5443 |
|
236 |
+
| 0.5759 | 7.7362 | 36600 | 0.5435 |
|
237 |
+
| 0.5804 | 7.7785 | 36800 | 0.5436 |
|
238 |
+
| 0.5826 | 7.8208 | 37000 | 0.5437 |
|
239 |
+
| 0.5829 | 7.8630 | 37200 | 0.5434 |
|
240 |
+
| 0.574 | 7.9053 | 37400 | 0.5431 |
|
241 |
+
| 0.5756 | 7.9476 | 37600 | 0.5432 |
|
242 |
+
| 0.5722 | 7.9899 | 37800 | 0.5430 |
|
243 |
|
244 |
|
245 |
### Framework versions
|
adapter_config.json
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
"layers_pattern": null,
|
11 |
"layers_to_transform": null,
|
12 |
"loftq_config": {},
|
13 |
-
"lora_alpha":
|
14 |
"lora_dropout": 0.1,
|
15 |
"megatron_config": null,
|
16 |
"megatron_core": "megatron.core",
|
@@ -20,8 +20,8 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
-
"
|
24 |
-
"
|
25 |
],
|
26 |
"task_type": "CAUSAL_LM",
|
27 |
"use_dora": false,
|
|
|
10 |
"layers_pattern": null,
|
11 |
"layers_to_transform": null,
|
12 |
"loftq_config": {},
|
13 |
+
"lora_alpha": 32,
|
14 |
"lora_dropout": 0.1,
|
15 |
"megatron_config": null,
|
16 |
"megatron_core": "megatron.core",
|
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
+
"v_proj",
|
24 |
+
"q_proj"
|
25 |
],
|
26 |
"task_type": "CAUSAL_LM",
|
27 |
"use_dora": false,
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9189792
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7b532051819914e006c0459e40b1af408d42fd75e1e18f3ce793b3d46bd09d3
|
3 |
size 9189792
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a25bc2d24fc1f13f7da9040de2cf12f2693006afe4c21e26e33bc6aa179ee36
|
3 |
+
size 5688
|