Model save
Browse files- README.md +179 -0
- added_tokens.json +24 -0
- all_results.json +9 -0
- config.json +30 -0
- generation_config.json +7 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- special_tokens_map.json +27 -0
- tokenizer.json +0 -0
- tokenizer_config.json +196 -0
- train_results.json +9 -0
- trainer_state.json +0 -0
- training_args.bin +3 -0
- vocab.json +0 -0
README.md
ADDED
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
base_model: hZzy/qwen2.5-0.5b-sft-news-IFT
|
4 |
+
tags:
|
5 |
+
- trl
|
6 |
+
- expo
|
7 |
+
- generated_from_trainer
|
8 |
+
model-index:
|
9 |
+
- name: qwen2.5-0.5b-expo-EXDPO-WEIGHT-BETA0.2
|
10 |
+
results: []
|
11 |
+
---
|
12 |
+
|
13 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
14 |
+
should probably proofread and complete it, then remove this comment. -->
|
15 |
+
|
16 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/zhiyuzha-university-of-florida/huggingface/runs/3vfs9qoc)
|
17 |
+
# qwen2.5-0.5b-expo-EXDPO-WEIGHT-BETA0.2
|
18 |
+
|
19 |
+
This model is a fine-tuned version of [hZzy/qwen2.5-0.5b-sft-news-IFT](https://huggingface.co/hZzy/qwen2.5-0.5b-sft-news-IFT) on an unknown dataset.
|
20 |
+
It achieves the following results on the evaluation set:
|
21 |
+
- Loss: 0.3418
|
22 |
+
- Logps: -93.8220
|
23 |
+
- Logits: -1.1885
|
24 |
+
- Objective: 0.3404
|
25 |
+
- Dpo Loss: 0.7083
|
26 |
+
- Regularize: 0.2696
|
27 |
+
- Ranking Simple: 0.5197
|
28 |
+
- Ranking Idealized: 0.5399
|
29 |
+
- Ranking Idealized Expo: 0.5243
|
30 |
+
|
31 |
+
## Model description
|
32 |
+
|
33 |
+
More information needed
|
34 |
+
|
35 |
+
## Intended uses & limitations
|
36 |
+
|
37 |
+
More information needed
|
38 |
+
|
39 |
+
## Training and evaluation data
|
40 |
+
|
41 |
+
More information needed
|
42 |
+
|
43 |
+
## Training procedure
|
44 |
+
|
45 |
+
### Training hyperparameters
|
46 |
+
|
47 |
+
The following hyperparameters were used during training:
|
48 |
+
- learning_rate: 5e-07
|
49 |
+
- train_batch_size: 4
|
50 |
+
- eval_batch_size: 4
|
51 |
+
- seed: 42
|
52 |
+
- distributed_type: multi-GPU
|
53 |
+
- num_devices: 3
|
54 |
+
- gradient_accumulation_steps: 8
|
55 |
+
- total_train_batch_size: 96
|
56 |
+
- total_eval_batch_size: 12
|
57 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
58 |
+
- lr_scheduler_type: cosine
|
59 |
+
- lr_scheduler_warmup_ratio: 0.1
|
60 |
+
- num_epochs: 10
|
61 |
+
- mixed_precision_training: Native AMP
|
62 |
+
|
63 |
+
### Training results
|
64 |
+
|
65 |
+
| Training Loss | Epoch | Step | Validation Loss | Logps | Logits | Objective | Dpo Loss | Regularize | Ranking Simple | Ranking Idealized | Ranking Idealized Expo |
|
66 |
+
|:-------------:|:------:|:----:|:---------------:|:--------:|:-------:|:---------:|:--------:|:----------:|:--------------:|:-----------------:|:----------------------:|
|
67 |
+
| 0.0798 | 0.0945 | 50 | 0.0807 | -98.5040 | -1.3072 | 0.0808 | 0.6932 | 0.0115 | 0.5238 | 0.5399 | 0.5243 |
|
68 |
+
| 0.081 | 0.1889 | 100 | 0.0819 | -98.4932 | -1.3084 | 0.0819 | 0.6934 | 0.0125 | 0.5238 | 0.5399 | 0.5243 |
|
69 |
+
| 0.0839 | 0.2834 | 150 | 0.0823 | -98.5417 | -1.3079 | 0.0823 | 0.6931 | 0.0130 | 0.5233 | 0.5399 | 0.5243 |
|
70 |
+
| 0.0891 | 0.3779 | 200 | 0.0840 | -98.6517 | -1.3063 | 0.0839 | 0.6927 | 0.0147 | 0.5238 | 0.5399 | 0.5243 |
|
71 |
+
| 0.1019 | 0.4724 | 250 | 0.0865 | -98.6753 | -1.3058 | 0.0864 | 0.6929 | 0.0171 | 0.5233 | 0.5399 | 0.5243 |
|
72 |
+
| 0.1094 | 0.5668 | 300 | 0.0928 | -98.3448 | -1.3087 | 0.0930 | 0.6926 | 0.0238 | 0.5238 | 0.5399 | 0.5243 |
|
73 |
+
| 0.1267 | 0.6613 | 350 | 0.0995 | -98.4803 | -1.3097 | 0.1004 | 0.6942 | 0.0310 | 0.5243 | 0.5399 | 0.5243 |
|
74 |
+
| 0.1414 | 0.7558 | 400 | 0.1020 | -98.6999 | -1.3138 | 0.1027 | 0.6920 | 0.0335 | 0.5248 | 0.5399 | 0.5243 |
|
75 |
+
| 0.156 | 0.8503 | 450 | 0.1102 | -98.6961 | -1.3001 | 0.1107 | 0.6917 | 0.0415 | 0.5228 | 0.5399 | 0.5243 |
|
76 |
+
| 0.1843 | 0.9447 | 500 | 0.1425 | -98.3685 | -1.2985 | 0.1416 | 0.6934 | 0.0723 | 0.5217 | 0.5399 | 0.5243 |
|
77 |
+
| 0.1954 | 1.0392 | 550 | 0.1388 | -98.2336 | -1.3154 | 0.1383 | 0.6946 | 0.0689 | 0.5228 | 0.5399 | 0.5243 |
|
78 |
+
| 0.2073 | 1.1337 | 600 | 0.1374 | -98.7215 | -1.3071 | 0.1369 | 0.6936 | 0.0676 | 0.5228 | 0.5399 | 0.5243 |
|
79 |
+
| 0.2165 | 1.2282 | 650 | 0.1478 | -97.9261 | -1.2926 | 0.1487 | 0.6916 | 0.0796 | 0.5233 | 0.5399 | 0.5243 |
|
80 |
+
| 0.2333 | 1.3226 | 700 | 0.1470 | -97.1071 | -1.2930 | 0.1450 | 0.6915 | 0.0758 | 0.5243 | 0.5399 | 0.5243 |
|
81 |
+
| 0.229 | 1.4171 | 750 | 0.1718 | -97.0923 | -1.2689 | 0.1725 | 0.6929 | 0.1032 | 0.5238 | 0.5399 | 0.5243 |
|
82 |
+
| 0.2565 | 1.5116 | 800 | 0.1817 | -97.2621 | -1.2540 | 0.1830 | 0.6944 | 0.1136 | 0.5243 | 0.5399 | 0.5243 |
|
83 |
+
| 0.2479 | 1.6060 | 850 | 0.1864 | -96.3423 | -1.2708 | 0.1853 | 0.6946 | 0.1159 | 0.5243 | 0.5399 | 0.5243 |
|
84 |
+
| 0.2586 | 1.7005 | 900 | 0.1839 | -97.2157 | -1.2623 | 0.1825 | 0.6944 | 0.1131 | 0.5223 | 0.5399 | 0.5243 |
|
85 |
+
| 0.2347 | 1.7950 | 950 | 0.1995 | -94.8402 | -1.2678 | 0.1989 | 0.6945 | 0.1295 | 0.5238 | 0.5399 | 0.5243 |
|
86 |
+
| 0.2414 | 1.8895 | 1000 | 0.1895 | -95.8793 | -1.2579 | 0.1901 | 0.6924 | 0.1209 | 0.5254 | 0.5399 | 0.5243 |
|
87 |
+
| 0.2433 | 1.9839 | 1050 | 0.2097 | -95.7970 | -1.2552 | 0.2068 | 0.6923 | 0.1376 | 0.5259 | 0.5399 | 0.5243 |
|
88 |
+
| 0.2393 | 2.0784 | 1100 | 0.2156 | -96.9313 | -1.2422 | 0.2149 | 0.6962 | 0.1452 | 0.5264 | 0.5399 | 0.5243 |
|
89 |
+
| 0.2476 | 2.1729 | 1150 | 0.2195 | -95.8618 | -1.2485 | 0.2191 | 0.6958 | 0.1495 | 0.5238 | 0.5399 | 0.5243 |
|
90 |
+
| 0.2443 | 2.2674 | 1200 | 0.2318 | -97.1362 | -1.2241 | 0.2317 | 0.6998 | 0.1617 | 0.5259 | 0.5399 | 0.5243 |
|
91 |
+
| 0.2337 | 2.3618 | 1250 | 0.2494 | -96.2629 | -1.2313 | 0.2515 | 0.6950 | 0.1820 | 0.5269 | 0.5399 | 0.5243 |
|
92 |
+
| 0.2264 | 2.4563 | 1300 | 0.2473 | -94.4504 | -1.2535 | 0.2456 | 0.6981 | 0.1758 | 0.5223 | 0.5399 | 0.5243 |
|
93 |
+
| 0.2398 | 2.5508 | 1350 | 0.2467 | -96.2065 | -1.2349 | 0.2462 | 0.7027 | 0.1760 | 0.5197 | 0.5399 | 0.5243 |
|
94 |
+
| 0.2346 | 2.6453 | 1400 | 0.2565 | -94.6591 | -1.2562 | 0.2567 | 0.7002 | 0.1867 | 0.5212 | 0.5399 | 0.5243 |
|
95 |
+
| 0.242 | 2.7397 | 1450 | 0.2640 | -94.6555 | -1.2141 | 0.2641 | 0.7015 | 0.1939 | 0.5243 | 0.5399 | 0.5243 |
|
96 |
+
| 0.2372 | 2.8342 | 1500 | 0.2747 | -94.9289 | -1.2472 | 0.2726 | 0.7027 | 0.2024 | 0.5202 | 0.5399 | 0.5243 |
|
97 |
+
| 0.2133 | 2.9287 | 1550 | 0.2529 | -95.1991 | -1.2345 | 0.2512 | 0.7006 | 0.1811 | 0.5243 | 0.5399 | 0.5243 |
|
98 |
+
| 0.2292 | 3.0231 | 1600 | 0.2840 | -93.6334 | -1.2437 | 0.2861 | 0.7038 | 0.2157 | 0.5197 | 0.5399 | 0.5243 |
|
99 |
+
| 0.2227 | 3.1176 | 1650 | 0.2854 | -93.4763 | -1.2332 | 0.2851 | 0.7025 | 0.2149 | 0.5217 | 0.5399 | 0.5243 |
|
100 |
+
| 0.2123 | 3.2121 | 1700 | 0.2752 | -95.6906 | -1.2311 | 0.2756 | 0.7008 | 0.2055 | 0.5233 | 0.5399 | 0.5243 |
|
101 |
+
| 0.218 | 3.3066 | 1750 | 0.2800 | -95.9042 | -1.2167 | 0.2783 | 0.7037 | 0.2079 | 0.5238 | 0.5399 | 0.5243 |
|
102 |
+
| 0.2086 | 3.4010 | 1800 | 0.2945 | -95.6983 | -1.2183 | 0.2932 | 0.7027 | 0.2230 | 0.5233 | 0.5399 | 0.5243 |
|
103 |
+
| 0.216 | 3.4955 | 1850 | 0.2895 | -93.0784 | -1.2235 | 0.2873 | 0.7028 | 0.2171 | 0.5212 | 0.5399 | 0.5243 |
|
104 |
+
| 0.2182 | 3.5900 | 1900 | 0.2973 | -95.2384 | -1.2138 | 0.2977 | 0.7019 | 0.2275 | 0.5207 | 0.5399 | 0.5243 |
|
105 |
+
| 0.2097 | 3.6845 | 1950 | 0.3023 | -93.4940 | -1.2111 | 0.3000 | 0.7046 | 0.2295 | 0.5217 | 0.5399 | 0.5243 |
|
106 |
+
| 0.2076 | 3.7789 | 2000 | 0.3084 | -93.0939 | -1.2337 | 0.3067 | 0.7034 | 0.2364 | 0.5243 | 0.5399 | 0.5243 |
|
107 |
+
| 0.2099 | 3.8734 | 2050 | 0.2962 | -93.1727 | -1.2280 | 0.2954 | 0.7044 | 0.2249 | 0.5212 | 0.5399 | 0.5243 |
|
108 |
+
| 0.2001 | 3.9679 | 2100 | 0.3139 | -93.9210 | -1.2079 | 0.3123 | 0.7063 | 0.2417 | 0.5186 | 0.5399 | 0.5243 |
|
109 |
+
| 0.2082 | 4.0624 | 2150 | 0.3119 | -93.6768 | -1.2148 | 0.3124 | 0.7037 | 0.2420 | 0.5217 | 0.5399 | 0.5243 |
|
110 |
+
| 0.1914 | 4.1568 | 2200 | 0.3139 | -94.5737 | -1.2179 | 0.3138 | 0.7032 | 0.2434 | 0.5197 | 0.5399 | 0.5243 |
|
111 |
+
| 0.2026 | 4.2513 | 2250 | 0.3179 | -93.2220 | -1.2044 | 0.3177 | 0.7035 | 0.2473 | 0.5202 | 0.5399 | 0.5243 |
|
112 |
+
| 0.1908 | 4.3458 | 2300 | 0.3067 | -94.3151 | -1.2117 | 0.3085 | 0.7022 | 0.2383 | 0.5233 | 0.5399 | 0.5243 |
|
113 |
+
| 0.1931 | 4.4402 | 2350 | 0.3241 | -93.4124 | -1.2066 | 0.3236 | 0.7058 | 0.2530 | 0.5223 | 0.5399 | 0.5243 |
|
114 |
+
| 0.195 | 4.5347 | 2400 | 0.3111 | -94.2419 | -1.2062 | 0.3113 | 0.7035 | 0.2410 | 0.5217 | 0.5399 | 0.5243 |
|
115 |
+
| 0.1947 | 4.6292 | 2450 | 0.3312 | -93.6715 | -1.1956 | 0.3317 | 0.7067 | 0.2610 | 0.5228 | 0.5399 | 0.5243 |
|
116 |
+
| 0.1837 | 4.7237 | 2500 | 0.3289 | -93.6179 | -1.2041 | 0.3304 | 0.7077 | 0.2596 | 0.5223 | 0.5399 | 0.5243 |
|
117 |
+
| 0.1751 | 4.8181 | 2550 | 0.3254 | -93.4709 | -1.1993 | 0.3247 | 0.7060 | 0.2541 | 0.5212 | 0.5399 | 0.5243 |
|
118 |
+
| 0.1717 | 4.9126 | 2600 | 0.3287 | -94.2886 | -1.2078 | 0.3292 | 0.7050 | 0.2587 | 0.5207 | 0.5399 | 0.5243 |
|
119 |
+
| 0.1761 | 5.0071 | 2650 | 0.3257 | -93.6210 | -1.2055 | 0.3239 | 0.7061 | 0.2533 | 0.5217 | 0.5399 | 0.5243 |
|
120 |
+
| 0.1692 | 5.1016 | 2700 | 0.3396 | -93.0109 | -1.2063 | 0.3378 | 0.7072 | 0.2670 | 0.5223 | 0.5399 | 0.5243 |
|
121 |
+
| 0.1676 | 5.1960 | 2750 | 0.3402 | -93.9591 | -1.1978 | 0.3384 | 0.7084 | 0.2675 | 0.5202 | 0.5399 | 0.5243 |
|
122 |
+
| 0.1743 | 5.2905 | 2800 | 0.3371 | -93.9100 | -1.1972 | 0.3351 | 0.7076 | 0.2643 | 0.5217 | 0.5399 | 0.5243 |
|
123 |
+
| 0.1715 | 5.3850 | 2850 | 0.3408 | -93.6808 | -1.1939 | 0.3405 | 0.7084 | 0.2696 | 0.5212 | 0.5399 | 0.5243 |
|
124 |
+
| 0.1643 | 5.4795 | 2900 | 0.3434 | -93.0381 | -1.1941 | 0.3434 | 0.7095 | 0.2724 | 0.5192 | 0.5399 | 0.5243 |
|
125 |
+
| 0.1569 | 5.5739 | 2950 | 0.3403 | -94.4489 | -1.1993 | 0.3406 | 0.7083 | 0.2698 | 0.5192 | 0.5399 | 0.5243 |
|
126 |
+
| 0.16 | 5.6684 | 3000 | 0.3337 | -94.1339 | -1.1952 | 0.3332 | 0.7068 | 0.2625 | 0.5233 | 0.5399 | 0.5243 |
|
127 |
+
| 0.1556 | 5.7629 | 3050 | 0.3379 | -93.7011 | -1.1943 | 0.3366 | 0.7075 | 0.2658 | 0.5197 | 0.5399 | 0.5243 |
|
128 |
+
| 0.1544 | 5.8573 | 3100 | 0.3407 | -93.8059 | -1.1896 | 0.3385 | 0.7082 | 0.2677 | 0.5212 | 0.5399 | 0.5243 |
|
129 |
+
| 0.1539 | 5.9518 | 3150 | 0.3377 | -93.3647 | -1.2013 | 0.3358 | 0.7079 | 0.2650 | 0.5207 | 0.5399 | 0.5243 |
|
130 |
+
| 0.1448 | 6.0463 | 3200 | 0.3418 | -93.0674 | -1.1912 | 0.3402 | 0.7086 | 0.2693 | 0.5181 | 0.5399 | 0.5243 |
|
131 |
+
| 0.1479 | 6.1408 | 3250 | 0.3437 | -93.1651 | -1.1883 | 0.3423 | 0.7079 | 0.2715 | 0.5217 | 0.5399 | 0.5243 |
|
132 |
+
| 0.1408 | 6.2352 | 3300 | 0.3427 | -93.4029 | -1.1821 | 0.3405 | 0.7074 | 0.2698 | 0.5197 | 0.5399 | 0.5243 |
|
133 |
+
| 0.1475 | 6.3297 | 3350 | 0.3401 | -93.6032 | -1.1856 | 0.3383 | 0.7078 | 0.2675 | 0.5192 | 0.5399 | 0.5243 |
|
134 |
+
| 0.1339 | 6.4242 | 3400 | 0.3415 | -93.5229 | -1.1891 | 0.3402 | 0.7082 | 0.2693 | 0.5212 | 0.5399 | 0.5243 |
|
135 |
+
| 0.1394 | 6.5187 | 3450 | 0.3398 | -94.0518 | -1.1959 | 0.3379 | 0.7083 | 0.2671 | 0.5186 | 0.5399 | 0.5243 |
|
136 |
+
| 0.1324 | 6.6131 | 3500 | 0.3401 | -93.9466 | -1.1836 | 0.3389 | 0.7075 | 0.2682 | 0.5192 | 0.5399 | 0.5243 |
|
137 |
+
| 0.1385 | 6.7076 | 3550 | 0.3449 | -93.6245 | -1.1866 | 0.3437 | 0.7080 | 0.2729 | 0.5202 | 0.5399 | 0.5243 |
|
138 |
+
| 0.1289 | 6.8021 | 3600 | 0.3433 | -93.8482 | -1.1858 | 0.3412 | 0.7088 | 0.2703 | 0.5192 | 0.5399 | 0.5243 |
|
139 |
+
| 0.1272 | 6.8966 | 3650 | 0.3431 | -93.9371 | -1.1979 | 0.3417 | 0.7080 | 0.2709 | 0.5202 | 0.5399 | 0.5243 |
|
140 |
+
| 0.125 | 6.9910 | 3700 | 0.3436 | -93.9666 | -1.1952 | 0.3425 | 0.7079 | 0.2717 | 0.5202 | 0.5399 | 0.5243 |
|
141 |
+
| 0.1227 | 7.0855 | 3750 | 0.3404 | -93.8781 | -1.2022 | 0.3382 | 0.7086 | 0.2674 | 0.5197 | 0.5399 | 0.5243 |
|
142 |
+
| 0.1142 | 7.1800 | 3800 | 0.3426 | -93.8234 | -1.1874 | 0.3420 | 0.7083 | 0.2712 | 0.5207 | 0.5399 | 0.5243 |
|
143 |
+
| 0.1142 | 7.2744 | 3850 | 0.3454 | -93.6895 | -1.1775 | 0.3442 | 0.7090 | 0.2733 | 0.5202 | 0.5399 | 0.5243 |
|
144 |
+
| 0.1128 | 7.3689 | 3900 | 0.3417 | -94.0521 | -1.1838 | 0.3406 | 0.7083 | 0.2698 | 0.5197 | 0.5399 | 0.5243 |
|
145 |
+
| 0.1158 | 7.4634 | 3950 | 0.3434 | -93.9208 | -1.1875 | 0.3423 | 0.7086 | 0.2714 | 0.5197 | 0.5399 | 0.5243 |
|
146 |
+
| 0.113 | 7.5579 | 4000 | 0.3428 | -93.6866 | -1.1850 | 0.3411 | 0.7087 | 0.2702 | 0.5197 | 0.5399 | 0.5243 |
|
147 |
+
| 0.1113 | 7.6523 | 4050 | 0.3434 | -93.6171 | -1.1837 | 0.3425 | 0.7087 | 0.2716 | 0.5202 | 0.5399 | 0.5243 |
|
148 |
+
| 0.1082 | 7.7468 | 4100 | 0.3411 | -94.0013 | -1.1852 | 0.3403 | 0.7081 | 0.2695 | 0.5192 | 0.5399 | 0.5243 |
|
149 |
+
| 0.1051 | 7.8413 | 4150 | 0.3425 | -93.8552 | -1.1848 | 0.3417 | 0.7083 | 0.2709 | 0.5197 | 0.5399 | 0.5243 |
|
150 |
+
| 0.1047 | 7.9358 | 4200 | 0.3422 | -93.6696 | -1.1872 | 0.3411 | 0.7085 | 0.2702 | 0.5197 | 0.5399 | 0.5243 |
|
151 |
+
| 0.0985 | 8.0302 | 4250 | 0.3416 | -93.6924 | -1.1844 | 0.3403 | 0.7083 | 0.2695 | 0.5197 | 0.5399 | 0.5243 |
|
152 |
+
| 0.0964 | 8.1247 | 4300 | 0.3422 | -93.5025 | -1.1871 | 0.3409 | 0.7082 | 0.2701 | 0.5202 | 0.5399 | 0.5243 |
|
153 |
+
| 0.0997 | 8.2192 | 4350 | 0.3423 | -93.8074 | -1.1866 | 0.3408 | 0.7081 | 0.2700 | 0.5186 | 0.5399 | 0.5243 |
|
154 |
+
| 0.0963 | 8.3137 | 4400 | 0.3434 | -93.6885 | -1.1861 | 0.3419 | 0.7084 | 0.2711 | 0.5202 | 0.5399 | 0.5243 |
|
155 |
+
| 0.0966 | 8.4081 | 4450 | 0.3434 | -93.7312 | -1.1875 | 0.3419 | 0.7084 | 0.2711 | 0.5186 | 0.5399 | 0.5243 |
|
156 |
+
| 0.0956 | 8.5026 | 4500 | 0.3431 | -93.8431 | -1.1866 | 0.3416 | 0.7081 | 0.2708 | 0.5186 | 0.5399 | 0.5243 |
|
157 |
+
| 0.0928 | 8.5971 | 4550 | 0.3428 | -93.8243 | -1.1859 | 0.3414 | 0.7084 | 0.2706 | 0.5186 | 0.5399 | 0.5243 |
|
158 |
+
| 0.0924 | 8.6915 | 4600 | 0.3418 | -93.7706 | -1.1871 | 0.3406 | 0.7082 | 0.2698 | 0.5186 | 0.5399 | 0.5243 |
|
159 |
+
| 0.0908 | 8.7860 | 4650 | 0.3415 | -93.7405 | -1.1872 | 0.3403 | 0.7079 | 0.2695 | 0.5202 | 0.5399 | 0.5243 |
|
160 |
+
| 0.0922 | 8.8805 | 4700 | 0.3419 | -93.7126 | -1.1888 | 0.3405 | 0.7078 | 0.2698 | 0.5202 | 0.5399 | 0.5243 |
|
161 |
+
| 0.0895 | 8.9750 | 4750 | 0.3417 | -93.7926 | -1.1886 | 0.3402 | 0.7080 | 0.2694 | 0.5202 | 0.5399 | 0.5243 |
|
162 |
+
| 0.0877 | 9.0694 | 4800 | 0.3425 | -93.7523 | -1.1891 | 0.3415 | 0.7083 | 0.2706 | 0.5197 | 0.5399 | 0.5243 |
|
163 |
+
| 0.0862 | 9.1639 | 4850 | 0.3423 | -93.8492 | -1.1894 | 0.3406 | 0.7082 | 0.2698 | 0.5207 | 0.5399 | 0.5243 |
|
164 |
+
| 0.0856 | 9.2584 | 4900 | 0.3417 | -93.8453 | -1.1883 | 0.3404 | 0.7081 | 0.2696 | 0.5197 | 0.5399 | 0.5243 |
|
165 |
+
| 0.0883 | 9.3529 | 4950 | 0.3414 | -93.8773 | -1.1886 | 0.3401 | 0.7080 | 0.2693 | 0.5202 | 0.5399 | 0.5243 |
|
166 |
+
| 0.0866 | 9.4473 | 5000 | 0.3414 | -93.8593 | -1.1880 | 0.3402 | 0.7081 | 0.2694 | 0.5197 | 0.5399 | 0.5243 |
|
167 |
+
| 0.0843 | 9.5418 | 5050 | 0.3417 | -93.8241 | -1.1880 | 0.3405 | 0.7081 | 0.2697 | 0.5207 | 0.5399 | 0.5243 |
|
168 |
+
| 0.0862 | 9.6363 | 5100 | 0.3419 | -93.8268 | -1.1884 | 0.3404 | 0.7081 | 0.2696 | 0.5197 | 0.5399 | 0.5243 |
|
169 |
+
| 0.0851 | 9.7308 | 5150 | 0.3418 | -93.8247 | -1.1881 | 0.3405 | 0.7082 | 0.2697 | 0.5192 | 0.5399 | 0.5243 |
|
170 |
+
| 0.0852 | 9.8252 | 5200 | 0.3415 | -93.8257 | -1.1886 | 0.3402 | 0.7081 | 0.2694 | 0.5197 | 0.5399 | 0.5243 |
|
171 |
+
| 0.0873 | 9.9197 | 5250 | 0.3418 | -93.8220 | -1.1885 | 0.3404 | 0.7083 | 0.2696 | 0.5197 | 0.5399 | 0.5243 |
|
172 |
+
|
173 |
+
|
174 |
+
### Framework versions
|
175 |
+
|
176 |
+
- Transformers 4.42.0
|
177 |
+
- Pytorch 2.3.0+cu121
|
178 |
+
- Datasets 2.19.1
|
179 |
+
- Tokenizers 0.19.1
|
added_tokens.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"</tool_call>": 151658,
|
3 |
+
"<tool_call>": 151657,
|
4 |
+
"<|box_end|>": 151649,
|
5 |
+
"<|box_start|>": 151648,
|
6 |
+
"<|endoftext|>": 151643,
|
7 |
+
"<|file_sep|>": 151664,
|
8 |
+
"<|fim_middle|>": 151660,
|
9 |
+
"<|fim_pad|>": 151662,
|
10 |
+
"<|fim_prefix|>": 151659,
|
11 |
+
"<|fim_suffix|>": 151661,
|
12 |
+
"<|im_end|>": 151645,
|
13 |
+
"<|im_start|>": 151644,
|
14 |
+
"<|image_pad|>": 151655,
|
15 |
+
"<|object_ref_end|>": 151647,
|
16 |
+
"<|object_ref_start|>": 151646,
|
17 |
+
"<|quad_end|>": 151651,
|
18 |
+
"<|quad_start|>": 151650,
|
19 |
+
"<|repo_name|>": 151663,
|
20 |
+
"<|video_pad|>": 151656,
|
21 |
+
"<|vision_end|>": 151653,
|
22 |
+
"<|vision_pad|>": 151654,
|
23 |
+
"<|vision_start|>": 151652
|
24 |
+
}
|
all_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 9.995276334435522,
|
3 |
+
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.15836801591183153,
|
5 |
+
"train_runtime": 49865.2618,
|
6 |
+
"train_samples": 50802,
|
7 |
+
"train_samples_per_second": 10.188,
|
8 |
+
"train_steps_per_second": 0.106
|
9 |
+
}
|
config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "hZzy/qwen2.5-0.5b-sft-news-IFT",
|
3 |
+
"architectures": [
|
4 |
+
"Qwen2ForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 151644,
|
8 |
+
"eos_token_id": 151645,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 896,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 4864,
|
13 |
+
"max_position_embeddings": 32768,
|
14 |
+
"max_window_layers": 24,
|
15 |
+
"model_type": "qwen2",
|
16 |
+
"num_attention_heads": 14,
|
17 |
+
"num_hidden_layers": 24,
|
18 |
+
"num_key_value_heads": 2,
|
19 |
+
"pad_token_id": 151645,
|
20 |
+
"rms_norm_eps": 1e-06,
|
21 |
+
"rope_theta": 1000000.0,
|
22 |
+
"sliding_window": 32768,
|
23 |
+
"tie_word_embeddings": true,
|
24 |
+
"torch_dtype": "float16",
|
25 |
+
"transformers_version": "4.42.0",
|
26 |
+
"use_cache": false,
|
27 |
+
"use_mrope": false,
|
28 |
+
"use_sliding_window": false,
|
29 |
+
"vocab_size": 151665
|
30 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token_id": 151644,
|
3 |
+
"eos_token_id": 151645,
|
4 |
+
"max_new_tokens": 2048,
|
5 |
+
"pad_token_id": 151645,
|
6 |
+
"transformers_version": "4.42.0"
|
7 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:384313fb1360a2598e06964ad0ef2c2c174bd7e704195febc19430c83db50c2c
|
3 |
+
size 987611904
|
special_tokens_map.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"<|im_start|>",
|
4 |
+
"<|im_end|>"
|
5 |
+
],
|
6 |
+
"bos_token": {
|
7 |
+
"content": "<|im_start|>",
|
8 |
+
"lstrip": false,
|
9 |
+
"normalized": false,
|
10 |
+
"rstrip": false,
|
11 |
+
"single_word": false
|
12 |
+
},
|
13 |
+
"eos_token": {
|
14 |
+
"content": "<|im_end|>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false
|
19 |
+
},
|
20 |
+
"pad_token": {
|
21 |
+
"content": "<|im_end|>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": false,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false
|
26 |
+
}
|
27 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_prefix_space": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"151643": {
|
6 |
+
"content": "<|endoftext|>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"151644": {
|
14 |
+
"content": "<|im_start|>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"151645": {
|
22 |
+
"content": "<|im_end|>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
},
|
29 |
+
"151646": {
|
30 |
+
"content": "<|object_ref_start|>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": false,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": true
|
36 |
+
},
|
37 |
+
"151647": {
|
38 |
+
"content": "<|object_ref_end|>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false,
|
43 |
+
"special": true
|
44 |
+
},
|
45 |
+
"151648": {
|
46 |
+
"content": "<|box_start|>",
|
47 |
+
"lstrip": false,
|
48 |
+
"normalized": false,
|
49 |
+
"rstrip": false,
|
50 |
+
"single_word": false,
|
51 |
+
"special": true
|
52 |
+
},
|
53 |
+
"151649": {
|
54 |
+
"content": "<|box_end|>",
|
55 |
+
"lstrip": false,
|
56 |
+
"normalized": false,
|
57 |
+
"rstrip": false,
|
58 |
+
"single_word": false,
|
59 |
+
"special": true
|
60 |
+
},
|
61 |
+
"151650": {
|
62 |
+
"content": "<|quad_start|>",
|
63 |
+
"lstrip": false,
|
64 |
+
"normalized": false,
|
65 |
+
"rstrip": false,
|
66 |
+
"single_word": false,
|
67 |
+
"special": true
|
68 |
+
},
|
69 |
+
"151651": {
|
70 |
+
"content": "<|quad_end|>",
|
71 |
+
"lstrip": false,
|
72 |
+
"normalized": false,
|
73 |
+
"rstrip": false,
|
74 |
+
"single_word": false,
|
75 |
+
"special": true
|
76 |
+
},
|
77 |
+
"151652": {
|
78 |
+
"content": "<|vision_start|>",
|
79 |
+
"lstrip": false,
|
80 |
+
"normalized": false,
|
81 |
+
"rstrip": false,
|
82 |
+
"single_word": false,
|
83 |
+
"special": true
|
84 |
+
},
|
85 |
+
"151653": {
|
86 |
+
"content": "<|vision_end|>",
|
87 |
+
"lstrip": false,
|
88 |
+
"normalized": false,
|
89 |
+
"rstrip": false,
|
90 |
+
"single_word": false,
|
91 |
+
"special": true
|
92 |
+
},
|
93 |
+
"151654": {
|
94 |
+
"content": "<|vision_pad|>",
|
95 |
+
"lstrip": false,
|
96 |
+
"normalized": false,
|
97 |
+
"rstrip": false,
|
98 |
+
"single_word": false,
|
99 |
+
"special": true
|
100 |
+
},
|
101 |
+
"151655": {
|
102 |
+
"content": "<|image_pad|>",
|
103 |
+
"lstrip": false,
|
104 |
+
"normalized": false,
|
105 |
+
"rstrip": false,
|
106 |
+
"single_word": false,
|
107 |
+
"special": true
|
108 |
+
},
|
109 |
+
"151656": {
|
110 |
+
"content": "<|video_pad|>",
|
111 |
+
"lstrip": false,
|
112 |
+
"normalized": false,
|
113 |
+
"rstrip": false,
|
114 |
+
"single_word": false,
|
115 |
+
"special": true
|
116 |
+
},
|
117 |
+
"151657": {
|
118 |
+
"content": "<tool_call>",
|
119 |
+
"lstrip": false,
|
120 |
+
"normalized": false,
|
121 |
+
"rstrip": false,
|
122 |
+
"single_word": false,
|
123 |
+
"special": false
|
124 |
+
},
|
125 |
+
"151658": {
|
126 |
+
"content": "</tool_call>",
|
127 |
+
"lstrip": false,
|
128 |
+
"normalized": false,
|
129 |
+
"rstrip": false,
|
130 |
+
"single_word": false,
|
131 |
+
"special": false
|
132 |
+
},
|
133 |
+
"151659": {
|
134 |
+
"content": "<|fim_prefix|>",
|
135 |
+
"lstrip": false,
|
136 |
+
"normalized": false,
|
137 |
+
"rstrip": false,
|
138 |
+
"single_word": false,
|
139 |
+
"special": false
|
140 |
+
},
|
141 |
+
"151660": {
|
142 |
+
"content": "<|fim_middle|>",
|
143 |
+
"lstrip": false,
|
144 |
+
"normalized": false,
|
145 |
+
"rstrip": false,
|
146 |
+
"single_word": false,
|
147 |
+
"special": false
|
148 |
+
},
|
149 |
+
"151661": {
|
150 |
+
"content": "<|fim_suffix|>",
|
151 |
+
"lstrip": false,
|
152 |
+
"normalized": false,
|
153 |
+
"rstrip": false,
|
154 |
+
"single_word": false,
|
155 |
+
"special": false
|
156 |
+
},
|
157 |
+
"151662": {
|
158 |
+
"content": "<|fim_pad|>",
|
159 |
+
"lstrip": false,
|
160 |
+
"normalized": false,
|
161 |
+
"rstrip": false,
|
162 |
+
"single_word": false,
|
163 |
+
"special": false
|
164 |
+
},
|
165 |
+
"151663": {
|
166 |
+
"content": "<|repo_name|>",
|
167 |
+
"lstrip": false,
|
168 |
+
"normalized": false,
|
169 |
+
"rstrip": false,
|
170 |
+
"single_word": false,
|
171 |
+
"special": false
|
172 |
+
},
|
173 |
+
"151664": {
|
174 |
+
"content": "<|file_sep|>",
|
175 |
+
"lstrip": false,
|
176 |
+
"normalized": false,
|
177 |
+
"rstrip": false,
|
178 |
+
"single_word": false,
|
179 |
+
"special": false
|
180 |
+
}
|
181 |
+
},
|
182 |
+
"additional_special_tokens": [
|
183 |
+
"<|im_start|>",
|
184 |
+
"<|im_end|>"
|
185 |
+
],
|
186 |
+
"bos_token": "<|im_start|>",
|
187 |
+
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
|
188 |
+
"clean_up_tokenization_spaces": false,
|
189 |
+
"eos_token": "<|im_end|>",
|
190 |
+
"errors": "replace",
|
191 |
+
"model_max_length": 2048,
|
192 |
+
"pad_token": "<|im_end|>",
|
193 |
+
"split_special_tokens": false,
|
194 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
195 |
+
"unk_token": null
|
196 |
+
}
|
train_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 9.995276334435522,
|
3 |
+
"total_flos": 0.0,
|
4 |
+
"train_loss": 0.15836801591183153,
|
5 |
+
"train_runtime": 49865.2618,
|
6 |
+
"train_samples": 50802,
|
7 |
+
"train_samples_per_second": 10.188,
|
8 |
+
"train_steps_per_second": 0.106
|
9 |
+
}
|
trainer_state.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10621b82969f8423c7f1786671fff7586249d2e90d14a5c932545238f115038f
|
3 |
+
size 8120
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|