YYYYYYibo commited on
Commit
7623a78
1 Parent(s): 15bb16c

Model save

Browse files
README.md CHANGED
@@ -1,11 +1,11 @@
1
  ---
2
- base_model: model/approx_nash_1_iter_1
 
3
  tags:
 
 
4
  - alignment-handbook
5
  - generated_from_trainer
6
- datasets:
7
- - updated
8
- - original
9
  model-index:
10
  - name: approx_nash_1_iter_2
11
  results: []
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # approx_nash_1_iter_2
18
 
19
- This model is a fine-tuned version of [model/approx_nash_1_iter_1](https://huggingface.co/model/approx_nash_1_iter_1) on the updated and the original datasets.
20
 
21
  ## Model description
22
 
@@ -55,7 +55,7 @@ The following hyperparameters were used during training:
55
 
56
  ### Framework versions
57
 
58
- - Transformers 4.36.2
59
- - Pytorch 2.1.2+cu121
60
  - Datasets 2.14.6
61
  - Tokenizers 0.15.2
 
1
  ---
2
+ license: apache-2.0
3
+ base_model: YYYYYYibo/full_vanilla_dpo_iter_1
4
  tags:
5
+ - trl
6
+ - dpo
7
  - alignment-handbook
8
  - generated_from_trainer
 
 
 
9
  model-index:
10
  - name: approx_nash_1_iter_2
11
  results: []
 
16
 
17
  # approx_nash_1_iter_2
18
 
19
+ This model is a fine-tuned version of [YYYYYYibo/full_vanilla_dpo_iter_1](https://huggingface.co/YYYYYYibo/full_vanilla_dpo_iter_1) on the None dataset.
20
 
21
  ## Model description
22
 
 
55
 
56
  ### Framework versions
57
 
58
+ - Transformers 4.38.2
59
+ - Pytorch 2.3.1+cu121
60
  - Datasets 2.14.6
61
  - Tokenizers 0.15.2
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.97,
3
- "train_loss": 0.6927885015805563,
4
- "train_runtime": 4174.3985,
5
- "train_samples": 2000,
6
- "train_samples_per_second": 0.479,
7
  "train_steps_per_second": 0.004
8
  }
 
1
  {
2
+ "epoch": 0.99,
3
+ "train_loss": 0.6871888306405809,
4
+ "train_runtime": 39835.0539,
5
+ "train_samples": 20000,
6
+ "train_samples_per_second": 0.502,
7
  "train_steps_per_second": 0.004
8
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.36.2"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "transformers_version": "4.38.2"
6
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80e1eb31051511db24a3cb1083af1b2b86d1cdbad2030985ed9a141351cf38ec
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cae70a4dee48f8d1868b52b16a326401013ee4761521f2eff7f8d3172b2dcedd
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0edf5713b6717da7492edd0496093c4b38f041b9ca9be76a0833b29992e0ed3f
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2620dbe89f0b5620681856b78fc69560faa7a44de50d7502c68ac5ffa788e7f7
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:102d00429a5fc79d61b8cbd22799ff459103bf8a9baf9e8f99ea8ea87822c47b
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a0c19638750ceba97b0acb52e992407c09d6263561efa16a131f62965b944e4
3
  size 4540516344
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.97,
3
- "train_loss": 0.6927885015805563,
4
- "train_runtime": 4174.3985,
5
- "train_samples": 2000,
6
- "train_samples_per_second": 0.479,
7
  "train_steps_per_second": 0.004
8
  }
 
1
  {
2
+ "epoch": 0.99,
3
+ "train_loss": 0.6871888306405809,
4
+ "train_runtime": 39835.0539,
5
+ "train_samples": 20000,
6
+ "train_samples_per_second": 0.502,
7
  "train_steps_per_second": 0.004
8
  }
trainer_state.json CHANGED
@@ -1,20 +1,21 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.975,
5
  "eval_steps": 500,
6
- "global_step": 15,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.07,
13
- "learning_rate": 2.5e-07,
14
- "logits/chosen": -3.029622793197632,
15
- "logits/rejected": -2.873795986175537,
16
- "logps/chosen": -140.72406005859375,
17
- "logps/rejected": -159.6417694091797,
 
18
  "loss": 0.6931,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
@@ -23,31 +24,242 @@
23
  "step": 1
24
  },
25
  {
26
- "epoch": 0.65,
27
- "learning_rate": 1.6134877823936607e-07,
28
- "logits/chosen": -2.9111006259918213,
29
- "logits/rejected": -2.8640284538269043,
30
- "logps/chosen": -127.64539337158203,
31
- "logps/rejected": -126.8525619506836,
32
- "loss": 0.6925,
33
- "rewards/accuracies": 0.4572649598121643,
34
- "rewards/chosen": 0.02028985135257244,
35
- "rewards/margins": 0.0014097096864134073,
36
- "rewards/rejected": 0.01888013817369938,
 
37
  "step": 10
38
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  {
40
  "epoch": 0.97,
41
- "step": 15,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  "total_flos": 0.0,
43
- "train_loss": 0.6927885015805563,
44
- "train_runtime": 4174.3985,
45
- "train_samples_per_second": 0.479,
46
  "train_steps_per_second": 0.004
47
  }
48
  ],
49
  "logging_steps": 10,
50
- "max_steps": 15,
51
  "num_input_tokens_seen": 0,
52
  "num_train_epochs": 1,
53
  "save_steps": 100,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9945,
5
  "eval_steps": 500,
6
+ "global_step": 153,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.01,
13
+ "grad_norm": 13.312925409718954,
14
+ "learning_rate": 3.125e-08,
15
+ "logits/chosen": -2.1492395401000977,
16
+ "logits/rejected": -2.139173746109009,
17
+ "logps/chosen": -189.41439819335938,
18
+ "logps/rejected": -184.15049743652344,
19
  "loss": 0.6931,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
 
24
  "step": 1
25
  },
26
  {
27
+ "epoch": 0.07,
28
+ "grad_norm": 15.630000847331686,
29
+ "learning_rate": 3.1249999999999997e-07,
30
+ "logits/chosen": -2.3999834060668945,
31
+ "logits/rejected": -2.346851348876953,
32
+ "logps/chosen": -178.99545288085938,
33
+ "logps/rejected": -177.0459747314453,
34
+ "loss": 0.6934,
35
+ "rewards/accuracies": 0.42307692766189575,
36
+ "rewards/chosen": -0.0037847168277949095,
37
+ "rewards/margins": 0.0002747862017713487,
38
+ "rewards/rejected": -0.004059503320604563,
39
  "step": 10
40
  },
41
+ {
42
+ "epoch": 0.13,
43
+ "grad_norm": 14.549393173612225,
44
+ "learning_rate": 4.989490450759331e-07,
45
+ "logits/chosen": -2.4151172637939453,
46
+ "logits/rejected": -2.356534004211426,
47
+ "logps/chosen": -179.75003051757812,
48
+ "logps/rejected": -179.4581756591797,
49
+ "loss": 0.6909,
50
+ "rewards/accuracies": 0.5461538434028625,
51
+ "rewards/chosen": -0.055207282304763794,
52
+ "rewards/margins": 0.006152572110295296,
53
+ "rewards/rejected": -0.06135985627770424,
54
+ "step": 20
55
+ },
56
+ {
57
+ "epoch": 0.2,
58
+ "grad_norm": 22.390116207007786,
59
+ "learning_rate": 4.872270441827174e-07,
60
+ "logits/chosen": -2.312279224395752,
61
+ "logits/rejected": -2.211397886276245,
62
+ "logps/chosen": -206.32656860351562,
63
+ "logps/rejected": -211.81321716308594,
64
+ "loss": 0.6929,
65
+ "rewards/accuracies": 0.557692289352417,
66
+ "rewards/chosen": -0.3904457688331604,
67
+ "rewards/margins": 0.03509727492928505,
68
+ "rewards/rejected": -0.42554304003715515,
69
+ "step": 30
70
+ },
71
+ {
72
+ "epoch": 0.26,
73
+ "grad_norm": 15.933088854619298,
74
+ "learning_rate": 4.6308512113530063e-07,
75
+ "logits/chosen": -2.2958626747131348,
76
+ "logits/rejected": -2.3168814182281494,
77
+ "logps/chosen": -236.7042999267578,
78
+ "logps/rejected": -244.78851318359375,
79
+ "loss": 0.6981,
80
+ "rewards/accuracies": 0.5461538434028625,
81
+ "rewards/chosen": -0.6312862038612366,
82
+ "rewards/margins": 0.015706488862633705,
83
+ "rewards/rejected": -0.6469926238059998,
84
+ "step": 40
85
+ },
86
+ {
87
+ "epoch": 0.33,
88
+ "grad_norm": 14.014878007482002,
89
+ "learning_rate": 4.277872161641681e-07,
90
+ "logits/chosen": -2.368952512741089,
91
+ "logits/rejected": -2.4042294025421143,
92
+ "logps/chosen": -214.369384765625,
93
+ "logps/rejected": -220.7718505859375,
94
+ "loss": 0.6913,
95
+ "rewards/accuracies": 0.550000011920929,
96
+ "rewards/chosen": -0.2600650191307068,
97
+ "rewards/margins": 0.018586795777082443,
98
+ "rewards/rejected": -0.2786518335342407,
99
+ "step": 50
100
+ },
101
+ {
102
+ "epoch": 0.39,
103
+ "grad_norm": 14.767139513110513,
104
+ "learning_rate": 3.8318133624280046e-07,
105
+ "logits/chosen": -2.4127275943756104,
106
+ "logits/rejected": -2.434305191040039,
107
+ "logps/chosen": -217.94210815429688,
108
+ "logps/rejected": -227.94302368164062,
109
+ "loss": 0.6919,
110
+ "rewards/accuracies": 0.5384615659713745,
111
+ "rewards/chosen": -0.2718888223171234,
112
+ "rewards/margins": 0.015998326241970062,
113
+ "rewards/rejected": -0.2878871560096741,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 0.46,
118
+ "grad_norm": 15.91144067203442,
119
+ "learning_rate": 3.316028034595861e-07,
120
+ "logits/chosen": -2.264232635498047,
121
+ "logits/rejected": -2.299992322921753,
122
+ "logps/chosen": -194.38172912597656,
123
+ "logps/rejected": -205.9635009765625,
124
+ "loss": 0.6833,
125
+ "rewards/accuracies": 0.5884615182876587,
126
+ "rewards/chosen": -0.17818714678287506,
127
+ "rewards/margins": 0.02437894232571125,
128
+ "rewards/rejected": -0.20256608724594116,
129
+ "step": 70
130
+ },
131
+ {
132
+ "epoch": 0.52,
133
+ "grad_norm": 18.074689046967872,
134
+ "learning_rate": 2.7575199021178855e-07,
135
+ "logits/chosen": -2.299180746078491,
136
+ "logits/rejected": -2.182999610900879,
137
+ "logps/chosen": -231.85098266601562,
138
+ "logps/rejected": -236.9989776611328,
139
+ "loss": 0.6842,
140
+ "rewards/accuracies": 0.5730769038200378,
141
+ "rewards/chosen": -0.3959502577781677,
142
+ "rewards/margins": 0.03195538371801376,
143
+ "rewards/rejected": -0.4279056191444397,
144
+ "step": 80
145
+ },
146
+ {
147
+ "epoch": 0.58,
148
+ "grad_norm": 19.634321191048826,
149
+ "learning_rate": 2.1855294234408068e-07,
150
+ "logits/chosen": -2.232875347137451,
151
+ "logits/rejected": -2.2362263202667236,
152
+ "logps/chosen": -208.51087951660156,
153
+ "logps/rejected": -207.45663452148438,
154
+ "loss": 0.689,
155
+ "rewards/accuracies": 0.5461538434028625,
156
+ "rewards/chosen": -0.22500069439411163,
157
+ "rewards/margins": 0.003552414011210203,
158
+ "rewards/rejected": -0.22855311632156372,
159
+ "step": 90
160
+ },
161
+ {
162
+ "epoch": 0.65,
163
+ "grad_norm": 17.473494481507956,
164
+ "learning_rate": 1.6300029195778453e-07,
165
+ "logits/chosen": -2.236097812652588,
166
+ "logits/rejected": -2.0412774085998535,
167
+ "logps/chosen": -213.67514038085938,
168
+ "logps/rejected": -206.89111328125,
169
+ "loss": 0.6881,
170
+ "rewards/accuracies": 0.5038461685180664,
171
+ "rewards/chosen": -0.2356816679239273,
172
+ "rewards/margins": 0.003031224012374878,
173
+ "rewards/rejected": -0.23871289193630219,
174
+ "step": 100
175
+ },
176
+ {
177
+ "epoch": 0.71,
178
+ "grad_norm": 15.544936822002546,
179
+ "learning_rate": 1.1200247470632392e-07,
180
+ "logits/chosen": -2.103285789489746,
181
+ "logits/rejected": -2.1786677837371826,
182
+ "logps/chosen": -224.00047302246094,
183
+ "logps/rejected": -220.13726806640625,
184
+ "loss": 0.6848,
185
+ "rewards/accuracies": 0.5615384578704834,
186
+ "rewards/chosen": -0.3865113854408264,
187
+ "rewards/margins": 0.03568296507000923,
188
+ "rewards/rejected": -0.42219436168670654,
189
+ "step": 110
190
+ },
191
+ {
192
+ "epoch": 0.78,
193
+ "grad_norm": 17.169881927493602,
194
+ "learning_rate": 6.822945986946385e-08,
195
+ "logits/chosen": -1.9218517541885376,
196
+ "logits/rejected": -2.109549045562744,
197
+ "logps/chosen": -220.54318237304688,
198
+ "logps/rejected": -231.7896270751953,
199
+ "loss": 0.6813,
200
+ "rewards/accuracies": 0.5769230723381042,
201
+ "rewards/chosen": -0.4736253619194031,
202
+ "rewards/margins": 0.03084597922861576,
203
+ "rewards/rejected": -0.5044713020324707,
204
+ "step": 120
205
+ },
206
+ {
207
+ "epoch": 0.84,
208
+ "grad_norm": 17.60589291870986,
209
+ "learning_rate": 3.397296523427806e-08,
210
+ "logits/chosen": -2.146359920501709,
211
+ "logits/rejected": -2.1425552368164062,
212
+ "logps/chosen": -221.13165283203125,
213
+ "logps/rejected": -225.94419860839844,
214
+ "loss": 0.6816,
215
+ "rewards/accuracies": 0.5615384578704834,
216
+ "rewards/chosen": -0.4886237382888794,
217
+ "rewards/margins": 0.03550608828663826,
218
+ "rewards/rejected": -0.5241298675537109,
219
+ "step": 130
220
+ },
221
+ {
222
+ "epoch": 0.91,
223
+ "grad_norm": 18.707751355883822,
224
+ "learning_rate": 1.1026475173977978e-08,
225
+ "logits/chosen": -2.1278481483459473,
226
+ "logits/rejected": -2.0320982933044434,
227
+ "logps/chosen": -220.7178192138672,
228
+ "logps/rejected": -217.0054931640625,
229
+ "loss": 0.6837,
230
+ "rewards/accuracies": 0.5923076868057251,
231
+ "rewards/chosen": -0.3798917829990387,
232
+ "rewards/margins": 0.05050484091043472,
233
+ "rewards/rejected": -0.4303966164588928,
234
+ "step": 140
235
+ },
236
  {
237
  "epoch": 0.97,
238
+ "grad_norm": 18.697426009812567,
239
+ "learning_rate": 5.913435276374834e-10,
240
+ "logits/chosen": -2.186318874359131,
241
+ "logits/rejected": -2.1368911266326904,
242
+ "logps/chosen": -221.08029174804688,
243
+ "logps/rejected": -230.6654052734375,
244
+ "loss": 0.6744,
245
+ "rewards/accuracies": 0.5961538553237915,
246
+ "rewards/chosen": -0.38067081570625305,
247
+ "rewards/margins": 0.07161368429660797,
248
+ "rewards/rejected": -0.4522845447063446,
249
+ "step": 150
250
+ },
251
+ {
252
+ "epoch": 0.99,
253
+ "step": 153,
254
  "total_flos": 0.0,
255
+ "train_loss": 0.6871888306405809,
256
+ "train_runtime": 39835.0539,
257
+ "train_samples_per_second": 0.502,
258
  "train_steps_per_second": 0.004
259
  }
260
  ],
261
  "logging_steps": 10,
262
+ "max_steps": 153,
263
  "num_input_tokens_seen": 0,
264
  "num_train_epochs": 1,
265
  "save_steps": 100,