hZzy commited on
Commit
cf2f99d
·
verified ·
1 Parent(s): edf2771

Model save

Browse files
README.md ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: hZzy/qwen2.5-0.5b-sft-news-IFT
4
+ tags:
5
+ - trl
6
+ - expo
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: qwen2.5-0.5b-expo-DPO-noES5-1
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/zhiyuzha-university-of-florida/huggingface/runs/ka5w2jn7)
17
+ # qwen2.5-0.5b-expo-DPO-noES5-1
18
+
19
+ This model is a fine-tuned version of [hZzy/qwen2.5-0.5b-sft-news-IFT](https://huggingface.co/hZzy/qwen2.5-0.5b-sft-news-IFT) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 1.9399
22
+ - Logps: -81.1684
23
+ - Logits: -0.8509
24
+ - Objective: 1.8787
25
+ - Dpo Loss: 1.8787
26
+ - Ranking Simple: 0.5347
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 5e-06
46
+ - train_batch_size: 4
47
+ - eval_batch_size: 4
48
+ - seed: 42
49
+ - distributed_type: multi-GPU
50
+ - num_devices: 3
51
+ - gradient_accumulation_steps: 12
52
+ - total_train_batch_size: 144
53
+ - total_eval_batch_size: 12
54
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
+ - lr_scheduler_type: cosine
56
+ - lr_scheduler_warmup_ratio: 0.1
57
+ - num_epochs: 2
58
+
59
+ ### Training results
60
+
61
+ | Training Loss | Epoch | Step | Validation Loss | Logps | Logits | Objective | Dpo Loss | Ranking Simple |
62
+ |:-------------:|:------:|:----:|:---------------:|:--------:|:-------:|:---------:|:--------:|:--------------:|
63
+ | 1.1219 | 0.1417 | 50 | 1.1407 | -91.1504 | -1.3894 | 1.1280 | 1.1280 | 0.5274 |
64
+ | 1.3821 | 0.2834 | 100 | 1.5304 | -81.2234 | -1.3774 | 1.4947 | 1.4947 | 0.5290 |
65
+ | 1.4062 | 0.4251 | 150 | 1.8818 | -79.7787 | -1.1641 | 1.8192 | 1.8192 | 0.5430 |
66
+ | 1.2275 | 0.5668 | 200 | 2.0358 | -77.9854 | -1.1289 | 1.9717 | 1.9717 | 0.5347 |
67
+ | 1.1914 | 0.7085 | 250 | 2.0084 | -78.3385 | -1.0883 | 1.9461 | 1.9461 | 0.5347 |
68
+ | 1.0378 | 0.8503 | 300 | 2.0918 | -83.4707 | -0.9324 | 2.0357 | 2.0357 | 0.5352 |
69
+ | 0.8334 | 0.9920 | 350 | 2.1143 | -81.1740 | -0.8755 | 1.9975 | 1.9975 | 0.5388 |
70
+ | 0.4251 | 1.1337 | 400 | 2.0641 | -81.1689 | -0.8003 | 2.0241 | 2.0241 | 0.5435 |
71
+ | 0.3886 | 1.2754 | 450 | 2.0085 | -79.8813 | -0.8999 | 1.9598 | 1.9598 | 0.5388 |
72
+ | 0.4352 | 1.4171 | 500 | 2.0449 | -80.7357 | -0.8634 | 1.9819 | 1.9819 | 0.5367 |
73
+ | 0.3103 | 1.5588 | 550 | 1.9784 | -80.8827 | -0.8672 | 1.9073 | 1.9073 | 0.5373 |
74
+ | 0.2489 | 1.7005 | 600 | 1.9488 | -81.0833 | -0.8421 | 1.8851 | 1.8851 | 0.5367 |
75
+ | 0.3631 | 1.8422 | 650 | 1.9417 | -81.1721 | -0.8529 | 1.8805 | 1.8805 | 0.5347 |
76
+ | 0.3009 | 1.9839 | 700 | 1.9399 | -81.1684 | -0.8509 | 1.8787 | 1.8787 | 0.5347 |
77
+
78
+
79
+ ### Framework versions
80
+
81
+ - Transformers 4.42.0
82
+ - Pytorch 2.3.0+cu121
83
+ - Datasets 3.2.0
84
+ - Tokenizers 0.19.1
added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.995276334435522,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.7561936149881645,
5
+ "train_runtime": 36938.8609,
6
+ "train_samples": 50802,
7
+ "train_samples_per_second": 2.751,
8
+ "train_steps_per_second": 0.019
9
+ }
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "hZzy/qwen2.5-0.5b-sft-news-IFT",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151644,
8
+ "eos_token_id": 151645,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 24,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 14,
17
+ "num_hidden_layers": 24,
18
+ "num_key_value_heads": 2,
19
+ "pad_token_id": 151645,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": 32768,
23
+ "tie_word_embeddings": true,
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.42.0",
26
+ "use_cache": false,
27
+ "use_mrope": false,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151665
30
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151644,
3
+ "eos_token_id": 151645,
4
+ "max_new_tokens": 2048,
5
+ "pad_token_id": 151645,
6
+ "transformers_version": "4.42.0"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a6927a8e3a0aa891c25b491010fd4c0e7b8ba06c89a3a2661df8f80b2921567
3
+ size 1975192208
special_tokens_map.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<|im_start|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "<|im_end|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ }
27
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>"
185
+ ],
186
+ "bos_token": "<|im_start|>",
187
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
188
+ "clean_up_tokenization_spaces": false,
189
+ "eos_token": "<|im_end|>",
190
+ "errors": "replace",
191
+ "model_max_length": 2048,
192
+ "pad_token": "<|im_end|>",
193
+ "split_special_tokens": false,
194
+ "tokenizer_class": "Qwen2Tokenizer",
195
+ "unk_token": null
196
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.995276334435522,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.7561936149881645,
5
+ "train_runtime": 36938.8609,
6
+ "train_samples": 50802,
7
+ "train_samples_per_second": 2.751,
8
+ "train_steps_per_second": 0.019
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,1916 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.995276334435522,
5
+ "eval_steps": 50,
6
+ "global_step": 704,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "dpo_loss": 0.6931471824645996,
13
+ "epoch": 0.002834199338686821,
14
+ "grad_norm": 183.97025063743072,
15
+ "learning_rate": 7.042253521126761e-08,
16
+ "logits": -1.2867579460144043,
17
+ "logps": -84.34933471679688,
18
+ "loss": 0.6931,
19
+ "objective": 0.6931471824645996,
20
+ "ranking_simple": 0.5833333134651184,
21
+ "step": 1
22
+ },
23
+ {
24
+ "dpo_loss": 0.6891745924949646,
25
+ "epoch": 0.014170996693434105,
26
+ "grad_norm": 175.97134717956817,
27
+ "learning_rate": 3.521126760563381e-07,
28
+ "logits": -1.430897831916809,
29
+ "logps": -83.63750457763672,
30
+ "loss": 0.6935,
31
+ "objective": 0.6891745924949646,
32
+ "ranking_simple": 0.4895833432674408,
33
+ "step": 5
34
+ },
35
+ {
36
+ "dpo_loss": 0.6994127631187439,
37
+ "epoch": 0.02834199338686821,
38
+ "grad_norm": 210.24216843481642,
39
+ "learning_rate": 7.042253521126762e-07,
40
+ "logits": -1.4024795293807983,
41
+ "logps": -84.05953216552734,
42
+ "loss": 0.7084,
43
+ "objective": 0.6994127631187439,
44
+ "ranking_simple": 0.5791666507720947,
45
+ "step": 10
46
+ },
47
+ {
48
+ "dpo_loss": 0.7201023101806641,
49
+ "epoch": 0.042512990080302314,
50
+ "grad_norm": 217.21520494236623,
51
+ "learning_rate": 1.0563380281690142e-06,
52
+ "logits": -1.5396863222122192,
53
+ "logps": -83.98353576660156,
54
+ "loss": 0.6992,
55
+ "objective": 0.7201023101806641,
56
+ "ranking_simple": 0.5708333253860474,
57
+ "step": 15
58
+ },
59
+ {
60
+ "dpo_loss": 0.6394654512405396,
61
+ "epoch": 0.05668398677373642,
62
+ "grad_norm": 178.00062840147226,
63
+ "learning_rate": 1.4084507042253523e-06,
64
+ "logits": -1.3805947303771973,
65
+ "logps": -82.83494567871094,
66
+ "loss": 0.6982,
67
+ "objective": 0.6394654512405396,
68
+ "ranking_simple": 0.4833333194255829,
69
+ "step": 20
70
+ },
71
+ {
72
+ "dpo_loss": 0.7543553113937378,
73
+ "epoch": 0.07085498346717052,
74
+ "grad_norm": 232.13126995990035,
75
+ "learning_rate": 1.7605633802816902e-06,
76
+ "logits": -1.406501054763794,
77
+ "logps": -83.180908203125,
78
+ "loss": 0.7413,
79
+ "objective": 0.7543553113937378,
80
+ "ranking_simple": 0.5833333134651184,
81
+ "step": 25
82
+ },
83
+ {
84
+ "dpo_loss": 0.8550678491592407,
85
+ "epoch": 0.08502598016060463,
86
+ "grad_norm": 178.51107981018083,
87
+ "learning_rate": 2.1126760563380285e-06,
88
+ "logits": -1.450936198234558,
89
+ "logps": -83.1689682006836,
90
+ "loss": 0.8038,
91
+ "objective": 0.8550678491592407,
92
+ "ranking_simple": 0.4958333373069763,
93
+ "step": 30
94
+ },
95
+ {
96
+ "dpo_loss": 0.8976810574531555,
97
+ "epoch": 0.09919697685403873,
98
+ "grad_norm": 159.63407046559615,
99
+ "learning_rate": 2.4647887323943666e-06,
100
+ "logits": -1.4125274419784546,
101
+ "logps": -83.51966857910156,
102
+ "loss": 0.8446,
103
+ "objective": 0.8976810574531555,
104
+ "ranking_simple": 0.550000011920929,
105
+ "step": 35
106
+ },
107
+ {
108
+ "dpo_loss": 0.9427976608276367,
109
+ "epoch": 0.11336797354747284,
110
+ "grad_norm": 204.8220203673265,
111
+ "learning_rate": 2.8169014084507046e-06,
112
+ "logits": -1.370052456855774,
113
+ "logps": -85.57767486572266,
114
+ "loss": 0.8988,
115
+ "objective": 0.9427976608276367,
116
+ "ranking_simple": 0.5,
117
+ "step": 40
118
+ },
119
+ {
120
+ "dpo_loss": 0.9266307950019836,
121
+ "epoch": 0.12753897024090693,
122
+ "grad_norm": 166.47204979724842,
123
+ "learning_rate": 3.1690140845070427e-06,
124
+ "logits": -1.3562687635421753,
125
+ "logps": -82.7292251586914,
126
+ "loss": 0.9677,
127
+ "objective": 0.9266307950019836,
128
+ "ranking_simple": 0.5041666626930237,
129
+ "step": 45
130
+ },
131
+ {
132
+ "dpo_loss": 1.1937092542648315,
133
+ "epoch": 0.14170996693434104,
134
+ "grad_norm": 162.3910297296546,
135
+ "learning_rate": 3.5211267605633804e-06,
136
+ "logits": -1.3756095170974731,
137
+ "logps": -83.8311538696289,
138
+ "loss": 1.1219,
139
+ "objective": 1.1937092542648315,
140
+ "ranking_simple": 0.5249999761581421,
141
+ "step": 50
142
+ },
143
+ {
144
+ "epoch": 0.14170996693434104,
145
+ "eval_dpo_loss": 1.127982258796692,
146
+ "eval_logits": -1.3893742561340332,
147
+ "eval_logps": -91.15036010742188,
148
+ "eval_loss": 1.1406893730163574,
149
+ "eval_objective": 1.127982258796692,
150
+ "eval_ranking_simple": 0.5274327397346497,
151
+ "eval_runtime": 605.2256,
152
+ "eval_samples_per_second": 9.567,
153
+ "eval_steps_per_second": 0.798,
154
+ "step": 50
155
+ },
156
+ {
157
+ "dpo_loss": 1.2814911603927612,
158
+ "epoch": 0.15588096362777515,
159
+ "grad_norm": 163.57940740751664,
160
+ "learning_rate": 3.873239436619718e-06,
161
+ "logits": -1.3777908086776733,
162
+ "logps": -84.10151672363281,
163
+ "loss": 1.1332,
164
+ "objective": 1.2814911603927612,
165
+ "ranking_simple": 0.5041666626930237,
166
+ "step": 55
167
+ },
168
+ {
169
+ "dpo_loss": 1.0759367942810059,
170
+ "epoch": 0.17005196032120926,
171
+ "grad_norm": 161.38779700951656,
172
+ "learning_rate": 4.225352112676057e-06,
173
+ "logits": -1.3657281398773193,
174
+ "logps": -80.9452133178711,
175
+ "loss": 1.1561,
176
+ "objective": 1.0759367942810059,
177
+ "ranking_simple": 0.5541666746139526,
178
+ "step": 60
179
+ },
180
+ {
181
+ "dpo_loss": 1.2120552062988281,
182
+ "epoch": 0.18422295701464336,
183
+ "grad_norm": 160.82431949475915,
184
+ "learning_rate": 4.577464788732395e-06,
185
+ "logits": -1.3958569765090942,
186
+ "logps": -77.98729705810547,
187
+ "loss": 1.1297,
188
+ "objective": 1.2120552062988281,
189
+ "ranking_simple": 0.574999988079071,
190
+ "step": 65
191
+ },
192
+ {
193
+ "dpo_loss": 1.182289958000183,
194
+ "epoch": 0.19839395370807747,
195
+ "grad_norm": 153.19784131936885,
196
+ "learning_rate": 4.929577464788733e-06,
197
+ "logits": -1.3969266414642334,
198
+ "logps": -81.52046203613281,
199
+ "loss": 1.1094,
200
+ "objective": 1.182289958000183,
201
+ "ranking_simple": 0.5791666507720947,
202
+ "step": 70
203
+ },
204
+ {
205
+ "dpo_loss": 1.1960618495941162,
206
+ "epoch": 0.21256495040151158,
207
+ "grad_norm": 146.00340475417016,
208
+ "learning_rate": 4.999507384516835e-06,
209
+ "logits": -1.3441658020019531,
210
+ "logps": -80.9335708618164,
211
+ "loss": 1.2518,
212
+ "objective": 1.1960618495941162,
213
+ "ranking_simple": 0.5833333134651184,
214
+ "step": 75
215
+ },
216
+ {
217
+ "dpo_loss": 1.1959224939346313,
218
+ "epoch": 0.22673594709494568,
219
+ "grad_norm": 164.14111542380238,
220
+ "learning_rate": 4.997506466835171e-06,
221
+ "logits": -1.4696060419082642,
222
+ "logps": -78.75235748291016,
223
+ "loss": 1.2283,
224
+ "objective": 1.1959224939346313,
225
+ "ranking_simple": 0.612500011920929,
226
+ "step": 80
227
+ },
228
+ {
229
+ "dpo_loss": 1.3274264335632324,
230
+ "epoch": 0.2409069437883798,
231
+ "grad_norm": 116.34306089867205,
232
+ "learning_rate": 4.9939676896203576e-06,
233
+ "logits": -1.3812180757522583,
234
+ "logps": -77.95438385009766,
235
+ "loss": 1.2276,
236
+ "objective": 1.3274264335632324,
237
+ "ranking_simple": 0.5833333134651184,
238
+ "step": 85
239
+ },
240
+ {
241
+ "dpo_loss": 1.0321040153503418,
242
+ "epoch": 0.25507794048181387,
243
+ "grad_norm": 130.12375600026587,
244
+ "learning_rate": 4.9888932319026994e-06,
245
+ "logits": -1.406469702720642,
246
+ "logps": -78.24427795410156,
247
+ "loss": 1.2313,
248
+ "objective": 1.0321040153503418,
249
+ "ranking_simple": 0.5916666388511658,
250
+ "step": 90
251
+ },
252
+ {
253
+ "dpo_loss": 1.0937633514404297,
254
+ "epoch": 0.269248937175248,
255
+ "grad_norm": 138.54409786316114,
256
+ "learning_rate": 4.982286218320023e-06,
257
+ "logits": -1.3431053161621094,
258
+ "logps": -78.20173645019531,
259
+ "loss": 1.0961,
260
+ "objective": 1.0937633514404297,
261
+ "ranking_simple": 0.5333333611488342,
262
+ "step": 95
263
+ },
264
+ {
265
+ "dpo_loss": 1.2458388805389404,
266
+ "epoch": 0.2834199338686821,
267
+ "grad_norm": 135.72867701135627,
268
+ "learning_rate": 4.974150717193654e-06,
269
+ "logits": -1.2822611331939697,
270
+ "logps": -75.05729675292969,
271
+ "loss": 1.3821,
272
+ "objective": 1.2458388805389404,
273
+ "ranking_simple": 0.5666666626930237,
274
+ "step": 100
275
+ },
276
+ {
277
+ "epoch": 0.2834199338686821,
278
+ "eval_dpo_loss": 1.4947212934494019,
279
+ "eval_logits": -1.3773702383041382,
280
+ "eval_logps": -81.2234115600586,
281
+ "eval_loss": 1.530389428138733,
282
+ "eval_objective": 1.4947212934494019,
283
+ "eval_ranking_simple": 0.5289855003356934,
284
+ "eval_runtime": 599.6473,
285
+ "eval_samples_per_second": 9.656,
286
+ "eval_steps_per_second": 0.805,
287
+ "step": 100
288
+ },
289
+ {
290
+ "dpo_loss": 1.673114538192749,
291
+ "epoch": 0.2975909305621162,
292
+ "grad_norm": 165.43372686768916,
293
+ "learning_rate": 4.964491738023321e-06,
294
+ "logits": -1.4061511754989624,
295
+ "logps": -76.94784545898438,
296
+ "loss": 1.4977,
297
+ "objective": 1.673114538192749,
298
+ "ranking_simple": 0.5916666388511658,
299
+ "step": 105
300
+ },
301
+ {
302
+ "dpo_loss": 1.381745457649231,
303
+ "epoch": 0.3117619272555503,
304
+ "grad_norm": 134.94435406621707,
305
+ "learning_rate": 4.953315228402512e-06,
306
+ "logits": -1.2966018915176392,
307
+ "logps": -75.49461364746094,
308
+ "loss": 1.4307,
309
+ "objective": 1.381745457649231,
310
+ "ranking_simple": 0.5541666746139526,
311
+ "step": 110
312
+ },
313
+ {
314
+ "dpo_loss": 1.525147795677185,
315
+ "epoch": 0.32593292394898443,
316
+ "grad_norm": 146.42714865280993,
317
+ "learning_rate": 4.9406280703561944e-06,
318
+ "logits": -1.203226923942566,
319
+ "logps": -73.4133529663086,
320
+ "loss": 1.4613,
321
+ "objective": 1.525147795677185,
322
+ "ranking_simple": 0.5708333253860474,
323
+ "step": 115
324
+ },
325
+ {
326
+ "dpo_loss": 1.2190510034561157,
327
+ "epoch": 0.3401039206424185,
328
+ "grad_norm": 145.8596102648603,
329
+ "learning_rate": 4.926438076103162e-06,
330
+ "logits": -1.192163109779358,
331
+ "logps": -72.815673828125,
332
+ "loss": 1.3059,
333
+ "objective": 1.2190510034561157,
334
+ "ranking_simple": 0.5708333253860474,
335
+ "step": 120
336
+ },
337
+ {
338
+ "dpo_loss": 1.5624465942382812,
339
+ "epoch": 0.35427491733585265,
340
+ "grad_norm": 131.5829174213295,
341
+ "learning_rate": 4.910753983245589e-06,
342
+ "logits": -1.190179467201233,
343
+ "logps": -73.50247192382812,
344
+ "loss": 1.4015,
345
+ "objective": 1.5624465942382812,
346
+ "ranking_simple": 0.5541666746139526,
347
+ "step": 125
348
+ },
349
+ {
350
+ "dpo_loss": 1.3345781564712524,
351
+ "epoch": 0.3684459140292867,
352
+ "grad_norm": 121.07722918376957,
353
+ "learning_rate": 4.893585449388786e-06,
354
+ "logits": -1.1411052942276,
355
+ "logps": -70.5329818725586,
356
+ "loss": 1.4099,
357
+ "objective": 1.3345781564712524,
358
+ "ranking_simple": 0.6416666507720947,
359
+ "step": 130
360
+ },
361
+ {
362
+ "dpo_loss": 1.0789151191711426,
363
+ "epoch": 0.3826169107227208,
364
+ "grad_norm": 118.85794111154456,
365
+ "learning_rate": 4.8749430461944536e-06,
366
+ "logits": -1.088476300239563,
367
+ "logps": -71.05619812011719,
368
+ "loss": 1.2611,
369
+ "objective": 1.0789151191711426,
370
+ "ranking_simple": 0.5874999761581421,
371
+ "step": 135
372
+ },
373
+ {
374
+ "dpo_loss": 1.8989228010177612,
375
+ "epoch": 0.39678790741615494,
376
+ "grad_norm": 131.90266764687357,
377
+ "learning_rate": 4.854838252871097e-06,
378
+ "logits": -1.0452989339828491,
379
+ "logps": -71.00385284423828,
380
+ "loss": 1.5343,
381
+ "objective": 1.8989228010177612,
382
+ "ranking_simple": 0.574999988079071,
383
+ "step": 140
384
+ },
385
+ {
386
+ "dpo_loss": 1.576548457145691,
387
+ "epoch": 0.410958904109589,
388
+ "grad_norm": 140.01764826482653,
389
+ "learning_rate": 4.833283449105609e-06,
390
+ "logits": -1.1004178524017334,
391
+ "logps": -73.08991241455078,
392
+ "loss": 1.6065,
393
+ "objective": 1.576548457145691,
394
+ "ranking_simple": 0.625,
395
+ "step": 145
396
+ },
397
+ {
398
+ "dpo_loss": 1.3776977062225342,
399
+ "epoch": 0.42512990080302315,
400
+ "grad_norm": 126.62570019000385,
401
+ "learning_rate": 4.810291907440382e-06,
402
+ "logits": -1.0991231203079224,
403
+ "logps": -74.44639587402344,
404
+ "loss": 1.4062,
405
+ "objective": 1.3776977062225342,
406
+ "ranking_simple": 0.574999988079071,
407
+ "step": 150
408
+ },
409
+ {
410
+ "epoch": 0.42512990080302315,
411
+ "eval_dpo_loss": 1.8192129135131836,
412
+ "eval_logits": -1.1641029119491577,
413
+ "eval_logps": -79.7786636352539,
414
+ "eval_loss": 1.8817615509033203,
415
+ "eval_objective": 1.8192129135131836,
416
+ "eval_ranking_simple": 0.5429606437683105,
417
+ "eval_runtime": 600.2453,
418
+ "eval_samples_per_second": 9.646,
419
+ "eval_steps_per_second": 0.805,
420
+ "step": 150
421
+ },
422
+ {
423
+ "dpo_loss": 1.2696728706359863,
424
+ "epoch": 0.43930089749645723,
425
+ "grad_norm": 122.22748192353309,
426
+ "learning_rate": 4.785877785100633e-06,
427
+ "logits": -1.187027096748352,
428
+ "logps": -75.60051727294922,
429
+ "loss": 1.3364,
430
+ "objective": 1.2696728706359863,
431
+ "ranking_simple": 0.5708333253860474,
432
+ "step": 155
433
+ },
434
+ {
435
+ "dpo_loss": 1.7951911687850952,
436
+ "epoch": 0.45347189418989137,
437
+ "grad_norm": 125.17292960366125,
438
+ "learning_rate": 4.7600561152769795e-06,
439
+ "logits": -1.1456806659698486,
440
+ "logps": -74.9968032836914,
441
+ "loss": 1.5773,
442
+ "objective": 1.7951911687850952,
443
+ "ranking_simple": 0.550000011920929,
444
+ "step": 160
445
+ },
446
+ {
447
+ "dpo_loss": 1.2346725463867188,
448
+ "epoch": 0.46764289088332545,
449
+ "grad_norm": 108.82881778945708,
450
+ "learning_rate": 4.732842797868631e-06,
451
+ "logits": -1.2308270931243896,
452
+ "logps": -75.75040435791016,
453
+ "loss": 1.3947,
454
+ "objective": 1.2346725463867188,
455
+ "ranking_simple": 0.6083333492279053,
456
+ "step": 165
457
+ },
458
+ {
459
+ "dpo_loss": 1.3481464385986328,
460
+ "epoch": 0.4818138875767596,
461
+ "grad_norm": 108.10724884444645,
462
+ "learning_rate": 4.704254589692903e-06,
463
+ "logits": -1.2144527435302734,
464
+ "logps": -75.97994232177734,
465
+ "loss": 1.305,
466
+ "objective": 1.3481464385986328,
467
+ "ranking_simple": 0.5583333373069763,
468
+ "step": 170
469
+ },
470
+ {
471
+ "dpo_loss": 1.4393560886383057,
472
+ "epoch": 0.49598488427019366,
473
+ "grad_norm": 104.8074907192406,
474
+ "learning_rate": 4.6743090941670675e-06,
475
+ "logits": -1.147178292274475,
476
+ "logps": -76.960205078125,
477
+ "loss": 1.3896,
478
+ "objective": 1.4393560886383057,
479
+ "ranking_simple": 0.5583333373069763,
480
+ "step": 175
481
+ },
482
+ {
483
+ "dpo_loss": 1.6493242979049683,
484
+ "epoch": 0.5101558809636277,
485
+ "grad_norm": 107.1323631395759,
486
+ "learning_rate": 4.643024750468913e-06,
487
+ "logits": -1.1330630779266357,
488
+ "logps": -75.9026870727539,
489
+ "loss": 1.3334,
490
+ "objective": 1.6493242979049683,
491
+ "ranking_simple": 0.5291666388511658,
492
+ "step": 180
493
+ },
494
+ {
495
+ "dpo_loss": 1.1641370058059692,
496
+ "epoch": 0.5243268776570619,
497
+ "grad_norm": 113.77830856113329,
498
+ "learning_rate": 4.610420822182671e-06,
499
+ "logits": -1.1537328958511353,
500
+ "logps": -74.29253387451172,
501
+ "loss": 1.1361,
502
+ "objective": 1.1641370058059692,
503
+ "ranking_simple": 0.5916666388511658,
504
+ "step": 185
505
+ },
506
+ {
507
+ "dpo_loss": 1.2755895853042603,
508
+ "epoch": 0.538497874350496,
509
+ "grad_norm": 109.44630427090762,
510
+ "learning_rate": 4.576517385437315e-06,
511
+ "logits": -1.1797126531600952,
512
+ "logps": -74.75035858154297,
513
+ "loss": 1.2583,
514
+ "objective": 1.2755895853042603,
515
+ "ranking_simple": 0.637499988079071,
516
+ "step": 190
517
+ },
518
+ {
519
+ "dpo_loss": 1.073716402053833,
520
+ "epoch": 0.5526688710439301,
521
+ "grad_norm": 98.88901766370496,
522
+ "learning_rate": 4.541335316544514e-06,
523
+ "logits": -1.1745376586914062,
524
+ "logps": -74.90966796875,
525
+ "loss": 1.2475,
526
+ "objective": 1.073716402053833,
527
+ "ranking_simple": 0.637499988079071,
528
+ "step": 195
529
+ },
530
+ {
531
+ "dpo_loss": 1.185225248336792,
532
+ "epoch": 0.5668398677373642,
533
+ "grad_norm": 94.03051977106067,
534
+ "learning_rate": 4.5048962791438885e-06,
535
+ "logits": -1.1321061849594116,
536
+ "logps": -74.69174194335938,
537
+ "loss": 1.2275,
538
+ "objective": 1.185225248336792,
539
+ "ranking_simple": 0.5791666507720947,
540
+ "step": 200
541
+ },
542
+ {
543
+ "epoch": 0.5668398677373642,
544
+ "eval_dpo_loss": 1.9716603755950928,
545
+ "eval_logits": -1.1289294958114624,
546
+ "eval_logps": -77.9853515625,
547
+ "eval_loss": 2.0358171463012695,
548
+ "eval_objective": 1.9716603755950928,
549
+ "eval_ranking_simple": 0.534679114818573,
550
+ "eval_runtime": 601.7719,
551
+ "eval_samples_per_second": 9.622,
552
+ "eval_steps_per_second": 0.803,
553
+ "step": 200
554
+ },
555
+ {
556
+ "dpo_loss": 1.2547378540039062,
557
+ "epoch": 0.5810108644307983,
558
+ "grad_norm": 109.12199093644038,
559
+ "learning_rate": 4.467222710863444e-06,
560
+ "logits": -0.9467021226882935,
561
+ "logps": -74.9859390258789,
562
+ "loss": 1.2613,
563
+ "objective": 1.2547378540039062,
564
+ "ranking_simple": 0.5874999761581421,
565
+ "step": 205
566
+ },
567
+ {
568
+ "dpo_loss": 0.9887279868125916,
569
+ "epoch": 0.5951818611242324,
570
+ "grad_norm": 110.59494347158504,
571
+ "learning_rate": 4.428337809503425e-06,
572
+ "logits": -0.9453111290931702,
573
+ "logps": -75.48457336425781,
574
+ "loss": 1.2147,
575
+ "objective": 0.9887279868125916,
576
+ "ranking_simple": 0.5958333611488342,
577
+ "step": 210
578
+ },
579
+ {
580
+ "dpo_loss": 1.297851800918579,
581
+ "epoch": 0.6093528578176665,
582
+ "grad_norm": 95.93031839243162,
583
+ "learning_rate": 4.388265518752085e-06,
584
+ "logits": -0.9826800227165222,
585
+ "logps": -75.59996032714844,
586
+ "loss": 1.1658,
587
+ "objective": 1.297851800918579,
588
+ "ranking_simple": 0.574999988079071,
589
+ "step": 215
590
+ },
591
+ {
592
+ "dpo_loss": 1.0760146379470825,
593
+ "epoch": 0.6235238545111006,
594
+ "grad_norm": 93.17621109620359,
595
+ "learning_rate": 4.347030513442168e-06,
596
+ "logits": -1.0486137866973877,
597
+ "logps": -75.58748626708984,
598
+ "loss": 1.0822,
599
+ "objective": 1.0760146379470825,
600
+ "ranking_simple": 0.5333333611488342,
601
+ "step": 220
602
+ },
603
+ {
604
+ "dpo_loss": 1.1693772077560425,
605
+ "epoch": 0.6376948512045347,
606
+ "grad_norm": 97.88558873864356,
607
+ "learning_rate": 4.304658184357186e-06,
608
+ "logits": -1.0480105876922607,
609
+ "logps": -75.87112426757812,
610
+ "loss": 1.2411,
611
+ "objective": 1.1693772077560425,
612
+ "ranking_simple": 0.5791666507720947,
613
+ "step": 225
614
+ },
615
+ {
616
+ "dpo_loss": 1.0544065237045288,
617
+ "epoch": 0.6518658478979689,
618
+ "grad_norm": 96.24074077951633,
619
+ "learning_rate": 4.261174622596835e-06,
620
+ "logits": -0.9322254657745361,
621
+ "logps": -74.73289489746094,
622
+ "loss": 1.2067,
623
+ "objective": 1.0544065237045288,
624
+ "ranking_simple": 0.6499999761581421,
625
+ "step": 230
626
+ },
627
+ {
628
+ "dpo_loss": 1.3109701871871948,
629
+ "epoch": 0.6660368445914029,
630
+ "grad_norm": 99.32952244494642,
631
+ "learning_rate": 4.216606603511202e-06,
632
+ "logits": -0.8552966713905334,
633
+ "logps": -76.02928161621094,
634
+ "loss": 1.1741,
635
+ "objective": 1.3109701871871948,
636
+ "ranking_simple": 0.5916666388511658,
637
+ "step": 235
638
+ },
639
+ {
640
+ "dpo_loss": 1.1290283203125,
641
+ "epoch": 0.680207841284837,
642
+ "grad_norm": 90.5291171770579,
643
+ "learning_rate": 4.170981570213621e-06,
644
+ "logits": -1.0387074947357178,
645
+ "logps": -74.89482116699219,
646
+ "loss": 1.1403,
647
+ "objective": 1.1290283203125,
648
+ "ranking_simple": 0.5916666388511658,
649
+ "step": 240
650
+ },
651
+ {
652
+ "dpo_loss": 1.3167603015899658,
653
+ "epoch": 0.6943788379782712,
654
+ "grad_norm": 111.7529832893929,
655
+ "learning_rate": 4.124327616682362e-06,
656
+ "logits": -1.05294930934906,
657
+ "logps": -73.70958709716797,
658
+ "loss": 1.1308,
659
+ "objective": 1.3167603015899658,
660
+ "ranking_simple": 0.612500011920929,
661
+ "step": 245
662
+ },
663
+ {
664
+ "dpo_loss": 1.0333999395370483,
665
+ "epoch": 0.7085498346717053,
666
+ "grad_norm": 103.1933415885334,
667
+ "learning_rate": 4.076673470461538e-06,
668
+ "logits": -0.8845551609992981,
669
+ "logps": -73.25338745117188,
670
+ "loss": 1.1914,
671
+ "objective": 1.0333999395370483,
672
+ "ranking_simple": 0.6208333373069763,
673
+ "step": 250
674
+ },
675
+ {
676
+ "epoch": 0.7085498346717053,
677
+ "eval_dpo_loss": 1.9460629224777222,
678
+ "eval_logits": -1.0883058309555054,
679
+ "eval_logps": -78.33851623535156,
680
+ "eval_loss": 2.0083987712860107,
681
+ "eval_objective": 1.9460629224777222,
682
+ "eval_ranking_simple": 0.534679114818573,
683
+ "eval_runtime": 607.1642,
684
+ "eval_samples_per_second": 9.536,
685
+ "eval_steps_per_second": 0.796,
686
+ "step": 250
687
+ },
688
+ {
689
+ "dpo_loss": 1.0360175371170044,
690
+ "epoch": 0.7227208313651393,
691
+ "grad_norm": 86.03208381847595,
692
+ "learning_rate": 4.028048474971889e-06,
693
+ "logits": -1.0679165124893188,
694
+ "logps": -73.73800659179688,
695
+ "loss": 1.0854,
696
+ "objective": 1.0360175371170044,
697
+ "ranking_simple": 0.5458333492279053,
698
+ "step": 255
699
+ },
700
+ {
701
+ "dpo_loss": 1.0874184370040894,
702
+ "epoch": 0.7368918280585735,
703
+ "grad_norm": 103.07844444269477,
704
+ "learning_rate": 3.978482571442339e-06,
705
+ "logits": -1.0024687051773071,
706
+ "logps": -77.03482818603516,
707
+ "loss": 1.12,
708
+ "objective": 1.0874184370040894,
709
+ "ranking_simple": 0.612500011920929,
710
+ "step": 260
711
+ },
712
+ {
713
+ "dpo_loss": 0.8361913561820984,
714
+ "epoch": 0.7510628247520076,
715
+ "grad_norm": 99.23229150586383,
716
+ "learning_rate": 3.928006280473445e-06,
717
+ "logits": -0.9436743259429932,
718
+ "logps": -76.39049530029297,
719
+ "loss": 1.0349,
720
+ "objective": 0.8361913561820984,
721
+ "ranking_simple": 0.6708333492279053,
722
+ "step": 265
723
+ },
724
+ {
725
+ "dpo_loss": 1.0280269384384155,
726
+ "epoch": 0.7652338214454416,
727
+ "grad_norm": 95.81187840710191,
728
+ "learning_rate": 3.876650683244093e-06,
729
+ "logits": -0.9881510138511658,
730
+ "logps": -77.2256851196289,
731
+ "loss": 1.0768,
732
+ "objective": 1.0280269384384155,
733
+ "ranking_simple": 0.5458333492279053,
734
+ "step": 270
735
+ },
736
+ {
737
+ "dpo_loss": 0.9129766821861267,
738
+ "epoch": 0.7794048181388757,
739
+ "grad_norm": 89.52663856813324,
740
+ "learning_rate": 3.8244474023730155e-06,
741
+ "logits": -0.8726529479026794,
742
+ "logps": -76.8287124633789,
743
+ "loss": 1.0978,
744
+ "objective": 0.9129766821861267,
745
+ "ranking_simple": 0.5874999761581421,
746
+ "step": 275
747
+ },
748
+ {
749
+ "dpo_loss": 0.8938360810279846,
750
+ "epoch": 0.7935758148323099,
751
+ "grad_norm": 88.2587417355247,
752
+ "learning_rate": 3.771428582446908e-06,
753
+ "logits": -0.9136671423912048,
754
+ "logps": -76.3465347290039,
755
+ "loss": 0.9242,
756
+ "objective": 0.8938360810279846,
757
+ "ranking_simple": 0.6083333492279053,
758
+ "step": 280
759
+ },
760
+ {
761
+ "dpo_loss": 1.147481083869934,
762
+ "epoch": 0.807746811525744,
763
+ "grad_norm": 106.04454752282797,
764
+ "learning_rate": 3.7176268702271468e-06,
765
+ "logits": -0.8765575885772705,
766
+ "logps": -77.12137603759766,
767
+ "loss": 0.9999,
768
+ "objective": 1.147481083869934,
769
+ "ranking_simple": 0.5583333373069763,
770
+ "step": 285
771
+ },
772
+ {
773
+ "dpo_loss": 0.9309877157211304,
774
+ "epoch": 0.821917808219178,
775
+ "grad_norm": 97.07071302996712,
776
+ "learning_rate": 3.6630753945472854e-06,
777
+ "logits": -0.8515520095825195,
778
+ "logps": -76.22865295410156,
779
+ "loss": 0.8822,
780
+ "objective": 0.9309877157211304,
781
+ "ranking_simple": 0.5916666388511658,
782
+ "step": 290
783
+ },
784
+ {
785
+ "dpo_loss": 0.8610218167304993,
786
+ "epoch": 0.8360888049126122,
787
+ "grad_norm": 95.1741444029312,
788
+ "learning_rate": 3.6078077459137097e-06,
789
+ "logits": -0.9089216589927673,
790
+ "logps": -76.60446166992188,
791
+ "loss": 1.093,
792
+ "objective": 0.8610218167304993,
793
+ "ranking_simple": 0.550000011920929,
794
+ "step": 295
795
+ },
796
+ {
797
+ "dpo_loss": 0.7812207341194153,
798
+ "epoch": 0.8502598016060463,
799
+ "grad_norm": 86.74577338149517,
800
+ "learning_rate": 3.5518579558220144e-06,
801
+ "logits": -0.885455846786499,
802
+ "logps": -76.61499786376953,
803
+ "loss": 1.0378,
804
+ "objective": 0.7812207341194153,
805
+ "ranking_simple": 0.6499999761581421,
806
+ "step": 300
807
+ },
808
+ {
809
+ "epoch": 0.8502598016060463,
810
+ "eval_dpo_loss": 2.0357348918914795,
811
+ "eval_logits": -0.9324438571929932,
812
+ "eval_logps": -83.47068786621094,
813
+ "eval_loss": 2.091830015182495,
814
+ "eval_objective": 2.0357348918914795,
815
+ "eval_ranking_simple": 0.5351966619491577,
816
+ "eval_runtime": 611.0424,
817
+ "eval_samples_per_second": 9.476,
818
+ "eval_steps_per_second": 0.79,
819
+ "step": 300
820
+ },
821
+ {
822
+ "dpo_loss": 1.2579916715621948,
823
+ "epoch": 0.8644307982994804,
824
+ "grad_norm": 85.57616018620313,
825
+ "learning_rate": 3.495260475801841e-06,
826
+ "logits": -0.8382306694984436,
827
+ "logps": -79.23892974853516,
828
+ "loss": 0.9276,
829
+ "objective": 1.2579916715621948,
830
+ "ranking_simple": 0.637499988079071,
831
+ "step": 305
832
+ },
833
+ {
834
+ "dpo_loss": 0.6779700517654419,
835
+ "epoch": 0.8786017949929145,
836
+ "grad_norm": 99.75714946886596,
837
+ "learning_rate": 3.4380501562030704e-06,
838
+ "logits": -0.7922627329826355,
839
+ "logps": -78.47576141357422,
840
+ "loss": 0.8132,
841
+ "objective": 0.6779700517654419,
842
+ "ranking_simple": 0.6166666746139526,
843
+ "step": 310
844
+ },
845
+ {
846
+ "dpo_loss": 0.8982964754104614,
847
+ "epoch": 0.8927727916863486,
848
+ "grad_norm": 77.85557552766669,
849
+ "learning_rate": 3.3802622247364446e-06,
850
+ "logits": -0.7617653608322144,
851
+ "logps": -78.67091369628906,
852
+ "loss": 0.9992,
853
+ "objective": 0.8982964754104614,
854
+ "ranking_simple": 0.5874999761581421,
855
+ "step": 315
856
+ },
857
+ {
858
+ "dpo_loss": 0.8807361125946045,
859
+ "epoch": 0.9069437883797827,
860
+ "grad_norm": 97.29848811322167,
861
+ "learning_rate": 3.321932264781822e-06,
862
+ "logits": -0.7270026803016663,
863
+ "logps": -80.3343734741211,
864
+ "loss": 0.9675,
865
+ "objective": 0.8807361125946045,
866
+ "ranking_simple": 0.6208333373069763,
867
+ "step": 320
868
+ },
869
+ {
870
+ "dpo_loss": 0.7385057806968689,
871
+ "epoch": 0.9211147850732169,
872
+ "grad_norm": 80.05333271753152,
873
+ "learning_rate": 3.2630961934774265e-06,
874
+ "logits": -0.8284817934036255,
875
+ "logps": -80.14983367919922,
876
+ "loss": 0.8071,
877
+ "objective": 0.7385057806968689,
878
+ "ranking_simple": 0.6041666865348816,
879
+ "step": 325
880
+ },
881
+ {
882
+ "dpo_loss": 0.8387654423713684,
883
+ "epoch": 0.9352857817666509,
884
+ "grad_norm": 85.76862598705932,
885
+ "learning_rate": 3.203790239603583e-06,
886
+ "logits": -0.7765657305717468,
887
+ "logps": -79.01445007324219,
888
+ "loss": 0.9652,
889
+ "objective": 0.8387654423713684,
890
+ "ranking_simple": 0.5958333611488342,
891
+ "step": 330
892
+ },
893
+ {
894
+ "dpo_loss": 0.838505208492279,
895
+ "epoch": 0.949456778460085,
896
+ "grad_norm": 88.47139227896243,
897
+ "learning_rate": 3.1440509212745584e-06,
898
+ "logits": -0.7588701844215393,
899
+ "logps": -78.80551147460938,
900
+ "loss": 0.8953,
901
+ "objective": 0.838505208492279,
902
+ "ranking_simple": 0.6166666746139526,
903
+ "step": 335
904
+ },
905
+ {
906
+ "dpo_loss": 1.108252763748169,
907
+ "epoch": 0.9636277751535192,
908
+ "grad_norm": 69.45012344359534,
909
+ "learning_rate": 3.0839150234522404e-06,
910
+ "logits": -0.8102169036865234,
911
+ "logps": -77.67143249511719,
912
+ "loss": 0.9601,
913
+ "objective": 1.108252763748169,
914
+ "ranking_simple": 0.5874999761581421,
915
+ "step": 340
916
+ },
917
+ {
918
+ "dpo_loss": 0.9010140299797058,
919
+ "epoch": 0.9777987718469532,
920
+ "grad_norm": 82.40549141111858,
921
+ "learning_rate": 3.0234195752955032e-06,
922
+ "logits": -0.8673346638679504,
923
+ "logps": -75.89323425292969,
924
+ "loss": 0.969,
925
+ "objective": 0.9010140299797058,
926
+ "ranking_simple": 0.6541666388511658,
927
+ "step": 345
928
+ },
929
+ {
930
+ "dpo_loss": 0.7752221822738647,
931
+ "epoch": 0.9919697685403873,
932
+ "grad_norm": 80.80020112901609,
933
+ "learning_rate": 2.962601827359208e-06,
934
+ "logits": -0.7917401790618896,
935
+ "logps": -77.88353729248047,
936
+ "loss": 0.8334,
937
+ "objective": 0.7752221822738647,
938
+ "ranking_simple": 0.6875,
939
+ "step": 350
940
+ },
941
+ {
942
+ "epoch": 0.9919697685403873,
943
+ "eval_dpo_loss": 1.9975330829620361,
944
+ "eval_logits": -0.8755112886428833,
945
+ "eval_logps": -81.17398071289062,
946
+ "eval_loss": 2.1142992973327637,
947
+ "eval_objective": 1.9975330829620361,
948
+ "eval_ranking_simple": 0.5388198494911194,
949
+ "eval_runtime": 610.9883,
950
+ "eval_samples_per_second": 9.476,
951
+ "eval_steps_per_second": 0.791,
952
+ "step": 350
953
+ },
954
+ {
955
+ "dpo_loss": 0.602699875831604,
956
+ "epoch": 1.0061407652338215,
957
+ "grad_norm": 47.23395645047123,
958
+ "learning_rate": 2.9014992286568773e-06,
959
+ "logits": -0.8253915309906006,
960
+ "logps": -76.2964096069336,
961
+ "loss": 0.6347,
962
+ "objective": 0.602699875831604,
963
+ "ranking_simple": 0.6708333492279053,
964
+ "step": 355
965
+ },
966
+ {
967
+ "dpo_loss": 0.6031454205513,
968
+ "epoch": 1.0203117619272555,
969
+ "grad_norm": 58.81365077909613,
970
+ "learning_rate": 2.840149403601166e-06,
971
+ "logits": -0.7789632081985474,
972
+ "logps": -77.5028305053711,
973
+ "loss": 0.4974,
974
+ "objective": 0.6031454205513,
975
+ "ranking_simple": 0.6499999761581421,
976
+ "step": 360
977
+ },
978
+ {
979
+ "dpo_loss": 0.45107781887054443,
980
+ "epoch": 1.0344827586206897,
981
+ "grad_norm": 53.96463042562765,
982
+ "learning_rate": 2.7785901288363253e-06,
983
+ "logits": -0.7713039517402649,
984
+ "logps": -77.14903259277344,
985
+ "loss": 0.4946,
986
+ "objective": 0.45107781887054443,
987
+ "ranking_simple": 0.625,
988
+ "step": 365
989
+ },
990
+ {
991
+ "dpo_loss": 0.4809951186180115,
992
+ "epoch": 1.0486537553141237,
993
+ "grad_norm": 64.07795252956707,
994
+ "learning_rate": 2.7168593099769414e-06,
995
+ "logits": -0.7404767870903015,
996
+ "logps": -74.05150604248047,
997
+ "loss": 0.4291,
998
+ "objective": 0.4809951186180115,
999
+ "ranking_simple": 0.6208333373069763,
1000
+ "step": 370
1001
+ },
1002
+ {
1003
+ "dpo_loss": 0.4239761233329773,
1004
+ "epoch": 1.0628247520075578,
1005
+ "grad_norm": 55.70979139140137,
1006
+ "learning_rate": 2.654994958267241e-06,
1007
+ "logits": -0.8401414752006531,
1008
+ "logps": -75.16132354736328,
1009
+ "loss": 0.4015,
1010
+ "objective": 0.4239761233329773,
1011
+ "ranking_simple": 0.6416666507720947,
1012
+ "step": 375
1013
+ },
1014
+ {
1015
+ "dpo_loss": 0.4185657799243927,
1016
+ "epoch": 1.076995748700992,
1017
+ "grad_norm": 57.189539552967226,
1018
+ "learning_rate": 2.5930351671753707e-06,
1019
+ "logits": -0.8457431793212891,
1020
+ "logps": -76.84170532226562,
1021
+ "loss": 0.4414,
1022
+ "objective": 0.4185657799243927,
1023
+ "ranking_simple": 0.6875,
1024
+ "step": 380
1025
+ },
1026
+ {
1027
+ "dpo_loss": 0.49603796005249023,
1028
+ "epoch": 1.091166745394426,
1029
+ "grad_norm": 47.834305883433586,
1030
+ "learning_rate": 2.5310180889370374e-06,
1031
+ "logits": -0.778562605381012,
1032
+ "logps": -75.62349700927734,
1033
+ "loss": 0.4482,
1034
+ "objective": 0.49603796005249023,
1035
+ "ranking_simple": 0.6333333253860474,
1036
+ "step": 385
1037
+ },
1038
+ {
1039
+ "dpo_loss": 0.42004406452178955,
1040
+ "epoch": 1.10533774208786,
1041
+ "grad_norm": 48.511623077068286,
1042
+ "learning_rate": 2.468981911062964e-06,
1043
+ "logits": -0.7501899600028992,
1044
+ "logps": -77.29354095458984,
1045
+ "loss": 0.3694,
1046
+ "objective": 0.42004406452178955,
1047
+ "ranking_simple": 0.6916666626930237,
1048
+ "step": 390
1049
+ },
1050
+ {
1051
+ "dpo_loss": 0.4247871935367584,
1052
+ "epoch": 1.1195087387812943,
1053
+ "grad_norm": 57.912950107999485,
1054
+ "learning_rate": 2.4069648328246305e-06,
1055
+ "logits": -0.7564467787742615,
1056
+ "logps": -76.87413024902344,
1057
+ "loss": 0.5005,
1058
+ "objective": 0.4247871935367584,
1059
+ "ranking_simple": 0.637499988079071,
1060
+ "step": 395
1061
+ },
1062
+ {
1063
+ "dpo_loss": 0.3325573801994324,
1064
+ "epoch": 1.1336797354747283,
1065
+ "grad_norm": 58.38348871817592,
1066
+ "learning_rate": 2.3450050417327593e-06,
1067
+ "logits": -0.7802504301071167,
1068
+ "logps": -75.82648468017578,
1069
+ "loss": 0.4251,
1070
+ "objective": 0.3325573801994324,
1071
+ "ranking_simple": 0.6583333611488342,
1072
+ "step": 400
1073
+ },
1074
+ {
1075
+ "epoch": 1.1336797354747283,
1076
+ "eval_dpo_loss": 2.0240566730499268,
1077
+ "eval_logits": -0.8002918362617493,
1078
+ "eval_logps": -81.16890716552734,
1079
+ "eval_loss": 2.06406831741333,
1080
+ "eval_objective": 2.0240566730499268,
1081
+ "eval_ranking_simple": 0.54347825050354,
1082
+ "eval_runtime": 604.5313,
1083
+ "eval_samples_per_second": 9.578,
1084
+ "eval_steps_per_second": 0.799,
1085
+ "step": 400
1086
+ },
1087
+ {
1088
+ "dpo_loss": 0.6402778029441833,
1089
+ "epoch": 1.1478507321681626,
1090
+ "grad_norm": 57.999816664931764,
1091
+ "learning_rate": 2.2831406900230586e-06,
1092
+ "logits": -0.7289463877677917,
1093
+ "logps": -77.95418548583984,
1094
+ "loss": 0.4798,
1095
+ "objective": 0.6402778029441833,
1096
+ "ranking_simple": 0.6166666746139526,
1097
+ "step": 405
1098
+ },
1099
+ {
1100
+ "dpo_loss": 0.5912865400314331,
1101
+ "epoch": 1.1620217288615966,
1102
+ "grad_norm": 50.45480604272446,
1103
+ "learning_rate": 2.221409871163675e-06,
1104
+ "logits": -0.747395932674408,
1105
+ "logps": -77.24584197998047,
1106
+ "loss": 0.4554,
1107
+ "objective": 0.5912865400314331,
1108
+ "ranking_simple": 0.6541666388511658,
1109
+ "step": 410
1110
+ },
1111
+ {
1112
+ "dpo_loss": 0.3779014050960541,
1113
+ "epoch": 1.1761927255550306,
1114
+ "grad_norm": 64.88698660251025,
1115
+ "learning_rate": 2.1598505963988354e-06,
1116
+ "logits": -0.6771641969680786,
1117
+ "logps": -75.90592956542969,
1118
+ "loss": 0.4899,
1119
+ "objective": 0.3779014050960541,
1120
+ "ranking_simple": 0.6291666626930237,
1121
+ "step": 415
1122
+ },
1123
+ {
1124
+ "dpo_loss": 0.35035714507102966,
1125
+ "epoch": 1.1903637222484649,
1126
+ "grad_norm": 57.50741792623655,
1127
+ "learning_rate": 2.098500771343124e-06,
1128
+ "logits": -0.6913995742797852,
1129
+ "logps": -76.21831512451172,
1130
+ "loss": 0.4117,
1131
+ "objective": 0.35035714507102966,
1132
+ "ranking_simple": 0.6291666626930237,
1133
+ "step": 420
1134
+ },
1135
+ {
1136
+ "dpo_loss": 0.48492512106895447,
1137
+ "epoch": 1.204534718941899,
1138
+ "grad_norm": 59.717637187759834,
1139
+ "learning_rate": 2.037398172640793e-06,
1140
+ "logits": -0.7741472125053406,
1141
+ "logps": -74.0752944946289,
1142
+ "loss": 0.4692,
1143
+ "objective": 0.48492512106895447,
1144
+ "ranking_simple": 0.6541666388511658,
1145
+ "step": 425
1146
+ },
1147
+ {
1148
+ "dpo_loss": 0.613711953163147,
1149
+ "epoch": 1.2187057156353331,
1150
+ "grad_norm": 58.710504416860545,
1151
+ "learning_rate": 1.976580424704498e-06,
1152
+ "logits": -0.7667457461357117,
1153
+ "logps": -76.66746520996094,
1154
+ "loss": 0.3957,
1155
+ "objective": 0.613711953163147,
1156
+ "ranking_simple": 0.6291666626930237,
1157
+ "step": 430
1158
+ },
1159
+ {
1160
+ "dpo_loss": 0.28608372807502747,
1161
+ "epoch": 1.2328767123287672,
1162
+ "grad_norm": 88.90691891983907,
1163
+ "learning_rate": 1.9160849765477604e-06,
1164
+ "logits": -0.6787567734718323,
1165
+ "logps": -76.04117584228516,
1166
+ "loss": 0.4339,
1167
+ "objective": 0.28608372807502747,
1168
+ "ranking_simple": 0.699999988079071,
1169
+ "step": 435
1170
+ },
1171
+ {
1172
+ "dpo_loss": 0.4388173222541809,
1173
+ "epoch": 1.2470477090222012,
1174
+ "grad_norm": 63.01131335623436,
1175
+ "learning_rate": 1.8559490787254423e-06,
1176
+ "logits": -0.7236205339431763,
1177
+ "logps": -76.31954956054688,
1178
+ "loss": 0.4457,
1179
+ "objective": 0.4388173222541809,
1180
+ "ranking_simple": 0.7083333134651184,
1181
+ "step": 440
1182
+ },
1183
+ {
1184
+ "dpo_loss": 0.5651527643203735,
1185
+ "epoch": 1.2612187057156352,
1186
+ "grad_norm": 65.93704260731457,
1187
+ "learning_rate": 1.7962097603964177e-06,
1188
+ "logits": -0.7464514374732971,
1189
+ "logps": -76.93856048583984,
1190
+ "loss": 0.4889,
1191
+ "objective": 0.5651527643203735,
1192
+ "ranking_simple": 0.699999988079071,
1193
+ "step": 445
1194
+ },
1195
+ {
1196
+ "dpo_loss": 0.4833088517189026,
1197
+ "epoch": 1.2753897024090695,
1198
+ "grad_norm": 56.25706142129644,
1199
+ "learning_rate": 1.7369038065225743e-06,
1200
+ "logits": -0.8744809627532959,
1201
+ "logps": -76.16983795166016,
1202
+ "loss": 0.3886,
1203
+ "objective": 0.4833088517189026,
1204
+ "ranking_simple": 0.6416666507720947,
1205
+ "step": 450
1206
+ },
1207
+ {
1208
+ "epoch": 1.2753897024090695,
1209
+ "eval_dpo_loss": 1.9598380327224731,
1210
+ "eval_logits": -0.8998873233795166,
1211
+ "eval_logps": -79.8813247680664,
1212
+ "eval_loss": 2.0085067749023438,
1213
+ "eval_objective": 1.9598380327224731,
1214
+ "eval_ranking_simple": 0.5388198494911194,
1215
+ "eval_runtime": 617.7574,
1216
+ "eval_samples_per_second": 9.373,
1217
+ "eval_steps_per_second": 0.782,
1218
+ "step": 450
1219
+ },
1220
+ {
1221
+ "dpo_loss": 0.5061320662498474,
1222
+ "epoch": 1.2895606991025035,
1223
+ "grad_norm": 54.696067906167414,
1224
+ "learning_rate": 1.6780677352181781e-06,
1225
+ "logits": -0.7727634310722351,
1226
+ "logps": -76.43891906738281,
1227
+ "loss": 0.4525,
1228
+ "objective": 0.5061320662498474,
1229
+ "ranking_simple": 0.5874999761581421,
1230
+ "step": 455
1231
+ },
1232
+ {
1233
+ "dpo_loss": 0.45641031861305237,
1234
+ "epoch": 1.3037316957959377,
1235
+ "grad_norm": 57.70332958198004,
1236
+ "learning_rate": 1.6197377752635563e-06,
1237
+ "logits": -0.8627158999443054,
1238
+ "logps": -75.46236419677734,
1239
+ "loss": 0.3956,
1240
+ "objective": 0.45641031861305237,
1241
+ "ranking_simple": 0.6333333253860474,
1242
+ "step": 460
1243
+ },
1244
+ {
1245
+ "dpo_loss": 0.35006803274154663,
1246
+ "epoch": 1.3179026924893718,
1247
+ "grad_norm": 60.819739611222175,
1248
+ "learning_rate": 1.5619498437969302e-06,
1249
+ "logits": -0.8722900748252869,
1250
+ "logps": -73.59480285644531,
1251
+ "loss": 0.4224,
1252
+ "objective": 0.35006803274154663,
1253
+ "ranking_simple": 0.637499988079071,
1254
+ "step": 465
1255
+ },
1256
+ {
1257
+ "dpo_loss": 0.4906507432460785,
1258
+ "epoch": 1.3320736891828058,
1259
+ "grad_norm": 49.64412942112177,
1260
+ "learning_rate": 1.5047395241981606e-06,
1261
+ "logits": -0.9396683573722839,
1262
+ "logps": -73.02540588378906,
1263
+ "loss": 0.5003,
1264
+ "objective": 0.4906507432460785,
1265
+ "ranking_simple": 0.6499999761581421,
1266
+ "step": 470
1267
+ },
1268
+ {
1269
+ "dpo_loss": 0.30290400981903076,
1270
+ "epoch": 1.34624468587624,
1271
+ "grad_norm": 79.3006324136739,
1272
+ "learning_rate": 1.4481420441779862e-06,
1273
+ "logits": -0.8443289995193481,
1274
+ "logps": -75.5423583984375,
1275
+ "loss": 0.3808,
1276
+ "objective": 0.30290400981903076,
1277
+ "ranking_simple": 0.6958333253860474,
1278
+ "step": 475
1279
+ },
1280
+ {
1281
+ "dpo_loss": 0.5529125928878784,
1282
+ "epoch": 1.360415682569674,
1283
+ "grad_norm": 70.13856035677702,
1284
+ "learning_rate": 1.3921922540862907e-06,
1285
+ "logits": -0.8475839495658875,
1286
+ "logps": -76.10958862304688,
1287
+ "loss": 0.476,
1288
+ "objective": 0.5529125928878784,
1289
+ "ranking_simple": 0.637499988079071,
1290
+ "step": 480
1291
+ },
1292
+ {
1293
+ "dpo_loss": 0.4004639685153961,
1294
+ "epoch": 1.3745866792631083,
1295
+ "grad_norm": 78.28600119390228,
1296
+ "learning_rate": 1.3369246054527152e-06,
1297
+ "logits": -0.8136020302772522,
1298
+ "logps": -74.83834838867188,
1299
+ "loss": 0.4712,
1300
+ "objective": 0.4004639685153961,
1301
+ "ranking_simple": 0.625,
1302
+ "step": 485
1303
+ },
1304
+ {
1305
+ "dpo_loss": 0.54802006483078,
1306
+ "epoch": 1.3887576759565423,
1307
+ "grad_norm": 61.45793050447296,
1308
+ "learning_rate": 1.2823731297728536e-06,
1309
+ "logits": -0.7907751798629761,
1310
+ "logps": -77.5755844116211,
1311
+ "loss": 0.3945,
1312
+ "objective": 0.54802006483078,
1313
+ "ranking_simple": 0.625,
1314
+ "step": 490
1315
+ },
1316
+ {
1317
+ "dpo_loss": 0.2221187800168991,
1318
+ "epoch": 1.4029286726499763,
1319
+ "grad_norm": 66.41905989355433,
1320
+ "learning_rate": 1.2285714175530936e-06,
1321
+ "logits": -0.805623471736908,
1322
+ "logps": -77.69929504394531,
1323
+ "loss": 0.3891,
1324
+ "objective": 0.2221187800168991,
1325
+ "ranking_simple": 0.625,
1326
+ "step": 495
1327
+ },
1328
+ {
1329
+ "dpo_loss": 0.42266911268234253,
1330
+ "epoch": 1.4170996693434104,
1331
+ "grad_norm": 66.25308663743711,
1332
+ "learning_rate": 1.1755525976269851e-06,
1333
+ "logits": -0.750647783279419,
1334
+ "logps": -76.04014587402344,
1335
+ "loss": 0.4352,
1336
+ "objective": 0.42266911268234253,
1337
+ "ranking_simple": 0.6583333611488342,
1338
+ "step": 500
1339
+ },
1340
+ {
1341
+ "epoch": 1.4170996693434104,
1342
+ "eval_dpo_loss": 1.9818733930587769,
1343
+ "eval_logits": -0.8633737564086914,
1344
+ "eval_logps": -80.73566436767578,
1345
+ "eval_loss": 2.0448944568634033,
1346
+ "eval_objective": 1.9818733930587769,
1347
+ "eval_ranking_simple": 0.5367494821548462,
1348
+ "eval_runtime": 608.89,
1349
+ "eval_samples_per_second": 9.509,
1350
+ "eval_steps_per_second": 0.793,
1351
+ "step": 500
1352
+ },
1353
+ {
1354
+ "dpo_loss": 0.31959182024002075,
1355
+ "epoch": 1.4312706660368446,
1356
+ "grad_norm": 52.73552059138801,
1357
+ "learning_rate": 1.1233493167559065e-06,
1358
+ "logits": -0.7827561497688293,
1359
+ "logps": -76.80120086669922,
1360
+ "loss": 0.3302,
1361
+ "objective": 0.31959182024002075,
1362
+ "ranking_simple": 0.6458333134651184,
1363
+ "step": 505
1364
+ },
1365
+ {
1366
+ "dpo_loss": 0.3922279179096222,
1367
+ "epoch": 1.4454416627302786,
1368
+ "grad_norm": 61.50180104681149,
1369
+ "learning_rate": 1.0719937195265555e-06,
1370
+ "logits": -0.8109835386276245,
1371
+ "logps": -76.9507827758789,
1372
+ "loss": 0.3945,
1373
+ "objective": 0.3922279179096222,
1374
+ "ranking_simple": 0.6625000238418579,
1375
+ "step": 510
1376
+ },
1377
+ {
1378
+ "dpo_loss": 0.3581198453903198,
1379
+ "epoch": 1.4596126594237129,
1380
+ "grad_norm": 44.945879303706135,
1381
+ "learning_rate": 1.0215174285576615e-06,
1382
+ "logits": -0.8377180695533752,
1383
+ "logps": -78.14236450195312,
1384
+ "loss": 0.4044,
1385
+ "objective": 0.3581198453903198,
1386
+ "ranking_simple": 0.6083333492279053,
1387
+ "step": 515
1388
+ },
1389
+ {
1390
+ "dpo_loss": 0.3747193217277527,
1391
+ "epoch": 1.473783656117147,
1392
+ "grad_norm": 52.10326588262369,
1393
+ "learning_rate": 9.719515250281122e-07,
1394
+ "logits": -0.79498291015625,
1395
+ "logps": -75.8031234741211,
1396
+ "loss": 0.3132,
1397
+ "objective": 0.3747193217277527,
1398
+ "ranking_simple": 0.675000011920929,
1399
+ "step": 520
1400
+ },
1401
+ {
1402
+ "dpo_loss": 0.3443489670753479,
1403
+ "epoch": 1.487954652810581,
1404
+ "grad_norm": 51.25471401996847,
1405
+ "learning_rate": 9.233265295384624e-07,
1406
+ "logits": -0.6887346506118774,
1407
+ "logps": -75.2328872680664,
1408
+ "loss": 0.3972,
1409
+ "objective": 0.3443489670753479,
1410
+ "ranking_simple": 0.6541666388511658,
1411
+ "step": 525
1412
+ },
1413
+ {
1414
+ "dpo_loss": 0.3036381006240845,
1415
+ "epoch": 1.5021256495040152,
1416
+ "grad_norm": 58.55859781711994,
1417
+ "learning_rate": 8.756723833176376e-07,
1418
+ "logits": -0.8320663571357727,
1419
+ "logps": -77.42900085449219,
1420
+ "loss": 0.3414,
1421
+ "objective": 0.3036381006240845,
1422
+ "ranking_simple": 0.637499988079071,
1423
+ "step": 530
1424
+ },
1425
+ {
1426
+ "dpo_loss": 0.2670062184333801,
1427
+ "epoch": 1.5162966461974492,
1428
+ "grad_norm": 47.62625405878976,
1429
+ "learning_rate": 8.290184297863793e-07,
1430
+ "logits": -0.8374890685081482,
1431
+ "logps": -76.14564514160156,
1432
+ "loss": 0.3198,
1433
+ "objective": 0.2670062184333801,
1434
+ "ranking_simple": 0.6499999761581421,
1435
+ "step": 535
1436
+ },
1437
+ {
1438
+ "dpo_loss": 0.49031582474708557,
1439
+ "epoch": 1.5304676428908834,
1440
+ "grad_norm": 62.51314897054348,
1441
+ "learning_rate": 7.833933964887985e-07,
1442
+ "logits": -0.7449190020561218,
1443
+ "logps": -76.09394073486328,
1444
+ "loss": 0.3338,
1445
+ "objective": 0.49031582474708557,
1446
+ "ranking_simple": 0.699999988079071,
1447
+ "step": 540
1448
+ },
1449
+ {
1450
+ "dpo_loss": 0.4148392975330353,
1451
+ "epoch": 1.5446386395843175,
1452
+ "grad_norm": 52.636990202390535,
1453
+ "learning_rate": 7.388253774031659e-07,
1454
+ "logits": -0.8080311417579651,
1455
+ "logps": -76.78816986083984,
1456
+ "loss": 0.3883,
1457
+ "objective": 0.4148392975330353,
1458
+ "ranking_simple": 0.7250000238418579,
1459
+ "step": 545
1460
+ },
1461
+ {
1462
+ "dpo_loss": 0.22362883388996124,
1463
+ "epoch": 1.5588096362777515,
1464
+ "grad_norm": 46.69497634920009,
1465
+ "learning_rate": 6.953418156428152e-07,
1466
+ "logits": -0.8403748869895935,
1467
+ "logps": -76.63642883300781,
1468
+ "loss": 0.3103,
1469
+ "objective": 0.22362883388996124,
1470
+ "ranking_simple": 0.6499999761581421,
1471
+ "step": 550
1472
+ },
1473
+ {
1474
+ "epoch": 1.5588096362777515,
1475
+ "eval_dpo_loss": 1.9073150157928467,
1476
+ "eval_logits": -0.8671907186508179,
1477
+ "eval_logps": -80.88265228271484,
1478
+ "eval_loss": 1.9783891439437866,
1479
+ "eval_objective": 1.9073150157928467,
1480
+ "eval_ranking_simple": 0.5372670888900757,
1481
+ "eval_runtime": 612.8866,
1482
+ "eval_samples_per_second": 9.447,
1483
+ "eval_steps_per_second": 0.788,
1484
+ "step": 550
1485
+ },
1486
+ {
1487
+ "dpo_loss": 0.41253456473350525,
1488
+ "epoch": 1.5729806329711855,
1489
+ "grad_norm": 50.727318491856984,
1490
+ "learning_rate": 6.529694865578318e-07,
1491
+ "logits": -0.6723602414131165,
1492
+ "logps": -77.01154327392578,
1493
+ "loss": 0.3626,
1494
+ "objective": 0.41253456473350525,
1495
+ "ranking_simple": 0.6291666626930237,
1496
+ "step": 555
1497
+ },
1498
+ {
1499
+ "dpo_loss": 0.3846481144428253,
1500
+ "epoch": 1.5871516296646198,
1501
+ "grad_norm": 69.96953957426896,
1502
+ "learning_rate": 6.117344812479154e-07,
1503
+ "logits": -0.7329820990562439,
1504
+ "logps": -75.9783935546875,
1505
+ "loss": 0.3651,
1506
+ "objective": 0.3846481144428253,
1507
+ "ranking_simple": 0.6791666746139526,
1508
+ "step": 560
1509
+ },
1510
+ {
1511
+ "dpo_loss": 0.3101910650730133,
1512
+ "epoch": 1.601322626358054,
1513
+ "grad_norm": 66.15790086947261,
1514
+ "learning_rate": 5.71662190496575e-07,
1515
+ "logits": -0.7286636233329773,
1516
+ "logps": -76.75426483154297,
1517
+ "loss": 0.349,
1518
+ "objective": 0.3101910650730133,
1519
+ "ranking_simple": 0.612500011920929,
1520
+ "step": 565
1521
+ },
1522
+ {
1523
+ "dpo_loss": 0.2726050615310669,
1524
+ "epoch": 1.615493623051488,
1525
+ "grad_norm": 56.0158605948958,
1526
+ "learning_rate": 5.327772891365565e-07,
1527
+ "logits": -0.8405836224555969,
1528
+ "logps": -77.85334014892578,
1529
+ "loss": 0.2979,
1530
+ "objective": 0.2726050615310669,
1531
+ "ranking_simple": 0.6541666388511658,
1532
+ "step": 570
1533
+ },
1534
+ {
1535
+ "dpo_loss": 0.4396015405654907,
1536
+ "epoch": 1.629664619744922,
1537
+ "grad_norm": 67.51231381478947,
1538
+ "learning_rate": 4.951037208561116e-07,
1539
+ "logits": -0.8009175658226013,
1540
+ "logps": -75.7006607055664,
1541
+ "loss": 0.3867,
1542
+ "objective": 0.4396015405654907,
1543
+ "ranking_simple": 0.6833333373069763,
1544
+ "step": 575
1545
+ },
1546
+ {
1547
+ "dpo_loss": 0.3268950283527374,
1548
+ "epoch": 1.643835616438356,
1549
+ "grad_norm": 55.29442626424949,
1550
+ "learning_rate": 4.586646834554864e-07,
1551
+ "logits": -0.8186151385307312,
1552
+ "logps": -75.92921447753906,
1553
+ "loss": 0.3658,
1554
+ "objective": 0.3268950283527374,
1555
+ "ranking_simple": 0.6625000238418579,
1556
+ "step": 580
1557
+ },
1558
+ {
1559
+ "dpo_loss": 0.2519194185733795,
1560
+ "epoch": 1.6580066131317903,
1561
+ "grad_norm": 53.89781546053762,
1562
+ "learning_rate": 4.234826145626855e-07,
1563
+ "logits": -0.7085616588592529,
1564
+ "logps": -75.17672729492188,
1565
+ "loss": 0.2996,
1566
+ "objective": 0.2519194185733795,
1567
+ "ranking_simple": 0.5874999761581421,
1568
+ "step": 585
1569
+ },
1570
+ {
1571
+ "dpo_loss": 0.31272074580192566,
1572
+ "epoch": 1.6721776098252243,
1573
+ "grad_norm": 59.39748892759291,
1574
+ "learning_rate": 3.8957917781732883e-07,
1575
+ "logits": -0.7312564253807068,
1576
+ "logps": -76.37480163574219,
1577
+ "loss": 0.3241,
1578
+ "objective": 0.31272074580192566,
1579
+ "ranking_simple": 0.675000011920929,
1580
+ "step": 590
1581
+ },
1582
+ {
1583
+ "dpo_loss": 0.27419552206993103,
1584
+ "epoch": 1.6863486065186586,
1585
+ "grad_norm": 53.45471180782799,
1586
+ "learning_rate": 3.569752495310877e-07,
1587
+ "logits": -0.7288424372673035,
1588
+ "logps": -76.92625427246094,
1589
+ "loss": 0.2888,
1590
+ "objective": 0.27419552206993103,
1591
+ "ranking_simple": 0.6458333134651184,
1592
+ "step": 595
1593
+ },
1594
+ {
1595
+ "dpo_loss": 0.1944553703069687,
1596
+ "epoch": 1.7005196032120926,
1597
+ "grad_norm": 50.57431920897434,
1598
+ "learning_rate": 3.2569090583293356e-07,
1599
+ "logits": -0.7772687673568726,
1600
+ "logps": -77.40592193603516,
1601
+ "loss": 0.2489,
1602
+ "objective": 0.1944553703069687,
1603
+ "ranking_simple": 0.699999988079071,
1604
+ "step": 600
1605
+ },
1606
+ {
1607
+ "epoch": 1.7005196032120926,
1608
+ "eval_dpo_loss": 1.8850661516189575,
1609
+ "eval_logits": -0.8421351313591003,
1610
+ "eval_logps": -81.0832748413086,
1611
+ "eval_loss": 1.9488178491592407,
1612
+ "eval_objective": 1.8850661516189575,
1613
+ "eval_ranking_simple": 0.5367494821548462,
1614
+ "eval_runtime": 603.5072,
1615
+ "eval_samples_per_second": 9.594,
1616
+ "eval_steps_per_second": 0.8,
1617
+ "step": 600
1618
+ },
1619
+ {
1620
+ "dpo_loss": 0.35472404956817627,
1621
+ "epoch": 1.7146905999055266,
1622
+ "grad_norm": 65.83086255827398,
1623
+ "learning_rate": 2.957454103070978e-07,
1624
+ "logits": -0.7188993096351624,
1625
+ "logps": -78.14205169677734,
1626
+ "loss": 0.3777,
1627
+ "objective": 0.35472404956817627,
1628
+ "ranking_simple": 0.6499999761581421,
1629
+ "step": 605
1630
+ },
1631
+ {
1632
+ "dpo_loss": 0.286739319562912,
1633
+ "epoch": 1.7288615965989607,
1634
+ "grad_norm": 69.02122554997335,
1635
+ "learning_rate": 2.6715720213136955e-07,
1636
+ "logits": -0.7877374887466431,
1637
+ "logps": -76.47672271728516,
1638
+ "loss": 0.3096,
1639
+ "objective": 0.286739319562912,
1640
+ "ranking_simple": 0.6791666746139526,
1641
+ "step": 610
1642
+ },
1643
+ {
1644
+ "dpo_loss": 0.35429847240448,
1645
+ "epoch": 1.743032593292395,
1646
+ "grad_norm": 58.94519405559986,
1647
+ "learning_rate": 2.399438847230212e-07,
1648
+ "logits": -0.8013178110122681,
1649
+ "logps": -76.39897155761719,
1650
+ "loss": 0.3637,
1651
+ "objective": 0.35429847240448,
1652
+ "ranking_simple": 0.6166666746139526,
1653
+ "step": 615
1654
+ },
1655
+ {
1656
+ "dpo_loss": 0.2548845112323761,
1657
+ "epoch": 1.7572035899858292,
1658
+ "grad_norm": 53.576700184407876,
1659
+ "learning_rate": 2.1412221489936796e-07,
1660
+ "logits": -0.8240512609481812,
1661
+ "logps": -76.49982452392578,
1662
+ "loss": 0.3083,
1663
+ "objective": 0.2548845112323761,
1664
+ "ranking_simple": 0.6875,
1665
+ "step": 620
1666
+ },
1667
+ {
1668
+ "dpo_loss": 0.2307281196117401,
1669
+ "epoch": 1.7713745866792632,
1670
+ "grad_norm": 64.55540353999235,
1671
+ "learning_rate": 1.897080925596187e-07,
1672
+ "logits": -0.7509959936141968,
1673
+ "logps": -75.8747329711914,
1674
+ "loss": 0.321,
1675
+ "objective": 0.2307281196117401,
1676
+ "ranking_simple": 0.6416666507720947,
1677
+ "step": 625
1678
+ },
1679
+ {
1680
+ "dpo_loss": 0.5376826524734497,
1681
+ "epoch": 1.7855455833726972,
1682
+ "grad_norm": 61.55828638335153,
1683
+ "learning_rate": 1.6671655089439186e-07,
1684
+ "logits": -0.7904254794120789,
1685
+ "logps": -77.4958267211914,
1686
+ "loss": 0.4286,
1687
+ "objective": 0.5376826524734497,
1688
+ "ranking_simple": 0.6166666746139526,
1689
+ "step": 630
1690
+ },
1691
+ {
1692
+ "dpo_loss": 0.32618287205696106,
1693
+ "epoch": 1.7997165800661312,
1694
+ "grad_norm": 67.7723818410267,
1695
+ "learning_rate": 1.4516174712890406e-07,
1696
+ "logits": -0.8496752381324768,
1697
+ "logps": -77.06031036376953,
1698
+ "loss": 0.3605,
1699
+ "objective": 0.32618287205696106,
1700
+ "ranking_simple": 0.7041666507720947,
1701
+ "step": 635
1702
+ },
1703
+ {
1704
+ "dpo_loss": 0.264717161655426,
1705
+ "epoch": 1.8138875767595655,
1706
+ "grad_norm": 49.987295020521714,
1707
+ "learning_rate": 1.2505695380554712e-07,
1708
+ "logits": -0.8106148838996887,
1709
+ "logps": -77.50927734375,
1710
+ "loss": 0.2895,
1711
+ "objective": 0.264717161655426,
1712
+ "ranking_simple": 0.6499999761581421,
1713
+ "step": 640
1714
+ },
1715
+ {
1716
+ "dpo_loss": 0.3132435977458954,
1717
+ "epoch": 1.8280585734529995,
1718
+ "grad_norm": 57.443846637348,
1719
+ "learning_rate": 1.0641455061121519e-07,
1720
+ "logits": -0.8152769804000854,
1721
+ "logps": -78.20152282714844,
1722
+ "loss": 0.4079,
1723
+ "objective": 0.3132435977458954,
1724
+ "ranking_simple": 0.6958333253860474,
1725
+ "step": 645
1726
+ },
1727
+ {
1728
+ "dpo_loss": 0.3075469732284546,
1729
+ "epoch": 1.8422295701464337,
1730
+ "grad_norm": 63.674814735474534,
1731
+ "learning_rate": 8.924601675441207e-08,
1732
+ "logits": -0.8521133065223694,
1733
+ "logps": -78.01354217529297,
1734
+ "loss": 0.3631,
1735
+ "objective": 0.3075469732284546,
1736
+ "ranking_simple": 0.6708333492279053,
1737
+ "step": 650
1738
+ },
1739
+ {
1740
+ "epoch": 1.8422295701464337,
1741
+ "eval_dpo_loss": 1.8805330991744995,
1742
+ "eval_logits": -0.8528652787208557,
1743
+ "eval_logps": -81.17214965820312,
1744
+ "eval_loss": 1.9417303800582886,
1745
+ "eval_objective": 1.8805330991744995,
1746
+ "eval_ranking_simple": 0.534679114818573,
1747
+ "eval_runtime": 608.284,
1748
+ "eval_samples_per_second": 9.519,
1749
+ "eval_steps_per_second": 0.794,
1750
+ "step": 650
1751
+ },
1752
+ {
1753
+ "dpo_loss": 0.2915767729282379,
1754
+ "epoch": 1.8564005668398678,
1755
+ "grad_norm": 51.326241873653245,
1756
+ "learning_rate": 7.356192389683825e-08,
1757
+ "logits": -0.7731481790542603,
1758
+ "logps": -75.27713012695312,
1759
+ "loss": 0.3407,
1760
+ "objective": 0.2915767729282379,
1761
+ "ranking_simple": 0.6166666746139526,
1762
+ "step": 655
1763
+ },
1764
+ {
1765
+ "dpo_loss": 0.40089327096939087,
1766
+ "epoch": 1.8705715635333018,
1767
+ "grad_norm": 51.09671637748102,
1768
+ "learning_rate": 5.937192964380556e-08,
1769
+ "logits": -0.7996050715446472,
1770
+ "logps": -78.20768737792969,
1771
+ "loss": 0.3594,
1772
+ "objective": 0.40089327096939087,
1773
+ "ranking_simple": 0.6583333611488342,
1774
+ "step": 660
1775
+ },
1776
+ {
1777
+ "dpo_loss": 0.32784855365753174,
1778
+ "epoch": 1.8847425602267358,
1779
+ "grad_norm": 59.65383844014325,
1780
+ "learning_rate": 4.668477159748858e-08,
1781
+ "logits": -0.824116587638855,
1782
+ "logps": -76.31925201416016,
1783
+ "loss": 0.3228,
1784
+ "objective": 0.32784855365753174,
1785
+ "ranking_simple": 0.7041666507720947,
1786
+ "step": 665
1787
+ },
1788
+ {
1789
+ "dpo_loss": 0.4064708948135376,
1790
+ "epoch": 1.89891355692017,
1791
+ "grad_norm": 52.69018895294125,
1792
+ "learning_rate": 3.5508261976678894e-08,
1793
+ "logits": -0.7589130997657776,
1794
+ "logps": -75.46687316894531,
1795
+ "loss": 0.366,
1796
+ "objective": 0.4064708948135376,
1797
+ "ranking_simple": 0.625,
1798
+ "step": 670
1799
+ },
1800
+ {
1801
+ "dpo_loss": 0.3538086712360382,
1802
+ "epoch": 1.9130845536136043,
1803
+ "grad_norm": 57.27565478185342,
1804
+ "learning_rate": 2.5849282806345855e-08,
1805
+ "logits": -0.7437042593955994,
1806
+ "logps": -76.25951385498047,
1807
+ "loss": 0.3801,
1808
+ "objective": 0.3538086712360382,
1809
+ "ranking_simple": 0.675000011920929,
1810
+ "step": 675
1811
+ },
1812
+ {
1813
+ "dpo_loss": 0.2823149859905243,
1814
+ "epoch": 1.9272555503070383,
1815
+ "grad_norm": 55.3247140739336,
1816
+ "learning_rate": 1.771378167997745e-08,
1817
+ "logits": -0.788720428943634,
1818
+ "logps": -78.25997161865234,
1819
+ "loss": 0.389,
1820
+ "objective": 0.2823149859905243,
1821
+ "ranking_simple": 0.6875,
1822
+ "step": 680
1823
+ },
1824
+ {
1825
+ "dpo_loss": 0.30807915329933167,
1826
+ "epoch": 1.9414265470004723,
1827
+ "grad_norm": 42.581364917436595,
1828
+ "learning_rate": 1.1106768097300657e-08,
1829
+ "logits": -0.769125759601593,
1830
+ "logps": -75.96378326416016,
1831
+ "loss": 0.3183,
1832
+ "objective": 0.30807915329933167,
1833
+ "ranking_simple": 0.6083333492279053,
1834
+ "step": 685
1835
+ },
1836
+ {
1837
+ "dpo_loss": 0.21873274445533752,
1838
+ "epoch": 1.9555975436939064,
1839
+ "grad_norm": 37.39828456518223,
1840
+ "learning_rate": 6.032310379642803e-09,
1841
+ "logits": -0.7863041758537292,
1842
+ "logps": -76.51734924316406,
1843
+ "loss": 0.3202,
1844
+ "objective": 0.21873274445533752,
1845
+ "ranking_simple": 0.6708333492279053,
1846
+ "step": 690
1847
+ },
1848
+ {
1849
+ "dpo_loss": 0.24407407641410828,
1850
+ "epoch": 1.9697685403873406,
1851
+ "grad_norm": 75.42620047856866,
1852
+ "learning_rate": 2.4935331648298644e-09,
1853
+ "logits": -0.7426345944404602,
1854
+ "logps": -77.34226989746094,
1855
+ "loss": 0.3361,
1856
+ "objective": 0.24407407641410828,
1857
+ "ranking_simple": 0.625,
1858
+ "step": 695
1859
+ },
1860
+ {
1861
+ "dpo_loss": 0.2809065580368042,
1862
+ "epoch": 1.9839395370807746,
1863
+ "grad_norm": 63.61763495221277,
1864
+ "learning_rate": 4.926154831655372e-10,
1865
+ "logits": -0.6787734627723694,
1866
+ "logps": -78.1474609375,
1867
+ "loss": 0.3009,
1868
+ "objective": 0.2809065580368042,
1869
+ "ranking_simple": 0.6541666388511658,
1870
+ "step": 700
1871
+ },
1872
+ {
1873
+ "epoch": 1.9839395370807746,
1874
+ "eval_dpo_loss": 1.8786609172821045,
1875
+ "eval_logits": -0.850897490978241,
1876
+ "eval_logps": -81.16839599609375,
1877
+ "eval_loss": 1.9399291276931763,
1878
+ "eval_objective": 1.8786609172821045,
1879
+ "eval_ranking_simple": 0.534679114818573,
1880
+ "eval_runtime": 605.8989,
1881
+ "eval_samples_per_second": 9.556,
1882
+ "eval_steps_per_second": 0.797,
1883
+ "step": 700
1884
+ },
1885
+ {
1886
+ "epoch": 1.995276334435522,
1887
+ "step": 704,
1888
+ "total_flos": 0.0,
1889
+ "train_loss": 0.7561936149881645,
1890
+ "train_runtime": 36938.8609,
1891
+ "train_samples_per_second": 2.751,
1892
+ "train_steps_per_second": 0.019
1893
+ }
1894
+ ],
1895
+ "logging_steps": 5,
1896
+ "max_steps": 704,
1897
+ "num_input_tokens_seen": 0,
1898
+ "num_train_epochs": 2,
1899
+ "save_steps": 500,
1900
+ "stateful_callbacks": {
1901
+ "TrainerControl": {
1902
+ "args": {
1903
+ "should_epoch_stop": false,
1904
+ "should_evaluate": false,
1905
+ "should_log": false,
1906
+ "should_save": false,
1907
+ "should_training_stop": false
1908
+ },
1909
+ "attributes": {}
1910
+ }
1911
+ },
1912
+ "total_flos": 0.0,
1913
+ "train_batch_size": 4,
1914
+ "trial_name": null,
1915
+ "trial_params": null
1916
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bba08646559c28cfce6c7b3860c6f96553c410db9f55571ebac85cbdab56839
3
+ size 8120
vocab.json ADDED
The diff for this file is too large to render. See raw diff