hushell commited on
Commit
fac6a8d
1 Parent(s): 73940eb

Model save

Browse files
README.md ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: alignment-handbook/zephyr-7b-sft-full
4
+ tags:
5
+ - trl
6
+ - dpo
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: zephyr-7b-dpo-full
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # zephyr-7b-dpo-full
17
+
18
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.4893
21
+ - Rewards/chosen: -1.9379
22
+ - Rewards/rejected: -3.0213
23
+ - Rewards/accuracies: 0.7718
24
+ - Rewards/margins: 1.0835
25
+ - Logps/rejected: -563.9073
26
+ - Logps/chosen: -477.8896
27
+ - Logits/rejected: 0.6827
28
+ - Logits/chosen: -0.4606
29
+
30
+ ## Model description
31
+
32
+ More information needed
33
+
34
+ ## Intended uses & limitations
35
+
36
+ More information needed
37
+
38
+ ## Training and evaluation data
39
+
40
+ More information needed
41
+
42
+ ## Training procedure
43
+
44
+ ### Training hyperparameters
45
+
46
+ The following hyperparameters were used during training:
47
+ - learning_rate: 5e-07
48
+ - train_batch_size: 8
49
+ - eval_batch_size: 8
50
+ - seed: 42
51
+ - distributed_type: multi-GPU
52
+ - num_devices: 4
53
+ - gradient_accumulation_steps: 2
54
+ - total_train_batch_size: 64
55
+ - total_eval_batch_size: 32
56
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
57
+ - lr_scheduler_type: cosine
58
+ - lr_scheduler_warmup_ratio: 0.1
59
+ - num_epochs: 1
60
+
61
+ ### Training results
62
+
63
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
64
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
65
+ | 0.6338 | 0.1 | 100 | 0.6333 | -0.4184 | -0.6017 | 0.6865 | 0.1833 | -321.9407 | -325.9421 | -2.4857 | -2.5392 |
66
+ | 0.5643 | 0.21 | 200 | 0.5547 | -1.1977 | -1.8547 | 0.7480 | 0.6570 | -447.2422 | -403.8748 | 0.1190 | -0.4672 |
67
+ | 0.5066 | 0.31 | 300 | 0.5214 | -0.9561 | -1.7858 | 0.7778 | 0.8297 | -440.3582 | -379.7161 | -0.7390 | -1.4155 |
68
+ | 0.4941 | 0.42 | 400 | 0.5082 | -1.2581 | -2.1325 | 0.7599 | 0.8744 | -475.0238 | -409.9142 | 0.1688 | -0.7662 |
69
+ | 0.506 | 0.52 | 500 | 0.5090 | -1.1067 | -2.0712 | 0.7639 | 0.9645 | -468.8966 | -394.7739 | 1.3983 | 0.0857 |
70
+ | 0.4893 | 0.63 | 600 | 0.4953 | -1.4696 | -2.4963 | 0.7579 | 1.0267 | -511.4048 | -431.0652 | 0.9613 | -0.4181 |
71
+ | 0.4558 | 0.73 | 700 | 0.4937 | -1.8124 | -2.8894 | 0.7698 | 1.0770 | -550.7128 | -465.3409 | 0.6946 | -0.4445 |
72
+ | 0.4781 | 0.84 | 800 | 0.4898 | -1.9968 | -3.0983 | 0.7698 | 1.1015 | -571.6086 | -483.7863 | 0.7311 | -0.4503 |
73
+ | 0.495 | 0.94 | 900 | 0.4894 | -1.9365 | -3.0176 | 0.7698 | 1.0812 | -563.5378 | -477.7505 | 0.6757 | -0.4642 |
74
+
75
+
76
+ ### Framework versions
77
+
78
+ - Transformers 4.36.2
79
+ - Pytorch 2.1.2+cu118
80
+ - Datasets 2.14.6
81
+ - Tokenizers 0.15.0
all_results.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": -0.4606437087059021,
4
+ "eval_logits/rejected": 0.6826901435852051,
5
+ "eval_logps/chosen": -477.88958740234375,
6
+ "eval_logps/rejected": -563.9073486328125,
7
+ "eval_loss": 0.48932957649230957,
8
+ "eval_rewards/accuracies": 0.77182537317276,
9
+ "eval_rewards/chosen": -1.9378634691238403,
10
+ "eval_rewards/margins": 1.0834674835205078,
11
+ "eval_rewards/rejected": -3.0213310718536377,
12
+ "eval_runtime": 279.233,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 7.162,
15
+ "eval_steps_per_second": 0.226,
16
+ "train_loss": 0.5274335609056563,
17
+ "train_runtime": 22383.3635,
18
+ "train_samples": 61135,
19
+ "train_samples_per_second": 2.731,
20
+ "train_steps_per_second": 0.043
21
+ }
eval_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_logits/chosen": -0.4606437087059021,
4
+ "eval_logits/rejected": 0.6826901435852051,
5
+ "eval_logps/chosen": -477.88958740234375,
6
+ "eval_logps/rejected": -563.9073486328125,
7
+ "eval_loss": 0.48932957649230957,
8
+ "eval_rewards/accuracies": 0.77182537317276,
9
+ "eval_rewards/chosen": -1.9378634691238403,
10
+ "eval_rewards/margins": 1.0834674835205078,
11
+ "eval_rewards/rejected": -3.0213310718536377,
12
+ "eval_runtime": 279.233,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 7.162,
15
+ "eval_steps_per_second": 0.226
16
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.36.2"
6
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d396669aecb89be161259b09d255abbb9173ac204f7b4d1f336b2b724753966f
3
+ size 4943162336
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:293e46fc28924d5258cc69ce175999f4fc4b8b6a406d4b30ecd45dabb999073b
3
+ size 4999819336
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a15dfe4081e25656dcacc88ecef82d9756c61c2245dfa875e37ebf77b3d1294
3
+ size 4540516344
model.safetensors.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 14483464192
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00003-of-00003.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
245
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
246
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
247
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
248
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
249
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
250
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
251
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
254
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
255
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
256
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
257
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
259
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
260
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
261
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
262
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
263
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
264
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
265
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
266
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
267
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
268
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
269
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
270
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
271
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
272
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
273
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
274
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
275
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
276
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
277
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
283
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
284
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
285
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
286
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
287
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
288
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
289
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
290
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
291
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
292
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
293
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
294
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
295
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
296
+ "model.norm.weight": "model-00003-of-00003.safetensors"
297
+ }
298
+ }
runs/Jan12_11-39-02_main1/events.out.tfevents.1705059886.main1.9143.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:add430ad40a428f682c7db431ab804dd0664b086d8ad0e5c0222f3f09480a482
3
- size 68726
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60ec38f7ce3691c8b95ff48e8a2c30d96e6ad2675688376a3e12de66871f20c5
3
+ size 72250
runs/Jan12_11-39-02_main1/events.out.tfevents.1705082549.main1.9143.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c3097dfbeb1b99fbf99dc573f017edae70ced0d93cb40080aac127bfe287566
3
+ size 828
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.5274335609056563,
4
+ "train_runtime": 22383.3635,
5
+ "train_samples": 61135,
6
+ "train_samples_per_second": 2.731,
7
+ "train_steps_per_second": 0.043
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,1518 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9994767137624281,
5
+ "eval_steps": 100,
6
+ "global_step": 955,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 5.208333333333333e-09,
14
+ "logits/chosen": -2.721794605255127,
15
+ "logits/rejected": -2.6868748664855957,
16
+ "logps/chosen": -229.75424194335938,
17
+ "logps/rejected": -214.74229431152344,
18
+ "loss": 0.6931,
19
+ "rewards/accuracies": 0.0,
20
+ "rewards/chosen": 0.0,
21
+ "rewards/margins": 0.0,
22
+ "rewards/rejected": 0.0,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.01,
27
+ "learning_rate": 5.208333333333333e-08,
28
+ "logits/chosen": -2.6621577739715576,
29
+ "logits/rejected": -2.5832412242889404,
30
+ "logps/chosen": -295.84637451171875,
31
+ "logps/rejected": -250.62530517578125,
32
+ "loss": 0.6931,
33
+ "rewards/accuracies": 0.4305555522441864,
34
+ "rewards/chosen": -3.170721174683422e-05,
35
+ "rewards/margins": 0.00016743924061302096,
36
+ "rewards/rejected": -0.00019914642325602472,
37
+ "step": 10
38
+ },
39
+ {
40
+ "epoch": 0.02,
41
+ "learning_rate": 1.0416666666666667e-07,
42
+ "logits/chosen": -2.5875017642974854,
43
+ "logits/rejected": -2.5754282474517822,
44
+ "logps/chosen": -271.33502197265625,
45
+ "logps/rejected": -246.907470703125,
46
+ "loss": 0.693,
47
+ "rewards/accuracies": 0.4625000059604645,
48
+ "rewards/chosen": -0.0004090077127330005,
49
+ "rewards/margins": -0.00012708954454865307,
50
+ "rewards/rejected": -0.0002819181536324322,
51
+ "step": 20
52
+ },
53
+ {
54
+ "epoch": 0.03,
55
+ "learning_rate": 1.5624999999999999e-07,
56
+ "logits/chosen": -2.6605145931243896,
57
+ "logits/rejected": -2.5901198387145996,
58
+ "logps/chosen": -278.30572509765625,
59
+ "logps/rejected": -254.51986694335938,
60
+ "loss": 0.6924,
61
+ "rewards/accuracies": 0.6312500238418579,
62
+ "rewards/chosen": 0.001268151798285544,
63
+ "rewards/margins": 0.001779665588401258,
64
+ "rewards/rejected": -0.0005115137319080532,
65
+ "step": 30
66
+ },
67
+ {
68
+ "epoch": 0.04,
69
+ "learning_rate": 2.0833333333333333e-07,
70
+ "logits/chosen": -2.631272792816162,
71
+ "logits/rejected": -2.5783164501190186,
72
+ "logps/chosen": -273.85552978515625,
73
+ "logps/rejected": -237.7501220703125,
74
+ "loss": 0.6907,
75
+ "rewards/accuracies": 0.706250011920929,
76
+ "rewards/chosen": 0.003829254535958171,
77
+ "rewards/margins": 0.006563174072653055,
78
+ "rewards/rejected": -0.002733920468017459,
79
+ "step": 40
80
+ },
81
+ {
82
+ "epoch": 0.05,
83
+ "learning_rate": 2.604166666666667e-07,
84
+ "logits/chosen": -2.634207248687744,
85
+ "logits/rejected": -2.5876431465148926,
86
+ "logps/chosen": -296.025146484375,
87
+ "logps/rejected": -274.70440673828125,
88
+ "loss": 0.6865,
89
+ "rewards/accuracies": 0.6937500238418579,
90
+ "rewards/chosen": 0.009410797618329525,
91
+ "rewards/margins": 0.016297370195388794,
92
+ "rewards/rejected": -0.006886570248752832,
93
+ "step": 50
94
+ },
95
+ {
96
+ "epoch": 0.06,
97
+ "learning_rate": 3.1249999999999997e-07,
98
+ "logits/chosen": -2.598982334136963,
99
+ "logits/rejected": -2.5420570373535156,
100
+ "logps/chosen": -285.24249267578125,
101
+ "logps/rejected": -274.27044677734375,
102
+ "loss": 0.6802,
103
+ "rewards/accuracies": 0.668749988079071,
104
+ "rewards/chosen": 0.025370482355356216,
105
+ "rewards/margins": 0.02863493002951145,
106
+ "rewards/rejected": -0.0032644483726471663,
107
+ "step": 60
108
+ },
109
+ {
110
+ "epoch": 0.07,
111
+ "learning_rate": 3.645833333333333e-07,
112
+ "logits/chosen": -2.6306140422821045,
113
+ "logits/rejected": -2.5784096717834473,
114
+ "logps/chosen": -310.9300537109375,
115
+ "logps/rejected": -290.3727111816406,
116
+ "loss": 0.6673,
117
+ "rewards/accuracies": 0.6875,
118
+ "rewards/chosen": 0.03530525788664818,
119
+ "rewards/margins": 0.053881775587797165,
120
+ "rewards/rejected": -0.018576517701148987,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 0.08,
125
+ "learning_rate": 4.1666666666666667e-07,
126
+ "logits/chosen": -2.4795422554016113,
127
+ "logits/rejected": -2.400930881500244,
128
+ "logps/chosen": -304.25933837890625,
129
+ "logps/rejected": -281.20159912109375,
130
+ "loss": 0.644,
131
+ "rewards/accuracies": 0.75,
132
+ "rewards/chosen": -0.03674982860684395,
133
+ "rewards/margins": 0.11624778807163239,
134
+ "rewards/rejected": -0.15299761295318604,
135
+ "step": 80
136
+ },
137
+ {
138
+ "epoch": 0.09,
139
+ "learning_rate": 4.6874999999999996e-07,
140
+ "logits/chosen": -2.5025558471679688,
141
+ "logits/rejected": -2.459231376647949,
142
+ "logps/chosen": -291.841064453125,
143
+ "logps/rejected": -283.4093322753906,
144
+ "loss": 0.6346,
145
+ "rewards/accuracies": 0.706250011920929,
146
+ "rewards/chosen": 0.000455733563285321,
147
+ "rewards/margins": 0.17087821662425995,
148
+ "rewards/rejected": -0.1704224795103073,
149
+ "step": 90
150
+ },
151
+ {
152
+ "epoch": 0.1,
153
+ "learning_rate": 4.999732492681437e-07,
154
+ "logits/chosen": -2.4846038818359375,
155
+ "logits/rejected": -2.455655336380005,
156
+ "logps/chosen": -335.92242431640625,
157
+ "logps/rejected": -340.43475341796875,
158
+ "loss": 0.6338,
159
+ "rewards/accuracies": 0.706250011920929,
160
+ "rewards/chosen": -0.30497807264328003,
161
+ "rewards/margins": 0.1776745617389679,
162
+ "rewards/rejected": -0.4826526641845703,
163
+ "step": 100
164
+ },
165
+ {
166
+ "epoch": 0.1,
167
+ "eval_logits/chosen": -2.5391652584075928,
168
+ "eval_logits/rejected": -2.4857265949249268,
169
+ "eval_logps/chosen": -325.942138671875,
170
+ "eval_logps/rejected": -321.94073486328125,
171
+ "eval_loss": 0.6332610249519348,
172
+ "eval_rewards/accuracies": 0.6865079402923584,
173
+ "eval_rewards/chosen": -0.4183884263038635,
174
+ "eval_rewards/margins": 0.1832766830921173,
175
+ "eval_rewards/rejected": -0.6016651391983032,
176
+ "eval_runtime": 280.8039,
177
+ "eval_samples_per_second": 7.122,
178
+ "eval_steps_per_second": 0.224,
179
+ "step": 100
180
+ },
181
+ {
182
+ "epoch": 0.12,
183
+ "learning_rate": 4.996723692767926e-07,
184
+ "logits/chosen": -2.335371255874634,
185
+ "logits/rejected": -2.2089285850524902,
186
+ "logps/chosen": -307.1370544433594,
187
+ "logps/rejected": -291.34033203125,
188
+ "loss": 0.618,
189
+ "rewards/accuracies": 0.668749988079071,
190
+ "rewards/chosen": -0.46787112951278687,
191
+ "rewards/margins": 0.27501681447029114,
192
+ "rewards/rejected": -0.7428879737854004,
193
+ "step": 110
194
+ },
195
+ {
196
+ "epoch": 0.13,
197
+ "learning_rate": 4.990375746213598e-07,
198
+ "logits/chosen": -1.776285171508789,
199
+ "logits/rejected": -1.5675570964813232,
200
+ "logps/chosen": -336.9173889160156,
201
+ "logps/rejected": -321.92755126953125,
202
+ "loss": 0.6025,
203
+ "rewards/accuracies": 0.71875,
204
+ "rewards/chosen": -0.31301265954971313,
205
+ "rewards/margins": 0.37262943387031555,
206
+ "rewards/rejected": -0.6856420636177063,
207
+ "step": 120
208
+ },
209
+ {
210
+ "epoch": 0.14,
211
+ "learning_rate": 4.980697142834314e-07,
212
+ "logits/chosen": -1.6279542446136475,
213
+ "logits/rejected": -1.286072015762329,
214
+ "logps/chosen": -341.5347900390625,
215
+ "logps/rejected": -326.0841369628906,
216
+ "loss": 0.5902,
217
+ "rewards/accuracies": 0.7437499761581421,
218
+ "rewards/chosen": -0.21236875653266907,
219
+ "rewards/margins": 0.35374048352241516,
220
+ "rewards/rejected": -0.566109299659729,
221
+ "step": 130
222
+ },
223
+ {
224
+ "epoch": 0.15,
225
+ "learning_rate": 4.967700826904229e-07,
226
+ "logits/chosen": -1.0164812803268433,
227
+ "logits/rejected": -0.7005228996276855,
228
+ "logps/chosen": -264.51629638671875,
229
+ "logps/rejected": -297.2378234863281,
230
+ "loss": 0.5579,
231
+ "rewards/accuracies": 0.737500011920929,
232
+ "rewards/chosen": -0.24701330065727234,
233
+ "rewards/margins": 0.4705452024936676,
234
+ "rewards/rejected": -0.7175585031509399,
235
+ "step": 140
236
+ },
237
+ {
238
+ "epoch": 0.16,
239
+ "learning_rate": 4.951404179843962e-07,
240
+ "logits/chosen": -0.418118953704834,
241
+ "logits/rejected": -0.1309668868780136,
242
+ "logps/chosen": -362.30755615234375,
243
+ "logps/rejected": -408.3031311035156,
244
+ "loss": 0.6149,
245
+ "rewards/accuracies": 0.6000000238418579,
246
+ "rewards/chosen": -0.8524826765060425,
247
+ "rewards/margins": 0.3839341700077057,
248
+ "rewards/rejected": -1.2364168167114258,
249
+ "step": 150
250
+ },
251
+ {
252
+ "epoch": 0.17,
253
+ "learning_rate": 4.931828996974498e-07,
254
+ "logits/chosen": -1.7524774074554443,
255
+ "logits/rejected": -1.3019535541534424,
256
+ "logps/chosen": -368.46270751953125,
257
+ "logps/rejected": -398.49981689453125,
258
+ "loss": 0.5535,
259
+ "rewards/accuracies": 0.6937500238418579,
260
+ "rewards/chosen": -0.7256969809532166,
261
+ "rewards/margins": 0.35368454456329346,
262
+ "rewards/rejected": -1.0793814659118652,
263
+ "step": 160
264
+ },
265
+ {
266
+ "epoch": 0.18,
267
+ "learning_rate": 4.909001458367866e-07,
268
+ "logits/chosen": -1.5669044256210327,
269
+ "logits/rejected": -1.0334669351577759,
270
+ "logps/chosen": -347.9214782714844,
271
+ "logps/rejected": -377.63446044921875,
272
+ "loss": 0.5472,
273
+ "rewards/accuracies": 0.7124999761581421,
274
+ "rewards/chosen": -0.7482506632804871,
275
+ "rewards/margins": 0.5563966631889343,
276
+ "rewards/rejected": -1.3046473264694214,
277
+ "step": 170
278
+ },
279
+ {
280
+ "epoch": 0.19,
281
+ "learning_rate": 4.882952093833627e-07,
282
+ "logits/chosen": -0.7614485025405884,
283
+ "logits/rejected": 0.021318623796105385,
284
+ "logps/chosen": -343.43798828125,
285
+ "logps/rejected": -394.9954528808594,
286
+ "loss": 0.5239,
287
+ "rewards/accuracies": 0.75,
288
+ "rewards/chosen": -0.9424430131912231,
289
+ "rewards/margins": 0.6877145767211914,
290
+ "rewards/rejected": -1.630157470703125,
291
+ "step": 180
292
+ },
293
+ {
294
+ "epoch": 0.2,
295
+ "learning_rate": 4.853715742087946e-07,
296
+ "logits/chosen": -1.3107954263687134,
297
+ "logits/rejected": -0.5814211368560791,
298
+ "logps/chosen": -347.8511047363281,
299
+ "logps/rejected": -398.18890380859375,
300
+ "loss": 0.5298,
301
+ "rewards/accuracies": 0.762499988079071,
302
+ "rewards/chosen": -0.6900913715362549,
303
+ "rewards/margins": 0.7306533455848694,
304
+ "rewards/rejected": -1.4207446575164795,
305
+ "step": 190
306
+ },
307
+ {
308
+ "epoch": 0.21,
309
+ "learning_rate": 4.821331504159906e-07,
310
+ "logits/chosen": -0.4228581488132477,
311
+ "logits/rejected": 0.10827291011810303,
312
+ "logps/chosen": -387.1797180175781,
313
+ "logps/rejected": -437.99200439453125,
314
+ "loss": 0.5643,
315
+ "rewards/accuracies": 0.737500011920929,
316
+ "rewards/chosen": -1.005066990852356,
317
+ "rewards/margins": 0.6199973821640015,
318
+ "rewards/rejected": -1.6250642538070679,
319
+ "step": 200
320
+ },
321
+ {
322
+ "epoch": 0.21,
323
+ "eval_logits/chosen": -0.4671556353569031,
324
+ "eval_logits/rejected": 0.11897020787000656,
325
+ "eval_logps/chosen": -403.8748474121094,
326
+ "eval_logps/rejected": -447.24224853515625,
327
+ "eval_loss": 0.5547109246253967,
328
+ "eval_rewards/accuracies": 0.7480158805847168,
329
+ "eval_rewards/chosen": -1.1977157592773438,
330
+ "eval_rewards/margins": 0.6569646000862122,
331
+ "eval_rewards/rejected": -1.8546805381774902,
332
+ "eval_runtime": 279.4406,
333
+ "eval_samples_per_second": 7.157,
334
+ "eval_steps_per_second": 0.225,
335
+ "step": 200
336
+ },
337
+ {
338
+ "epoch": 0.22,
339
+ "learning_rate": 4.785842691097342e-07,
340
+ "logits/chosen": -0.3859473466873169,
341
+ "logits/rejected": 0.4077689051628113,
342
+ "logps/chosen": -418.86883544921875,
343
+ "logps/rejected": -427.7865295410156,
344
+ "loss": 0.5491,
345
+ "rewards/accuracies": 0.7437499761581421,
346
+ "rewards/chosen": -1.1351739168167114,
347
+ "rewards/margins": 0.6033421754837036,
348
+ "rewards/rejected": -1.738515853881836,
349
+ "step": 210
350
+ },
351
+ {
352
+ "epoch": 0.23,
353
+ "learning_rate": 4.7472967660421603e-07,
354
+ "logits/chosen": -1.136400580406189,
355
+ "logits/rejected": -0.6079188585281372,
356
+ "logps/chosen": -363.44915771484375,
357
+ "logps/rejected": -404.9462585449219,
358
+ "loss": 0.55,
359
+ "rewards/accuracies": 0.7562500238418579,
360
+ "rewards/chosen": -0.7393938302993774,
361
+ "rewards/margins": 0.5468804836273193,
362
+ "rewards/rejected": -1.2862741947174072,
363
+ "step": 220
364
+ },
365
+ {
366
+ "epoch": 0.24,
367
+ "learning_rate": 4.705745280752585e-07,
368
+ "logits/chosen": -0.7287824153900146,
369
+ "logits/rejected": 0.3289051949977875,
370
+ "logps/chosen": -380.1632080078125,
371
+ "logps/rejected": -415.7613220214844,
372
+ "loss": 0.5434,
373
+ "rewards/accuracies": 0.737500011920929,
374
+ "rewards/chosen": -0.8638499975204468,
375
+ "rewards/margins": 0.8072270154953003,
376
+ "rewards/rejected": -1.671076774597168,
377
+ "step": 230
378
+ },
379
+ {
380
+ "epoch": 0.25,
381
+ "learning_rate": 4.6612438066572555e-07,
382
+ "logits/chosen": -1.221251130104065,
383
+ "logits/rejected": -0.22343508899211884,
384
+ "logps/chosen": -349.05084228515625,
385
+ "logps/rejected": -355.9934997558594,
386
+ "loss": 0.5308,
387
+ "rewards/accuracies": 0.6875,
388
+ "rewards/chosen": -0.7182748913764954,
389
+ "rewards/margins": 0.5810624957084656,
390
+ "rewards/rejected": -1.2993375062942505,
391
+ "step": 240
392
+ },
393
+ {
394
+ "epoch": 0.26,
395
+ "learning_rate": 4.6138518605333664e-07,
396
+ "logits/chosen": -0.4631493091583252,
397
+ "logits/rejected": 0.10230980068445206,
398
+ "logps/chosen": -364.1564025878906,
399
+ "logps/rejected": -439.31463623046875,
400
+ "loss": 0.5407,
401
+ "rewards/accuracies": 0.706250011920929,
402
+ "rewards/chosen": -0.9659140706062317,
403
+ "rewards/margins": 0.6127485036849976,
404
+ "rewards/rejected": -1.5786627531051636,
405
+ "step": 250
406
+ },
407
+ {
408
+ "epoch": 0.27,
409
+ "learning_rate": 4.5636328249082514e-07,
410
+ "logits/chosen": -0.5042780637741089,
411
+ "logits/rejected": 0.3967960774898529,
412
+ "logps/chosen": -377.323974609375,
413
+ "logps/rejected": -418.09661865234375,
414
+ "loss": 0.5323,
415
+ "rewards/accuracies": 0.71875,
416
+ "rewards/chosen": -0.9326316714286804,
417
+ "rewards/margins": 0.6699790358543396,
418
+ "rewards/rejected": -1.6026105880737305,
419
+ "step": 260
420
+ },
421
+ {
422
+ "epoch": 0.28,
423
+ "learning_rate": 4.510653863290871e-07,
424
+ "logits/chosen": -0.23900368809700012,
425
+ "logits/rejected": 1.1343748569488525,
426
+ "logps/chosen": -397.4126892089844,
427
+ "logps/rejected": -434.02203369140625,
428
+ "loss": 0.5028,
429
+ "rewards/accuracies": 0.75,
430
+ "rewards/chosen": -0.9560322761535645,
431
+ "rewards/margins": 0.8822371363639832,
432
+ "rewards/rejected": -1.8382694721221924,
433
+ "step": 270
434
+ },
435
+ {
436
+ "epoch": 0.29,
437
+ "learning_rate": 4.4549858303465737e-07,
438
+ "logits/chosen": -0.15336796641349792,
439
+ "logits/rejected": 0.8832536935806274,
440
+ "logps/chosen": -387.30743408203125,
441
+ "logps/rejected": -461.30084228515625,
442
+ "loss": 0.5195,
443
+ "rewards/accuracies": 0.78125,
444
+ "rewards/chosen": -0.9608807563781738,
445
+ "rewards/margins": 0.8189412355422974,
446
+ "rewards/rejected": -1.779821753501892,
447
+ "step": 280
448
+ },
449
+ {
450
+ "epoch": 0.3,
451
+ "learning_rate": 4.396703177135261e-07,
452
+ "logits/chosen": -1.3611189126968384,
453
+ "logits/rejected": -0.8717398643493652,
454
+ "logps/chosen": -381.35321044921875,
455
+ "logps/rejected": -414.11279296875,
456
+ "loss": 0.527,
457
+ "rewards/accuracies": 0.75,
458
+ "rewards/chosen": -0.9636392593383789,
459
+ "rewards/margins": 0.6538799405097961,
460
+ "rewards/rejected": -1.6175191402435303,
461
+ "step": 290
462
+ },
463
+ {
464
+ "epoch": 0.31,
465
+ "learning_rate": 4.335883851539693e-07,
466
+ "logits/chosen": -1.2623040676116943,
467
+ "logits/rejected": -0.05791844055056572,
468
+ "logps/chosen": -380.7442626953125,
469
+ "logps/rejected": -440.4820251464844,
470
+ "loss": 0.5066,
471
+ "rewards/accuracies": 0.768750011920929,
472
+ "rewards/chosen": -0.9736257791519165,
473
+ "rewards/margins": 0.9214972257614136,
474
+ "rewards/rejected": -1.8951228857040405,
475
+ "step": 300
476
+ },
477
+ {
478
+ "epoch": 0.31,
479
+ "eval_logits/chosen": -1.4155113697052002,
480
+ "eval_logits/rejected": -0.7390369772911072,
481
+ "eval_logps/chosen": -379.7160949707031,
482
+ "eval_logps/rejected": -440.3581848144531,
483
+ "eval_loss": 0.5213961005210876,
484
+ "eval_rewards/accuracies": 0.7777777910232544,
485
+ "eval_rewards/chosen": -0.9561280608177185,
486
+ "eval_rewards/margins": 0.8297119140625,
487
+ "eval_rewards/rejected": -1.7858400344848633,
488
+ "eval_runtime": 277.1592,
489
+ "eval_samples_per_second": 7.216,
490
+ "eval_steps_per_second": 0.227,
491
+ "step": 300
492
+ },
493
+ {
494
+ "epoch": 0.32,
495
+ "learning_rate": 4.272609194017105e-07,
496
+ "logits/chosen": -1.365898847579956,
497
+ "logits/rejected": -0.6500005722045898,
498
+ "logps/chosen": -368.51959228515625,
499
+ "logps/rejected": -451.77130126953125,
500
+ "loss": 0.4823,
501
+ "rewards/accuracies": 0.7250000238418579,
502
+ "rewards/chosen": -0.8627009391784668,
503
+ "rewards/margins": 0.9636882543563843,
504
+ "rewards/rejected": -1.8263890743255615,
505
+ "step": 310
506
+ },
507
+ {
508
+ "epoch": 0.33,
509
+ "learning_rate": 4.2069638288135547e-07,
510
+ "logits/chosen": -1.1361403465270996,
511
+ "logits/rejected": -0.4094177186489105,
512
+ "logps/chosen": -378.14337158203125,
513
+ "logps/rejected": -442.03179931640625,
514
+ "loss": 0.5231,
515
+ "rewards/accuracies": 0.699999988079071,
516
+ "rewards/chosen": -0.9646435976028442,
517
+ "rewards/margins": 0.7620329856872559,
518
+ "rewards/rejected": -1.7266767024993896,
519
+ "step": 320
520
+ },
521
+ {
522
+ "epoch": 0.35,
523
+ "learning_rate": 4.139035550786494e-07,
524
+ "logits/chosen": -1.151816487312317,
525
+ "logits/rejected": -0.6147797703742981,
526
+ "logps/chosen": -371.8530578613281,
527
+ "logps/rejected": -387.37786865234375,
528
+ "loss": 0.557,
529
+ "rewards/accuracies": 0.7124999761581421,
530
+ "rewards/chosen": -0.9230759739875793,
531
+ "rewards/margins": 0.6179936528205872,
532
+ "rewards/rejected": -1.541069507598877,
533
+ "step": 330
534
+ },
535
+ {
536
+ "epoch": 0.36,
537
+ "learning_rate": 4.0689152079869306e-07,
538
+ "logits/chosen": -1.3167518377304077,
539
+ "logits/rejected": -0.7884875535964966,
540
+ "logps/chosen": -342.40643310546875,
541
+ "logps/rejected": -372.789794921875,
542
+ "loss": 0.5621,
543
+ "rewards/accuracies": 0.668749988079071,
544
+ "rewards/chosen": -0.966681957244873,
545
+ "rewards/margins": 0.5185996890068054,
546
+ "rewards/rejected": -1.4852817058563232,
547
+ "step": 340
548
+ },
549
+ {
550
+ "epoch": 0.37,
551
+ "learning_rate": 3.99669658015821e-07,
552
+ "logits/chosen": -0.2449747771024704,
553
+ "logits/rejected": 0.10235898196697235,
554
+ "logps/chosen": -384.5868225097656,
555
+ "logps/rejected": -475.15606689453125,
556
+ "loss": 0.5192,
557
+ "rewards/accuracies": 0.737500011920929,
558
+ "rewards/chosen": -1.1246081590652466,
559
+ "rewards/margins": 0.8372184038162231,
560
+ "rewards/rejected": -1.9618265628814697,
561
+ "step": 350
562
+ },
563
+ {
564
+ "epoch": 0.38,
565
+ "learning_rate": 3.92247625331392e-07,
566
+ "logits/chosen": -0.19500190019607544,
567
+ "logits/rejected": 0.6360437273979187,
568
+ "logps/chosen": -383.85845947265625,
569
+ "logps/rejected": -425.1438903808594,
570
+ "loss": 0.5123,
571
+ "rewards/accuracies": 0.7562500238418579,
572
+ "rewards/chosen": -1.011595606803894,
573
+ "rewards/margins": 0.8127263188362122,
574
+ "rewards/rejected": -1.8243221044540405,
575
+ "step": 360
576
+ },
577
+ {
578
+ "epoch": 0.39,
579
+ "learning_rate": 3.846353490562664e-07,
580
+ "logits/chosen": -0.6418613791465759,
581
+ "logits/rejected": 0.004262035712599754,
582
+ "logps/chosen": -332.0767517089844,
583
+ "logps/rejected": -436.4134826660156,
584
+ "loss": 0.4874,
585
+ "rewards/accuracies": 0.768750011920929,
586
+ "rewards/chosen": -0.919625461101532,
587
+ "rewards/margins": 0.9047123789787292,
588
+ "rewards/rejected": -1.8243379592895508,
589
+ "step": 370
590
+ },
591
+ {
592
+ "epoch": 0.4,
593
+ "learning_rate": 3.768430099352445e-07,
594
+ "logits/chosen": -1.2811148166656494,
595
+ "logits/rejected": -0.3615255355834961,
596
+ "logps/chosen": -408.3636474609375,
597
+ "logps/rejected": -467.22601318359375,
598
+ "loss": 0.4952,
599
+ "rewards/accuracies": 0.762499988079071,
600
+ "rewards/chosen": -1.1174873113632202,
601
+ "rewards/margins": 0.9306632280349731,
602
+ "rewards/rejected": -2.0481505393981934,
603
+ "step": 380
604
+ },
605
+ {
606
+ "epoch": 0.41,
607
+ "learning_rate": 3.6888102953122304e-07,
608
+ "logits/chosen": -0.6116107702255249,
609
+ "logits/rejected": 0.6004475355148315,
610
+ "logps/chosen": -408.0143737792969,
611
+ "logps/rejected": -468.11602783203125,
612
+ "loss": 0.5044,
613
+ "rewards/accuracies": 0.7749999761581421,
614
+ "rewards/chosen": -1.3232171535491943,
615
+ "rewards/margins": 1.0072014331817627,
616
+ "rewards/rejected": -2.330418348312378,
617
+ "step": 390
618
+ },
619
+ {
620
+ "epoch": 0.42,
621
+ "learning_rate": 3.607600562872785e-07,
622
+ "logits/chosen": -0.746349036693573,
623
+ "logits/rejected": 0.21193762123584747,
624
+ "logps/chosen": -457.70703125,
625
+ "logps/rejected": -493.6814880371094,
626
+ "loss": 0.4941,
627
+ "rewards/accuracies": 0.71875,
628
+ "rewards/chosen": -1.5325865745544434,
629
+ "rewards/margins": 0.7493036985397339,
630
+ "rewards/rejected": -2.281890392303467,
631
+ "step": 400
632
+ },
633
+ {
634
+ "epoch": 0.42,
635
+ "eval_logits/chosen": -0.766248881816864,
636
+ "eval_logits/rejected": 0.168849915266037,
637
+ "eval_logps/chosen": -409.9141540527344,
638
+ "eval_logps/rejected": -475.0238037109375,
639
+ "eval_loss": 0.5082493424415588,
640
+ "eval_rewards/accuracies": 0.7599206566810608,
641
+ "eval_rewards/chosen": -1.2581086158752441,
642
+ "eval_rewards/margins": 0.8743875622749329,
643
+ "eval_rewards/rejected": -2.1324961185455322,
644
+ "eval_runtime": 277.9817,
645
+ "eval_samples_per_second": 7.195,
646
+ "eval_steps_per_second": 0.227,
647
+ "step": 400
648
+ },
649
+ {
650
+ "epoch": 0.43,
651
+ "learning_rate": 3.5249095128531856e-07,
652
+ "logits/chosen": -1.081780195236206,
653
+ "logits/rejected": -0.14936906099319458,
654
+ "logps/chosen": -433.837890625,
655
+ "logps/rejected": -488.9054260253906,
656
+ "loss": 0.5236,
657
+ "rewards/accuracies": 0.7124999761581421,
658
+ "rewards/chosen": -1.1999945640563965,
659
+ "rewards/margins": 0.7789229154586792,
660
+ "rewards/rejected": -1.9789173603057861,
661
+ "step": 410
662
+ },
663
+ {
664
+ "epoch": 0.44,
665
+ "learning_rate": 3.4408477372034736e-07,
666
+ "logits/chosen": -0.9525474309921265,
667
+ "logits/rejected": -0.22369150817394257,
668
+ "logps/chosen": -385.2050476074219,
669
+ "logps/rejected": -414.0570373535156,
670
+ "loss": 0.5265,
671
+ "rewards/accuracies": 0.6937500238418579,
672
+ "rewards/chosen": -1.2109800577163696,
673
+ "rewards/margins": 0.6298609972000122,
674
+ "rewards/rejected": -1.8408409357070923,
675
+ "step": 420
676
+ },
677
+ {
678
+ "epoch": 0.45,
679
+ "learning_rate": 3.3555276610977276e-07,
680
+ "logits/chosen": -1.1188991069793701,
681
+ "logits/rejected": -0.6098810434341431,
682
+ "logps/chosen": -366.67327880859375,
683
+ "logps/rejected": -413.8002014160156,
684
+ "loss": 0.5316,
685
+ "rewards/accuracies": 0.762499988079071,
686
+ "rewards/chosen": -1.0450365543365479,
687
+ "rewards/margins": 0.6988474726676941,
688
+ "rewards/rejected": -1.7438838481903076,
689
+ "step": 430
690
+ },
691
+ {
692
+ "epoch": 0.46,
693
+ "learning_rate": 3.269063392575352e-07,
694
+ "logits/chosen": -0.8952062726020813,
695
+ "logits/rejected": -0.29640626907348633,
696
+ "logps/chosen": -369.5331115722656,
697
+ "logps/rejected": -437.5091857910156,
698
+ "loss": 0.5019,
699
+ "rewards/accuracies": 0.731249988079071,
700
+ "rewards/chosen": -1.1057814359664917,
701
+ "rewards/margins": 0.7806274890899658,
702
+ "rewards/rejected": -1.886409044265747,
703
+ "step": 440
704
+ },
705
+ {
706
+ "epoch": 0.47,
707
+ "learning_rate": 3.1815705699316964e-07,
708
+ "logits/chosen": -0.5310872793197632,
709
+ "logits/rejected": 0.35491126775741577,
710
+ "logps/chosen": -383.77337646484375,
711
+ "logps/rejected": -466.27252197265625,
712
+ "loss": 0.5111,
713
+ "rewards/accuracies": 0.768750011920929,
714
+ "rewards/chosen": -1.1957778930664062,
715
+ "rewards/margins": 1.0301498174667358,
716
+ "rewards/rejected": -2.2259278297424316,
717
+ "step": 450
718
+ },
719
+ {
720
+ "epoch": 0.48,
721
+ "learning_rate": 3.0931662070620794e-07,
722
+ "logits/chosen": -0.14580872654914856,
723
+ "logits/rejected": 1.0018532276153564,
724
+ "logps/chosen": -406.70404052734375,
725
+ "logps/rejected": -492.42333984375,
726
+ "loss": 0.5107,
727
+ "rewards/accuracies": 0.737500011920929,
728
+ "rewards/chosen": -1.4167234897613525,
729
+ "rewards/margins": 0.9401258230209351,
730
+ "rewards/rejected": -2.3568496704101562,
731
+ "step": 460
732
+ },
733
+ {
734
+ "epoch": 0.49,
735
+ "learning_rate": 3.003968536966078e-07,
736
+ "logits/chosen": -0.4022013545036316,
737
+ "logits/rejected": 0.9017314910888672,
738
+ "logps/chosen": -438.36492919921875,
739
+ "logps/rejected": -496.6222229003906,
740
+ "loss": 0.506,
741
+ "rewards/accuracies": 0.8062499761581421,
742
+ "rewards/chosen": -1.4015130996704102,
743
+ "rewards/margins": 0.9599510431289673,
744
+ "rewards/rejected": -2.361464023590088,
745
+ "step": 470
746
+ },
747
+ {
748
+ "epoch": 0.5,
749
+ "learning_rate": 2.9140968536213693e-07,
750
+ "logits/chosen": -0.5105709433555603,
751
+ "logits/rejected": 0.3333401083946228,
752
+ "logps/chosen": -348.86138916015625,
753
+ "logps/rejected": -420.00909423828125,
754
+ "loss": 0.5345,
755
+ "rewards/accuracies": 0.7250000238418579,
756
+ "rewards/chosen": -1.1617661714553833,
757
+ "rewards/margins": 0.749540388584137,
758
+ "rewards/rejected": -1.911306619644165,
759
+ "step": 480
760
+ },
761
+ {
762
+ "epoch": 0.51,
763
+ "learning_rate": 2.823671352438608e-07,
764
+ "logits/chosen": -0.5324286818504333,
765
+ "logits/rejected": 0.6832448244094849,
766
+ "logps/chosen": -373.8572692871094,
767
+ "logps/rejected": -425.37188720703125,
768
+ "loss": 0.5055,
769
+ "rewards/accuracies": 0.7437499761581421,
770
+ "rewards/chosen": -0.9815096855163574,
771
+ "rewards/margins": 0.7788732051849365,
772
+ "rewards/rejected": -1.760382890701294,
773
+ "step": 490
774
+ },
775
+ {
776
+ "epoch": 0.52,
777
+ "learning_rate": 2.73281296951072e-07,
778
+ "logits/chosen": 0.2629791498184204,
779
+ "logits/rejected": 1.6376205682754517,
780
+ "logps/chosen": -394.38934326171875,
781
+ "logps/rejected": -471.0135803222656,
782
+ "loss": 0.506,
783
+ "rewards/accuracies": 0.7437499761581421,
784
+ "rewards/chosen": -1.2273036241531372,
785
+ "rewards/margins": 0.9877279996871948,
786
+ "rewards/rejected": -2.215031623840332,
787
+ "step": 500
788
+ },
789
+ {
790
+ "epoch": 0.52,
791
+ "eval_logits/chosen": 0.08570393919944763,
792
+ "eval_logits/rejected": 1.3982725143432617,
793
+ "eval_logps/chosen": -394.77392578125,
794
+ "eval_logps/rejected": -468.8965759277344,
795
+ "eval_loss": 0.5090134739875793,
796
+ "eval_rewards/accuracies": 0.7638888955116272,
797
+ "eval_rewards/chosen": -1.1067068576812744,
798
+ "eval_rewards/margins": 0.9645172357559204,
799
+ "eval_rewards/rejected": -2.0712242126464844,
800
+ "eval_runtime": 278.9033,
801
+ "eval_samples_per_second": 7.171,
802
+ "eval_steps_per_second": 0.226,
803
+ "step": 500
804
+ },
805
+ {
806
+ "epoch": 0.53,
807
+ "learning_rate": 2.641643219871597e-07,
808
+ "logits/chosen": 0.26743632555007935,
809
+ "logits/rejected": 1.9989964962005615,
810
+ "logps/chosen": -418.77044677734375,
811
+ "logps/rejected": -468.5576171875,
812
+ "loss": 0.4874,
813
+ "rewards/accuracies": 0.762499988079071,
814
+ "rewards/chosen": -1.1739251613616943,
815
+ "rewards/margins": 0.9960535168647766,
816
+ "rewards/rejected": -2.1699788570404053,
817
+ "step": 510
818
+ },
819
+ {
820
+ "epoch": 0.54,
821
+ "learning_rate": 2.550284034980507e-07,
822
+ "logits/chosen": 0.5876134634017944,
823
+ "logits/rejected": 1.631603479385376,
824
+ "logps/chosen": -406.91351318359375,
825
+ "logps/rejected": -484.844482421875,
826
+ "loss": 0.5042,
827
+ "rewards/accuracies": 0.762499988079071,
828
+ "rewards/chosen": -1.4775892496109009,
829
+ "rewards/margins": 0.853603184223175,
830
+ "rewards/rejected": -2.3311924934387207,
831
+ "step": 520
832
+ },
833
+ {
834
+ "epoch": 0.55,
835
+ "learning_rate": 2.4588575996495794e-07,
836
+ "logits/chosen": 0.36718782782554626,
837
+ "logits/rejected": 1.523099660873413,
838
+ "logps/chosen": -433.95208740234375,
839
+ "logps/rejected": -500.431396484375,
840
+ "loss": 0.4987,
841
+ "rewards/accuracies": 0.7250000238418579,
842
+ "rewards/chosen": -1.4977753162384033,
843
+ "rewards/margins": 0.9368091821670532,
844
+ "rewards/rejected": -2.434584379196167,
845
+ "step": 530
846
+ },
847
+ {
848
+ "epoch": 0.57,
849
+ "learning_rate": 2.367486188632446e-07,
850
+ "logits/chosen": -0.19433559477329254,
851
+ "logits/rejected": 1.0568033456802368,
852
+ "logps/chosen": -429.4280700683594,
853
+ "logps/rejected": -540.71337890625,
854
+ "loss": 0.5031,
855
+ "rewards/accuracies": 0.7875000238418579,
856
+ "rewards/chosen": -1.3522653579711914,
857
+ "rewards/margins": 1.0526258945465088,
858
+ "rewards/rejected": -2.4048912525177,
859
+ "step": 540
860
+ },
861
+ {
862
+ "epoch": 0.58,
863
+ "learning_rate": 2.276292003092593e-07,
864
+ "logits/chosen": -0.5677329897880554,
865
+ "logits/rejected": 0.8163228034973145,
866
+ "logps/chosen": -383.25537109375,
867
+ "logps/rejected": -460.65643310546875,
868
+ "loss": 0.519,
869
+ "rewards/accuracies": 0.75,
870
+ "rewards/chosen": -1.1353278160095215,
871
+ "rewards/margins": 0.9918322563171387,
872
+ "rewards/rejected": -2.127159833908081,
873
+ "step": 550
874
+ },
875
+ {
876
+ "epoch": 0.59,
877
+ "learning_rate": 2.185397007170141e-07,
878
+ "logits/chosen": -0.5527084469795227,
879
+ "logits/rejected": 0.5254577398300171,
880
+ "logps/chosen": -373.94189453125,
881
+ "logps/rejected": -429.65283203125,
882
+ "loss": 0.5028,
883
+ "rewards/accuracies": 0.75,
884
+ "rewards/chosen": -1.071791410446167,
885
+ "rewards/margins": 0.8905409574508667,
886
+ "rewards/rejected": -1.962332010269165,
887
+ "step": 560
888
+ },
889
+ {
890
+ "epoch": 0.6,
891
+ "learning_rate": 2.094922764865619e-07,
892
+ "logits/chosen": -0.08112873882055283,
893
+ "logits/rejected": 1.072463035583496,
894
+ "logps/chosen": -403.42254638671875,
895
+ "logps/rejected": -467.67449951171875,
896
+ "loss": 0.5061,
897
+ "rewards/accuracies": 0.7250000238418579,
898
+ "rewards/chosen": -1.2727501392364502,
899
+ "rewards/margins": 0.8145562410354614,
900
+ "rewards/rejected": -2.087306499481201,
901
+ "step": 570
902
+ },
903
+ {
904
+ "epoch": 0.61,
905
+ "learning_rate": 2.0049902774588797e-07,
906
+ "logits/chosen": 0.4094156324863434,
907
+ "logits/rejected": 1.793280005455017,
908
+ "logps/chosen": -428.2859802246094,
909
+ "logps/rejected": -489.5137634277344,
910
+ "loss": 0.5007,
911
+ "rewards/accuracies": 0.762499988079071,
912
+ "rewards/chosen": -1.5437203645706177,
913
+ "rewards/margins": 0.9729151725769043,
914
+ "rewards/rejected": -2.5166354179382324,
915
+ "step": 580
916
+ },
917
+ {
918
+ "epoch": 0.62,
919
+ "learning_rate": 1.9157198216806238e-07,
920
+ "logits/chosen": -0.1933516561985016,
921
+ "logits/rejected": 0.8015575408935547,
922
+ "logps/chosen": -403.41021728515625,
923
+ "logps/rejected": -485.6842346191406,
924
+ "loss": 0.496,
925
+ "rewards/accuracies": 0.6812499761581421,
926
+ "rewards/chosen": -1.3032294511795044,
927
+ "rewards/margins": 0.7608039379119873,
928
+ "rewards/rejected": -2.0640335083007812,
929
+ "step": 590
930
+ },
931
+ {
932
+ "epoch": 0.63,
933
+ "learning_rate": 1.8272307888529274e-07,
934
+ "logits/chosen": -0.43585944175720215,
935
+ "logits/rejected": 0.9283556938171387,
936
+ "logps/chosen": -461.6996154785156,
937
+ "logps/rejected": -543.3409423828125,
938
+ "loss": 0.4893,
939
+ "rewards/accuracies": 0.762499988079071,
940
+ "rewards/chosen": -1.3578338623046875,
941
+ "rewards/margins": 0.9542685747146606,
942
+ "rewards/rejected": -2.3121023178100586,
943
+ "step": 600
944
+ },
945
+ {
946
+ "epoch": 0.63,
947
+ "eval_logits/chosen": -0.41807228326797485,
948
+ "eval_logits/rejected": 0.961281418800354,
949
+ "eval_logps/chosen": -431.0652160644531,
950
+ "eval_logps/rejected": -511.40478515625,
951
+ "eval_loss": 0.49531620740890503,
952
+ "eval_rewards/accuracies": 0.7579365372657776,
953
+ "eval_rewards/chosen": -1.469619870185852,
954
+ "eval_rewards/margins": 1.0266858339309692,
955
+ "eval_rewards/rejected": -2.4963057041168213,
956
+ "eval_runtime": 279.0041,
957
+ "eval_samples_per_second": 7.168,
958
+ "eval_steps_per_second": 0.226,
959
+ "step": 600
960
+ },
961
+ {
962
+ "epoch": 0.64,
963
+ "learning_rate": 1.7396415252139288e-07,
964
+ "logits/chosen": -0.44998255372047424,
965
+ "logits/rejected": 0.763337254524231,
966
+ "logps/chosen": -419.0155334472656,
967
+ "logps/rejected": -462.7840270996094,
968
+ "loss": 0.4842,
969
+ "rewards/accuracies": 0.7562500238418579,
970
+ "rewards/chosen": -1.4401321411132812,
971
+ "rewards/margins": 0.9432790875434875,
972
+ "rewards/rejected": -2.383411407470703,
973
+ "step": 610
974
+ },
975
+ {
976
+ "epoch": 0.65,
977
+ "learning_rate": 1.6530691736402316e-07,
978
+ "logits/chosen": -0.37868770956993103,
979
+ "logits/rejected": 1.2032816410064697,
980
+ "logps/chosen": -442.1136169433594,
981
+ "logps/rejected": -499.60504150390625,
982
+ "loss": 0.5035,
983
+ "rewards/accuracies": 0.75,
984
+ "rewards/chosen": -1.7007478475570679,
985
+ "rewards/margins": 0.9936334490776062,
986
+ "rewards/rejected": -2.6943812370300293,
987
+ "step": 620
988
+ },
989
+ {
990
+ "epoch": 0.66,
991
+ "learning_rate": 1.5676295169786864e-07,
992
+ "logits/chosen": -0.10520428419113159,
993
+ "logits/rejected": 1.3110377788543701,
994
+ "logps/chosen": -421.3946838378906,
995
+ "logps/rejected": -537.7481689453125,
996
+ "loss": 0.4811,
997
+ "rewards/accuracies": 0.762499988079071,
998
+ "rewards/chosen": -1.5981671810150146,
999
+ "rewards/margins": 1.225342869758606,
1000
+ "rewards/rejected": -2.823509931564331,
1001
+ "step": 630
1002
+ },
1003
+ {
1004
+ "epoch": 0.67,
1005
+ "learning_rate": 1.483436823197092e-07,
1006
+ "logits/chosen": -0.4932429790496826,
1007
+ "logits/rejected": 0.5939461588859558,
1008
+ "logps/chosen": -437.61572265625,
1009
+ "logps/rejected": -519.6408081054688,
1010
+ "loss": 0.496,
1011
+ "rewards/accuracies": 0.800000011920929,
1012
+ "rewards/chosen": -1.5301989316940308,
1013
+ "rewards/margins": 1.0939643383026123,
1014
+ "rewards/rejected": -2.6241631507873535,
1015
+ "step": 640
1016
+ },
1017
+ {
1018
+ "epoch": 0.68,
1019
+ "learning_rate": 1.4006036925609243e-07,
1020
+ "logits/chosen": -0.7950553894042969,
1021
+ "logits/rejected": 0.526989221572876,
1022
+ "logps/chosen": -480.47161865234375,
1023
+ "logps/rejected": -521.6663208007812,
1024
+ "loss": 0.5013,
1025
+ "rewards/accuracies": 0.78125,
1026
+ "rewards/chosen": -1.7415778636932373,
1027
+ "rewards/margins": 0.8471624255180359,
1028
+ "rewards/rejected": -2.588740587234497,
1029
+ "step": 650
1030
+ },
1031
+ {
1032
+ "epoch": 0.69,
1033
+ "learning_rate": 1.319240907040458e-07,
1034
+ "logits/chosen": -0.6145257353782654,
1035
+ "logits/rejected": 0.26546335220336914,
1036
+ "logps/chosen": -462.09527587890625,
1037
+ "logps/rejected": -522.7366943359375,
1038
+ "loss": 0.503,
1039
+ "rewards/accuracies": 0.7437499761581421,
1040
+ "rewards/chosen": -1.6060371398925781,
1041
+ "rewards/margins": 0.9507848024368286,
1042
+ "rewards/rejected": -2.556821823120117,
1043
+ "step": 660
1044
+ },
1045
+ {
1046
+ "epoch": 0.7,
1047
+ "learning_rate": 1.239457282149695e-07,
1048
+ "logits/chosen": -0.6238754391670227,
1049
+ "logits/rejected": 0.2821710407733917,
1050
+ "logps/chosen": -436.31097412109375,
1051
+ "logps/rejected": -520.7191772460938,
1052
+ "loss": 0.4934,
1053
+ "rewards/accuracies": 0.737500011920929,
1054
+ "rewards/chosen": -1.5321730375289917,
1055
+ "rewards/margins": 0.9870231747627258,
1056
+ "rewards/rejected": -2.5191962718963623,
1057
+ "step": 670
1058
+ },
1059
+ {
1060
+ "epoch": 0.71,
1061
+ "learning_rate": 1.1613595214152711e-07,
1062
+ "logits/chosen": -0.5683806538581848,
1063
+ "logits/rejected": 0.05843483284115791,
1064
+ "logps/chosen": -399.19952392578125,
1065
+ "logps/rejected": -444.88604736328125,
1066
+ "loss": 0.5183,
1067
+ "rewards/accuracies": 0.668749988079071,
1068
+ "rewards/chosen": -1.3775449991226196,
1069
+ "rewards/margins": 0.6896665692329407,
1070
+ "rewards/rejected": -2.067211627960205,
1071
+ "step": 680
1072
+ },
1073
+ {
1074
+ "epoch": 0.72,
1075
+ "learning_rate": 1.0850520736699362e-07,
1076
+ "logits/chosen": -0.8241173028945923,
1077
+ "logits/rejected": 0.25952425599098206,
1078
+ "logps/chosen": -385.21063232421875,
1079
+ "logps/rejected": -466.60809326171875,
1080
+ "loss": 0.4836,
1081
+ "rewards/accuracies": 0.762499988079071,
1082
+ "rewards/chosen": -1.237121343612671,
1083
+ "rewards/margins": 1.0103009939193726,
1084
+ "rewards/rejected": -2.247422456741333,
1085
+ "step": 690
1086
+ },
1087
+ {
1088
+ "epoch": 0.73,
1089
+ "learning_rate": 1.0106369933615042e-07,
1090
+ "logits/chosen": -0.5171055793762207,
1091
+ "logits/rejected": 0.7539600133895874,
1092
+ "logps/chosen": -430.15069580078125,
1093
+ "logps/rejected": -530.7283935546875,
1094
+ "loss": 0.4558,
1095
+ "rewards/accuracies": 0.75,
1096
+ "rewards/chosen": -1.7596960067749023,
1097
+ "rewards/margins": 1.09982168674469,
1098
+ "rewards/rejected": -2.8595175743103027,
1099
+ "step": 700
1100
+ },
1101
+ {
1102
+ "epoch": 0.73,
1103
+ "eval_logits/chosen": -0.44453164935112,
1104
+ "eval_logits/rejected": 0.6945505738258362,
1105
+ "eval_logps/chosen": -465.3408508300781,
1106
+ "eval_logps/rejected": -550.7127685546875,
1107
+ "eval_loss": 0.49365857243537903,
1108
+ "eval_rewards/accuracies": 0.7698412537574768,
1109
+ "eval_rewards/chosen": -1.8123756647109985,
1110
+ "eval_rewards/margins": 1.0770103931427002,
1111
+ "eval_rewards/rejected": -2.889385938644409,
1112
+ "eval_runtime": 278.4646,
1113
+ "eval_samples_per_second": 7.182,
1114
+ "eval_steps_per_second": 0.226,
1115
+ "step": 700
1116
+ },
1117
+ {
1118
+ "epoch": 0.74,
1119
+ "learning_rate": 9.382138040640714e-08,
1120
+ "logits/chosen": -0.43636417388916016,
1121
+ "logits/rejected": 0.8631266355514526,
1122
+ "logps/chosen": -496.98138427734375,
1123
+ "logps/rejected": -531.872314453125,
1124
+ "loss": 0.4688,
1125
+ "rewards/accuracies": 0.7124999761581421,
1126
+ "rewards/chosen": -1.92108154296875,
1127
+ "rewards/margins": 0.9923149943351746,
1128
+ "rewards/rejected": -2.9133965969085693,
1129
+ "step": 710
1130
+ },
1131
+ {
1132
+ "epoch": 0.75,
1133
+ "learning_rate": 8.678793653740632e-08,
1134
+ "logits/chosen": -0.10378441959619522,
1135
+ "logits/rejected": 1.2110047340393066,
1136
+ "logps/chosen": -494.89990234375,
1137
+ "logps/rejected": -584.9482421875,
1138
+ "loss": 0.4768,
1139
+ "rewards/accuracies": 0.8187500238418579,
1140
+ "rewards/chosen": -2.034623146057129,
1141
+ "rewards/margins": 1.1489760875701904,
1142
+ "rewards/rejected": -3.1835992336273193,
1143
+ "step": 720
1144
+ },
1145
+ {
1146
+ "epoch": 0.76,
1147
+ "learning_rate": 7.997277433690983e-08,
1148
+ "logits/chosen": -0.11848801374435425,
1149
+ "logits/rejected": 1.194339632987976,
1150
+ "logps/chosen": -507.22760009765625,
1151
+ "logps/rejected": -562.0817260742188,
1152
+ "loss": 0.4949,
1153
+ "rewards/accuracies": 0.7749999761581421,
1154
+ "rewards/chosen": -2.0333375930786133,
1155
+ "rewards/margins": 1.0677392482757568,
1156
+ "rewards/rejected": -3.101076602935791,
1157
+ "step": 730
1158
+ },
1159
+ {
1160
+ "epoch": 0.77,
1161
+ "learning_rate": 7.338500848029602e-08,
1162
+ "logits/chosen": 0.04791594296693802,
1163
+ "logits/rejected": 1.755448579788208,
1164
+ "logps/chosen": -520.539794921875,
1165
+ "logps/rejected": -599.8433837890625,
1166
+ "loss": 0.4849,
1167
+ "rewards/accuracies": 0.8187500238418579,
1168
+ "rewards/chosen": -2.0619003772735596,
1169
+ "rewards/margins": 1.345897912979126,
1170
+ "rewards/rejected": -3.4077982902526855,
1171
+ "step": 740
1172
+ },
1173
+ {
1174
+ "epoch": 0.78,
1175
+ "learning_rate": 6.70334495204884e-08,
1176
+ "logits/chosen": -0.3863813579082489,
1177
+ "logits/rejected": 0.8495087623596191,
1178
+ "logps/chosen": -478.74835205078125,
1179
+ "logps/rejected": -582.2133178710938,
1180
+ "loss": 0.4878,
1181
+ "rewards/accuracies": 0.737500011920929,
1182
+ "rewards/chosen": -1.953743577003479,
1183
+ "rewards/margins": 1.0831085443496704,
1184
+ "rewards/rejected": -3.0368518829345703,
1185
+ "step": 750
1186
+ },
1187
+ {
1188
+ "epoch": 0.8,
1189
+ "learning_rate": 6.092659210462231e-08,
1190
+ "logits/chosen": -0.1735175997018814,
1191
+ "logits/rejected": 0.8358144760131836,
1192
+ "logps/chosen": -487.7015075683594,
1193
+ "logps/rejected": -554.705810546875,
1194
+ "loss": 0.4576,
1195
+ "rewards/accuracies": 0.731249988079071,
1196
+ "rewards/chosen": -2.09541392326355,
1197
+ "rewards/margins": 0.945289134979248,
1198
+ "rewards/rejected": -3.040703058242798,
1199
+ "step": 760
1200
+ },
1201
+ {
1202
+ "epoch": 0.81,
1203
+ "learning_rate": 5.507260361320737e-08,
1204
+ "logits/chosen": -0.21100804209709167,
1205
+ "logits/rejected": 0.5948140621185303,
1206
+ "logps/chosen": -511.5699157714844,
1207
+ "logps/rejected": -609.0560302734375,
1208
+ "loss": 0.5073,
1209
+ "rewards/accuracies": 0.75,
1210
+ "rewards/chosen": -2.175572156906128,
1211
+ "rewards/margins": 0.9202457666397095,
1212
+ "rewards/rejected": -3.095817804336548,
1213
+ "step": 770
1214
+ },
1215
+ {
1216
+ "epoch": 0.82,
1217
+ "learning_rate": 4.947931323697982e-08,
1218
+ "logits/chosen": -0.36330968141555786,
1219
+ "logits/rejected": 0.8800643682479858,
1220
+ "logps/chosen": -465.767333984375,
1221
+ "logps/rejected": -530.8380737304688,
1222
+ "loss": 0.5019,
1223
+ "rewards/accuracies": 0.706250011920929,
1224
+ "rewards/chosen": -2.095672130584717,
1225
+ "rewards/margins": 0.8483352661132812,
1226
+ "rewards/rejected": -2.944007158279419,
1227
+ "step": 780
1228
+ },
1229
+ {
1230
+ "epoch": 0.83,
1231
+ "learning_rate": 4.415420150605398e-08,
1232
+ "logits/chosen": -0.35998591780662537,
1233
+ "logits/rejected": 0.594068706035614,
1234
+ "logps/chosen": -519.5208740234375,
1235
+ "logps/rejected": -622.3743896484375,
1236
+ "loss": 0.4857,
1237
+ "rewards/accuracies": 0.762499988079071,
1238
+ "rewards/chosen": -2.191953182220459,
1239
+ "rewards/margins": 1.1867144107818604,
1240
+ "rewards/rejected": -3.3786678314208984,
1241
+ "step": 790
1242
+ },
1243
+ {
1244
+ "epoch": 0.84,
1245
+ "learning_rate": 3.9104390285376374e-08,
1246
+ "logits/chosen": -0.4554923474788666,
1247
+ "logits/rejected": 0.7812598943710327,
1248
+ "logps/chosen": -540.5608520507812,
1249
+ "logps/rejected": -596.4019775390625,
1250
+ "loss": 0.4781,
1251
+ "rewards/accuracies": 0.762499988079071,
1252
+ "rewards/chosen": -1.9934720993041992,
1253
+ "rewards/margins": 1.1518696546554565,
1254
+ "rewards/rejected": -3.1453421115875244,
1255
+ "step": 800
1256
+ },
1257
+ {
1258
+ "epoch": 0.84,
1259
+ "eval_logits/chosen": -0.45034220814704895,
1260
+ "eval_logits/rejected": 0.7310919761657715,
1261
+ "eval_logps/chosen": -483.78631591796875,
1262
+ "eval_logps/rejected": -571.6085815429688,
1263
+ "eval_loss": 0.4898362457752228,
1264
+ "eval_rewards/accuracies": 0.7698412537574768,
1265
+ "eval_rewards/chosen": -1.9968301057815552,
1266
+ "eval_rewards/margins": 1.1015136241912842,
1267
+ "eval_rewards/rejected": -3.09834361076355,
1268
+ "eval_runtime": 275.7757,
1269
+ "eval_samples_per_second": 7.252,
1270
+ "eval_steps_per_second": 0.228,
1271
+ "step": 800
1272
+ },
1273
+ {
1274
+ "epoch": 0.85,
1275
+ "learning_rate": 3.433663324986208e-08,
1276
+ "logits/chosen": -0.3436442017555237,
1277
+ "logits/rejected": 1.0277094841003418,
1278
+ "logps/chosen": -494.46343994140625,
1279
+ "logps/rejected": -540.2312622070312,
1280
+ "loss": 0.4921,
1281
+ "rewards/accuracies": 0.7437499761581421,
1282
+ "rewards/chosen": -2.1496834754943848,
1283
+ "rewards/margins": 0.8621308207511902,
1284
+ "rewards/rejected": -3.0118138790130615,
1285
+ "step": 810
1286
+ },
1287
+ {
1288
+ "epoch": 0.86,
1289
+ "learning_rate": 2.9857306851953897e-08,
1290
+ "logits/chosen": -0.10989487171173096,
1291
+ "logits/rejected": 0.818514347076416,
1292
+ "logps/chosen": -449.99346923828125,
1293
+ "logps/rejected": -534.0044555664062,
1294
+ "loss": 0.5095,
1295
+ "rewards/accuracies": 0.7749999761581421,
1296
+ "rewards/chosen": -1.9733314514160156,
1297
+ "rewards/margins": 1.014764428138733,
1298
+ "rewards/rejected": -2.988095760345459,
1299
+ "step": 820
1300
+ },
1301
+ {
1302
+ "epoch": 0.87,
1303
+ "learning_rate": 2.567240179368185e-08,
1304
+ "logits/chosen": -0.16947147250175476,
1305
+ "logits/rejected": 0.5960814952850342,
1306
+ "logps/chosen": -450.3282165527344,
1307
+ "logps/rejected": -539.936767578125,
1308
+ "loss": 0.4804,
1309
+ "rewards/accuracies": 0.75,
1310
+ "rewards/chosen": -2.0146961212158203,
1311
+ "rewards/margins": 0.9625784754753113,
1312
+ "rewards/rejected": -2.9772744178771973,
1313
+ "step": 830
1314
+ },
1315
+ {
1316
+ "epoch": 0.88,
1317
+ "learning_rate": 2.1787515014630357e-08,
1318
+ "logits/chosen": -0.3402346968650818,
1319
+ "logits/rejected": 0.5662250518798828,
1320
+ "logps/chosen": -507.84747314453125,
1321
+ "logps/rejected": -563.3038940429688,
1322
+ "loss": 0.5113,
1323
+ "rewards/accuracies": 0.7250000238418579,
1324
+ "rewards/chosen": -1.9471012353897095,
1325
+ "rewards/margins": 0.8456957936286926,
1326
+ "rewards/rejected": -2.792797327041626,
1327
+ "step": 840
1328
+ },
1329
+ {
1330
+ "epoch": 0.89,
1331
+ "learning_rate": 1.820784220652766e-08,
1332
+ "logits/chosen": -0.5917896032333374,
1333
+ "logits/rejected": 0.3662889003753662,
1334
+ "logps/chosen": -483.69134521484375,
1335
+ "logps/rejected": -513.99658203125,
1336
+ "loss": 0.494,
1337
+ "rewards/accuracies": 0.762499988079071,
1338
+ "rewards/chosen": -1.8341186046600342,
1339
+ "rewards/margins": 0.8557528257369995,
1340
+ "rewards/rejected": -2.689871311187744,
1341
+ "step": 850
1342
+ },
1343
+ {
1344
+ "epoch": 0.9,
1345
+ "learning_rate": 1.4938170864468636e-08,
1346
+ "logits/chosen": -0.317844957113266,
1347
+ "logits/rejected": 0.6835850477218628,
1348
+ "logps/chosen": -478.897705078125,
1349
+ "logps/rejected": -561.5941772460938,
1350
+ "loss": 0.4905,
1351
+ "rewards/accuracies": 0.768750011920929,
1352
+ "rewards/chosen": -2.0050323009490967,
1353
+ "rewards/margins": 1.1022390127182007,
1354
+ "rewards/rejected": -3.107271432876587,
1355
+ "step": 860
1356
+ },
1357
+ {
1358
+ "epoch": 0.91,
1359
+ "learning_rate": 1.1982873884064465e-08,
1360
+ "logits/chosen": -0.48837724328041077,
1361
+ "logits/rejected": 0.6178911924362183,
1362
+ "logps/chosen": -414.78631591796875,
1363
+ "logps/rejected": -539.1580200195312,
1364
+ "loss": 0.4814,
1365
+ "rewards/accuracies": 0.8187500238418579,
1366
+ "rewards/chosen": -1.678101897239685,
1367
+ "rewards/margins": 1.251063346862793,
1368
+ "rewards/rejected": -2.9291653633117676,
1369
+ "step": 870
1370
+ },
1371
+ {
1372
+ "epoch": 0.92,
1373
+ "learning_rate": 9.345903713082304e-09,
1374
+ "logits/chosen": -0.3579421043395996,
1375
+ "logits/rejected": 0.8588876724243164,
1376
+ "logps/chosen": -478.02728271484375,
1377
+ "logps/rejected": -552.3193359375,
1378
+ "loss": 0.4827,
1379
+ "rewards/accuracies": 0.731249988079071,
1380
+ "rewards/chosen": -2.0388994216918945,
1381
+ "rewards/margins": 0.9511061906814575,
1382
+ "rewards/rejected": -2.9900057315826416,
1383
+ "step": 880
1384
+ },
1385
+ {
1386
+ "epoch": 0.93,
1387
+ "learning_rate": 7.030787065396865e-09,
1388
+ "logits/chosen": -0.30088967084884644,
1389
+ "logits/rejected": 0.4045659601688385,
1390
+ "logps/chosen": -469.99920654296875,
1391
+ "logps/rejected": -559.8849487304688,
1392
+ "loss": 0.509,
1393
+ "rewards/accuracies": 0.7124999761581421,
1394
+ "rewards/chosen": -2.034852981567383,
1395
+ "rewards/margins": 0.909083366394043,
1396
+ "rewards/rejected": -2.943936586380005,
1397
+ "step": 890
1398
+ },
1399
+ {
1400
+ "epoch": 0.94,
1401
+ "learning_rate": 5.04062020432286e-09,
1402
+ "logits/chosen": -0.4078744351863861,
1403
+ "logits/rejected": 0.6653724908828735,
1404
+ "logps/chosen": -496.38848876953125,
1405
+ "logps/rejected": -572.7337646484375,
1406
+ "loss": 0.495,
1407
+ "rewards/accuracies": 0.7124999761581421,
1408
+ "rewards/chosen": -1.979608178138733,
1409
+ "rewards/margins": 0.9050843119621277,
1410
+ "rewards/rejected": -2.8846921920776367,
1411
+ "step": 900
1412
+ },
1413
+ {
1414
+ "epoch": 0.94,
1415
+ "eval_logits/chosen": -0.46421733498573303,
1416
+ "eval_logits/rejected": 0.6757184267044067,
1417
+ "eval_logps/chosen": -477.7505187988281,
1418
+ "eval_logps/rejected": -563.537841796875,
1419
+ "eval_loss": 0.48935839533805847,
1420
+ "eval_rewards/accuracies": 0.7698412537574768,
1421
+ "eval_rewards/chosen": -1.9364722967147827,
1422
+ "eval_rewards/margins": 1.0811636447906494,
1423
+ "eval_rewards/rejected": -3.0176358222961426,
1424
+ "eval_runtime": 278.8433,
1425
+ "eval_samples_per_second": 7.172,
1426
+ "eval_steps_per_second": 0.226,
1427
+ "step": 900
1428
+ },
1429
+ {
1430
+ "epoch": 0.95,
1431
+ "learning_rate": 3.3780648016376866e-09,
1432
+ "logits/chosen": -0.34233179688453674,
1433
+ "logits/rejected": 0.7931967973709106,
1434
+ "logps/chosen": -453.32513427734375,
1435
+ "logps/rejected": -545.7423095703125,
1436
+ "loss": 0.4928,
1437
+ "rewards/accuracies": 0.7749999761581421,
1438
+ "rewards/chosen": -2.057847023010254,
1439
+ "rewards/margins": 0.984235942363739,
1440
+ "rewards/rejected": -3.0420830249786377,
1441
+ "step": 910
1442
+ },
1443
+ {
1444
+ "epoch": 0.96,
1445
+ "learning_rate": 2.0453443778310766e-09,
1446
+ "logits/chosen": -0.42071524262428284,
1447
+ "logits/rejected": 0.7710467576980591,
1448
+ "logps/chosen": -491.179931640625,
1449
+ "logps/rejected": -565.1658935546875,
1450
+ "loss": 0.4639,
1451
+ "rewards/accuracies": 0.800000011920929,
1452
+ "rewards/chosen": -1.9547284841537476,
1453
+ "rewards/margins": 1.1154911518096924,
1454
+ "rewards/rejected": -3.0702195167541504,
1455
+ "step": 920
1456
+ },
1457
+ {
1458
+ "epoch": 0.97,
1459
+ "learning_rate": 1.0442413283435758e-09,
1460
+ "logits/chosen": -0.39107799530029297,
1461
+ "logits/rejected": 0.5957974195480347,
1462
+ "logps/chosen": -469.1175231933594,
1463
+ "logps/rejected": -551.2074584960938,
1464
+ "loss": 0.4664,
1465
+ "rewards/accuracies": 0.8187500238418579,
1466
+ "rewards/chosen": -1.9638084173202515,
1467
+ "rewards/margins": 1.1078821420669556,
1468
+ "rewards/rejected": -3.071690797805786,
1469
+ "step": 930
1470
+ },
1471
+ {
1472
+ "epoch": 0.98,
1473
+ "learning_rate": 3.760945397705828e-10,
1474
+ "logits/chosen": -0.2911260724067688,
1475
+ "logits/rejected": 0.8524629473686218,
1476
+ "logps/chosen": -528.3253784179688,
1477
+ "logps/rejected": -595.6929931640625,
1478
+ "loss": 0.4725,
1479
+ "rewards/accuracies": 0.768750011920929,
1480
+ "rewards/chosen": -1.9858026504516602,
1481
+ "rewards/margins": 1.0581741333007812,
1482
+ "rewards/rejected": -3.0439765453338623,
1483
+ "step": 940
1484
+ },
1485
+ {
1486
+ "epoch": 0.99,
1487
+ "learning_rate": 4.17975992204056e-11,
1488
+ "logits/chosen": -0.4992304742336273,
1489
+ "logits/rejected": 0.760162353515625,
1490
+ "logps/chosen": -487.5731506347656,
1491
+ "logps/rejected": -578.0675048828125,
1492
+ "loss": 0.5097,
1493
+ "rewards/accuracies": 0.7749999761581421,
1494
+ "rewards/chosen": -2.031038761138916,
1495
+ "rewards/margins": 1.11741042137146,
1496
+ "rewards/rejected": -3.148449182510376,
1497
+ "step": 950
1498
+ },
1499
+ {
1500
+ "epoch": 1.0,
1501
+ "step": 955,
1502
+ "total_flos": 0.0,
1503
+ "train_loss": 0.5274335609056563,
1504
+ "train_runtime": 22383.3635,
1505
+ "train_samples_per_second": 2.731,
1506
+ "train_steps_per_second": 0.043
1507
+ }
1508
+ ],
1509
+ "logging_steps": 10,
1510
+ "max_steps": 955,
1511
+ "num_input_tokens_seen": 0,
1512
+ "num_train_epochs": 1,
1513
+ "save_steps": 100,
1514
+ "total_flos": 0.0,
1515
+ "train_batch_size": 8,
1516
+ "trial_name": null,
1517
+ "trial_params": null
1518
+ }