AmberYifan commited on
Commit
3434bec
·
verified ·
1 Parent(s): 77b30eb

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/global_step915/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8c3fa2b36b1415c355598292f0e58d07fc38b746a3783a94ffaf12d9d9c69f8
3
+ size 17075366956
last-checkpoint/global_step915/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9f61d688d5ffeb48ab98c9c28b0f7f08915d0829cc2fd1579e58f320ca9466c
3
+ size 17075366956
last-checkpoint/global_step915/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07ed2a5bfebd11a10b97360df8c3cb76d2d53e184381ff46ce90781b793aa594
3
+ size 17075366956
last-checkpoint/global_step915/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5c491fc8eea51decda17837070dfe2fde7596e0a04f542030437e928d6cd9e9
3
+ size 17075366956
last-checkpoint/global_step915/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20a7fd50be01694e49f9de3f8f1272257c8c03f4ad98ad21333c50f043c9f4e2
3
+ size 131892
last-checkpoint/global_step915/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdc85537b53b2e8316afc05428c47945418ef5624b9e5a3e7fb0883dd92beafa
3
+ size 131828
last-checkpoint/global_step915/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a36623a59edfd11a4af114aed7acad913054b7c916187ceb13f66f370b5df031
3
+ size 131828
last-checkpoint/global_step915/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a971b41276ea7e04ae399c49141ed5b590cd146d7caaaf0c73534af09b8a01c1
3
+ size 131828
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step610
 
1
+ global_step915
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a12c862d2ef96968d2f0f5e4972cea773cd7db53ef6ee1042bcd403afd8b6cd6
3
  size 4995496656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46ee7aa453e70d10c2d280c1ef35378db840e022726ccec385cbea893281f8f4
3
  size 4995496656
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c672d54f9e6ba1468c38e46dd54bb25368ce833977ea89a029fa0ff0830a3a7e
3
  size 4982953168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32040c4cc65ef27258a1d1054a03cecd7e32cf87cbfb2f7e1ec1d98dfe5455ce
3
  size 4982953168
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9b6b3c0e7322426b4abba14239ff48c798c2c9ee84e366d2619ac8eab0580c6
3
  size 4982953200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf9cd027b02fef79cbb1cd7640a0015d6b62b09c3b174cd3ff9f25758d133911
3
  size 4982953200
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7b16af01a2e16e531a32351ccf6ff3b7ad91f13d7efe31f409eaa7f8f3a62d6
3
  size 2113988336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8e5dbff8fcd53770be3cfcdf0dfa0103de5978baec282205d3a2fd64c6fb474
3
  size 2113988336
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2307c03867cef25b5028feb9a23f80e784b9af9a615de13ddca560a6a90fb593
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab7cf0d34d60dfda516cf9661904550e2e294e723edd07c25c738f05e8ba92d1
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50218cfaecdd818354e567b7167c13899e3b42297e7d8f58bd7e732cfa547800
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7b183324e8227a51a9556d86b2ad893a8c4c52205ed4a737356c6611dac7353
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9814a66b49861f5495b06dae3be12ddf7185b88e2cae1fb808ca9efd99d5807f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac69e994090f4818cb1fa6f6cefa363178552c3c731c6507ff195bcb07fd5bef
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7be93040a65e0a29975f6c70b94418e1fdf88423a50c58aa572141d3c92fbfc
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f68e61b63402f8afb1f69c960f7944965655dac11e3ccf29919c282f23931f86
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23cddb35265adde8fcd130442bb80eda4e7e58a5537171741839d315438de751
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89cc83db19eba05de12a39437c27831b7029da4dd2494fc1e52d31a689e731bb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 610,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -969,6 +969,472 @@
969
  "eval_samples_per_second": 12.919,
970
  "eval_steps_per_second": 0.461,
971
  "step": 610
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
972
  }
973
  ],
974
  "logging_steps": 10,
@@ -983,7 +1449,7 @@
983
  "should_evaluate": false,
984
  "should_log": false,
985
  "should_save": true,
986
- "should_training_stop": false
987
  },
988
  "attributes": {}
989
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 915,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
969
  "eval_samples_per_second": 12.919,
970
  "eval_steps_per_second": 0.461,
971
  "step": 610
972
+ },
973
+ {
974
+ "epoch": 2.0327868852459017,
975
+ "grad_norm": 1.7713226584924222e-05,
976
+ "learning_rate": 1.792223572296476e-07,
977
+ "logits/chosen": 276.0,
978
+ "logits/rejected": 282.0,
979
+ "logps/chosen": -620.0,
980
+ "logps/rejected": -1240.0,
981
+ "loss": 0.0,
982
+ "rewards/accuracies": 1.0,
983
+ "rewards/chosen": 16.5,
984
+ "rewards/margins": 53.75,
985
+ "rewards/rejected": -37.25,
986
+ "step": 620
987
+ },
988
+ {
989
+ "epoch": 2.0655737704918034,
990
+ "grad_norm": 1.0249551855925774e-06,
991
+ "learning_rate": 1.7314702308626974e-07,
992
+ "logits/chosen": 276.0,
993
+ "logits/rejected": 282.0,
994
+ "logps/chosen": -616.0,
995
+ "logps/rejected": -1176.0,
996
+ "loss": 0.0,
997
+ "rewards/accuracies": 1.0,
998
+ "rewards/chosen": 16.25,
999
+ "rewards/margins": 52.25,
1000
+ "rewards/rejected": -36.0,
1001
+ "step": 630
1002
+ },
1003
+ {
1004
+ "epoch": 2.098360655737705,
1005
+ "grad_norm": 4.2707376166338526e-05,
1006
+ "learning_rate": 1.6707168894289186e-07,
1007
+ "logits/chosen": 276.0,
1008
+ "logits/rejected": 282.0,
1009
+ "logps/chosen": -656.0,
1010
+ "logps/rejected": -1192.0,
1011
+ "loss": 0.0,
1012
+ "rewards/accuracies": 1.0,
1013
+ "rewards/chosen": 16.625,
1014
+ "rewards/margins": 53.0,
1015
+ "rewards/rejected": -36.5,
1016
+ "step": 640
1017
+ },
1018
+ {
1019
+ "epoch": 2.1311475409836067,
1020
+ "grad_norm": 0.00034207508221282683,
1021
+ "learning_rate": 1.6099635479951396e-07,
1022
+ "logits/chosen": 276.0,
1023
+ "logits/rejected": 282.0,
1024
+ "logps/chosen": -544.0,
1025
+ "logps/rejected": -1240.0,
1026
+ "loss": 0.0,
1027
+ "rewards/accuracies": 1.0,
1028
+ "rewards/chosen": 13.8125,
1029
+ "rewards/margins": 51.5,
1030
+ "rewards/rejected": -37.75,
1031
+ "step": 650
1032
+ },
1033
+ {
1034
+ "epoch": 2.1639344262295084,
1035
+ "grad_norm": 5.419595774676902e-09,
1036
+ "learning_rate": 1.5492102065613608e-07,
1037
+ "logits/chosen": 276.0,
1038
+ "logits/rejected": 282.0,
1039
+ "logps/chosen": -482.0,
1040
+ "logps/rejected": -1200.0,
1041
+ "loss": 0.0,
1042
+ "rewards/accuracies": 1.0,
1043
+ "rewards/chosen": 12.1875,
1044
+ "rewards/margins": 49.0,
1045
+ "rewards/rejected": -37.0,
1046
+ "step": 660
1047
+ },
1048
+ {
1049
+ "epoch": 2.19672131147541,
1050
+ "grad_norm": 4.851068352220634e-11,
1051
+ "learning_rate": 1.488456865127582e-07,
1052
+ "logits/chosen": 276.0,
1053
+ "logits/rejected": 282.0,
1054
+ "logps/chosen": -752.0,
1055
+ "logps/rejected": -1288.0,
1056
+ "loss": 0.0,
1057
+ "rewards/accuracies": 1.0,
1058
+ "rewards/chosen": 19.0,
1059
+ "rewards/margins": 58.5,
1060
+ "rewards/rejected": -39.25,
1061
+ "step": 670
1062
+ },
1063
+ {
1064
+ "epoch": 2.2295081967213113,
1065
+ "grad_norm": 1.7612628984927503e-10,
1066
+ "learning_rate": 1.427703523693803e-07,
1067
+ "logits/chosen": 276.0,
1068
+ "logits/rejected": 282.0,
1069
+ "logps/chosen": -576.0,
1070
+ "logps/rejected": -1192.0,
1071
+ "loss": 0.0,
1072
+ "rewards/accuracies": 1.0,
1073
+ "rewards/chosen": 15.5625,
1074
+ "rewards/margins": 53.0,
1075
+ "rewards/rejected": -37.25,
1076
+ "step": 680
1077
+ },
1078
+ {
1079
+ "epoch": 2.262295081967213,
1080
+ "grad_norm": 3.275622407668139e-08,
1081
+ "learning_rate": 1.3669501822600243e-07,
1082
+ "logits/chosen": 276.0,
1083
+ "logits/rejected": 282.0,
1084
+ "logps/chosen": -612.0,
1085
+ "logps/rejected": -1264.0,
1086
+ "loss": 0.0,
1087
+ "rewards/accuracies": 1.0,
1088
+ "rewards/chosen": 16.125,
1089
+ "rewards/margins": 54.75,
1090
+ "rewards/rejected": -38.5,
1091
+ "step": 690
1092
+ },
1093
+ {
1094
+ "epoch": 2.2950819672131146,
1095
+ "grad_norm": 0.0013215912863667734,
1096
+ "learning_rate": 1.3061968408262452e-07,
1097
+ "logits/chosen": 276.0,
1098
+ "logits/rejected": 282.0,
1099
+ "logps/chosen": -580.0,
1100
+ "logps/rejected": -1232.0,
1101
+ "loss": 0.0,
1102
+ "rewards/accuracies": 1.0,
1103
+ "rewards/chosen": 14.25,
1104
+ "rewards/margins": 51.0,
1105
+ "rewards/rejected": -36.75,
1106
+ "step": 700
1107
+ },
1108
+ {
1109
+ "epoch": 2.3278688524590163,
1110
+ "grad_norm": 0.00041899600497659085,
1111
+ "learning_rate": 1.2454434993924665e-07,
1112
+ "logits/chosen": 276.0,
1113
+ "logits/rejected": 282.0,
1114
+ "logps/chosen": -516.0,
1115
+ "logps/rejected": -1256.0,
1116
+ "loss": 0.0,
1117
+ "rewards/accuracies": 1.0,
1118
+ "rewards/chosen": 13.625,
1119
+ "rewards/margins": 52.5,
1120
+ "rewards/rejected": -38.75,
1121
+ "step": 710
1122
+ },
1123
+ {
1124
+ "epoch": 2.360655737704918,
1125
+ "grad_norm": 7.351246552401219e-09,
1126
+ "learning_rate": 1.1846901579586877e-07,
1127
+ "logits/chosen": 276.0,
1128
+ "logits/rejected": 282.0,
1129
+ "logps/chosen": -584.0,
1130
+ "logps/rejected": -1208.0,
1131
+ "loss": 0.0,
1132
+ "rewards/accuracies": 1.0,
1133
+ "rewards/chosen": 14.5,
1134
+ "rewards/margins": 51.5,
1135
+ "rewards/rejected": -37.0,
1136
+ "step": 720
1137
+ },
1138
+ {
1139
+ "epoch": 2.3934426229508197,
1140
+ "grad_norm": 1.2823458114586286e-12,
1141
+ "learning_rate": 1.1239368165249088e-07,
1142
+ "logits/chosen": 276.0,
1143
+ "logits/rejected": 282.0,
1144
+ "logps/chosen": -628.0,
1145
+ "logps/rejected": -1256.0,
1146
+ "loss": 0.0,
1147
+ "rewards/accuracies": 1.0,
1148
+ "rewards/chosen": 14.8125,
1149
+ "rewards/margins": 53.5,
1150
+ "rewards/rejected": -38.75,
1151
+ "step": 730
1152
+ },
1153
+ {
1154
+ "epoch": 2.4262295081967213,
1155
+ "grad_norm": 2.2183065144506676e-05,
1156
+ "learning_rate": 1.0631834750911299e-07,
1157
+ "logits/chosen": 276.0,
1158
+ "logits/rejected": 282.0,
1159
+ "logps/chosen": -672.0,
1160
+ "logps/rejected": -1272.0,
1161
+ "loss": 0.0,
1162
+ "rewards/accuracies": 1.0,
1163
+ "rewards/chosen": 15.25,
1164
+ "rewards/margins": 54.5,
1165
+ "rewards/rejected": -39.0,
1166
+ "step": 740
1167
+ },
1168
+ {
1169
+ "epoch": 2.459016393442623,
1170
+ "grad_norm": 9.496741561479611e-10,
1171
+ "learning_rate": 1.0024301336573512e-07,
1172
+ "logits/chosen": 276.0,
1173
+ "logits/rejected": 282.0,
1174
+ "logps/chosen": -540.0,
1175
+ "logps/rejected": -1216.0,
1176
+ "loss": 0.0,
1177
+ "rewards/accuracies": 1.0,
1178
+ "rewards/chosen": 13.4375,
1179
+ "rewards/margins": 52.0,
1180
+ "rewards/rejected": -38.5,
1181
+ "step": 750
1182
+ },
1183
+ {
1184
+ "epoch": 2.4918032786885247,
1185
+ "grad_norm": 6.428681857091136e-08,
1186
+ "learning_rate": 9.416767922235723e-08,
1187
+ "logits/chosen": 276.0,
1188
+ "logits/rejected": 282.0,
1189
+ "logps/chosen": -676.0,
1190
+ "logps/rejected": -1232.0,
1191
+ "loss": 0.0,
1192
+ "rewards/accuracies": 1.0,
1193
+ "rewards/chosen": 17.0,
1194
+ "rewards/margins": 54.5,
1195
+ "rewards/rejected": -37.75,
1196
+ "step": 760
1197
+ },
1198
+ {
1199
+ "epoch": 2.5245901639344264,
1200
+ "grad_norm": 2.0982264237925454e-08,
1201
+ "learning_rate": 8.809234507897934e-08,
1202
+ "logits/chosen": 276.0,
1203
+ "logits/rejected": 282.0,
1204
+ "logps/chosen": -672.0,
1205
+ "logps/rejected": -1184.0,
1206
+ "loss": 0.0,
1207
+ "rewards/accuracies": 1.0,
1208
+ "rewards/chosen": 16.25,
1209
+ "rewards/margins": 53.25,
1210
+ "rewards/rejected": -37.0,
1211
+ "step": 770
1212
+ },
1213
+ {
1214
+ "epoch": 2.557377049180328,
1215
+ "grad_norm": 9.460545250549385e-16,
1216
+ "learning_rate": 8.201701093560146e-08,
1217
+ "logits/chosen": 276.0,
1218
+ "logits/rejected": 282.0,
1219
+ "logps/chosen": -676.0,
1220
+ "logps/rejected": -1208.0,
1221
+ "loss": 0.0,
1222
+ "rewards/accuracies": 1.0,
1223
+ "rewards/chosen": 14.9375,
1224
+ "rewards/margins": 52.5,
1225
+ "rewards/rejected": -37.75,
1226
+ "step": 780
1227
+ },
1228
+ {
1229
+ "epoch": 2.5901639344262293,
1230
+ "grad_norm": 4.793838989333602e-09,
1231
+ "learning_rate": 7.594167679222357e-08,
1232
+ "logits/chosen": 276.0,
1233
+ "logits/rejected": 282.0,
1234
+ "logps/chosen": -556.0,
1235
+ "logps/rejected": -1272.0,
1236
+ "loss": 0.0,
1237
+ "rewards/accuracies": 1.0,
1238
+ "rewards/chosen": 14.25,
1239
+ "rewards/margins": 53.75,
1240
+ "rewards/rejected": -39.5,
1241
+ "step": 790
1242
+ },
1243
+ {
1244
+ "epoch": 2.6229508196721314,
1245
+ "grad_norm": 6.169679361669359e-11,
1246
+ "learning_rate": 6.986634264884568e-08,
1247
+ "logits/chosen": 276.0,
1248
+ "logits/rejected": 282.0,
1249
+ "logps/chosen": -612.0,
1250
+ "logps/rejected": -1200.0,
1251
+ "loss": 0.0,
1252
+ "rewards/accuracies": 1.0,
1253
+ "rewards/chosen": 15.3125,
1254
+ "rewards/margins": 53.0,
1255
+ "rewards/rejected": -37.75,
1256
+ "step": 800
1257
+ },
1258
+ {
1259
+ "epoch": 2.6557377049180326,
1260
+ "grad_norm": 8.064196022250346e-08,
1261
+ "learning_rate": 6.37910085054678e-08,
1262
+ "logits/chosen": 276.0,
1263
+ "logits/rejected": 282.0,
1264
+ "logps/chosen": -656.0,
1265
+ "logps/rejected": -1216.0,
1266
+ "loss": 0.0,
1267
+ "rewards/accuracies": 1.0,
1268
+ "rewards/chosen": 16.875,
1269
+ "rewards/margins": 54.75,
1270
+ "rewards/rejected": -37.75,
1271
+ "step": 810
1272
+ },
1273
+ {
1274
+ "epoch": 2.6885245901639343,
1275
+ "grad_norm": 1.1139956025424588e-07,
1276
+ "learning_rate": 5.771567436208991e-08,
1277
+ "logits/chosen": 276.0,
1278
+ "logits/rejected": 282.0,
1279
+ "logps/chosen": -580.0,
1280
+ "logps/rejected": -1224.0,
1281
+ "loss": 0.0,
1282
+ "rewards/accuracies": 1.0,
1283
+ "rewards/chosen": 15.3125,
1284
+ "rewards/margins": 52.75,
1285
+ "rewards/rejected": -37.5,
1286
+ "step": 820
1287
+ },
1288
+ {
1289
+ "epoch": 2.721311475409836,
1290
+ "grad_norm": 1.8351283154166078e-11,
1291
+ "learning_rate": 5.164034021871203e-08,
1292
+ "logits/chosen": 276.0,
1293
+ "logits/rejected": 282.0,
1294
+ "logps/chosen": -648.0,
1295
+ "logps/rejected": -1216.0,
1296
+ "loss": 0.0,
1297
+ "rewards/accuracies": 1.0,
1298
+ "rewards/chosen": 15.25,
1299
+ "rewards/margins": 52.5,
1300
+ "rewards/rejected": -37.25,
1301
+ "step": 830
1302
+ },
1303
+ {
1304
+ "epoch": 2.7540983606557377,
1305
+ "grad_norm": 5.336889215608692e-08,
1306
+ "learning_rate": 4.5565006075334144e-08,
1307
+ "logits/chosen": 276.0,
1308
+ "logits/rejected": 282.0,
1309
+ "logps/chosen": -664.0,
1310
+ "logps/rejected": -1240.0,
1311
+ "loss": 0.0,
1312
+ "rewards/accuracies": 1.0,
1313
+ "rewards/chosen": 16.25,
1314
+ "rewards/margins": 54.75,
1315
+ "rewards/rejected": -38.5,
1316
+ "step": 840
1317
+ },
1318
+ {
1319
+ "epoch": 2.7868852459016393,
1320
+ "grad_norm": 1.3681049107441818e-10,
1321
+ "learning_rate": 3.9489671931956255e-08,
1322
+ "logits/chosen": 276.0,
1323
+ "logits/rejected": 282.0,
1324
+ "logps/chosen": -680.0,
1325
+ "logps/rejected": -1240.0,
1326
+ "loss": 0.0,
1327
+ "rewards/accuracies": 1.0,
1328
+ "rewards/chosen": 16.125,
1329
+ "rewards/margins": 55.25,
1330
+ "rewards/rejected": -39.25,
1331
+ "step": 850
1332
+ },
1333
+ {
1334
+ "epoch": 2.819672131147541,
1335
+ "grad_norm": 2.7039086242151e-05,
1336
+ "learning_rate": 3.341433778857837e-08,
1337
+ "logits/chosen": 274.0,
1338
+ "logits/rejected": 282.0,
1339
+ "logps/chosen": -512.0,
1340
+ "logps/rejected": -1240.0,
1341
+ "loss": 0.0,
1342
+ "rewards/accuracies": 1.0,
1343
+ "rewards/chosen": 13.0625,
1344
+ "rewards/margins": 51.5,
1345
+ "rewards/rejected": -38.5,
1346
+ "step": 860
1347
+ },
1348
+ {
1349
+ "epoch": 2.8524590163934427,
1350
+ "grad_norm": 4.1661882432785415e-06,
1351
+ "learning_rate": 2.7339003645200486e-08,
1352
+ "logits/chosen": 276.0,
1353
+ "logits/rejected": 280.0,
1354
+ "logps/chosen": -588.0,
1355
+ "logps/rejected": -1240.0,
1356
+ "loss": 0.0,
1357
+ "rewards/accuracies": 1.0,
1358
+ "rewards/chosen": 13.5,
1359
+ "rewards/margins": 52.0,
1360
+ "rewards/rejected": -38.5,
1361
+ "step": 870
1362
+ },
1363
+ {
1364
+ "epoch": 2.8852459016393444,
1365
+ "grad_norm": 4.794762374205389e-10,
1366
+ "learning_rate": 2.12636695018226e-08,
1367
+ "logits/chosen": 274.0,
1368
+ "logits/rejected": 282.0,
1369
+ "logps/chosen": -608.0,
1370
+ "logps/rejected": -1208.0,
1371
+ "loss": 0.0,
1372
+ "rewards/accuracies": 1.0,
1373
+ "rewards/chosen": 15.3125,
1374
+ "rewards/margins": 53.0,
1375
+ "rewards/rejected": -37.75,
1376
+ "step": 880
1377
+ },
1378
+ {
1379
+ "epoch": 2.918032786885246,
1380
+ "grad_norm": 1.9558841209760312e-05,
1381
+ "learning_rate": 1.5188335358444714e-08,
1382
+ "logits/chosen": 276.0,
1383
+ "logits/rejected": 282.0,
1384
+ "logps/chosen": -560.0,
1385
+ "logps/rejected": -1280.0,
1386
+ "loss": 0.0,
1387
+ "rewards/accuracies": 1.0,
1388
+ "rewards/chosen": 14.0,
1389
+ "rewards/margins": 54.0,
1390
+ "rewards/rejected": -40.0,
1391
+ "step": 890
1392
+ },
1393
+ {
1394
+ "epoch": 2.9508196721311473,
1395
+ "grad_norm": 0.00011639924338885354,
1396
+ "learning_rate": 9.113001215066828e-09,
1397
+ "logits/chosen": 274.0,
1398
+ "logits/rejected": 282.0,
1399
+ "logps/chosen": -492.0,
1400
+ "logps/rejected": -1232.0,
1401
+ "loss": 0.0,
1402
+ "rewards/accuracies": 1.0,
1403
+ "rewards/chosen": 13.375,
1404
+ "rewards/margins": 52.0,
1405
+ "rewards/rejected": -38.5,
1406
+ "step": 900
1407
+ },
1408
+ {
1409
+ "epoch": 2.9836065573770494,
1410
+ "grad_norm": 9.409851507715962e-11,
1411
+ "learning_rate": 3.0376670716889426e-09,
1412
+ "logits/chosen": 276.0,
1413
+ "logits/rejected": 282.0,
1414
+ "logps/chosen": -536.0,
1415
+ "logps/rejected": -1296.0,
1416
+ "loss": 0.0,
1417
+ "rewards/accuracies": 1.0,
1418
+ "rewards/chosen": 13.375,
1419
+ "rewards/margins": 54.0,
1420
+ "rewards/rejected": -40.75,
1421
+ "step": 910
1422
+ },
1423
+ {
1424
+ "epoch": 3.0,
1425
+ "eval_logits/chosen": 276.0,
1426
+ "eval_logits/rejected": 282.0,
1427
+ "eval_logps/chosen": -676.0,
1428
+ "eval_logps/rejected": -1264.0,
1429
+ "eval_loss": 1.2766672341513186e-07,
1430
+ "eval_rewards/accuracies": 1.0,
1431
+ "eval_rewards/chosen": 17.5,
1432
+ "eval_rewards/margins": 56.5,
1433
+ "eval_rewards/rejected": -39.0,
1434
+ "eval_runtime": 17.7894,
1435
+ "eval_samples_per_second": 11.018,
1436
+ "eval_steps_per_second": 0.393,
1437
+ "step": 915
1438
  }
1439
  ],
1440
  "logging_steps": 10,
 
1449
  "should_evaluate": false,
1450
  "should_log": false,
1451
  "should_save": true,
1452
+ "should_training_stop": true
1453
  },
1454
  "attributes": {}
1455
  }