|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9968652037617555, |
|
"eval_steps": 500, |
|
"global_step": 159, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -1.995415210723877, |
|
"logits/rejected": -2.0361223220825195, |
|
"logps/chosen": -475.42401123046875, |
|
"logps/pi_response": -236.53262329101562, |
|
"logps/ref_response": -236.53262329101562, |
|
"logps/rejected": -571.512451171875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.0094504356384277, |
|
"logits/rejected": -1.9493383169174194, |
|
"logps/chosen": -398.70062255859375, |
|
"logps/pi_response": -160.86041259765625, |
|
"logps/ref_response": -161.05593872070312, |
|
"logps/rejected": -436.8709411621094, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.4513888955116272, |
|
"rewards/chosen": -0.010171439498662949, |
|
"rewards/margins": 0.0004877760075032711, |
|
"rewards/rejected": -0.010659217834472656, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -1.9780817031860352, |
|
"logits/rejected": -1.9359722137451172, |
|
"logps/chosen": -396.9286193847656, |
|
"logps/pi_response": -165.99008178710938, |
|
"logps/ref_response": -170.16830444335938, |
|
"logps/rejected": -542.5230712890625, |
|
"loss": 0.6473, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.28707781434059143, |
|
"rewards/margins": 0.1905716061592102, |
|
"rewards/rejected": -0.4776495099067688, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -1.929377794265747, |
|
"logits/rejected": -1.9014297723770142, |
|
"logps/chosen": -477.72247314453125, |
|
"logps/pi_response": -175.51304626464844, |
|
"logps/ref_response": -176.73623657226562, |
|
"logps/rejected": -558.6651000976562, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.7204787135124207, |
|
"rewards/margins": 0.3288179636001587, |
|
"rewards/rejected": -1.0492966175079346, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -1.9873077869415283, |
|
"logits/rejected": -1.9341981410980225, |
|
"logps/chosen": -473.9336853027344, |
|
"logps/pi_response": -187.4553680419922, |
|
"logps/ref_response": -175.1200408935547, |
|
"logps/rejected": -595.444091796875, |
|
"loss": 0.6401, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7007579803466797, |
|
"rewards/margins": 0.272124320268631, |
|
"rewards/rejected": -0.9728823900222778, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -1.8471710681915283, |
|
"logits/rejected": -1.641099214553833, |
|
"logps/chosen": -511.49871826171875, |
|
"logps/pi_response": -183.08493041992188, |
|
"logps/ref_response": -184.63198852539062, |
|
"logps/rejected": -632.9375, |
|
"loss": 0.6017, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.8219456672668457, |
|
"rewards/margins": 0.4130307137966156, |
|
"rewards/rejected": -1.2349765300750732, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": -0.9633913040161133, |
|
"logits/rejected": -0.7572700381278992, |
|
"logps/chosen": -479.69775390625, |
|
"logps/pi_response": -182.07357788085938, |
|
"logps/ref_response": -175.98574829101562, |
|
"logps/rejected": -607.6907958984375, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6976214051246643, |
|
"rewards/margins": 0.5892900228500366, |
|
"rewards/rejected": -1.2869113683700562, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": -0.3013351559638977, |
|
"logits/rejected": 0.12189887464046478, |
|
"logps/chosen": -462.14190673828125, |
|
"logps/pi_response": -178.45272827148438, |
|
"logps/ref_response": -161.18075561523438, |
|
"logps/rejected": -615.686279296875, |
|
"loss": 0.5997, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.698739767074585, |
|
"rewards/margins": 0.6993860006332397, |
|
"rewards/rejected": -1.3981258869171143, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": -0.2273164540529251, |
|
"logits/rejected": 0.14264746010303497, |
|
"logps/chosen": -505.594970703125, |
|
"logps/pi_response": -187.5181121826172, |
|
"logps/ref_response": -175.85252380371094, |
|
"logps/rejected": -639.3907470703125, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9091317057609558, |
|
"rewards/margins": 0.6041504144668579, |
|
"rewards/rejected": -1.5132819414138794, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": 0.14409923553466797, |
|
"logits/rejected": 0.62093585729599, |
|
"logps/chosen": -543.806640625, |
|
"logps/pi_response": -203.8004608154297, |
|
"logps/ref_response": -184.4833526611328, |
|
"logps/rejected": -670.08984375, |
|
"loss": 0.5898, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.1904714107513428, |
|
"rewards/margins": 0.5562640428543091, |
|
"rewards/rejected": -1.7467355728149414, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": 0.1400291472673416, |
|
"logits/rejected": 0.6031023263931274, |
|
"logps/chosen": -544.8568115234375, |
|
"logps/pi_response": -222.86630249023438, |
|
"logps/ref_response": -191.95008850097656, |
|
"logps/rejected": -661.2197265625, |
|
"loss": 0.566, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.1847233772277832, |
|
"rewards/margins": 0.5735085010528564, |
|
"rewards/rejected": -1.7582318782806396, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": 0.13480663299560547, |
|
"logits/rejected": 0.6124667525291443, |
|
"logps/chosen": -551.712158203125, |
|
"logps/pi_response": -201.6997833251953, |
|
"logps/ref_response": -172.76661682128906, |
|
"logps/rejected": -627.7642211914062, |
|
"loss": 0.5799, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.042403221130371, |
|
"rewards/margins": 0.4938434660434723, |
|
"rewards/rejected": -1.536246657371521, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": 0.29530060291290283, |
|
"logits/rejected": 0.561973512172699, |
|
"logps/chosen": -507.9276428222656, |
|
"logps/pi_response": -198.93344116210938, |
|
"logps/ref_response": -166.47335815429688, |
|
"logps/rejected": -658.5081787109375, |
|
"loss": 0.5442, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0466843843460083, |
|
"rewards/margins": 0.6271711587905884, |
|
"rewards/rejected": -1.6738557815551758, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": 0.4397956430912018, |
|
"logits/rejected": 0.7937263250350952, |
|
"logps/chosen": -523.13525390625, |
|
"logps/pi_response": -213.9718017578125, |
|
"logps/ref_response": -178.47569274902344, |
|
"logps/rejected": -653.7128295898438, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.0310872793197632, |
|
"rewards/margins": 0.6268772482872009, |
|
"rewards/rejected": -1.6579644680023193, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": 0.6194955706596375, |
|
"logits/rejected": 1.0747044086456299, |
|
"logps/chosen": -514.37744140625, |
|
"logps/pi_response": -208.5031280517578, |
|
"logps/ref_response": -176.0931396484375, |
|
"logps/rejected": -671.9991455078125, |
|
"loss": 0.5577, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.111130952835083, |
|
"rewards/margins": 0.6132029294967651, |
|
"rewards/rejected": -1.7243340015411377, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": 0.08109824359416962, |
|
"logits/rejected": 0.7494246959686279, |
|
"logps/chosen": -524.765625, |
|
"logps/pi_response": -212.7974090576172, |
|
"logps/ref_response": -180.5318145751953, |
|
"logps/rejected": -664.6768798828125, |
|
"loss": 0.5556, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.9752191305160522, |
|
"rewards/margins": 0.7606547474861145, |
|
"rewards/rejected": -1.735873818397522, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 159, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5931253643155848, |
|
"train_runtime": 4224.9045, |
|
"train_samples_per_second": 4.823, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 159, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|