|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9765925925925925, |
|
"eval_steps": 500, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.047407407407407405, |
|
"grad_norm": 1899.5159912109375, |
|
"learning_rate": 7.8125e-06, |
|
"log_odds_chosen": -1.0597587823867798, |
|
"log_odds_ratio": -11.004037857055664, |
|
"logps/chosen": -22.70572280883789, |
|
"logps/rejected": -21.646194458007812, |
|
"loss": 526.7425, |
|
"nll_loss": 9.840391159057617, |
|
"po_loss": 6.620314121246338, |
|
"rewards/accuracies": 0.4781250059604645, |
|
"rewards/chosen": -4.541144847869873, |
|
"rewards/margins": -0.21190576255321503, |
|
"rewards/rejected": -4.3292388916015625, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09481481481481481, |
|
"grad_norm": 558.7034912109375, |
|
"learning_rate": 1.5625e-05, |
|
"log_odds_chosen": -3.1177897453308105, |
|
"log_odds_ratio": -11.049609184265137, |
|
"logps/chosen": -22.010894775390625, |
|
"logps/rejected": -18.89345359802246, |
|
"loss": 510.6521, |
|
"nll_loss": 8.767520904541016, |
|
"po_loss": 7.1903533935546875, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -4.402178764343262, |
|
"rewards/margins": -0.6234878897666931, |
|
"rewards/rejected": -3.778690814971924, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14222222222222222, |
|
"grad_norm": 728.9421997070312, |
|
"learning_rate": 2.34375e-05, |
|
"log_odds_chosen": -1.2930371761322021, |
|
"log_odds_ratio": -11.378229141235352, |
|
"logps/chosen": -22.08417320251465, |
|
"logps/rejected": -20.78976821899414, |
|
"loss": 515.0328, |
|
"nll_loss": 8.051271438598633, |
|
"po_loss": 8.043503761291504, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": -4.416834831237793, |
|
"rewards/margins": -0.25888124108314514, |
|
"rewards/rejected": -4.157953262329102, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.18962962962962962, |
|
"grad_norm": 9066.4345703125, |
|
"learning_rate": 3.125e-05, |
|
"log_odds_chosen": -4.506316184997559, |
|
"log_odds_ratio": -9.819967269897461, |
|
"logps/chosen": -17.74567222595215, |
|
"logps/rejected": -13.241083145141602, |
|
"loss": 425.3085, |
|
"nll_loss": 6.465292453765869, |
|
"po_loss": 6.825601100921631, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -3.5491347312927246, |
|
"rewards/margins": -0.9009180068969727, |
|
"rewards/rejected": -2.648216724395752, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.23703703703703705, |
|
"grad_norm": 602.6565551757812, |
|
"learning_rate": 3.90625e-05, |
|
"log_odds_chosen": -0.16372856497764587, |
|
"log_odds_ratio": -2.0679056644439697, |
|
"logps/chosen": -4.469531536102295, |
|
"logps/rejected": -4.290602684020996, |
|
"loss": 123.0824, |
|
"nll_loss": 2.8301875591278076, |
|
"po_loss": 1.0161364078521729, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.8939064145088196, |
|
"rewards/margins": -0.03578581288456917, |
|
"rewards/rejected": -0.8581206202507019, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.28444444444444444, |
|
"grad_norm": 375.20941162109375, |
|
"learning_rate": 4.6875e-05, |
|
"log_odds_chosen": 0.037730950862169266, |
|
"log_odds_ratio": -0.9092292785644531, |
|
"logps/chosen": -1.8757789134979248, |
|
"logps/rejected": -1.9118578433990479, |
|
"loss": 66.581, |
|
"nll_loss": 2.074733018875122, |
|
"po_loss": 0.00592245627194643, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.3751557767391205, |
|
"rewards/margins": 0.007215849123895168, |
|
"rewards/rejected": -0.38237160444259644, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33185185185185184, |
|
"grad_norm": 288.1863098144531, |
|
"learning_rate": 4.998613757348784e-05, |
|
"log_odds_chosen": 0.2057056725025177, |
|
"log_odds_ratio": -0.8370769619941711, |
|
"logps/chosen": -1.7295191287994385, |
|
"logps/rejected": -1.9166914224624634, |
|
"loss": 61.7867, |
|
"nll_loss": 1.9286344051361084, |
|
"po_loss": 0.002200313610956073, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": -0.34590381383895874, |
|
"rewards/margins": 0.03743448108434677, |
|
"rewards/rejected": -0.3833382725715637, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.37925925925925924, |
|
"grad_norm": 85.25788116455078, |
|
"learning_rate": 4.990147841143462e-05, |
|
"log_odds_chosen": 0.2474113404750824, |
|
"log_odds_ratio": -0.7440091967582703, |
|
"logps/chosen": -1.5026391744613647, |
|
"logps/rejected": -1.7087732553482056, |
|
"loss": 57.7528, |
|
"nll_loss": 1.8044459819793701, |
|
"po_loss": 0.0003297007642686367, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.3005278706550598, |
|
"rewards/margins": 0.04122680425643921, |
|
"rewards/rejected": -0.34175464510917664, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 101.17169189453125, |
|
"learning_rate": 4.97401218720448e-05, |
|
"log_odds_chosen": 0.20201453566551208, |
|
"log_odds_ratio": -0.7342582941055298, |
|
"logps/chosen": -1.3815350532531738, |
|
"logps/rejected": -1.5576066970825195, |
|
"loss": 53.7768, |
|
"nll_loss": 1.680175542831421, |
|
"po_loss": 0.00034963697544299066, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -0.27630701661109924, |
|
"rewards/margins": 0.03521432727575302, |
|
"rewards/rejected": -0.31152135133743286, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4740740740740741, |
|
"grad_norm": 69.27424621582031, |
|
"learning_rate": 4.9502564938797946e-05, |
|
"log_odds_chosen": 0.24245789647102356, |
|
"log_odds_ratio": -0.700467586517334, |
|
"logps/chosen": -1.2549350261688232, |
|
"logps/rejected": -1.4581347703933716, |
|
"loss": 49.7115, |
|
"nll_loss": 1.5532619953155518, |
|
"po_loss": 0.00022162115783430636, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.2509870231151581, |
|
"rewards/margins": 0.040639933198690414, |
|
"rewards/rejected": -0.2916269600391388, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5214814814814814, |
|
"grad_norm": 59.69867706298828, |
|
"learning_rate": 4.918953929490768e-05, |
|
"log_odds_chosen": 0.2079828977584839, |
|
"log_odds_ratio": -0.728143036365509, |
|
"logps/chosen": -1.2705906629562378, |
|
"logps/rejected": -1.4351913928985596, |
|
"loss": 50.3722, |
|
"nll_loss": 1.5738521814346313, |
|
"po_loss": 0.0002790199941955507, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.2541181445121765, |
|
"rewards/margins": 0.03292013332247734, |
|
"rewards/rejected": -0.28703826665878296, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5688888888888889, |
|
"grad_norm": 164.81796264648438, |
|
"learning_rate": 4.88020090697132e-05, |
|
"log_odds_chosen": 0.1613532155752182, |
|
"log_odds_ratio": -0.7294767498970032, |
|
"logps/chosen": -1.2667067050933838, |
|
"logps/rejected": -1.40398108959198, |
|
"loss": 50.8378, |
|
"nll_loss": 1.5884637832641602, |
|
"po_loss": 0.00021803542040288448, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.25334134697914124, |
|
"rewards/margins": 0.027454886585474014, |
|
"rewards/rejected": -0.28079622983932495, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6162962962962963, |
|
"grad_norm": 178.92111206054688, |
|
"learning_rate": 4.834116786912897e-05, |
|
"log_odds_chosen": 0.21860842406749725, |
|
"log_odds_ratio": -0.7070624232292175, |
|
"logps/chosen": -1.2478413581848145, |
|
"logps/rejected": -1.4164764881134033, |
|
"loss": 48.7673, |
|
"nll_loss": 1.5237150192260742, |
|
"po_loss": 0.00026398617774248123, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.24956829845905304, |
|
"rewards/margins": 0.033726997673511505, |
|
"rewards/rejected": -0.28329527378082275, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6637037037037037, |
|
"grad_norm": 69.5670166015625, |
|
"learning_rate": 4.7808435099299045e-05, |
|
"log_odds_chosen": 0.31238844990730286, |
|
"log_odds_ratio": -0.6625837087631226, |
|
"logps/chosen": -1.1966934204101562, |
|
"logps/rejected": -1.4487974643707275, |
|
"loss": 48.3884, |
|
"nll_loss": 1.5121018886566162, |
|
"po_loss": 3.584091246011667e-05, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.239338681101799, |
|
"rewards/margins": 0.050420790910720825, |
|
"rewards/rejected": -0.28975948691368103, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 129.68734741210938, |
|
"learning_rate": 4.720545159477922e-05, |
|
"log_odds_chosen": 0.262991726398468, |
|
"log_odds_ratio": -0.6946216225624084, |
|
"logps/chosen": -1.1699544191360474, |
|
"logps/rejected": -1.3726341724395752, |
|
"loss": 47.6136, |
|
"nll_loss": 1.487747311592102, |
|
"po_loss": 0.0001775487035047263, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.23399090766906738, |
|
"rewards/margins": 0.04053593799471855, |
|
"rewards/rejected": -0.27452683448791504, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7585185185185185, |
|
"grad_norm": 111.74764251708984, |
|
"learning_rate": 4.653407456471222e-05, |
|
"log_odds_chosen": 0.17025665938854218, |
|
"log_odds_ratio": -0.7084556818008423, |
|
"logps/chosen": -1.1360405683517456, |
|
"logps/rejected": -1.2656519412994385, |
|
"loss": 45.6908, |
|
"nll_loss": 1.4278182983398438, |
|
"po_loss": 1.782720755727496e-05, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.22720813751220703, |
|
"rewards/margins": 0.0259222574532032, |
|
"rewards/rejected": -0.25313037633895874, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8059259259259259, |
|
"grad_norm": 79.55937957763672, |
|
"learning_rate": 4.579637187256222e-05, |
|
"log_odds_chosen": 0.1675080806016922, |
|
"log_odds_ratio": -0.6960164904594421, |
|
"logps/chosen": -1.117205262184143, |
|
"logps/rejected": -1.2469722032546997, |
|
"loss": 46.5408, |
|
"nll_loss": 1.454362154006958, |
|
"po_loss": 3.7405981856863946e-05, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -0.22344104945659637, |
|
"rewards/margins": 0.025953417643904686, |
|
"rewards/rejected": -0.2493944615125656, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 62.937400817871094, |
|
"learning_rate": 4.499461566702685e-05, |
|
"log_odds_chosen": 0.28352928161621094, |
|
"log_odds_ratio": -0.655947744846344, |
|
"logps/chosen": -1.0310251712799072, |
|
"logps/rejected": -1.2295501232147217, |
|
"loss": 44.1915, |
|
"nll_loss": 1.3809893131256104, |
|
"po_loss": -6.17957175563788e-06, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.20620505511760712, |
|
"rewards/margins": 0.03970498591661453, |
|
"rewards/rejected": -0.24591004848480225, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9007407407407407, |
|
"grad_norm": 206.94097900390625, |
|
"learning_rate": 4.413127538374411e-05, |
|
"log_odds_chosen": 0.1304076611995697, |
|
"log_odds_ratio": -0.7256454229354858, |
|
"logps/chosen": -1.1198256015777588, |
|
"logps/rejected": -1.2201364040374756, |
|
"loss": 46.0187, |
|
"nll_loss": 1.4380216598510742, |
|
"po_loss": 6.208533159224316e-05, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.223965123295784, |
|
"rewards/margins": 0.020062167197465897, |
|
"rewards/rejected": -0.2440272867679596, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.9481481481481482, |
|
"grad_norm": 120.00648498535156, |
|
"learning_rate": 4.320901013934887e-05, |
|
"log_odds_chosen": 0.20910552144050598, |
|
"log_odds_ratio": -0.6791337132453918, |
|
"logps/chosen": -1.0920485258102417, |
|
"logps/rejected": -1.2438485622406006, |
|
"loss": 44.6104, |
|
"nll_loss": 1.3940393924713135, |
|
"po_loss": 3.593241126509383e-05, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -0.2184097319841385, |
|
"rewards/margins": 0.030360007658600807, |
|
"rewards/rejected": -0.24876973032951355, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9955555555555555, |
|
"grad_norm": 68.66105651855469, |
|
"learning_rate": 4.223066054130568e-05, |
|
"log_odds_chosen": 0.2536667287349701, |
|
"log_odds_ratio": -0.6884052157402039, |
|
"logps/chosen": -1.0762526988983154, |
|
"logps/rejected": -1.289530873298645, |
|
"loss": 45.6209, |
|
"nll_loss": 1.4256471395492554, |
|
"po_loss": 6.704734005325008e-06, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -0.21525056660175323, |
|
"rewards/margins": 0.042655594646930695, |
|
"rewards/rejected": -0.25790613889694214, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.037925925925926, |
|
"grad_norm": 43.638755798339844, |
|
"learning_rate": 4.1199239938743797e-05, |
|
"log_odds_chosen": 0.4813327491283417, |
|
"log_odds_ratio": -0.571753203868866, |
|
"logps/chosen": -0.9467726349830627, |
|
"logps/rejected": -1.2524032592773438, |
|
"loss": 34.5558, |
|
"nll_loss": 1.2083364725112915, |
|
"po_loss": -9.182094072457403e-05, |
|
"rewards/accuracies": 0.6958041787147522, |
|
"rewards/chosen": -0.1893545240163803, |
|
"rewards/margins": 0.06112610921263695, |
|
"rewards/rejected": -0.25048065185546875, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0853333333333333, |
|
"grad_norm": 66.49714660644531, |
|
"learning_rate": 4.0117925141242174e-05, |
|
"log_odds_chosen": 0.6769129037857056, |
|
"log_odds_ratio": -0.5279095768928528, |
|
"logps/chosen": -0.8538272976875305, |
|
"logps/rejected": -1.2733014822006226, |
|
"loss": 36.8982, |
|
"nll_loss": 1.1533045768737793, |
|
"po_loss": -0.00023453465837519616, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.1707654595375061, |
|
"rewards/margins": 0.08389485627412796, |
|
"rewards/rejected": -0.25466030836105347, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.1327407407407408, |
|
"grad_norm": 63.25371551513672, |
|
"learning_rate": 3.899004663415084e-05, |
|
"log_odds_chosen": 0.5953308343887329, |
|
"log_odds_ratio": -0.5498973727226257, |
|
"logps/chosen": -0.8752245903015137, |
|
"logps/rejected": -1.2550878524780273, |
|
"loss": 37.9873, |
|
"nll_loss": 1.1874502897262573, |
|
"po_loss": -0.000346492015523836, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.17504490911960602, |
|
"rewards/margins": 0.07597266137599945, |
|
"rewards/rejected": -0.25101757049560547, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.1801481481481482, |
|
"grad_norm": 55.93903732299805, |
|
"learning_rate": 3.781907832058587e-05, |
|
"log_odds_chosen": 0.576469361782074, |
|
"log_odds_ratio": -0.5385292768478394, |
|
"logps/chosen": -0.9115549921989441, |
|
"logps/rejected": -1.2783465385437012, |
|
"loss": 37.1419, |
|
"nll_loss": 1.161022424697876, |
|
"po_loss": -0.00033691470162011683, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.18231101334095, |
|
"rewards/margins": 0.07335831224918365, |
|
"rewards/rejected": -0.2556692957878113, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.2275555555555555, |
|
"grad_norm": 48.382877349853516, |
|
"learning_rate": 3.660862682169282e-05, |
|
"log_odds_chosen": 0.6674849390983582, |
|
"log_odds_ratio": -0.5325134992599487, |
|
"logps/chosen": -0.8701179623603821, |
|
"logps/rejected": -1.2853254079818726, |
|
"loss": 37.2066, |
|
"nll_loss": 1.1643040180206299, |
|
"po_loss": -0.0015986410435289145, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.1740236133337021, |
|
"rewards/margins": 0.0830414891242981, |
|
"rewards/rejected": -0.2570651173591614, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.274962962962963, |
|
"grad_norm": 52.023677825927734, |
|
"learning_rate": 3.5362420368134356e-05, |
|
"log_odds_chosen": 0.6190892457962036, |
|
"log_odds_ratio": -0.5239226222038269, |
|
"logps/chosen": -0.8530964851379395, |
|
"logps/rejected": -1.225818395614624, |
|
"loss": 36.3779, |
|
"nll_loss": 1.1369173526763916, |
|
"po_loss": -0.0001083976385416463, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.17061929404735565, |
|
"rewards/margins": 0.07454435527324677, |
|
"rewards/rejected": -0.24516364932060242, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.3223703703703704, |
|
"grad_norm": 66.0684585571289, |
|
"learning_rate": 3.408429731701635e-05, |
|
"log_odds_chosen": 0.6582817435264587, |
|
"log_odds_ratio": -0.535703718662262, |
|
"logps/chosen": -0.884809672832489, |
|
"logps/rejected": -1.3024781942367554, |
|
"loss": 37.8405, |
|
"nll_loss": 1.1836440563201904, |
|
"po_loss": -0.0011288595851510763, |
|
"rewards/accuracies": 0.7406250238418579, |
|
"rewards/chosen": -0.17696192860603333, |
|
"rewards/margins": 0.08353371918201447, |
|
"rewards/rejected": -0.2604956328868866, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.3697777777777778, |
|
"grad_norm": 94.56107330322266, |
|
"learning_rate": 3.2778194329621104e-05, |
|
"log_odds_chosen": 0.5688942670822144, |
|
"log_odds_ratio": -0.5570982694625854, |
|
"logps/chosen": -0.8914452791213989, |
|
"logps/rejected": -1.2537453174591064, |
|
"loss": 38.2471, |
|
"nll_loss": 1.1957530975341797, |
|
"po_loss": -0.0005301563069224358, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.17828908562660217, |
|
"rewards/margins": 0.07246001064777374, |
|
"rewards/rejected": -0.25074905157089233, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.417185185185185, |
|
"grad_norm": 58.81022262573242, |
|
"learning_rate": 3.144813424636031e-05, |
|
"log_odds_chosen": 0.6168826222419739, |
|
"log_odds_ratio": -0.534443736076355, |
|
"logps/chosen": -0.8365335464477539, |
|
"logps/rejected": -1.2264480590820312, |
|
"loss": 35.7093, |
|
"nll_loss": 1.1160484552383423, |
|
"po_loss": -0.00013402634067460895, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.16730670630931854, |
|
"rewards/margins": 0.07798293977975845, |
|
"rewards/rejected": -0.245289608836174, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.4645925925925927, |
|
"grad_norm": 46.942298889160156, |
|
"learning_rate": 3.0098213696293542e-05, |
|
"log_odds_chosen": 0.6257598400115967, |
|
"log_odds_ratio": -0.5429662466049194, |
|
"logps/chosen": -0.8669716119766235, |
|
"logps/rejected": -1.270719289779663, |
|
"loss": 36.8711, |
|
"nll_loss": 1.152681589126587, |
|
"po_loss": -0.0004589153977576643, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.1733943372964859, |
|
"rewards/margins": 0.0807495266199112, |
|
"rewards/rejected": -0.2541438639163971, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.512, |
|
"grad_norm": 48.96784591674805, |
|
"learning_rate": 2.8732590479375165e-05, |
|
"log_odds_chosen": 0.564893364906311, |
|
"log_odds_ratio": -0.5655848383903503, |
|
"logps/chosen": -0.8240112066268921, |
|
"logps/rejected": -1.1826220750808716, |
|
"loss": 35.9479, |
|
"nll_loss": 1.1237273216247559, |
|
"po_loss": -0.0003545045619830489, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.16480223834514618, |
|
"rewards/margins": 0.07172217220067978, |
|
"rewards/rejected": -0.23652443289756775, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.5594074074074074, |
|
"grad_norm": 43.50642013549805, |
|
"learning_rate": 2.7355470760292956e-05, |
|
"log_odds_chosen": 0.6325685977935791, |
|
"log_odds_ratio": -0.5345262885093689, |
|
"logps/chosen": -0.8210509419441223, |
|
"logps/rejected": -1.206971526145935, |
|
"loss": 36.8652, |
|
"nll_loss": 1.1524779796600342, |
|
"po_loss": -0.00043968428508378565, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.16421017050743103, |
|
"rewards/margins": 0.07718412578105927, |
|
"rewards/rejected": -0.2413943111896515, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.6068148148148147, |
|
"grad_norm": 51.71367645263672, |
|
"learning_rate": 2.597109611334169e-05, |
|
"log_odds_chosen": 0.6004430055618286, |
|
"log_odds_ratio": -0.535349428653717, |
|
"logps/chosen": -0.8440070152282715, |
|
"logps/rejected": -1.230797290802002, |
|
"loss": 36.4736, |
|
"nll_loss": 1.1400222778320312, |
|
"po_loss": -0.0002236413274658844, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.16880139708518982, |
|
"rewards/margins": 0.07735804468393326, |
|
"rewards/rejected": -0.24615943431854248, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.6542222222222223, |
|
"grad_norm": 48.140621185302734, |
|
"learning_rate": 2.458373045823404e-05, |
|
"log_odds_chosen": 0.6727933287620544, |
|
"log_odds_ratio": -0.5192316174507141, |
|
"logps/chosen": -0.83983314037323, |
|
"logps/rejected": -1.2668081521987915, |
|
"loss": 35.4802, |
|
"nll_loss": 1.1090071201324463, |
|
"po_loss": -0.00025099312188103795, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.16796663403511047, |
|
"rewards/margins": 0.08539502322673798, |
|
"rewards/rejected": -0.25336164236068726, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.7016296296296296, |
|
"grad_norm": 42.03474044799805, |
|
"learning_rate": 2.3197646927086697e-05, |
|
"log_odds_chosen": 0.5798546671867371, |
|
"log_odds_ratio": -0.5439001321792603, |
|
"logps/chosen": -0.8370186686515808, |
|
"logps/rejected": -1.1941139698028564, |
|
"loss": 36.0393, |
|
"nll_loss": 1.1264227628707886, |
|
"po_loss": -0.000195543936570175, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.16740374267101288, |
|
"rewards/margins": 0.07141904532909393, |
|
"rewards/rejected": -0.2388227880001068, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.749037037037037, |
|
"grad_norm": 38.88856887817383, |
|
"learning_rate": 2.1817114703032176e-05, |
|
"log_odds_chosen": 0.49664992094039917, |
|
"log_odds_ratio": -0.5907926559448242, |
|
"logps/chosen": -0.8930248022079468, |
|
"logps/rejected": -1.2256276607513428, |
|
"loss": 36.1086, |
|
"nll_loss": 1.1285018920898438, |
|
"po_loss": -0.00010755965195130557, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.17860497534275055, |
|
"rewards/margins": 0.0665205791592598, |
|
"rewards/rejected": -0.24512553215026855, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.7964444444444445, |
|
"grad_norm": 46.19615936279297, |
|
"learning_rate": 2.0446385870993467e-05, |
|
"log_odds_chosen": 0.6304856538772583, |
|
"log_odds_ratio": -0.5355208516120911, |
|
"logps/chosen": -0.8472241163253784, |
|
"logps/rejected": -1.229255199432373, |
|
"loss": 36.4206, |
|
"nll_loss": 1.1382454633712769, |
|
"po_loss": -0.00010128335270565003, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.16944481432437897, |
|
"rewards/margins": 0.07640622556209564, |
|
"rewards/rejected": -0.2458510398864746, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.8438518518518519, |
|
"grad_norm": 73.05342102050781, |
|
"learning_rate": 1.9089682321121834e-05, |
|
"log_odds_chosen": 0.6382489204406738, |
|
"log_odds_ratio": -0.5185028910636902, |
|
"logps/chosen": -0.8499706983566284, |
|
"logps/rejected": -1.2512637376785278, |
|
"loss": 35.7376, |
|
"nll_loss": 1.1169432401657104, |
|
"po_loss": -0.00014365585229825228, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.16999416053295135, |
|
"rewards/margins": 0.08025859296321869, |
|
"rewards/rejected": -0.25025275349617004, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.8912592592592592, |
|
"grad_norm": 47.978912353515625, |
|
"learning_rate": 1.775118274523545e-05, |
|
"log_odds_chosen": 0.6197646260261536, |
|
"log_odds_ratio": -0.5345317721366882, |
|
"logps/chosen": -0.8643702268600464, |
|
"logps/rejected": -1.2556793689727783, |
|
"loss": 36.9782, |
|
"nll_loss": 1.1558464765548706, |
|
"po_loss": -0.0002769582497421652, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.1728740632534027, |
|
"rewards/margins": 0.0782618373632431, |
|
"rewards/rejected": -0.2511358857154846, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.9386666666666668, |
|
"grad_norm": 48.29682159423828, |
|
"learning_rate": 1.643500976631037e-05, |
|
"log_odds_chosen": 0.6153634190559387, |
|
"log_odds_ratio": -0.5462952852249146, |
|
"logps/chosen": -0.8348654508590698, |
|
"logps/rejected": -1.204040765762329, |
|
"loss": 35.4368, |
|
"nll_loss": 1.107660174369812, |
|
"po_loss": -0.0002589077048469335, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.16697311401367188, |
|
"rewards/margins": 0.07383506745100021, |
|
"rewards/rejected": -0.2408081740140915, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.986074074074074, |
|
"grad_norm": 37.39937973022461, |
|
"learning_rate": 1.514521724066537e-05, |
|
"log_odds_chosen": 0.46801671385765076, |
|
"log_odds_ratio": -0.5808194875717163, |
|
"logps/chosen": -0.8600652813911438, |
|
"logps/rejected": -1.1320184469223022, |
|
"loss": 37.1, |
|
"nll_loss": 1.1594690084457397, |
|
"po_loss": -9.401176066603512e-05, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.1720130741596222, |
|
"rewards/margins": 0.05439060926437378, |
|
"rewards/rejected": -0.22640366852283478, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.0284444444444443, |
|
"grad_norm": 60.70783233642578, |
|
"learning_rate": 1.3885777771950348e-05, |
|
"log_odds_chosen": 0.8869153261184692, |
|
"log_odds_ratio": -0.44785308837890625, |
|
"logps/chosen": -0.6780868768692017, |
|
"logps/rejected": -1.1526471376419067, |
|
"loss": 26.1816, |
|
"nll_loss": 0.9157966375350952, |
|
"po_loss": -0.00035746488720178604, |
|
"rewards/accuracies": 0.7902097702026367, |
|
"rewards/chosen": -0.13561737537384033, |
|
"rewards/margins": 0.09491205215454102, |
|
"rewards/rejected": -0.23052944242954254, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.075851851851852, |
|
"grad_norm": 63.7738037109375, |
|
"learning_rate": 1.2660570475395683e-05, |
|
"log_odds_chosen": 1.119178056716919, |
|
"log_odds_ratio": -0.4020889699459076, |
|
"logps/chosen": -0.6872093677520752, |
|
"logps/rejected": -1.3119704723358154, |
|
"loss": 28.6045, |
|
"nll_loss": 0.8954778909683228, |
|
"po_loss": -0.0015872030053287745, |
|
"rewards/accuracies": 0.840624988079071, |
|
"rewards/chosen": -0.13744190335273743, |
|
"rewards/margins": 0.12495221942663193, |
|
"rewards/rejected": -0.26239413022994995, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.1232592592592594, |
|
"grad_norm": 51.26667022705078, |
|
"learning_rate": 1.1473369030008974e-05, |
|
"log_odds_chosen": 1.191803216934204, |
|
"log_odds_ratio": -0.3746866285800934, |
|
"logps/chosen": -0.6712603569030762, |
|
"logps/rejected": -1.3162505626678467, |
|
"loss": 27.9395, |
|
"nll_loss": 0.873916506767273, |
|
"po_loss": -0.0008082756539806724, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -0.13425207138061523, |
|
"rewards/margins": 0.1289980709552765, |
|
"rewards/rejected": -0.26325011253356934, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.1706666666666665, |
|
"grad_norm": 50.845035552978516, |
|
"learning_rate": 1.0327830055518842e-05, |
|
"log_odds_chosen": 1.267938494682312, |
|
"log_odds_ratio": -0.37167197465896606, |
|
"logps/chosen": -0.646305501461029, |
|
"logps/rejected": -1.347239375114441, |
|
"loss": 27.6319, |
|
"nll_loss": 0.8649285435676575, |
|
"po_loss": -0.0014302517520263791, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1292611062526703, |
|
"rewards/margins": 0.14018678665161133, |
|
"rewards/rejected": -0.2694478929042816, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.218074074074074, |
|
"grad_norm": 46.15715789794922, |
|
"learning_rate": 9.227481849865235e-06, |
|
"log_odds_chosen": 1.2005326747894287, |
|
"log_odds_ratio": -0.37836360931396484, |
|
"logps/chosen": -0.63874351978302, |
|
"logps/rejected": -1.281273365020752, |
|
"loss": 28.6655, |
|
"nll_loss": 0.8964711427688599, |
|
"po_loss": -0.0006741798715665936, |
|
"rewards/accuracies": 0.871874988079071, |
|
"rewards/chosen": -0.12774871289730072, |
|
"rewards/margins": 0.12850596010684967, |
|
"rewards/rejected": -0.2562546730041504, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.2654814814814817, |
|
"grad_norm": 41.55079650878906, |
|
"learning_rate": 8.175713521924978e-06, |
|
"log_odds_chosen": 1.3142430782318115, |
|
"log_odds_ratio": -0.351533979177475, |
|
"logps/chosen": -0.6246355772018433, |
|
"logps/rejected": -1.360852599143982, |
|
"loss": 26.9269, |
|
"nll_loss": 0.8446337580680847, |
|
"po_loss": -0.00316769746132195, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.12492714077234268, |
|
"rewards/margins": 0.14724338054656982, |
|
"rewards/rejected": -0.2721705138683319, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.3128888888888888, |
|
"grad_norm": 41.21688461303711, |
|
"learning_rate": 7.1757645529443665e-06, |
|
"log_odds_chosen": 1.2483962774276733, |
|
"log_odds_ratio": -0.35364586114883423, |
|
"logps/chosen": -0.6280895471572876, |
|
"logps/rejected": -1.3165141344070435, |
|
"loss": 27.6397, |
|
"nll_loss": 0.8649471402168274, |
|
"po_loss": -0.0012058170977979898, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.12561790645122528, |
|
"rewards/margins": 0.1376849114894867, |
|
"rewards/rejected": -0.26330283284187317, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.3602962962962963, |
|
"grad_norm": 45.04114532470703, |
|
"learning_rate": 6.230714818829733e-06, |
|
"log_odds_chosen": 1.2950201034545898, |
|
"log_odds_ratio": -0.34702420234680176, |
|
"logps/chosen": -0.5998716354370117, |
|
"logps/rejected": -1.2858121395111084, |
|
"loss": 27.8242, |
|
"nll_loss": 0.8703107833862305, |
|
"po_loss": -0.0008051077020354569, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.11997435241937637, |
|
"rewards/margins": 0.13718809187412262, |
|
"rewards/rejected": -0.2571624219417572, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.407703703703704, |
|
"grad_norm": 42.57611083984375, |
|
"learning_rate": 5.343475104027743e-06, |
|
"log_odds_chosen": 1.4084466695785522, |
|
"log_odds_ratio": -0.34137439727783203, |
|
"logps/chosen": -0.6155047416687012, |
|
"logps/rejected": -1.416010856628418, |
|
"loss": 27.1897, |
|
"nll_loss": 0.8556439280509949, |
|
"po_loss": -0.005965453572571278, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.12310095131397247, |
|
"rewards/margins": 0.16010123491287231, |
|
"rewards/rejected": -0.2832021713256836, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.455111111111111, |
|
"grad_norm": 37.19707107543945, |
|
"learning_rate": 4.516778136213037e-06, |
|
"log_odds_chosen": 1.2491247653961182, |
|
"log_odds_ratio": -0.35060206055641174, |
|
"logps/chosen": -0.6111522912979126, |
|
"logps/rejected": -1.2990696430206299, |
|
"loss": 27.3026, |
|
"nll_loss": 0.8542858958244324, |
|
"po_loss": -0.001079150359146297, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -0.12223044782876968, |
|
"rewards/margins": 0.13758344948291779, |
|
"rewards/rejected": -0.25981390476226807, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.5025185185185186, |
|
"grad_norm": 46.44743347167969, |
|
"learning_rate": 3.7531701693965554e-06, |
|
"log_odds_chosen": 1.178978681564331, |
|
"log_odds_ratio": -0.3793484568595886, |
|
"logps/chosen": -0.6664374470710754, |
|
"logps/rejected": -1.3323941230773926, |
|
"loss": 27.6656, |
|
"nll_loss": 0.8664777874946594, |
|
"po_loss": -0.0019290826749056578, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -0.1332874894142151, |
|
"rewards/margins": 0.1331913322210312, |
|
"rewards/rejected": -0.26647883653640747, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.549925925925926, |
|
"grad_norm": 44.04457473754883, |
|
"learning_rate": 3.055003141378948e-06, |
|
"log_odds_chosen": 1.312464952468872, |
|
"log_odds_ratio": -0.3540407717227936, |
|
"logps/chosen": -0.626907467842102, |
|
"logps/rejected": -1.3609790802001953, |
|
"loss": 26.6912, |
|
"nll_loss": 0.8371696472167969, |
|
"po_loss": -0.003068637801334262, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/chosen": -0.1253814995288849, |
|
"rewards/margins": 0.14681431651115417, |
|
"rewards/rejected": -0.2721957862377167, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.5973333333333333, |
|
"grad_norm": 48.056907653808594, |
|
"learning_rate": 2.424427429704365e-06, |
|
"log_odds_chosen": 1.2681269645690918, |
|
"log_odds_ratio": -0.35387295484542847, |
|
"logps/chosen": -0.6462396383285522, |
|
"logps/rejected": -1.3675159215927124, |
|
"loss": 26.9575, |
|
"nll_loss": 0.8445149660110474, |
|
"po_loss": -0.0020923474803566933, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -0.12924793362617493, |
|
"rewards/margins": 0.14425526559352875, |
|
"rewards/rejected": -0.27350321412086487, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.644740740740741, |
|
"grad_norm": 44.935523986816406, |
|
"learning_rate": 1.8633852284264508e-06, |
|
"log_odds_chosen": 1.1880494356155396, |
|
"log_odds_ratio": -0.3788948953151703, |
|
"logps/chosen": -0.626964271068573, |
|
"logps/rejected": -1.2733209133148193, |
|
"loss": 27.3931, |
|
"nll_loss": 0.8574727177619934, |
|
"po_loss": -0.0014386550756171346, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -0.1253928691148758, |
|
"rewards/margins": 0.12927132844924927, |
|
"rewards/rejected": -0.25466424226760864, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.6921481481481484, |
|
"grad_norm": 54.157283782958984, |
|
"learning_rate": 1.3736045660864034e-06, |
|
"log_odds_chosen": 1.1967148780822754, |
|
"log_odds_ratio": -0.3934488296508789, |
|
"logps/chosen": -0.6908615231513977, |
|
"logps/rejected": -1.3779170513153076, |
|
"loss": 28.7151, |
|
"nll_loss": 0.9004608392715454, |
|
"po_loss": -0.003114379709586501, |
|
"rewards/accuracies": 0.8343750238418579, |
|
"rewards/chosen": -0.13817231357097626, |
|
"rewards/margins": 0.13741108775138855, |
|
"rewards/rejected": -0.2755833864212036, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.7395555555555555, |
|
"grad_norm": 40.38901901245117, |
|
"learning_rate": 9.565939833279192e-07, |
|
"log_odds_chosen": 1.2478935718536377, |
|
"log_odds_ratio": -0.3714280128479004, |
|
"logps/chosen": -0.6672550439834595, |
|
"logps/rejected": -1.3506200313568115, |
|
"loss": 27.6346, |
|
"nll_loss": 0.8646379709243774, |
|
"po_loss": -0.0010563342366367579, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.13345098495483398, |
|
"rewards/margins": 0.1366730034351349, |
|
"rewards/rejected": -0.27012401819229126, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.786962962962963, |
|
"grad_norm": 40.278404235839844, |
|
"learning_rate": 6.136378865420872e-07, |
|
"log_odds_chosen": 1.1103378534317017, |
|
"log_odds_ratio": -0.39963197708129883, |
|
"logps/chosen": -0.6754161715507507, |
|
"logps/rejected": -1.2891783714294434, |
|
"loss": 28.7924, |
|
"nll_loss": 0.9006980061531067, |
|
"po_loss": -0.0009352812776342034, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.13508322834968567, |
|
"rewards/margins": 0.12275244295597076, |
|
"rewards/rejected": -0.2578356862068176, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.83437037037037, |
|
"grad_norm": 43.64297866821289, |
|
"learning_rate": 3.45792591853214e-07, |
|
"log_odds_chosen": 1.26936936378479, |
|
"log_odds_ratio": -0.37458792328834534, |
|
"logps/chosen": -0.625272274017334, |
|
"logps/rejected": -1.3025916814804077, |
|
"loss": 27.2818, |
|
"nll_loss": 0.8537141680717468, |
|
"po_loss": -0.0011581950820982456, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.12505444884300232, |
|
"rewards/margins": 0.1354638934135437, |
|
"rewards/rejected": -0.2605183720588684, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.8817777777777778, |
|
"grad_norm": 49.83470153808594, |
|
"learning_rate": 1.538830716302092e-07, |
|
"log_odds_chosen": 1.3354085683822632, |
|
"log_odds_ratio": -0.3548244833946228, |
|
"logps/chosen": -0.6030293703079224, |
|
"logps/rejected": -1.3301148414611816, |
|
"loss": 26.8635, |
|
"nll_loss": 0.8420251607894897, |
|
"po_loss": -0.002539848443120718, |
|
"rewards/accuracies": 0.8656250238418579, |
|
"rewards/chosen": -0.12060587108135223, |
|
"rewards/margins": 0.14541709423065186, |
|
"rewards/rejected": -0.2660229504108429, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.9291851851851853, |
|
"grad_norm": 41.83040237426758, |
|
"learning_rate": 3.8500413544415025e-08, |
|
"log_odds_chosen": 1.4439231157302856, |
|
"log_odds_ratio": -0.3062170743942261, |
|
"logps/chosen": -0.5867719650268555, |
|
"logps/rejected": -1.3927805423736572, |
|
"loss": 27.0062, |
|
"nll_loss": 0.845768928527832, |
|
"po_loss": -0.001825585961341858, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/chosen": -0.1173543930053711, |
|
"rewards/margins": 0.16120170056819916, |
|
"rewards/rejected": -0.27855610847473145, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.9765925925925925, |
|
"grad_norm": 56.19439697265625, |
|
"learning_rate": 0.0, |
|
"log_odds_chosen": 1.2505788803100586, |
|
"log_odds_ratio": -0.35724979639053345, |
|
"logps/chosen": -0.5848164558410645, |
|
"logps/rejected": -1.2180840969085693, |
|
"loss": 25.9054, |
|
"nll_loss": 0.8101914525032043, |
|
"po_loss": -0.0006477964925579727, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -0.11696330457925797, |
|
"rewards/margins": 0.1266535222530365, |
|
"rewards/rejected": -0.24361681938171387, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.9765925925925925, |
|
"step": 315, |
|
"total_flos": 0.0, |
|
"train_loss": 67.51294788178943, |
|
"train_runtime": 9693.4577, |
|
"train_samples_per_second": 2.089, |
|
"train_steps_per_second": 0.032 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|