PEFT
Safetensors
Amadeus-Preview / trainer_state.json
alicecomfy's picture
Upload 0.2 Epoch Early Model for testing purposes
7314dbd verified
raw
history blame
18.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.16410929679166325,
"eval_steps": 500,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0016410929679166325,
"grad_norm": 0.1563968062400818,
"learning_rate": 9.999992612842675e-06,
"loss": 0.6605,
"step": 5
},
{
"epoch": 0.003282185935833265,
"grad_norm": 0.15270280838012695,
"learning_rate": 9.999970451392527e-06,
"loss": 0.6491,
"step": 10
},
{
"epoch": 0.0049232789037498975,
"grad_norm": 0.17153096199035645,
"learning_rate": 9.999933515715042e-06,
"loss": 0.6383,
"step": 15
},
{
"epoch": 0.00656437187166653,
"grad_norm": 0.172921285033226,
"learning_rate": 9.999881805919356e-06,
"loss": 0.6506,
"step": 20
},
{
"epoch": 0.008205464839583163,
"grad_norm": 0.16361959278583527,
"learning_rate": 9.999815322158266e-06,
"loss": 0.6067,
"step": 25
},
{
"epoch": 0.009846557807499795,
"grad_norm": 0.12810567021369934,
"learning_rate": 9.999734064628224e-06,
"loss": 0.6157,
"step": 30
},
{
"epoch": 0.011487650775416428,
"grad_norm": 0.13882791996002197,
"learning_rate": 9.999638033569334e-06,
"loss": 0.6163,
"step": 35
},
{
"epoch": 0.01312874374333306,
"grad_norm": 0.08581311255693436,
"learning_rate": 9.999527229265353e-06,
"loss": 0.5795,
"step": 40
},
{
"epoch": 0.014769836711249693,
"grad_norm": 0.09464729577302933,
"learning_rate": 9.999401652043697e-06,
"loss": 0.5908,
"step": 45
},
{
"epoch": 0.016410929679166325,
"grad_norm": 0.08246736973524094,
"learning_rate": 9.999261302275424e-06,
"loss": 0.5681,
"step": 50
},
{
"epoch": 0.018052022647082958,
"grad_norm": 0.07881084084510803,
"learning_rate": 9.999106180375251e-06,
"loss": 0.5566,
"step": 55
},
{
"epoch": 0.01969311561499959,
"grad_norm": 0.07179544121026993,
"learning_rate": 9.998936286801541e-06,
"loss": 0.5806,
"step": 60
},
{
"epoch": 0.021334208582916223,
"grad_norm": 0.11372455954551697,
"learning_rate": 9.99875162205631e-06,
"loss": 0.541,
"step": 65
},
{
"epoch": 0.022975301550832855,
"grad_norm": 0.07621748745441437,
"learning_rate": 9.998552186685211e-06,
"loss": 0.5421,
"step": 70
},
{
"epoch": 0.024616394518749488,
"grad_norm": 0.07502977550029755,
"learning_rate": 9.998337981277552e-06,
"loss": 0.5634,
"step": 75
},
{
"epoch": 0.02625748748666612,
"grad_norm": 0.06710907071828842,
"learning_rate": 9.998109006466281e-06,
"loss": 0.5322,
"step": 80
},
{
"epoch": 0.027898580454582753,
"grad_norm": 0.066213458776474,
"learning_rate": 9.997865262927984e-06,
"loss": 0.5474,
"step": 85
},
{
"epoch": 0.029539673422499385,
"grad_norm": 0.0674639567732811,
"learning_rate": 9.997606751382894e-06,
"loss": 0.5525,
"step": 90
},
{
"epoch": 0.031180766390416018,
"grad_norm": 0.07476690411567688,
"learning_rate": 9.997333472594872e-06,
"loss": 0.526,
"step": 95
},
{
"epoch": 0.03282185935833265,
"grad_norm": 0.05499599874019623,
"learning_rate": 9.997045427371423e-06,
"loss": 0.5262,
"step": 100
},
{
"epoch": 0.03446295232624928,
"grad_norm": 0.06396327167749405,
"learning_rate": 9.996742616563682e-06,
"loss": 0.5141,
"step": 105
},
{
"epoch": 0.036104045294165915,
"grad_norm": 0.06143304333090782,
"learning_rate": 9.99642504106641e-06,
"loss": 0.5107,
"step": 110
},
{
"epoch": 0.03774513826208255,
"grad_norm": 0.0630095973610878,
"learning_rate": 9.996092701818004e-06,
"loss": 0.5177,
"step": 115
},
{
"epoch": 0.03938623122999918,
"grad_norm": 0.059215761721134186,
"learning_rate": 9.995745599800476e-06,
"loss": 0.5098,
"step": 120
},
{
"epoch": 0.04102732419791581,
"grad_norm": 0.05862729996442795,
"learning_rate": 9.995383736039465e-06,
"loss": 0.5027,
"step": 125
},
{
"epoch": 0.042668417165832445,
"grad_norm": 0.05443592369556427,
"learning_rate": 9.995007111604232e-06,
"loss": 0.4688,
"step": 130
},
{
"epoch": 0.04430951013374908,
"grad_norm": 0.05443759262561798,
"learning_rate": 9.994615727607648e-06,
"loss": 0.4988,
"step": 135
},
{
"epoch": 0.04595060310166571,
"grad_norm": 0.05553797259926796,
"learning_rate": 9.994209585206201e-06,
"loss": 0.4864,
"step": 140
},
{
"epoch": 0.04759169606958234,
"grad_norm": 0.055965058505535126,
"learning_rate": 9.993788685599985e-06,
"loss": 0.4768,
"step": 145
},
{
"epoch": 0.049232789037498975,
"grad_norm": 0.06474044173955917,
"learning_rate": 9.993353030032701e-06,
"loss": 0.4942,
"step": 150
},
{
"epoch": 0.05087388200541561,
"grad_norm": 0.0566246323287487,
"learning_rate": 9.992902619791652e-06,
"loss": 0.4888,
"step": 155
},
{
"epoch": 0.05251497497333224,
"grad_norm": 0.0555800199508667,
"learning_rate": 9.992437456207738e-06,
"loss": 0.4687,
"step": 160
},
{
"epoch": 0.05415606794124887,
"grad_norm": 0.0563640259206295,
"learning_rate": 9.991957540655453e-06,
"loss": 0.4923,
"step": 165
},
{
"epoch": 0.055797160909165505,
"grad_norm": 0.05194167420268059,
"learning_rate": 9.991462874552882e-06,
"loss": 0.4811,
"step": 170
},
{
"epoch": 0.05743825387708214,
"grad_norm": 0.1205214262008667,
"learning_rate": 9.990953459361696e-06,
"loss": 0.4696,
"step": 175
},
{
"epoch": 0.05907934684499877,
"grad_norm": 0.0542314276099205,
"learning_rate": 9.990429296587148e-06,
"loss": 0.4547,
"step": 180
},
{
"epoch": 0.0607204398129154,
"grad_norm": 0.06125911697745323,
"learning_rate": 9.989890387778065e-06,
"loss": 0.4817,
"step": 185
},
{
"epoch": 0.062361532780832035,
"grad_norm": 0.06071058660745621,
"learning_rate": 9.98933673452685e-06,
"loss": 0.4553,
"step": 190
},
{
"epoch": 0.06400262574874867,
"grad_norm": 0.05769752338528633,
"learning_rate": 9.98876833846947e-06,
"loss": 0.4632,
"step": 195
},
{
"epoch": 0.0656437187166653,
"grad_norm": 0.05761849135160446,
"learning_rate": 9.988185201285461e-06,
"loss": 0.4439,
"step": 200
},
{
"epoch": 0.06728481168458193,
"grad_norm": 0.05703369528055191,
"learning_rate": 9.987587324697912e-06,
"loss": 0.4506,
"step": 205
},
{
"epoch": 0.06892590465249857,
"grad_norm": 0.05599252134561539,
"learning_rate": 9.986974710473467e-06,
"loss": 0.4573,
"step": 210
},
{
"epoch": 0.0705669976204152,
"grad_norm": 0.05374148488044739,
"learning_rate": 9.986347360422316e-06,
"loss": 0.4555,
"step": 215
},
{
"epoch": 0.07220809058833183,
"grad_norm": 0.054009810090065,
"learning_rate": 9.985705276398193e-06,
"loss": 0.4436,
"step": 220
},
{
"epoch": 0.07384918355624846,
"grad_norm": 0.07604236155748367,
"learning_rate": 9.985048460298367e-06,
"loss": 0.4583,
"step": 225
},
{
"epoch": 0.0754902765241651,
"grad_norm": 0.052760086953639984,
"learning_rate": 9.984376914063643e-06,
"loss": 0.4409,
"step": 230
},
{
"epoch": 0.07713136949208173,
"grad_norm": 0.06065182387828827,
"learning_rate": 9.983690639678343e-06,
"loss": 0.4637,
"step": 235
},
{
"epoch": 0.07877246245999836,
"grad_norm": 0.06539740413427353,
"learning_rate": 9.982989639170319e-06,
"loss": 0.4636,
"step": 240
},
{
"epoch": 0.08041355542791499,
"grad_norm": 0.06656944006681442,
"learning_rate": 9.982273914610927e-06,
"loss": 0.4487,
"step": 245
},
{
"epoch": 0.08205464839583163,
"grad_norm": 0.05745495483279228,
"learning_rate": 9.981543468115039e-06,
"loss": 0.4393,
"step": 250
},
{
"epoch": 0.08369574136374826,
"grad_norm": 0.06525252759456635,
"learning_rate": 9.98079830184102e-06,
"loss": 0.4713,
"step": 255
},
{
"epoch": 0.08533683433166489,
"grad_norm": 0.0555146224796772,
"learning_rate": 9.980038417990736e-06,
"loss": 0.4278,
"step": 260
},
{
"epoch": 0.08697792729958152,
"grad_norm": 0.0773826315999031,
"learning_rate": 9.979263818809542e-06,
"loss": 0.4527,
"step": 265
},
{
"epoch": 0.08861902026749816,
"grad_norm": 0.06476614624261856,
"learning_rate": 9.978474506586269e-06,
"loss": 0.4404,
"step": 270
},
{
"epoch": 0.09026011323541479,
"grad_norm": 0.06019666790962219,
"learning_rate": 9.977670483653228e-06,
"loss": 0.4414,
"step": 275
},
{
"epoch": 0.09190120620333142,
"grad_norm": 0.06592460721731186,
"learning_rate": 9.976851752386196e-06,
"loss": 0.4313,
"step": 280
},
{
"epoch": 0.09354229917124805,
"grad_norm": 0.06817147135734558,
"learning_rate": 9.976018315204412e-06,
"loss": 0.4357,
"step": 285
},
{
"epoch": 0.09518339213916469,
"grad_norm": 0.06738044321537018,
"learning_rate": 9.97517017457057e-06,
"loss": 0.4511,
"step": 290
},
{
"epoch": 0.09682448510708132,
"grad_norm": 0.07049284130334854,
"learning_rate": 9.974307332990806e-06,
"loss": 0.447,
"step": 295
},
{
"epoch": 0.09846557807499795,
"grad_norm": 0.06609766185283661,
"learning_rate": 9.973429793014703e-06,
"loss": 0.4576,
"step": 300
},
{
"epoch": 0.10010667104291458,
"grad_norm": 0.06007273495197296,
"learning_rate": 9.972537557235267e-06,
"loss": 0.4397,
"step": 305
},
{
"epoch": 0.10174776401083122,
"grad_norm": 0.0659220889210701,
"learning_rate": 9.971630628288935e-06,
"loss": 0.4538,
"step": 310
},
{
"epoch": 0.10338885697874785,
"grad_norm": 0.08680638670921326,
"learning_rate": 9.970709008855557e-06,
"loss": 0.442,
"step": 315
},
{
"epoch": 0.10502994994666448,
"grad_norm": 0.06552395224571228,
"learning_rate": 9.969772701658393e-06,
"loss": 0.4195,
"step": 320
},
{
"epoch": 0.10667104291458111,
"grad_norm": 0.06147119030356407,
"learning_rate": 9.968821709464101e-06,
"loss": 0.4419,
"step": 325
},
{
"epoch": 0.10831213588249775,
"grad_norm": 0.06841282546520233,
"learning_rate": 9.967856035082732e-06,
"loss": 0.4313,
"step": 330
},
{
"epoch": 0.10995322885041438,
"grad_norm": 0.07704174518585205,
"learning_rate": 9.966875681367724e-06,
"loss": 0.4373,
"step": 335
},
{
"epoch": 0.11159432181833101,
"grad_norm": 0.06586287170648575,
"learning_rate": 9.965880651215885e-06,
"loss": 0.4475,
"step": 340
},
{
"epoch": 0.11323541478624764,
"grad_norm": 0.13131344318389893,
"learning_rate": 9.964870947567396e-06,
"loss": 0.42,
"step": 345
},
{
"epoch": 0.11487650775416428,
"grad_norm": 0.06351525336503983,
"learning_rate": 9.963846573405791e-06,
"loss": 0.4247,
"step": 350
},
{
"epoch": 0.11651760072208091,
"grad_norm": 0.06860467791557312,
"learning_rate": 9.962807531757955e-06,
"loss": 0.432,
"step": 355
},
{
"epoch": 0.11815869368999754,
"grad_norm": 0.06938762962818146,
"learning_rate": 9.961753825694112e-06,
"loss": 0.4324,
"step": 360
},
{
"epoch": 0.11979978665791417,
"grad_norm": 0.06990928202867508,
"learning_rate": 9.960685458327824e-06,
"loss": 0.4136,
"step": 365
},
{
"epoch": 0.1214408796258308,
"grad_norm": 0.06884902715682983,
"learning_rate": 9.959602432815964e-06,
"loss": 0.4301,
"step": 370
},
{
"epoch": 0.12308197259374744,
"grad_norm": 0.06610533595085144,
"learning_rate": 9.958504752358729e-06,
"loss": 0.418,
"step": 375
},
{
"epoch": 0.12472306556166407,
"grad_norm": 0.07162132859230042,
"learning_rate": 9.957392420199612e-06,
"loss": 0.4335,
"step": 380
},
{
"epoch": 0.1263641585295807,
"grad_norm": 0.07637803256511688,
"learning_rate": 9.956265439625401e-06,
"loss": 0.4508,
"step": 385
},
{
"epoch": 0.12800525149749734,
"grad_norm": 0.06776853650808334,
"learning_rate": 9.955123813966172e-06,
"loss": 0.4235,
"step": 390
},
{
"epoch": 0.12964634446541395,
"grad_norm": 0.06781169027090073,
"learning_rate": 9.953967546595272e-06,
"loss": 0.4296,
"step": 395
},
{
"epoch": 0.1312874374333306,
"grad_norm": 0.06995800137519836,
"learning_rate": 9.952796640929309e-06,
"loss": 0.4328,
"step": 400
},
{
"epoch": 0.13292853040124722,
"grad_norm": 0.06544926762580872,
"learning_rate": 9.951611100428151e-06,
"loss": 0.4235,
"step": 405
},
{
"epoch": 0.13456962336916387,
"grad_norm": 0.07403396815061569,
"learning_rate": 9.95041092859491e-06,
"loss": 0.4362,
"step": 410
},
{
"epoch": 0.13621071633708048,
"grad_norm": 0.06964828819036484,
"learning_rate": 9.949196128975925e-06,
"loss": 0.4134,
"step": 415
},
{
"epoch": 0.13785180930499713,
"grad_norm": 0.07269076257944107,
"learning_rate": 9.947966705160765e-06,
"loss": 0.4288,
"step": 420
},
{
"epoch": 0.13949290227291375,
"grad_norm": 0.0716971680521965,
"learning_rate": 9.946722660782209e-06,
"loss": 0.4113,
"step": 425
},
{
"epoch": 0.1411339952408304,
"grad_norm": 0.06757480651140213,
"learning_rate": 9.945463999516236e-06,
"loss": 0.4311,
"step": 430
},
{
"epoch": 0.14277508820874701,
"grad_norm": 0.07381222397089005,
"learning_rate": 9.944190725082019e-06,
"loss": 0.4313,
"step": 435
},
{
"epoch": 0.14441618117666366,
"grad_norm": 0.07273319363594055,
"learning_rate": 9.94290284124191e-06,
"loss": 0.4195,
"step": 440
},
{
"epoch": 0.14605727414458028,
"grad_norm": 0.07356058806180954,
"learning_rate": 9.941600351801426e-06,
"loss": 0.425,
"step": 445
},
{
"epoch": 0.14769836711249693,
"grad_norm": 0.07552187144756317,
"learning_rate": 9.940283260609248e-06,
"loss": 0.4295,
"step": 450
},
{
"epoch": 0.14933946008041354,
"grad_norm": 0.0747319757938385,
"learning_rate": 9.938951571557198e-06,
"loss": 0.4426,
"step": 455
},
{
"epoch": 0.1509805530483302,
"grad_norm": 0.06968298554420471,
"learning_rate": 9.937605288580237e-06,
"loss": 0.4244,
"step": 460
},
{
"epoch": 0.1526216460162468,
"grad_norm": 0.07281242311000824,
"learning_rate": 9.936244415656443e-06,
"loss": 0.4263,
"step": 465
},
{
"epoch": 0.15426273898416346,
"grad_norm": 0.07637212425470352,
"learning_rate": 9.934868956807012e-06,
"loss": 0.4217,
"step": 470
},
{
"epoch": 0.15590383195208007,
"grad_norm": 0.07728656381368637,
"learning_rate": 9.933478916096235e-06,
"loss": 0.4214,
"step": 475
},
{
"epoch": 0.15754492491999672,
"grad_norm": 0.09080182015895844,
"learning_rate": 9.932074297631494e-06,
"loss": 0.4143,
"step": 480
},
{
"epoch": 0.15918601788791334,
"grad_norm": 0.07812851667404175,
"learning_rate": 9.930655105563241e-06,
"loss": 0.4307,
"step": 485
},
{
"epoch": 0.16082711085582999,
"grad_norm": 0.0779787003993988,
"learning_rate": 9.929221344084994e-06,
"loss": 0.433,
"step": 490
},
{
"epoch": 0.1624682038237466,
"grad_norm": 0.0747016966342926,
"learning_rate": 9.927773017433325e-06,
"loss": 0.4026,
"step": 495
},
{
"epoch": 0.16410929679166325,
"grad_norm": 0.07794748246669769,
"learning_rate": 9.926310129887836e-06,
"loss": 0.4331,
"step": 500
}
],
"logging_steps": 5,
"max_steps": 9138,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5.57326968815616e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}