Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61d55372f353d725c22373c90baeb7a63ee0efc2de62bf2ebdc7f0af3d498c85
 size 191968

 version https://git-lfs.github.com/spec/v1
+oid sha256:713bf37431609b04bbc7821c49c04bd4f4ae34526c86f5de36d062c73d164742
 size 191968

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c2f2e6beee4d6cdc4df0a48946e66d76e01c49b9afe9253ab81bae133eb3abc4
 size 400714

 version https://git-lfs.github.com/spec/v1
+oid sha256:c902fa478634fe66c2a7c6c7f83e102a18816842e783f536e6f2be6144c1b0a4
 size 400714

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab7a19129cab0b25700902124c164993162070b32adc9dc0b99b8841da288083
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:bae6cb4c215e360dca24bb2226025d8070d1f7e7fdb15b57bc707cf1e85ba41c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d90116c540b4ff0066495fbccc9c914a568905fb44c6564f227952cc4231b00
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:03ad66011cfc1fc727a51190602a41adc332b48eeef62a5ee87c2ca9f9b90b2b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.007046719751955465,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 370.457,
       "eval_steps_per_second": 92.614,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -387,7 +745,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1408327090176.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.01409343950391093,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 370.457,
       "eval_steps_per_second": 92.614,
       "step": 50
+    },
+    {
+      "epoch": 0.007187654146994574,
+      "grad_norm": 1.4676305055618286,
+      "learning_rate": 2.3816778784387097e-05,
+      "loss": 10.3208,
+      "step": 51
+    },
+    {
+      "epoch": 0.007328588542033683,
+      "grad_norm": 1.3910757303237915,
+      "learning_rate": 2.3263454721781537e-05,
+      "loss": 10.3338,
+      "step": 52
+    },
+    {
+      "epoch": 0.007469522937072793,
+      "grad_norm": 1.3545656204223633,
+      "learning_rate": 2.2693489161088592e-05,
+      "loss": 10.328,
+      "step": 53
+    },
+    {
+      "epoch": 0.0076104573321119015,
+      "grad_norm": 1.3630988597869873,
+      "learning_rate": 2.210802993709498e-05,
+      "loss": 10.3152,
+      "step": 54
+    },
+    {
+      "epoch": 0.007751391727151011,
+      "grad_norm": 1.4494277238845825,
+      "learning_rate": 2.1508256086763372e-05,
+      "loss": 10.3248,
+      "step": 55
+    },
+    {
+      "epoch": 0.00789232612219012,
+      "grad_norm": 1.41647207736969,
+      "learning_rate": 2.0895375474808857e-05,
+      "loss": 10.3232,
+      "step": 56
+    },
+    {
+      "epoch": 0.00803326051722923,
+      "grad_norm": 1.3655009269714355,
+      "learning_rate": 2.0270622361220143e-05,
+      "loss": 10.3206,
+      "step": 57
+    },
+    {
+      "epoch": 0.00817419491226834,
+      "grad_norm": 1.4189130067825317,
+      "learning_rate": 1.963525491562421e-05,
+      "loss": 10.3256,
+      "step": 58
+    },
+    {
+      "epoch": 0.008315129307307448,
+      "grad_norm": 1.3745726346969604,
+      "learning_rate": 1.8990552683500128e-05,
+      "loss": 10.3214,
+      "step": 59
+    },
+    {
+      "epoch": 0.008456063702346557,
+      "grad_norm": 1.3971679210662842,
+      "learning_rate": 1.8337814009344716e-05,
+      "loss": 10.3095,
+      "step": 60
+    },
+    {
+      "epoch": 0.008596998097385667,
+      "grad_norm": 1.431792140007019,
+      "learning_rate": 1.767835342197955e-05,
+      "loss": 10.3287,
+      "step": 61
+    },
+    {
+      "epoch": 0.008737932492424777,
+      "grad_norm": 1.5383330583572388,
+      "learning_rate": 1.7013498987264832e-05,
+      "loss": 10.3173,
+      "step": 62
+    },
+    {
+      "epoch": 0.008878866887463886,
+      "grad_norm": 1.3464570045471191,
+      "learning_rate": 1.6344589633551502e-05,
+      "loss": 10.3152,
+      "step": 63
+    },
+    {
+      "epoch": 0.009019801282502994,
+      "grad_norm": 1.4032111167907715,
+      "learning_rate": 1.5672972455257726e-05,
+      "loss": 10.3198,
+      "step": 64
+    },
+    {
+      "epoch": 0.009160735677542104,
+      "grad_norm": 1.377820372581482,
+      "learning_rate": 1.5e-05,
+      "loss": 10.3114,
+      "step": 65
+    },
+    {
+      "epoch": 0.009301670072581213,
+      "grad_norm": 1.3224940299987793,
+      "learning_rate": 1.4327027544742281e-05,
+      "loss": 10.3053,
+      "step": 66
+    },
+    {
+      "epoch": 0.009442604467620323,
+      "grad_norm": 1.3686047792434692,
+      "learning_rate": 1.36554103664485e-05,
+      "loss": 10.3148,
+      "step": 67
+    },
+    {
+      "epoch": 0.009583538862659433,
+      "grad_norm": 1.344351887702942,
+      "learning_rate": 1.2986501012735174e-05,
+      "loss": 10.3046,
+      "step": 68
+    },
+    {
+      "epoch": 0.009724473257698542,
+      "grad_norm": 1.366282343864441,
+      "learning_rate": 1.2321646578020452e-05,
+      "loss": 10.3113,
+      "step": 69
+    },
+    {
+      "epoch": 0.00986540765273765,
+      "grad_norm": 1.3756734132766724,
+      "learning_rate": 1.1662185990655285e-05,
+      "loss": 10.3039,
+      "step": 70
+    },
+    {
+      "epoch": 0.01000634204777676,
+      "grad_norm": 1.3746817111968994,
+      "learning_rate": 1.1009447316499875e-05,
+      "loss": 10.3097,
+      "step": 71
+    },
+    {
+      "epoch": 0.01014727644281587,
+      "grad_norm": 1.416692852973938,
+      "learning_rate": 1.036474508437579e-05,
+      "loss": 10.2987,
+      "step": 72
+    },
+    {
+      "epoch": 0.010288210837854979,
+      "grad_norm": 1.4465065002441406,
+      "learning_rate": 9.729377638779859e-06,
+      "loss": 10.2913,
+      "step": 73
+    },
+    {
+      "epoch": 0.010429145232894088,
+      "grad_norm": 1.4586553573608398,
+      "learning_rate": 9.104624525191147e-06,
+      "loss": 10.3027,
+      "step": 74
+    },
+    {
+      "epoch": 0.010570079627933196,
+      "grad_norm": 1.4745570421218872,
+      "learning_rate": 8.491743913236629e-06,
+      "loss": 10.2936,
+      "step": 75
+    },
+    {
+      "epoch": 0.010711014022972306,
+      "grad_norm": 1.3962432146072388,
+      "learning_rate": 7.89197006290502e-06,
+      "loss": 10.2829,
+      "step": 76
+    },
+    {
+      "epoch": 0.010851948418011416,
+      "grad_norm": 1.4083092212677002,
+      "learning_rate": 7.30651083891141e-06,
+      "loss": 10.3023,
+      "step": 77
+    },
+    {
+      "epoch": 0.010992882813050525,
+      "grad_norm": 1.300138235092163,
+      "learning_rate": 6.736545278218464e-06,
+      "loss": 10.3017,
+      "step": 78
+    },
+    {
+      "epoch": 0.011133817208089635,
+      "grad_norm": 1.4003950357437134,
+      "learning_rate": 6.1832212156129045e-06,
+      "loss": 10.2933,
+      "step": 79
+    },
+    {
+      "epoch": 0.011274751603128744,
+      "grad_norm": 1.3284310102462769,
+      "learning_rate": 5.647652972118998e-06,
+      "loss": 10.3043,
+      "step": 80
+    },
+    {
+      "epoch": 0.011415685998167852,
+      "grad_norm": 1.3440016508102417,
+      "learning_rate": 5.130919110904311e-06,
+      "loss": 10.2915,
+      "step": 81
+    },
+    {
+      "epoch": 0.011556620393206962,
+      "grad_norm": 1.427344560623169,
+      "learning_rate": 4.6340602651970304e-06,
+      "loss": 10.3187,
+      "step": 82
+    },
+    {
+      "epoch": 0.011697554788246071,
+      "grad_norm": 1.3191053867340088,
+      "learning_rate": 4.158077042589129e-06,
+      "loss": 10.2869,
+      "step": 83
+    },
+    {
+      "epoch": 0.011838489183285181,
+      "grad_norm": 1.3370403051376343,
+      "learning_rate": 3.7039280099458373e-06,
+      "loss": 10.3063,
+      "step": 84
+    },
+    {
+      "epoch": 0.01197942357832429,
+      "grad_norm": 1.3755344152450562,
+      "learning_rate": 3.272527762979553e-06,
+      "loss": 10.3048,
+      "step": 85
+    },
+    {
+      "epoch": 0.012120357973363399,
+      "grad_norm": 1.3229495286941528,
+      "learning_rate": 2.86474508437579e-06,
+      "loss": 10.2869,
+      "step": 86
+    },
+    {
+      "epoch": 0.012261292368402508,
+      "grad_norm": 1.2974941730499268,
+      "learning_rate": 2.4814011941804603e-06,
+      "loss": 10.3094,
+      "step": 87
+    },
+    {
+      "epoch": 0.012402226763441618,
+      "grad_norm": 1.4859426021575928,
+      "learning_rate": 2.1232680959720085e-06,
+      "loss": 10.2934,
+      "step": 88
+    },
+    {
+      "epoch": 0.012543161158480727,
+      "grad_norm": 1.4646068811416626,
+      "learning_rate": 1.79106702214893e-06,
+      "loss": 10.3102,
+      "step": 89
+    },
+    {
+      "epoch": 0.012684095553519837,
+      "grad_norm": 1.3561583757400513,
+      "learning_rate": 1.4854669814637145e-06,
+      "loss": 10.3049,
+      "step": 90
+    },
+    {
+      "epoch": 0.012825029948558947,
+      "grad_norm": 1.357525110244751,
+      "learning_rate": 1.2070834117282414e-06,
+      "loss": 10.309,
+      "step": 91
+    },
+    {
+      "epoch": 0.012965964343598054,
+      "grad_norm": 1.445386528968811,
+      "learning_rate": 9.56476940403942e-07,
+      "loss": 10.3097,
+      "step": 92
+    },
+    {
+      "epoch": 0.013106898738637164,
+      "grad_norm": 1.304282307624817,
+      "learning_rate": 7.341522555726971e-07,
+      "loss": 10.3029,
+      "step": 93
+    },
+    {
+      "epoch": 0.013247833133676274,
+      "grad_norm": 1.5140515565872192,
+      "learning_rate": 5.405570895622014e-07,
+      "loss": 10.3004,
+      "step": 94
+    },
+    {
+      "epoch": 0.013388767528715383,
+      "grad_norm": 1.4801911115646362,
+      "learning_rate": 3.760813172726457e-07,
+      "loss": 10.3047,
+      "step": 95
+    },
+    {
+      "epoch": 0.013529701923754493,
+      "grad_norm": 1.3998249769210815,
+      "learning_rate": 2.41056171020555e-07,
+      "loss": 10.3167,
+      "step": 96
+    },
+    {
+      "epoch": 0.0136706363187936,
+      "grad_norm": 1.4316073656082153,
+      "learning_rate": 1.357535734809795e-07,
+      "loss": 10.2992,
+      "step": 97
+    },
+    {
+      "epoch": 0.01381157071383271,
+      "grad_norm": 1.4810982942581177,
+      "learning_rate": 6.038559007141397e-08,
+      "loss": 10.3182,
+      "step": 98
+    },
+    {
+      "epoch": 0.01395250510887182,
+      "grad_norm": 1.5529173612594604,
+      "learning_rate": 1.510400188028116e-08,
+      "loss": 10.3235,
+      "step": 99
+    },
+    {
+      "epoch": 0.01409343950391093,
+      "grad_norm": 1.5916683673858643,
+      "learning_rate": 0.0,
+      "loss": 10.3158,
+      "step": 100
+    },
+    {
+      "epoch": 0.01409343950391093,
+      "eval_loss": NaN,
+      "eval_runtime": 8.012,
+      "eval_samples_per_second": 372.94,
+      "eval_steps_per_second": 93.235,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2816654180352.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null