filipesantoscv11 commited on
Commit
85f2b47
·
verified ·
1 Parent(s): 86d87a2

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c24cfdac0642280024d08fee535ea2ed3fc0371ea2a35a181eca4013cb2bd3ab
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ba6cd343730e3c5e1a149865096c31ac20cd2f323e1bd178af029fc6810232d
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58ae16ea8b04c504c9c68fb7d2d0fea1d31cc29c00089176ea35abf74ff88dee
3
  size 341314196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f605717d77142f10bc76b183167207cd5a0fb5822c6f97197a81cbff0626afed
3
  size 341314196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa9e3011ae95dbb07328cfa992efd78c7dd321fa4004dcd5616c190a230417cf
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a4e273b41c46ae363b3d5f5d2ef0a5518ca338ea410ed93a3faeb1a9d2062de
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70f9eefb79e5a651e4979588997923d1fc997984394f03e9fb61aca38c6bee05
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6da36b358afefa2fe6fe8e3889efc77dfb89ac577ed7bb55c631123a9ebe149
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.3773066997528076,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 0.002789166875854182,
5
  "eval_steps": 50,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -101,6 +101,49 @@
101
  "eval_samples_per_second": 27.811,
102
  "eval_steps_per_second": 6.954,
103
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  }
105
  ],
106
  "logging_steps": 10,
@@ -129,7 +172,7 @@
129
  "attributes": {}
130
  }
131
  },
132
- "total_flos": 2.262770368118784e+16,
133
  "train_batch_size": 8,
134
  "trial_name": null,
135
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.336289644241333,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
+ "epoch": 0.0041837503137812735,
5
  "eval_steps": 50,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
101
  "eval_samples_per_second": 27.811,
102
  "eval_steps_per_second": 6.954,
103
  "step": 100
104
+ },
105
+ {
106
+ "epoch": 0.0030680835634396006,
107
+ "grad_norm": 2.741830587387085,
108
+ "learning_rate": 5.05e-06,
109
+ "loss": 1.6088,
110
+ "step": 110
111
+ },
112
+ {
113
+ "epoch": 0.0033470002510250186,
114
+ "grad_norm": 2.751880407333374,
115
+ "learning_rate": 4.488888888888889e-06,
116
+ "loss": 1.2966,
117
+ "step": 120
118
+ },
119
+ {
120
+ "epoch": 0.003625916938610437,
121
+ "grad_norm": 3.4367833137512207,
122
+ "learning_rate": 3.927777777777778e-06,
123
+ "loss": 1.349,
124
+ "step": 130
125
+ },
126
+ {
127
+ "epoch": 0.0039048336261958555,
128
+ "grad_norm": 3.5134706497192383,
129
+ "learning_rate": 3.3666666666666665e-06,
130
+ "loss": 1.302,
131
+ "step": 140
132
+ },
133
+ {
134
+ "epoch": 0.0041837503137812735,
135
+ "grad_norm": 5.4509100914001465,
136
+ "learning_rate": 2.8055555555555555e-06,
137
+ "loss": 1.2418,
138
+ "step": 150
139
+ },
140
+ {
141
+ "epoch": 0.0041837503137812735,
142
+ "eval_loss": 1.336289644241333,
143
+ "eval_runtime": 539.6746,
144
+ "eval_samples_per_second": 27.974,
145
+ "eval_steps_per_second": 6.995,
146
+ "step": 150
147
  }
148
  ],
149
  "logging_steps": 10,
 
172
  "attributes": {}
173
  }
174
  },
175
+ "total_flos": 3.394155552178176e+16,
176
  "train_batch_size": 8,
177
  "trial_name": null,
178
  "trial_params": null