mgh6 commited on
Commit
2ffdf58
·
verified ·
1 Parent(s): cb10191

Training in progress, step 700, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc191803cd0e53be77c2dba2b0d597f90a12f2e582c0a194c3dc3a33d16375c1
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1bfe3de0e87ac55e372474748650d29469ea43618f69f4b3b1cda7a1c9e5275
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7aa0f2e56e087014b3024e59242f999df31bb9ec854960102dee184fb81de8eb
3
  size 268176506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57e118cafcc459602f3924757e157e3852e7caa20692d2fb027a0bb1e66ba14c
3
  size 268176506
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11465cf536e83fdad93931dcb6689da9268f72da3bfe23713c9605230238e759
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2af69833d69f080b3cc67beaefccdeb3962ea34b1a77f84a64e357eb155a372
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a1e738055e8bc0ebf68a93d55511ca26bd12dd7297a12608356f1b695a95f64
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f3f413b5f7ea3d1f3927898a7f680914683b4dac70ee76ea36b52d5795badb6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.1928865909576416,
3
- "best_model_checkpoint": "mgh6/TCS_MLM_50/checkpoint-600",
4
- "epoch": 0.1741654571843251,
5
  "eval_steps": 100,
6
- "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -97,6 +97,21 @@
97
  "eval_samples_per_second": 213.077,
98
  "eval_steps_per_second": 3.33,
99
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  }
101
  ],
102
  "logging_steps": 100,
@@ -125,7 +140,7 @@
125
  "attributes": {}
126
  }
127
  },
128
- "total_flos": 6002325646737408.0,
129
  "train_batch_size": 64,
130
  "trial_name": null,
131
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.181251049041748,
3
+ "best_model_checkpoint": "mgh6/TCS_MLM_50/checkpoint-700",
4
+ "epoch": 0.20319303338171263,
5
  "eval_steps": 100,
6
+ "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
97
  "eval_samples_per_second": 213.077,
98
  "eval_steps_per_second": 3.33,
99
  "step": 600
100
+ },
101
+ {
102
+ "epoch": 0.20319303338171263,
103
+ "grad_norm": 0.5164414048194885,
104
+ "learning_rate": 0.0009796806966618288,
105
+ "loss": 2.5121,
106
+ "step": 700
107
+ },
108
+ {
109
+ "epoch": 0.20319303338171263,
110
+ "eval_loss": 1.181251049041748,
111
+ "eval_runtime": 213.4132,
112
+ "eval_samples_per_second": 213.197,
113
+ "eval_steps_per_second": 3.332,
114
+ "step": 700
115
  }
116
  ],
117
  "logging_steps": 100,
 
140
  "attributes": {}
141
  }
142
  },
143
+ "total_flos": 7002713254526976.0,
144
  "train_batch_size": 64,
145
  "trial_name": null,
146
  "trial_params": null