kooff11 commited on
Commit
5b1147a
·
verified ·
1 Parent(s): c35ef54

Training in progress, step 18, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99a56ebedb10978bd4e193f3784da47fe9af8d1296615f5e231c886a16df738c
3
  size 2264640
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ace6cb9505b630cc42f296da21a26f3b567183b3a9811fb87fbbb9091993720
3
  size 2264640
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6176f1acdd175c0a7768374262fd2fac06322be9937e16bebeb9b14d46d46cc
3
  size 1183674
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8e094ef751cbd269068b095cde0e43290ede08203abd0c895a15a9461015938
3
  size 1183674
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:007df89bec6daff126d2a1d69c9d4018343e128705fb1d5b2af992fa6385b40b
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20a90b6a2c70e12c09fb32cf9d04cacc11c2523c58a742e731a65ec3e279e352
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4b1b142e127cfd32806d5105fed9a95a44b87b67f715dd9a5647e9d18a139c3
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f72b88be9468665c95a0d3c5676292c6feab6bcdb9e687c2b969b2a3bbd6c3c0
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35fabaf6f0b238c38746c1776744ea0f8639b1f15587fe23529faa4cb355d67f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e36117d22c9a63b8dad3fdec6160f4a75b956bf530cd84159b5adbb0baea0ea0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2508710801393728,
5
  "eval_steps": 9,
6
- "global_step": 9,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -86,6 +86,77 @@
86
  "eval_samples_per_second": 192.254,
87
  "eval_steps_per_second": 48.461,
88
  "step": 9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  }
90
  ],
91
  "logging_steps": 1,
@@ -105,7 +176,7 @@
105
  "attributes": {}
106
  }
107
  },
108
- "total_flos": 1547005696409600.0,
109
  "train_batch_size": 2,
110
  "trial_name": null,
111
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5017421602787456,
5
  "eval_steps": 9,
6
+ "global_step": 18,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
86
  "eval_samples_per_second": 192.254,
87
  "eval_steps_per_second": 48.461,
88
  "step": 9
89
+ },
90
+ {
91
+ "epoch": 0.2787456445993031,
92
+ "grad_norm": 0.7740032076835632,
93
+ "learning_rate": 8.695044586103296e-05,
94
+ "loss": 4.6227,
95
+ "step": 10
96
+ },
97
+ {
98
+ "epoch": 0.30662020905923343,
99
+ "grad_norm": 0.7942211627960205,
100
+ "learning_rate": 8.368478218232787e-05,
101
+ "loss": 4.6837,
102
+ "step": 11
103
+ },
104
+ {
105
+ "epoch": 0.3344947735191638,
106
+ "grad_norm": 0.8654100894927979,
107
+ "learning_rate": 8.013173181896283e-05,
108
+ "loss": 4.6198,
109
+ "step": 12
110
+ },
111
+ {
112
+ "epoch": 0.3623693379790941,
113
+ "grad_norm": 0.9776397943496704,
114
+ "learning_rate": 7.63216081438678e-05,
115
+ "loss": 4.7598,
116
+ "step": 13
117
+ },
118
+ {
119
+ "epoch": 0.3902439024390244,
120
+ "grad_norm": 0.828058123588562,
121
+ "learning_rate": 7.228691778882693e-05,
122
+ "loss": 4.6796,
123
+ "step": 14
124
+ },
125
+ {
126
+ "epoch": 0.4181184668989547,
127
+ "grad_norm": 0.8717594146728516,
128
+ "learning_rate": 6.806208330935766e-05,
129
+ "loss": 4.5456,
130
+ "step": 15
131
+ },
132
+ {
133
+ "epoch": 0.445993031358885,
134
+ "grad_norm": 0.9016168117523193,
135
+ "learning_rate": 6.368314950360415e-05,
136
+ "loss": 4.7329,
137
+ "step": 16
138
+ },
139
+ {
140
+ "epoch": 0.4738675958188153,
141
+ "grad_norm": 0.9021468162536621,
142
+ "learning_rate": 5.918747589082853e-05,
143
+ "loss": 4.6807,
144
+ "step": 17
145
+ },
146
+ {
147
+ "epoch": 0.5017421602787456,
148
+ "grad_norm": 0.9048068523406982,
149
+ "learning_rate": 5.4613417973165106e-05,
150
+ "loss": 4.6001,
151
+ "step": 18
152
+ },
153
+ {
154
+ "epoch": 0.5017421602787456,
155
+ "eval_loss": 4.461349010467529,
156
+ "eval_runtime": 1.2645,
157
+ "eval_samples_per_second": 191.38,
158
+ "eval_steps_per_second": 48.24,
159
+ "step": 18
160
  }
161
  ],
162
  "logging_steps": 1,
 
176
  "attributes": {}
177
  }
178
  },
179
+ "total_flos": 3078865861738496.0,
180
  "train_batch_size": 2,
181
  "trial_name": null,
182
  "trial_params": null