Saving weights and log at step 760000
Browse files- README.md +1 -1
- flax_model.msgpack +1 -1
- opt_state.msgpack +3 -0
- pytorch_model.bin +1 -1
- runs/events.out.tfevents.1641156371.t1v-n-2f64d7c8-w-0.13342.0.v2 +2 -2
- training_state.json +1 -0
README.md
CHANGED
@@ -30,7 +30,7 @@ Tokenizer:
|
|
30 |
Training details:
|
31 |
|
32 |
* Trained for 70K steps (batch size 64) to ppl 27 on mc4 nl tiny 1 epoch
|
33 |
-
* Trained for
|
34 |
* Training continuing
|
35 |
* Block size: 512
|
36 |
* Optimizer: adafactor
|
|
|
30 |
Training details:
|
31 |
|
32 |
* Trained for 70K steps (batch size 64) to ppl 27 on mc4 nl tiny 1 epoch
|
33 |
+
* Trained for 760K steps (batch size 16) to ppl 16.8 on mc4 nl full
|
34 |
* Training continuing
|
35 |
* Block size: 512
|
36 |
* Optimizer: adafactor
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5262314590
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9219656705501e15f9f93b78df01c8a339552af0685161f55febd8dc2edca3fc
|
3 |
size 5262314590
|
opt_state.msgpack
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9415497a6e41b76b0baa60a31beb021fe2a13f11f513106d350c89beda73f7f6
|
3 |
+
size 5778100
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5363100545
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71eac87d5c3e71477204c4b97e36e0beb17c43686afc160ee20955316ab50c80
|
3 |
size 5363100545
|
runs/events.out.tfevents.1641156371.t1v-n-2f64d7c8-w-0.13342.0.v2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:909e7ac40e6afe9723bd239188da21469a915ed6c44b30318bdca1e48dd9ba04
|
3 |
+
size 114081255
|
training_state.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"step": 760001}
|