Upload folder using huggingface_hub
Browse files- trainer_0/ae.pt +3 -0
- trainer_0/checkpoints/ae_0.pt +3 -0
- trainer_0/checkpoints/ae_2048.pt +3 -0
- trainer_0/checkpoints/ae_4096.pt +3 -0
- trainer_0/checkpoints/ae_6144.pt +3 -0
- trainer_0/checkpoints/ae_8192.pt +3 -0
- trainer_0/config.json +26 -0
trainer_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa5b042410ca433993da3c045d335629be860addc634ac735fef049b1d3b51e1
|
3 |
+
size 37778216
|
trainer_0/checkpoints/ae_0.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7335e86afac341439c6a69f5c1e31c93d8127a0ddf4120ae80add272d8f154a0
|
3 |
+
size 37778232
|
trainer_0/checkpoints/ae_2048.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7e7a42a0c59172ed870e81ed34a33d87157cc4802fd2e47fafd87bb4341ed61
|
3 |
+
size 37778320
|
trainer_0/checkpoints/ae_4096.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8966e001b92231fe9a2522cd98d567bd1408ced454aa344ed88bc3682cfd754
|
3 |
+
size 37778320
|
trainer_0/checkpoints/ae_6144.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a707603ac4cc676352e30373934986dbba01cf6ce5ffb1fb21e0aa6f2520c578
|
3 |
+
size 37778320
|
trainer_0/checkpoints/ae_8192.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08a4730f06b0fcd73260090f42b63e9c881b9f8d191a002c6cb570e22f07f1d5
|
3 |
+
size 37778320
|
trainer_0/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.00032659863237109043,
|
6 |
+
"steps": 30000,
|
7 |
+
"seed": null,
|
8 |
+
"activation_dim": 768,
|
9 |
+
"dict_size": 6144,
|
10 |
+
"k": 30,
|
11 |
+
"device": "cuda",
|
12 |
+
"layer": "0",
|
13 |
+
"lm_name": "TinyModel_2L_3E",
|
14 |
+
"wandb_name": "AutoEncoderTopK",
|
15 |
+
"submodule_name": null
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 768,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 30000.0,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 512,
|
23 |
+
"out_batch_size": 8192,
|
24 |
+
"device": "cuda"
|
25 |
+
}
|
26 |
+
}
|