alexandretl
commited on
Commit
•
e2a6f21
1
Parent(s):
f5cc918
Upload folder using huggingface_hub
Browse files
runs/worthy-violet-67/ckpt_120000/model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17994ab51d891a51a7e819eee1c8b207e5d1f904fb088daf187775e7bd48d344
|
3 |
+
size 2191206134
|
runs/worthy-violet-67/ckpt_140000_before_cooldown/model.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd6038fbfde00a11ae92574c458aea59edcd29d25673e3a006a0a14644426f24
|
3 |
+
size 2191206134
|
runs/worthy-violet-67/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"d_model": 1024, "n_layers": 12, "n_heads": 16, "max_len": 512, "dropout": 0.0, "bias": false, "norm_eps": 1e-05, "base_std": 0.02, "d_ff": 3584, "n_kv_heads": 16, "optimised_attn": false, "efficient_attn": false, "super_attn": false, "pos_emb": "rope", "rope_theta": 10000, "mup": false, "mup_base_width": 288, "flash": true, "architecture": "Transformer"}
|