Upload model
Browse files- config.json +50 -0
- pytorch_model-00001-of-00023.bin +3 -0
- pytorch_model-00002-of-00023.bin +3 -0
- pytorch_model-00003-of-00023.bin +3 -0
- pytorch_model-00004-of-00023.bin +3 -0
- pytorch_model-00005-of-00023.bin +3 -0
- pytorch_model-00006-of-00023.bin +3 -0
- pytorch_model-00007-of-00023.bin +3 -0
- pytorch_model-00008-of-00023.bin +3 -0
- pytorch_model-00009-of-00023.bin +3 -0
- pytorch_model-00010-of-00023.bin +3 -0
- pytorch_model-00011-of-00023.bin +3 -0
- pytorch_model-00012-of-00023.bin +3 -0
- pytorch_model-00013-of-00023.bin +3 -0
- pytorch_model-00014-of-00023.bin +3 -0
- pytorch_model-00015-of-00023.bin +3 -0
- pytorch_model-00016-of-00023.bin +3 -0
- pytorch_model-00017-of-00023.bin +3 -0
- pytorch_model-00018-of-00023.bin +3 -0
- pytorch_model-00019-of-00023.bin +3 -0
- pytorch_model-00020-of-00023.bin +3 -0
- pytorch_model-00021-of-00023.bin +3 -0
- pytorch_model-00022-of-00023.bin +3 -0
- pytorch_model-00023-of-00023.bin +3 -0
- pytorch_model.bin.index.json +0 -0
config.json
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "/home/arthur_huggingface_co/fairseq/weights/checkpoints/hf-converted-moe-54b",
|
| 3 |
+
"activation_dropout": 0.0,
|
| 4 |
+
"activation_function": "relu",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"NllbMoeModel"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.1,
|
| 9 |
+
"batch_prioritized_routing": false,
|
| 10 |
+
"bos_token_id": 0,
|
| 11 |
+
"d_model": 2048,
|
| 12 |
+
"decoder_attention_heads": 16,
|
| 13 |
+
"decoder_ffn_dim": 8192,
|
| 14 |
+
"decoder_layerdrop": 0,
|
| 15 |
+
"decoder_layers": 24,
|
| 16 |
+
"decoder_sparse_step": 4,
|
| 17 |
+
"decoder_start_token_id": 2,
|
| 18 |
+
"dropout": 0.1,
|
| 19 |
+
"encoder_attention_heads": 16,
|
| 20 |
+
"encoder_ffn_dim": 8192,
|
| 21 |
+
"encoder_layerdrop": 0,
|
| 22 |
+
"encoder_layers": 24,
|
| 23 |
+
"encoder_sparse_step": 4,
|
| 24 |
+
"eos_token_id": 2,
|
| 25 |
+
"expert_capacity": 64,
|
| 26 |
+
"init_std": 0.02,
|
| 27 |
+
"is_encoder_decoder": true,
|
| 28 |
+
"max_length": 200,
|
| 29 |
+
"max_position_embeddings": 1024,
|
| 30 |
+
"model_type": "nllb_moe",
|
| 31 |
+
"moe_eval_capacity_token_fraction": 1.0,
|
| 32 |
+
"moe_token_dropout": 0.2,
|
| 33 |
+
"normalize_router_prob_before_dropping": false,
|
| 34 |
+
"num_experts": 128,
|
| 35 |
+
"num_hidden_layers": 24,
|
| 36 |
+
"pad_token_id": 1,
|
| 37 |
+
"router_aux_loss_coef": 0.001,
|
| 38 |
+
"router_bias": false,
|
| 39 |
+
"router_dtype": "float32",
|
| 40 |
+
"router_ignore_padding_tokens": false,
|
| 41 |
+
"router_jitter_noise": 0.01,
|
| 42 |
+
"router_type": "tokens_masked",
|
| 43 |
+
"router_z_loss_coef": 0.001,
|
| 44 |
+
"scale_embedding": true,
|
| 45 |
+
"second_expert_policy": "all",
|
| 46 |
+
"torch_dtype": "float32",
|
| 47 |
+
"transformers_version": "4.27.0.dev0",
|
| 48 |
+
"use_cache": true,
|
| 49 |
+
"vocab_size": 256206
|
| 50 |
+
}
|
pytorch_model-00001-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9a01081279f5270e50b66114b359399d178d3f70c807068a42e728bc9b09ac6
|
| 3 |
+
size 7881650579
|
pytorch_model-00002-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c69aedd9d34c776f077979cf65d6d7c982a4598151646667bd45901532ff0cc8
|
| 3 |
+
size 9935240893
|
pytorch_model-00003-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e87b61b1b17440b137bc5e47105fd13cb3a7f21ee2dea186e492f707b9e8b794
|
| 3 |
+
size 9936482623
|
pytorch_model-00004-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7f5b7b360f8bed14ebf5c3a7c0e01b568af5059b28e4ec306386272fe8eedf9
|
| 3 |
+
size 9935240957
|
pytorch_model-00005-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c904df100b9928282769b9360733783bbeabf5600a122016ca6c4791ae7652ff
|
| 3 |
+
size 9936482987
|
pytorch_model-00006-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1958a842daacff1ce3e1da3bc05a6ec28f06741a14190b1df76c4ac71116bf98
|
| 3 |
+
size 9936483287
|
pytorch_model-00007-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4352bba732c871dc5356dbb4dcdc9de6cd81a1c4b7b3563913b56ebc648d12e0
|
| 3 |
+
size 9935241149
|
pytorch_model-00008-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:57959c787bd138a2c69b337d577ca3a28cb377a2aa86d8bf1f2d8ecfe14a04f7
|
| 3 |
+
size 9936482743
|
pytorch_model-00009-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c2b96fb7b580fc3933e3abe60946a5ff17fc970faf16f2b38373664159bf911
|
| 3 |
+
size 9935241149
|
pytorch_model-00010-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f31ddd24c833696b5600cc8b135dd62a2bcca0090dbc28062391e697aa604548
|
| 3 |
+
size 9936482743
|
pytorch_model-00011-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e463aa0ea26d4d957a99c0e7e831eb663ae59746360a2f122a10989fe448153
|
| 3 |
+
size 9935241213
|
pytorch_model-00012-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eaab08a291e020ce948fbb5bd2b509eda3c2bbf865306ce5708dbf5b1afc1445
|
| 3 |
+
size 9962851615
|
pytorch_model-00013-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6620b4464ca0fd9b59f9bd43679ddea18992a9da01eaf13b9a6fed2b203f90ef
|
| 3 |
+
size 9935240893
|
pytorch_model-00014-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8889365f817ff4da86c77c199af36c3bf64695e1228bb5043f644717cb55bc72
|
| 3 |
+
size 9936607643
|
pytorch_model-00015-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9fef1ccdbf6c00eb37cd989d4aca529d3fbff16d30f6dec8e93773fd633ba886
|
| 3 |
+
size 9935240957
|
pytorch_model-00016-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0fc0245e4bd3e43ebd01d0c6d79b4faa7b88834cdcf2cbb09ebeb430bde2c8bf
|
| 3 |
+
size 9936608295
|
pytorch_model-00017-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d33268527cfc42cfeb888820f8c75c0d48b76dee487b30ab5364a4d95974e786
|
| 3 |
+
size 9936608727
|
pytorch_model-00018-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b1f46e82e2fbb655dc468fc37b0b1fbf4673e76295f9cf33c48dad828545aca
|
| 3 |
+
size 9935241149
|
pytorch_model-00019-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c7a510a5910470e794fd30e3a91282971f5fbc91bf607ea816c4589e8666932
|
| 3 |
+
size 9936608567
|
pytorch_model-00020-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f7a1ac1eaf762ba49cad65572bdc671597334df3ae5e9d553313dd6b80b083d
|
| 3 |
+
size 9935241149
|
pytorch_model-00021-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:afd3ce3b9671096c70c59cdace5eefd70b782ec42f951c44de9bf21d835197d7
|
| 3 |
+
size 9936607787
|
pytorch_model-00022-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eff18afcbd5879cd650d82ee5d2bd391654aaf6d7e8444792312ead42d5557eb
|
| 3 |
+
size 9935241149
|
pytorch_model-00023-of-00023.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:808cebcbf4b6ae4cee04ab20f669a3e2cf4e95faafd3c5116385d961d372f596
|
| 3 |
+
size 3557910629
|
pytorch_model.bin.index.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|