Synchronizing local compiler cache.
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +44 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-8B-Instruct/700f3d6831b945b35649.json +79 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-8B-Instruct/b095f4e1a8142588f557.json +55 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/4c45a685188be76510ca.json +79 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/5719e2431a03a3a11a76.json +79 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/6ced15a046147a7195f4.json +79 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/cdcd648610c19bcc53eb.json +79 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/unsloth/Llama-3.2-1B-Instruct/a1454e46779410eda936.json +80 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/mixtral/dacorvo/Mixtral-tiny/3bd58a8f29b6ca08f6a0.json +75 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/model.hlo_module.pb +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/model.neff +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/model.hlo_module.pb +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/model.neff +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/model.hlo_module.pb +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/model.neff +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/wrapped_neff.hlo +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/model.hlo_module.pb +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/model.neff +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/model.hlo_module.pb +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/model.neff +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/wrapped_neff.hlo +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/model.hlo_module.pb +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/model.neff +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/model.hlo_module.pb +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/model.neff +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/model.hlo_module.pb +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/model.neff +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/model.hlo_module.pb +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/model.neff +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/model.hlo_module.pb +3 -0
.gitattributes
CHANGED
@@ -2255,3 +2255,47 @@ neuronxcc-2.17.194.0+d312836f/MODULE_892a0bb27ce39228be75+613edded/model.neff fi
|
|
2255 |
neuronxcc-2.17.194.0+d312836f/MODULE_c24af7fcf05443daf3b7+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
|
2256 |
neuronxcc-2.17.194.0+d312836f/MODULE_e3aea5d1517d9896fd33+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
|
2257 |
neuronxcc-2.17.194.0+d312836f/MODULE_fd6170cedb4fe53c8433+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2255 |
neuronxcc-2.17.194.0+d312836f/MODULE_c24af7fcf05443daf3b7+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
|
2256 |
neuronxcc-2.17.194.0+d312836f/MODULE_e3aea5d1517d9896fd33+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
|
2257 |
neuronxcc-2.17.194.0+d312836f/MODULE_fd6170cedb4fe53c8433+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
|
2258 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2259 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
|
2260 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2261 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2262 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2263 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2264 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2265 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2266 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
|
2267 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2268 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2269 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2270 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_501e832e2a453d315f02+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2271 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_501e832e2a453d315f02+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2272 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_584fbc6f07cc7a3a1ba0+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2273 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_5ced139eb4f9413aa8e0+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
|
2274 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_6a2c4b6116eb07aa27e9+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2275 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_6a2c4b6116eb07aa27e9+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2276 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_7434c6f37c47044f71f8+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2277 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_7acb3db8b2fb8dbb1bef+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2278 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_7acb3db8b2fb8dbb1bef+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2279 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_7c383e90d7a81031bcc3+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2280 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_87bbc2837c7a34ac7e7d+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2281 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_8cdf0acdee318d4bdf69+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
|
2282 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_8d605ae48e1a3bd443e9+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
|
2283 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_98af2fdc49cb9249ea3d+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2284 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_98af2fdc49cb9249ea3d+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2285 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_9f44958d7c5b8c540952+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2286 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_9f44958d7c5b8c540952+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2287 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_a115386b6164b70d349a+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2288 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_a2880de6f3cd7a029740+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2289 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_a2880de6f3cd7a029740+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2290 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_a4ecaba50ace94d96a23+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
|
2291 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_a5b7d8e60c4755ae19be+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2292 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_a5b7d8e60c4755ae19be+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2293 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_a9ce1b283ebf2388667c+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
|
2294 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_a9db44a28b85c5f0cea0+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
|
2295 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_b36d95e43385dd760fc5+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2296 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_bfb1d23ae5ee11d85871+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2297 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_bfb1d23ae5ee11d85871+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2298 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_cbaae69f32a2fe71d1d9+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2299 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2300 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2301 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_ddb4b83b834889a5553c+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-8B-Instruct/700f3d6831b945b35649.json
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 128,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 4096,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 14336,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"async_mode": false,
|
21 |
+
"attn_kernel_enabled": false,
|
22 |
+
"batch_size": 8,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"cc_pipeline_tiling_factor": 2,
|
25 |
+
"checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct",
|
26 |
+
"checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
|
27 |
+
"ctx_batch_size": 8,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"flash_decoding_enabled": false,
|
31 |
+
"fused_qkv": true,
|
32 |
+
"glu_mlp": true,
|
33 |
+
"is_chunked_prefill": false,
|
34 |
+
"is_continuous_batching": false,
|
35 |
+
"local_ranks_size": 8,
|
36 |
+
"logical_nc_config": 1,
|
37 |
+
"max_batch_size": 8,
|
38 |
+
"max_context_length": 4096,
|
39 |
+
"max_topk": 256,
|
40 |
+
"mlp_kernel_enabled": false,
|
41 |
+
"mlp_kernel_fuse_residual_add": false,
|
42 |
+
"n_active_tokens": 4096,
|
43 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
44 |
+
"num_cores_per_group": 1,
|
45 |
+
"on_device_sampling": true,
|
46 |
+
"optimum_neuron_version": "0.2.0.dev4",
|
47 |
+
"output_logits": false,
|
48 |
+
"padding_side": "right",
|
49 |
+
"pp_degree": 1,
|
50 |
+
"qk_layernorm": false,
|
51 |
+
"qkv_kernel_enabled": false,
|
52 |
+
"rpl_reduce_dtype": "bfloat16",
|
53 |
+
"sequence_length": 4096,
|
54 |
+
"sequence_parallel_enabled": false,
|
55 |
+
"speculation_length": 0,
|
56 |
+
"start_rank_id": 0,
|
57 |
+
"target": null,
|
58 |
+
"tkg_batch_size": 8,
|
59 |
+
"torch_dtype": "bfloat16",
|
60 |
+
"tp_degree": 8,
|
61 |
+
"vocab_parallel": false
|
62 |
+
},
|
63 |
+
"num_attention_heads": 32,
|
64 |
+
"num_hidden_layers": 32,
|
65 |
+
"num_key_value_heads": 8,
|
66 |
+
"pretraining_tp": 1,
|
67 |
+
"rms_norm_eps": 1e-05,
|
68 |
+
"rope_scaling": {
|
69 |
+
"factor": 8.0,
|
70 |
+
"high_freq_factor": 4.0,
|
71 |
+
"low_freq_factor": 1.0,
|
72 |
+
"original_max_position_embeddings": 8192,
|
73 |
+
"rope_type": "llama3"
|
74 |
+
},
|
75 |
+
"rope_theta": 500000.0,
|
76 |
+
"tie_word_embeddings": false,
|
77 |
+
"use_cache": true,
|
78 |
+
"vocab_size": 128256
|
79 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Llama-3.1-8B-Instruct/b095f4e1a8142588f557.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 128,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 4096,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 14336,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "HloNeuronConfig",
|
20 |
+
"all_reduce_dtype": null,
|
21 |
+
"allow_flash_attention": true,
|
22 |
+
"attention_layout": "BSH",
|
23 |
+
"attn_output_transposed": false,
|
24 |
+
"auto_cast_type": "bf16",
|
25 |
+
"batch_size": 8,
|
26 |
+
"checkpoint_id": "meta-llama/Llama-3.1-8B-Instruct",
|
27 |
+
"checkpoint_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
|
28 |
+
"collectives_layout": "HSB",
|
29 |
+
"continuous_batching": true,
|
30 |
+
"fuse_qkv": true,
|
31 |
+
"group_query_attention": "shard-over-heads",
|
32 |
+
"log_softmax_scores": false,
|
33 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
34 |
+
"optimum_neuron_version": "0.2.0.dev4",
|
35 |
+
"output_all_logits": false,
|
36 |
+
"sequence_length": 4096,
|
37 |
+
"tp_degree": 8
|
38 |
+
},
|
39 |
+
"num_attention_heads": 32,
|
40 |
+
"num_hidden_layers": 32,
|
41 |
+
"num_key_value_heads": 8,
|
42 |
+
"pretraining_tp": 1,
|
43 |
+
"rms_norm_eps": 1e-05,
|
44 |
+
"rope_scaling": {
|
45 |
+
"factor": 8.0,
|
46 |
+
"high_freq_factor": 4.0,
|
47 |
+
"low_freq_factor": 1.0,
|
48 |
+
"original_max_position_embeddings": 8192,
|
49 |
+
"rope_type": "llama3"
|
50 |
+
},
|
51 |
+
"rope_theta": 500000.0,
|
52 |
+
"tie_word_embeddings": false,
|
53 |
+
"use_cache": true,
|
54 |
+
"vocab_size": 128256
|
55 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/4c45a685188be76510ca.json
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 128,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 4096,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 14336,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"async_mode": false,
|
21 |
+
"attn_kernel_enabled": false,
|
22 |
+
"batch_size": 8,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"cc_pipeline_tiling_factor": 2,
|
25 |
+
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
26 |
+
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
27 |
+
"ctx_batch_size": 8,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"flash_decoding_enabled": false,
|
31 |
+
"fused_qkv": false,
|
32 |
+
"glu_mlp": true,
|
33 |
+
"is_chunked_prefill": false,
|
34 |
+
"is_continuous_batching": false,
|
35 |
+
"local_ranks_size": 8,
|
36 |
+
"logical_nc_config": 1,
|
37 |
+
"max_batch_size": 8,
|
38 |
+
"max_context_length": 4096,
|
39 |
+
"max_topk": 256,
|
40 |
+
"mlp_kernel_enabled": false,
|
41 |
+
"mlp_kernel_fuse_residual_add": false,
|
42 |
+
"n_active_tokens": 4096,
|
43 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
44 |
+
"num_cores_per_group": 1,
|
45 |
+
"on_device_sampling": false,
|
46 |
+
"optimum_neuron_version": "0.2.0.dev4",
|
47 |
+
"output_logits": false,
|
48 |
+
"padding_side": "right",
|
49 |
+
"pp_degree": 1,
|
50 |
+
"qk_layernorm": false,
|
51 |
+
"qkv_kernel_enabled": false,
|
52 |
+
"rpl_reduce_dtype": "bfloat16",
|
53 |
+
"sequence_length": 4096,
|
54 |
+
"sequence_parallel_enabled": false,
|
55 |
+
"speculation_length": 0,
|
56 |
+
"start_rank_id": 0,
|
57 |
+
"target": null,
|
58 |
+
"tkg_batch_size": 8,
|
59 |
+
"torch_dtype": "bfloat16",
|
60 |
+
"tp_degree": 8,
|
61 |
+
"vocab_parallel": false
|
62 |
+
},
|
63 |
+
"num_attention_heads": 32,
|
64 |
+
"num_hidden_layers": 32,
|
65 |
+
"num_key_value_heads": 8,
|
66 |
+
"pretraining_tp": 1,
|
67 |
+
"rms_norm_eps": 1e-05,
|
68 |
+
"rope_scaling": {
|
69 |
+
"factor": 8.0,
|
70 |
+
"high_freq_factor": 4.0,
|
71 |
+
"low_freq_factor": 1.0,
|
72 |
+
"original_max_position_embeddings": 8192,
|
73 |
+
"rope_type": "llama3"
|
74 |
+
},
|
75 |
+
"rope_theta": 500000.0,
|
76 |
+
"tie_word_embeddings": false,
|
77 |
+
"use_cache": true,
|
78 |
+
"vocab_size": 128256
|
79 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/5719e2431a03a3a11a76.json
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 128,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 4096,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 14336,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"async_mode": false,
|
21 |
+
"attn_kernel_enabled": false,
|
22 |
+
"batch_size": 8,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"cc_pipeline_tiling_factor": 2,
|
25 |
+
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
26 |
+
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
27 |
+
"ctx_batch_size": 8,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"flash_decoding_enabled": false,
|
31 |
+
"fused_qkv": true,
|
32 |
+
"glu_mlp": true,
|
33 |
+
"is_chunked_prefill": false,
|
34 |
+
"is_continuous_batching": false,
|
35 |
+
"local_ranks_size": 8,
|
36 |
+
"logical_nc_config": 1,
|
37 |
+
"max_batch_size": 8,
|
38 |
+
"max_context_length": 4096,
|
39 |
+
"max_topk": 256,
|
40 |
+
"mlp_kernel_enabled": false,
|
41 |
+
"mlp_kernel_fuse_residual_add": false,
|
42 |
+
"n_active_tokens": 4096,
|
43 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
44 |
+
"num_cores_per_group": 1,
|
45 |
+
"on_device_sampling": true,
|
46 |
+
"optimum_neuron_version": "0.2.0.dev4",
|
47 |
+
"output_logits": false,
|
48 |
+
"padding_side": "right",
|
49 |
+
"pp_degree": 1,
|
50 |
+
"qk_layernorm": false,
|
51 |
+
"qkv_kernel_enabled": false,
|
52 |
+
"rpl_reduce_dtype": "bfloat16",
|
53 |
+
"sequence_length": 4096,
|
54 |
+
"sequence_parallel_enabled": false,
|
55 |
+
"speculation_length": 0,
|
56 |
+
"start_rank_id": 0,
|
57 |
+
"target": null,
|
58 |
+
"tkg_batch_size": 8,
|
59 |
+
"torch_dtype": "bfloat16",
|
60 |
+
"tp_degree": 8,
|
61 |
+
"vocab_parallel": false
|
62 |
+
},
|
63 |
+
"num_attention_heads": 32,
|
64 |
+
"num_hidden_layers": 32,
|
65 |
+
"num_key_value_heads": 8,
|
66 |
+
"pretraining_tp": 1,
|
67 |
+
"rms_norm_eps": 1e-05,
|
68 |
+
"rope_scaling": {
|
69 |
+
"factor": 8.0,
|
70 |
+
"high_freq_factor": 4.0,
|
71 |
+
"low_freq_factor": 1.0,
|
72 |
+
"original_max_position_embeddings": 8192,
|
73 |
+
"rope_type": "llama3"
|
74 |
+
},
|
75 |
+
"rope_theta": 500000.0,
|
76 |
+
"tie_word_embeddings": false,
|
77 |
+
"use_cache": true,
|
78 |
+
"vocab_size": 128256
|
79 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/6ced15a046147a7195f4.json
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 128,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 4096,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 14336,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"async_mode": false,
|
21 |
+
"attn_kernel_enabled": false,
|
22 |
+
"batch_size": 8,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"cc_pipeline_tiling_factor": 2,
|
25 |
+
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
26 |
+
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
27 |
+
"ctx_batch_size": 8,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"flash_decoding_enabled": false,
|
31 |
+
"fused_qkv": false,
|
32 |
+
"glu_mlp": true,
|
33 |
+
"is_chunked_prefill": false,
|
34 |
+
"is_continuous_batching": false,
|
35 |
+
"local_ranks_size": 8,
|
36 |
+
"logical_nc_config": 1,
|
37 |
+
"max_batch_size": 8,
|
38 |
+
"max_context_length": 4096,
|
39 |
+
"max_topk": 256,
|
40 |
+
"mlp_kernel_enabled": false,
|
41 |
+
"mlp_kernel_fuse_residual_add": false,
|
42 |
+
"n_active_tokens": 4096,
|
43 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
44 |
+
"num_cores_per_group": 1,
|
45 |
+
"on_device_sampling": false,
|
46 |
+
"optimum_neuron_version": "0.2.0.dev4",
|
47 |
+
"output_logits": false,
|
48 |
+
"padding_side": "right",
|
49 |
+
"pp_degree": 1,
|
50 |
+
"qk_layernorm": false,
|
51 |
+
"qkv_kernel_enabled": false,
|
52 |
+
"rpl_reduce_dtype": "bfloat16",
|
53 |
+
"sequence_length": 4096,
|
54 |
+
"sequence_parallel_enabled": false,
|
55 |
+
"speculation_length": 0,
|
56 |
+
"start_rank_id": 0,
|
57 |
+
"target": null,
|
58 |
+
"tkg_batch_size": 8,
|
59 |
+
"torch_dtype": "bfloat16",
|
60 |
+
"tp_degree": 8,
|
61 |
+
"vocab_parallel": true
|
62 |
+
},
|
63 |
+
"num_attention_heads": 32,
|
64 |
+
"num_hidden_layers": 32,
|
65 |
+
"num_key_value_heads": 8,
|
66 |
+
"pretraining_tp": 1,
|
67 |
+
"rms_norm_eps": 1e-05,
|
68 |
+
"rope_scaling": {
|
69 |
+
"factor": 8.0,
|
70 |
+
"high_freq_factor": 4.0,
|
71 |
+
"low_freq_factor": 1.0,
|
72 |
+
"original_max_position_embeddings": 8192,
|
73 |
+
"rope_type": "llama3"
|
74 |
+
},
|
75 |
+
"rope_theta": 500000.0,
|
76 |
+
"tie_word_embeddings": false,
|
77 |
+
"use_cache": true,
|
78 |
+
"vocab_size": 128256
|
79 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/meta-llama/Meta-Llama-3.1-8B/cdcd648610c19bcc53eb.json
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 128,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 4096,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 14336,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"async_mode": true,
|
21 |
+
"attn_kernel_enabled": false,
|
22 |
+
"batch_size": 8,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"cc_pipeline_tiling_factor": 2,
|
25 |
+
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
26 |
+
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
27 |
+
"ctx_batch_size": 8,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"flash_decoding_enabled": false,
|
31 |
+
"fused_qkv": false,
|
32 |
+
"glu_mlp": true,
|
33 |
+
"is_chunked_prefill": false,
|
34 |
+
"is_continuous_batching": false,
|
35 |
+
"local_ranks_size": 8,
|
36 |
+
"logical_nc_config": 1,
|
37 |
+
"max_batch_size": 8,
|
38 |
+
"max_context_length": 4096,
|
39 |
+
"max_topk": 256,
|
40 |
+
"mlp_kernel_enabled": false,
|
41 |
+
"mlp_kernel_fuse_residual_add": false,
|
42 |
+
"n_active_tokens": 4096,
|
43 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
44 |
+
"num_cores_per_group": 1,
|
45 |
+
"on_device_sampling": false,
|
46 |
+
"optimum_neuron_version": "0.2.0.dev4",
|
47 |
+
"output_logits": false,
|
48 |
+
"padding_side": "right",
|
49 |
+
"pp_degree": 1,
|
50 |
+
"qk_layernorm": false,
|
51 |
+
"qkv_kernel_enabled": false,
|
52 |
+
"rpl_reduce_dtype": "bfloat16",
|
53 |
+
"sequence_length": 4096,
|
54 |
+
"sequence_parallel_enabled": false,
|
55 |
+
"speculation_length": 0,
|
56 |
+
"start_rank_id": 0,
|
57 |
+
"target": null,
|
58 |
+
"tkg_batch_size": 8,
|
59 |
+
"torch_dtype": "bfloat16",
|
60 |
+
"tp_degree": 8,
|
61 |
+
"vocab_parallel": false
|
62 |
+
},
|
63 |
+
"num_attention_heads": 32,
|
64 |
+
"num_hidden_layers": 32,
|
65 |
+
"num_key_value_heads": 8,
|
66 |
+
"pretraining_tp": 1,
|
67 |
+
"rms_norm_eps": 1e-05,
|
68 |
+
"rope_scaling": {
|
69 |
+
"factor": 8.0,
|
70 |
+
"high_freq_factor": 4.0,
|
71 |
+
"low_freq_factor": 1.0,
|
72 |
+
"original_max_position_embeddings": 8192,
|
73 |
+
"rope_type": "llama3"
|
74 |
+
},
|
75 |
+
"rope_theta": 500000.0,
|
76 |
+
"tie_word_embeddings": false,
|
77 |
+
"use_cache": true,
|
78 |
+
"vocab_size": 128256
|
79 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/unsloth/Llama-3.2-1B-Instruct/a1454e46779410eda936.json
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 64,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 2048,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 8192,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"async_mode": false,
|
21 |
+
"attn_kernel_enabled": false,
|
22 |
+
"batch_size": 4,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"cc_pipeline_tiling_factor": 2,
|
25 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
26 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
27 |
+
"ctx_batch_size": 4,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"flash_decoding_enabled": false,
|
31 |
+
"fused_qkv": true,
|
32 |
+
"glu_mlp": true,
|
33 |
+
"is_chunked_prefill": false,
|
34 |
+
"is_continuous_batching": false,
|
35 |
+
"local_ranks_size": 2,
|
36 |
+
"logical_nc_config": 1,
|
37 |
+
"max_batch_size": 4,
|
38 |
+
"max_context_length": 4096,
|
39 |
+
"max_topk": 256,
|
40 |
+
"mlp_kernel_enabled": false,
|
41 |
+
"mlp_kernel_fuse_residual_add": false,
|
42 |
+
"n_active_tokens": 4096,
|
43 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
44 |
+
"num_cores_per_group": 1,
|
45 |
+
"on_device_sampling": true,
|
46 |
+
"optimum_neuron_version": "0.2.0.dev4",
|
47 |
+
"output_logits": false,
|
48 |
+
"padding_side": "right",
|
49 |
+
"pp_degree": 1,
|
50 |
+
"qk_layernorm": false,
|
51 |
+
"qkv_kernel_enabled": false,
|
52 |
+
"rpl_reduce_dtype": "float16",
|
53 |
+
"sequence_length": 4096,
|
54 |
+
"sequence_parallel_enabled": false,
|
55 |
+
"speculation_length": 0,
|
56 |
+
"start_rank_id": 0,
|
57 |
+
"target": null,
|
58 |
+
"tkg_batch_size": 4,
|
59 |
+
"torch_dtype": "float16",
|
60 |
+
"tp_degree": 2,
|
61 |
+
"vocab_parallel": false
|
62 |
+
},
|
63 |
+
"num_attention_heads": 32,
|
64 |
+
"num_hidden_layers": 16,
|
65 |
+
"num_key_value_heads": 8,
|
66 |
+
"pretraining_tp": 1,
|
67 |
+
"rms_norm_eps": 1e-05,
|
68 |
+
"rope_scaling": {
|
69 |
+
"factor": 32.0,
|
70 |
+
"high_freq_factor": 4.0,
|
71 |
+
"low_freq_factor": 1.0,
|
72 |
+
"original_max_position_embeddings": 8192,
|
73 |
+
"rope_type": "llama3"
|
74 |
+
},
|
75 |
+
"rope_theta": 500000.0,
|
76 |
+
"tie_word_embeddings": true,
|
77 |
+
"unsloth_fixed": true,
|
78 |
+
"use_cache": true,
|
79 |
+
"vocab_size": 128256
|
80 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/mixtral/dacorvo/Mixtral-tiny/3bd58a8f29b6ca08f6a0.json
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "dacorvo/Mixtral-tiny",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"MixtralForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.0,
|
9 |
+
"head_dim": 32,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 1024,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3584,
|
14 |
+
"max_position_embeddings": 1024,
|
15 |
+
"model_type": "mixtral",
|
16 |
+
"neuron": {
|
17 |
+
"_serialized_key": "NxDNeuronConfig",
|
18 |
+
"async_mode": false,
|
19 |
+
"attn_kernel_enabled": false,
|
20 |
+
"batch_size": 2,
|
21 |
+
"capacity_factor": null,
|
22 |
+
"cc_pipeline_tiling_factor": 2,
|
23 |
+
"checkpoint_id": "dacorvo/Mixtral-tiny",
|
24 |
+
"checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
|
25 |
+
"ctx_batch_size": 2,
|
26 |
+
"enable_bucketing": false,
|
27 |
+
"ep_degree": 1,
|
28 |
+
"flash_decoding_enabled": false,
|
29 |
+
"fused_qkv": false,
|
30 |
+
"glu_mlp": true,
|
31 |
+
"is_chunked_prefill": false,
|
32 |
+
"is_continuous_batching": false,
|
33 |
+
"local_ranks_size": 2,
|
34 |
+
"logical_nc_config": 1,
|
35 |
+
"max_batch_size": 2,
|
36 |
+
"max_context_length": 100,
|
37 |
+
"max_topk": 256,
|
38 |
+
"mlp_kernel_enabled": false,
|
39 |
+
"mlp_kernel_fuse_residual_add": false,
|
40 |
+
"n_active_tokens": 100,
|
41 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
42 |
+
"num_cores_per_group": 1,
|
43 |
+
"on_device_sampling": false,
|
44 |
+
"optimum_neuron_version": "0.2.0.dev4",
|
45 |
+
"output_logits": false,
|
46 |
+
"padding_side": "right",
|
47 |
+
"pp_degree": 1,
|
48 |
+
"qk_layernorm": false,
|
49 |
+
"qkv_kernel_enabled": false,
|
50 |
+
"rpl_reduce_dtype": "float16",
|
51 |
+
"sequence_length": 100,
|
52 |
+
"sequence_parallel_enabled": false,
|
53 |
+
"speculation_length": 0,
|
54 |
+
"start_rank_id": 0,
|
55 |
+
"target": null,
|
56 |
+
"tkg_batch_size": 2,
|
57 |
+
"torch_dtype": "float16",
|
58 |
+
"tp_degree": 2,
|
59 |
+
"vocab_parallel": false
|
60 |
+
},
|
61 |
+
"num_attention_heads": 32,
|
62 |
+
"num_experts_per_tok": 2,
|
63 |
+
"num_hidden_layers": 2,
|
64 |
+
"num_key_value_heads": 8,
|
65 |
+
"num_local_experts": 8,
|
66 |
+
"output_router_logits": false,
|
67 |
+
"rms_norm_eps": 1e-05,
|
68 |
+
"rope_theta": 10000.0,
|
69 |
+
"router_aux_loss_coef": 0.001,
|
70 |
+
"router_jitter_noise": 0.0,
|
71 |
+
"sliding_window": 4096,
|
72 |
+
"tie_word_embeddings": false,
|
73 |
+
"use_cache": true,
|
74 |
+
"vocab_size": 32000
|
75 |
+
}
|
neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"
|
neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec824a349a9be66053dc28c493bf9e9137c78974212a6e5c05ecdc540083c91e
|
3 |
+
size 809203
|
neuronxcc-2.17.194.0+d312836f/MODULE_0ffcb646a5c3ca8902dc+793f1a96/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7987956410eaaa209e746c31cd9861bc6cfcbac9fb8b0107b4bb52d2ef2515f5
|
3 |
+
size 18514944
|
neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt"
|
neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:839ccdee4866452ddb56084de8feae041b2afc80b0026cc0b5f079e35c43971e
|
3 |
+
size 174185
|
neuronxcc-2.17.194.0+d312836f/MODULE_16575d1d23477e66f47c+431f5505/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:193826417c076529fed8c4be4bb3348e2b5407282ba499183eac96b4033101ed
|
3 |
+
size 2233344
|
neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper"
|
neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05c47bae0e908c0b46bcbf5d427777f5184d87f7b0b3911ead3e32f730dda4e9
|
3 |
+
size 732777
|
neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec2f4ea9e378d40c0cb9e897d365548c76723134d6d765dc0ff96b16957317e9
|
3 |
+
size 3073024
|
neuronxcc-2.17.194.0+d312836f/MODULE_18a02439fa5be899e4e2+7e4da68b/wrapped_neff.hlo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6e7608b51aad13aab7c9fbf121a2fd9cc275a8f821b0e2f4e18dad517b718da
|
3 |
+
size 3210726
|
neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"
|
neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72b5eb538c86713678f7331dfef2d664995ca12b6f961df905762b4e1c8dc40b
|
3 |
+
size 837628
|
neuronxcc-2.17.194.0+d312836f/MODULE_19677291845d5f9e90e8+793f1a96/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:779e99040a4d1ebc1839ab07114736075bcd27537d80bb820b16b6496aeb585d
|
3 |
+
size 18576384
|
neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper"
|
neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14c4553fa5b171984dc9319965554cf951a5205d008d3fc52c52d623c34f8b2b
|
3 |
+
size 748501
|
neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87369186b26f9b78b867f8626208080532c4ad7119f0e241668ccb0499d2c7dc
|
3 |
+
size 3124224
|
neuronxcc-2.17.194.0+d312836f/MODULE_1eeab200d3cb011df87f+7e4da68b/wrapped_neff.hlo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82dd294c24f308caaba609e59919c3794620372a8d33a0c4338225a45fa503d0
|
3 |
+
size 3262041
|
neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"
|
neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12e2aa950891cbdc52179ffcbc1b0981a9d662927b7dc5d80b69616106ce9dd3
|
3 |
+
size 825431
|
neuronxcc-2.17.194.0+d312836f/MODULE_242528f2fa438b512724+793f1a96/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00b8da5fc53d757bac81937a69fcb5412a744f0482aa6ad28d0e9044a3d1c589
|
3 |
+
size 18566144
|
neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt"
|
neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3f1844371da7c38b01be9977b0670dd49843be84c622f59851b21396727bedf
|
3 |
+
size 174020
|
neuronxcc-2.17.194.0+d312836f/MODULE_2da0bb9b58becd460cc8+431f5505/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fee9b3694854ceef9f483bed5d3f89602b736ad6e640cc8327f7576df2055e42
|
3 |
+
size 2223104
|
neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper"
|
neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2274b46e91edd1ff4132b909cbc27238ad4dded829a97cba5a96278a5df018e4
|
3 |
+
size 46052
|
neuronxcc-2.17.194.0+d312836f/MODULE_3c8663c080fcf8ec7355+7e4da68b/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05e9156cecf3a71b7230a44ab666a360b3302d6c5e1be3393f93c8ade4a4b4ca
|
3 |
+
size 164864
|
neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"
|
neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:695e975d41dbd4b048c6145861b36e618773e863e071e15a339ee3aad7ff0111
|
3 |
+
size 820388
|
neuronxcc-2.17.194.0+d312836f/MODULE_4226130b1ea4a246ad12+793f1a96/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7b55424923aafef8ee4915f80c2722336963c913835ab88bd150fdaa7f839e5
|
3 |
+
size 18525184
|
neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"
|
neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_436f478c6635f2715703+793f1a96/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8db430893cab774006da95e710d293521588dff3a3b6643508820c80821e88d6
|
3 |
+
size 39597
|