Synchronizing local compiler cache.
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +52 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/1c6cc851d88b10f70611.json +77 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/25288e331f1cf66f02d6.json +77 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a0360a2aab05149b5ed.json +77 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a8ae18c973b94646af4.json +77 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/48fd484fde912c3c9981.json +77 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/690a9eef6000b3a2bbed.json +77 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/738e74927966314ed1c8.json +77 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/9b33c62e0648eb870335.json +77 -0
- neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/cddf83ce508409c44d25.json +77 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.hlo_module.pb +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.neff +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.hlo_module.pb +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.neff +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/wrapped_neff.hlo +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.hlo_module.pb +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.neff +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.hlo_module.pb +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.neff +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.hlo_module.pb +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.neff +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.hlo_module.pb +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.neff +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.hlo_module.pb +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.neff +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.hlo_module.pb +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.neff +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.done +0 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.hlo_module.pb +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.neff +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/wrapped_neff.hlo +3 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/compile_flags.json +1 -0
- neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.done +0 -0
.gitattributes
CHANGED
@@ -2374,3 +2374,55 @@ neuronxcc-2.17.194.0+d312836f/MODULE_ca54687af788507da1a6+793f1a96/model.neff fi
|
|
2374 |
neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
|
2375 |
neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2376 |
neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2374 |
neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
|
2375 |
neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2376 |
neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2377 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
|
2378 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2379 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2380 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
|
2381 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
|
2382 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2383 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2384 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2385 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2386 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2387 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2388 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2389 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2390 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2391 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2392 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2393 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2394 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2395 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2396 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2397 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2398 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2399 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2400 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2401 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2402 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2403 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2404 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2405 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2406 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
|
2407 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2408 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
|
2409 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2410 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
|
2411 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2412 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2413 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2414 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
|
2415 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2416 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2417 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2418 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
|
2419 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2420 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
|
2421 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2422 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2423 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text
|
2424 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
|
2425 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2426 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text
|
2427 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
2428 |
+
neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/1c6cc851d88b10f70611.json
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 128,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 4096,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 14336,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"async_mode": false,
|
21 |
+
"attn_kernel_enabled": false,
|
22 |
+
"batch_size": 1,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"cc_pipeline_tiling_factor": 2,
|
25 |
+
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
26 |
+
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
27 |
+
"continuous_batching": false,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"flash_decoding_enabled": false,
|
31 |
+
"fused_qkv": true,
|
32 |
+
"glu_mlp": true,
|
33 |
+
"is_chunked_prefill": false,
|
34 |
+
"local_ranks_size": 8,
|
35 |
+
"logical_nc_config": 1,
|
36 |
+
"max_batch_size": 1,
|
37 |
+
"max_context_length": 4096,
|
38 |
+
"max_topk": 256,
|
39 |
+
"mlp_kernel_enabled": false,
|
40 |
+
"mlp_kernel_fuse_residual_add": false,
|
41 |
+
"n_active_tokens": 4096,
|
42 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
43 |
+
"num_cores_per_group": 1,
|
44 |
+
"on_device_sampling": true,
|
45 |
+
"optimum_neuron_version": "0.2.0.dev5",
|
46 |
+
"output_logits": false,
|
47 |
+
"padding_side": "right",
|
48 |
+
"pp_degree": 1,
|
49 |
+
"qk_layernorm": false,
|
50 |
+
"qkv_kernel_enabled": false,
|
51 |
+
"rpl_reduce_dtype": "bfloat16",
|
52 |
+
"sequence_length": 4096,
|
53 |
+
"sequence_parallel_enabled": false,
|
54 |
+
"speculation_length": 0,
|
55 |
+
"start_rank_id": 0,
|
56 |
+
"target": null,
|
57 |
+
"torch_dtype": "bfloat16",
|
58 |
+
"tp_degree": 8,
|
59 |
+
"vocab_parallel": false
|
60 |
+
},
|
61 |
+
"num_attention_heads": 32,
|
62 |
+
"num_hidden_layers": 32,
|
63 |
+
"num_key_value_heads": 8,
|
64 |
+
"pretraining_tp": 1,
|
65 |
+
"rms_norm_eps": 1e-05,
|
66 |
+
"rope_scaling": {
|
67 |
+
"factor": 8.0,
|
68 |
+
"high_freq_factor": 4.0,
|
69 |
+
"low_freq_factor": 1.0,
|
70 |
+
"original_max_position_embeddings": 8192,
|
71 |
+
"rope_type": "llama3"
|
72 |
+
},
|
73 |
+
"rope_theta": 500000.0,
|
74 |
+
"tie_word_embeddings": false,
|
75 |
+
"use_cache": true,
|
76 |
+
"vocab_size": 128256
|
77 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/25288e331f1cf66f02d6.json
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 128,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 4096,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 14336,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"async_mode": false,
|
21 |
+
"attn_kernel_enabled": false,
|
22 |
+
"batch_size": 16,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"cc_pipeline_tiling_factor": 2,
|
25 |
+
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
26 |
+
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
27 |
+
"continuous_batching": false,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"flash_decoding_enabled": false,
|
31 |
+
"fused_qkv": true,
|
32 |
+
"glu_mlp": true,
|
33 |
+
"is_chunked_prefill": false,
|
34 |
+
"local_ranks_size": 8,
|
35 |
+
"logical_nc_config": 1,
|
36 |
+
"max_batch_size": 16,
|
37 |
+
"max_context_length": 4096,
|
38 |
+
"max_topk": 256,
|
39 |
+
"mlp_kernel_enabled": false,
|
40 |
+
"mlp_kernel_fuse_residual_add": false,
|
41 |
+
"n_active_tokens": 4096,
|
42 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
43 |
+
"num_cores_per_group": 1,
|
44 |
+
"on_device_sampling": true,
|
45 |
+
"optimum_neuron_version": "0.2.0.dev5",
|
46 |
+
"output_logits": false,
|
47 |
+
"padding_side": "right",
|
48 |
+
"pp_degree": 1,
|
49 |
+
"qk_layernorm": false,
|
50 |
+
"qkv_kernel_enabled": false,
|
51 |
+
"rpl_reduce_dtype": "bfloat16",
|
52 |
+
"sequence_length": 4096,
|
53 |
+
"sequence_parallel_enabled": false,
|
54 |
+
"speculation_length": 0,
|
55 |
+
"start_rank_id": 0,
|
56 |
+
"target": null,
|
57 |
+
"torch_dtype": "bfloat16",
|
58 |
+
"tp_degree": 8,
|
59 |
+
"vocab_parallel": false
|
60 |
+
},
|
61 |
+
"num_attention_heads": 32,
|
62 |
+
"num_hidden_layers": 32,
|
63 |
+
"num_key_value_heads": 8,
|
64 |
+
"pretraining_tp": 1,
|
65 |
+
"rms_norm_eps": 1e-05,
|
66 |
+
"rope_scaling": {
|
67 |
+
"factor": 8.0,
|
68 |
+
"high_freq_factor": 4.0,
|
69 |
+
"low_freq_factor": 1.0,
|
70 |
+
"original_max_position_embeddings": 8192,
|
71 |
+
"rope_type": "llama3"
|
72 |
+
},
|
73 |
+
"rope_theta": 500000.0,
|
74 |
+
"tie_word_embeddings": false,
|
75 |
+
"use_cache": true,
|
76 |
+
"vocab_size": 128256
|
77 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a0360a2aab05149b5ed.json
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 128,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 4096,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 14336,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"async_mode": false,
|
21 |
+
"attn_kernel_enabled": false,
|
22 |
+
"batch_size": 64,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"cc_pipeline_tiling_factor": 2,
|
25 |
+
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
26 |
+
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
27 |
+
"continuous_batching": true,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"flash_decoding_enabled": false,
|
31 |
+
"fused_qkv": true,
|
32 |
+
"glu_mlp": true,
|
33 |
+
"is_chunked_prefill": false,
|
34 |
+
"local_ranks_size": 8,
|
35 |
+
"logical_nc_config": 1,
|
36 |
+
"max_batch_size": 64,
|
37 |
+
"max_context_length": 4096,
|
38 |
+
"max_topk": 256,
|
39 |
+
"mlp_kernel_enabled": false,
|
40 |
+
"mlp_kernel_fuse_residual_add": false,
|
41 |
+
"n_active_tokens": 4096,
|
42 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
43 |
+
"num_cores_per_group": 1,
|
44 |
+
"on_device_sampling": true,
|
45 |
+
"optimum_neuron_version": "0.2.0.dev5",
|
46 |
+
"output_logits": false,
|
47 |
+
"padding_side": "right",
|
48 |
+
"pp_degree": 1,
|
49 |
+
"qk_layernorm": false,
|
50 |
+
"qkv_kernel_enabled": false,
|
51 |
+
"rpl_reduce_dtype": "bfloat16",
|
52 |
+
"sequence_length": 4096,
|
53 |
+
"sequence_parallel_enabled": false,
|
54 |
+
"speculation_length": 0,
|
55 |
+
"start_rank_id": 0,
|
56 |
+
"target": null,
|
57 |
+
"torch_dtype": "bfloat16",
|
58 |
+
"tp_degree": 8,
|
59 |
+
"vocab_parallel": false
|
60 |
+
},
|
61 |
+
"num_attention_heads": 32,
|
62 |
+
"num_hidden_layers": 32,
|
63 |
+
"num_key_value_heads": 8,
|
64 |
+
"pretraining_tp": 1,
|
65 |
+
"rms_norm_eps": 1e-05,
|
66 |
+
"rope_scaling": {
|
67 |
+
"factor": 8.0,
|
68 |
+
"high_freq_factor": 4.0,
|
69 |
+
"low_freq_factor": 1.0,
|
70 |
+
"original_max_position_embeddings": 8192,
|
71 |
+
"rope_type": "llama3"
|
72 |
+
},
|
73 |
+
"rope_theta": 500000.0,
|
74 |
+
"tie_word_embeddings": false,
|
75 |
+
"use_cache": true,
|
76 |
+
"vocab_size": 128256
|
77 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a8ae18c973b94646af4.json
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 128,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 4096,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 14336,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"async_mode": false,
|
21 |
+
"attn_kernel_enabled": false,
|
22 |
+
"batch_size": 4,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"cc_pipeline_tiling_factor": 2,
|
25 |
+
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
26 |
+
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
27 |
+
"continuous_batching": true,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"flash_decoding_enabled": false,
|
31 |
+
"fused_qkv": true,
|
32 |
+
"glu_mlp": true,
|
33 |
+
"is_chunked_prefill": false,
|
34 |
+
"local_ranks_size": 8,
|
35 |
+
"logical_nc_config": 1,
|
36 |
+
"max_batch_size": 4,
|
37 |
+
"max_context_length": 4096,
|
38 |
+
"max_topk": 256,
|
39 |
+
"mlp_kernel_enabled": false,
|
40 |
+
"mlp_kernel_fuse_residual_add": false,
|
41 |
+
"n_active_tokens": 4096,
|
42 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
43 |
+
"num_cores_per_group": 1,
|
44 |
+
"on_device_sampling": true,
|
45 |
+
"optimum_neuron_version": "0.2.0.dev5",
|
46 |
+
"output_logits": false,
|
47 |
+
"padding_side": "right",
|
48 |
+
"pp_degree": 1,
|
49 |
+
"qk_layernorm": false,
|
50 |
+
"qkv_kernel_enabled": false,
|
51 |
+
"rpl_reduce_dtype": "bfloat16",
|
52 |
+
"sequence_length": 4096,
|
53 |
+
"sequence_parallel_enabled": false,
|
54 |
+
"speculation_length": 0,
|
55 |
+
"start_rank_id": 0,
|
56 |
+
"target": null,
|
57 |
+
"torch_dtype": "bfloat16",
|
58 |
+
"tp_degree": 8,
|
59 |
+
"vocab_parallel": false
|
60 |
+
},
|
61 |
+
"num_attention_heads": 32,
|
62 |
+
"num_hidden_layers": 32,
|
63 |
+
"num_key_value_heads": 8,
|
64 |
+
"pretraining_tp": 1,
|
65 |
+
"rms_norm_eps": 1e-05,
|
66 |
+
"rope_scaling": {
|
67 |
+
"factor": 8.0,
|
68 |
+
"high_freq_factor": 4.0,
|
69 |
+
"low_freq_factor": 1.0,
|
70 |
+
"original_max_position_embeddings": 8192,
|
71 |
+
"rope_type": "llama3"
|
72 |
+
},
|
73 |
+
"rope_theta": 500000.0,
|
74 |
+
"tie_word_embeddings": false,
|
75 |
+
"use_cache": true,
|
76 |
+
"vocab_size": 128256
|
77 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/48fd484fde912c3c9981.json
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 128,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 4096,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 14336,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"async_mode": false,
|
21 |
+
"attn_kernel_enabled": false,
|
22 |
+
"batch_size": 8,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"cc_pipeline_tiling_factor": 2,
|
25 |
+
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
26 |
+
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
27 |
+
"continuous_batching": false,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"flash_decoding_enabled": false,
|
31 |
+
"fused_qkv": true,
|
32 |
+
"glu_mlp": true,
|
33 |
+
"is_chunked_prefill": false,
|
34 |
+
"local_ranks_size": 8,
|
35 |
+
"logical_nc_config": 1,
|
36 |
+
"max_batch_size": 8,
|
37 |
+
"max_context_length": 4096,
|
38 |
+
"max_topk": 256,
|
39 |
+
"mlp_kernel_enabled": false,
|
40 |
+
"mlp_kernel_fuse_residual_add": false,
|
41 |
+
"n_active_tokens": 4096,
|
42 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
43 |
+
"num_cores_per_group": 1,
|
44 |
+
"on_device_sampling": true,
|
45 |
+
"optimum_neuron_version": "0.2.0.dev5",
|
46 |
+
"output_logits": false,
|
47 |
+
"padding_side": "right",
|
48 |
+
"pp_degree": 1,
|
49 |
+
"qk_layernorm": false,
|
50 |
+
"qkv_kernel_enabled": false,
|
51 |
+
"rpl_reduce_dtype": "bfloat16",
|
52 |
+
"sequence_length": 4096,
|
53 |
+
"sequence_parallel_enabled": false,
|
54 |
+
"speculation_length": 0,
|
55 |
+
"start_rank_id": 0,
|
56 |
+
"target": null,
|
57 |
+
"torch_dtype": "bfloat16",
|
58 |
+
"tp_degree": 8,
|
59 |
+
"vocab_parallel": false
|
60 |
+
},
|
61 |
+
"num_attention_heads": 32,
|
62 |
+
"num_hidden_layers": 32,
|
63 |
+
"num_key_value_heads": 8,
|
64 |
+
"pretraining_tp": 1,
|
65 |
+
"rms_norm_eps": 1e-05,
|
66 |
+
"rope_scaling": {
|
67 |
+
"factor": 8.0,
|
68 |
+
"high_freq_factor": 4.0,
|
69 |
+
"low_freq_factor": 1.0,
|
70 |
+
"original_max_position_embeddings": 8192,
|
71 |
+
"rope_type": "llama3"
|
72 |
+
},
|
73 |
+
"rope_theta": 500000.0,
|
74 |
+
"tie_word_embeddings": false,
|
75 |
+
"use_cache": true,
|
76 |
+
"vocab_size": 128256
|
77 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/690a9eef6000b3a2bbed.json
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 128,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 4096,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 14336,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"async_mode": false,
|
21 |
+
"attn_kernel_enabled": false,
|
22 |
+
"batch_size": 16,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"cc_pipeline_tiling_factor": 2,
|
25 |
+
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
26 |
+
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
27 |
+
"continuous_batching": true,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"flash_decoding_enabled": false,
|
31 |
+
"fused_qkv": true,
|
32 |
+
"glu_mlp": true,
|
33 |
+
"is_chunked_prefill": false,
|
34 |
+
"local_ranks_size": 8,
|
35 |
+
"logical_nc_config": 1,
|
36 |
+
"max_batch_size": 16,
|
37 |
+
"max_context_length": 4096,
|
38 |
+
"max_topk": 256,
|
39 |
+
"mlp_kernel_enabled": false,
|
40 |
+
"mlp_kernel_fuse_residual_add": false,
|
41 |
+
"n_active_tokens": 4096,
|
42 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
43 |
+
"num_cores_per_group": 1,
|
44 |
+
"on_device_sampling": true,
|
45 |
+
"optimum_neuron_version": "0.2.0.dev5",
|
46 |
+
"output_logits": false,
|
47 |
+
"padding_side": "right",
|
48 |
+
"pp_degree": 1,
|
49 |
+
"qk_layernorm": false,
|
50 |
+
"qkv_kernel_enabled": false,
|
51 |
+
"rpl_reduce_dtype": "bfloat16",
|
52 |
+
"sequence_length": 4096,
|
53 |
+
"sequence_parallel_enabled": false,
|
54 |
+
"speculation_length": 0,
|
55 |
+
"start_rank_id": 0,
|
56 |
+
"target": null,
|
57 |
+
"torch_dtype": "bfloat16",
|
58 |
+
"tp_degree": 8,
|
59 |
+
"vocab_parallel": false
|
60 |
+
},
|
61 |
+
"num_attention_heads": 32,
|
62 |
+
"num_hidden_layers": 32,
|
63 |
+
"num_key_value_heads": 8,
|
64 |
+
"pretraining_tp": 1,
|
65 |
+
"rms_norm_eps": 1e-05,
|
66 |
+
"rope_scaling": {
|
67 |
+
"factor": 8.0,
|
68 |
+
"high_freq_factor": 4.0,
|
69 |
+
"low_freq_factor": 1.0,
|
70 |
+
"original_max_position_embeddings": 8192,
|
71 |
+
"rope_type": "llama3"
|
72 |
+
},
|
73 |
+
"rope_theta": 500000.0,
|
74 |
+
"tie_word_embeddings": false,
|
75 |
+
"use_cache": true,
|
76 |
+
"vocab_size": 128256
|
77 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/738e74927966314ed1c8.json
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 128,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 4096,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 14336,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"async_mode": false,
|
21 |
+
"attn_kernel_enabled": false,
|
22 |
+
"batch_size": 4,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"cc_pipeline_tiling_factor": 2,
|
25 |
+
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
26 |
+
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
27 |
+
"continuous_batching": false,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"flash_decoding_enabled": false,
|
31 |
+
"fused_qkv": true,
|
32 |
+
"glu_mlp": true,
|
33 |
+
"is_chunked_prefill": false,
|
34 |
+
"local_ranks_size": 8,
|
35 |
+
"logical_nc_config": 1,
|
36 |
+
"max_batch_size": 4,
|
37 |
+
"max_context_length": 4096,
|
38 |
+
"max_topk": 256,
|
39 |
+
"mlp_kernel_enabled": false,
|
40 |
+
"mlp_kernel_fuse_residual_add": false,
|
41 |
+
"n_active_tokens": 4096,
|
42 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
43 |
+
"num_cores_per_group": 1,
|
44 |
+
"on_device_sampling": true,
|
45 |
+
"optimum_neuron_version": "0.2.0.dev5",
|
46 |
+
"output_logits": false,
|
47 |
+
"padding_side": "right",
|
48 |
+
"pp_degree": 1,
|
49 |
+
"qk_layernorm": false,
|
50 |
+
"qkv_kernel_enabled": false,
|
51 |
+
"rpl_reduce_dtype": "bfloat16",
|
52 |
+
"sequence_length": 4096,
|
53 |
+
"sequence_parallel_enabled": false,
|
54 |
+
"speculation_length": 0,
|
55 |
+
"start_rank_id": 0,
|
56 |
+
"target": null,
|
57 |
+
"torch_dtype": "bfloat16",
|
58 |
+
"tp_degree": 8,
|
59 |
+
"vocab_parallel": false
|
60 |
+
},
|
61 |
+
"num_attention_heads": 32,
|
62 |
+
"num_hidden_layers": 32,
|
63 |
+
"num_key_value_heads": 8,
|
64 |
+
"pretraining_tp": 1,
|
65 |
+
"rms_norm_eps": 1e-05,
|
66 |
+
"rope_scaling": {
|
67 |
+
"factor": 8.0,
|
68 |
+
"high_freq_factor": 4.0,
|
69 |
+
"low_freq_factor": 1.0,
|
70 |
+
"original_max_position_embeddings": 8192,
|
71 |
+
"rope_type": "llama3"
|
72 |
+
},
|
73 |
+
"rope_theta": 500000.0,
|
74 |
+
"tie_word_embeddings": false,
|
75 |
+
"use_cache": true,
|
76 |
+
"vocab_size": 128256
|
77 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/9b33c62e0648eb870335.json
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 128,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 4096,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 14336,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"async_mode": false,
|
21 |
+
"attn_kernel_enabled": false,
|
22 |
+
"batch_size": 8,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"cc_pipeline_tiling_factor": 2,
|
25 |
+
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
26 |
+
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
27 |
+
"continuous_batching": true,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"flash_decoding_enabled": false,
|
31 |
+
"fused_qkv": true,
|
32 |
+
"glu_mlp": true,
|
33 |
+
"is_chunked_prefill": false,
|
34 |
+
"local_ranks_size": 8,
|
35 |
+
"logical_nc_config": 1,
|
36 |
+
"max_batch_size": 8,
|
37 |
+
"max_context_length": 4096,
|
38 |
+
"max_topk": 256,
|
39 |
+
"mlp_kernel_enabled": false,
|
40 |
+
"mlp_kernel_fuse_residual_add": false,
|
41 |
+
"n_active_tokens": 4096,
|
42 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
43 |
+
"num_cores_per_group": 1,
|
44 |
+
"on_device_sampling": true,
|
45 |
+
"optimum_neuron_version": "0.2.0.dev5",
|
46 |
+
"output_logits": false,
|
47 |
+
"padding_side": "right",
|
48 |
+
"pp_degree": 1,
|
49 |
+
"qk_layernorm": false,
|
50 |
+
"qkv_kernel_enabled": false,
|
51 |
+
"rpl_reduce_dtype": "bfloat16",
|
52 |
+
"sequence_length": 4096,
|
53 |
+
"sequence_parallel_enabled": false,
|
54 |
+
"speculation_length": 0,
|
55 |
+
"start_rank_id": 0,
|
56 |
+
"target": null,
|
57 |
+
"torch_dtype": "bfloat16",
|
58 |
+
"tp_degree": 8,
|
59 |
+
"vocab_parallel": false
|
60 |
+
},
|
61 |
+
"num_attention_heads": 32,
|
62 |
+
"num_hidden_layers": 32,
|
63 |
+
"num_key_value_heads": 8,
|
64 |
+
"pretraining_tp": 1,
|
65 |
+
"rms_norm_eps": 1e-05,
|
66 |
+
"rope_scaling": {
|
67 |
+
"factor": 8.0,
|
68 |
+
"high_freq_factor": 4.0,
|
69 |
+
"low_freq_factor": 1.0,
|
70 |
+
"original_max_position_embeddings": 8192,
|
71 |
+
"rope_type": "llama3"
|
72 |
+
},
|
73 |
+
"rope_theta": 500000.0,
|
74 |
+
"tie_word_embeddings": false,
|
75 |
+
"use_cache": true,
|
76 |
+
"vocab_size": 128256
|
77 |
+
}
|
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/cddf83ce508409c44d25.json
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "meta-llama/Meta-Llama-3.1-8B",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"LlamaForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": false,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 128,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 4096,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 14336,
|
15 |
+
"max_position_embeddings": 131072,
|
16 |
+
"mlp_bias": false,
|
17 |
+
"model_type": "llama",
|
18 |
+
"neuron": {
|
19 |
+
"_serialized_key": "NxDNeuronConfig",
|
20 |
+
"async_mode": false,
|
21 |
+
"attn_kernel_enabled": false,
|
22 |
+
"batch_size": 32,
|
23 |
+
"capacity_factor": null,
|
24 |
+
"cc_pipeline_tiling_factor": 2,
|
25 |
+
"checkpoint_id": "meta-llama/Meta-Llama-3.1-8B",
|
26 |
+
"checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b",
|
27 |
+
"continuous_batching": true,
|
28 |
+
"enable_bucketing": false,
|
29 |
+
"ep_degree": 1,
|
30 |
+
"flash_decoding_enabled": false,
|
31 |
+
"fused_qkv": true,
|
32 |
+
"glu_mlp": true,
|
33 |
+
"is_chunked_prefill": false,
|
34 |
+
"local_ranks_size": 8,
|
35 |
+
"logical_nc_config": 1,
|
36 |
+
"max_batch_size": 32,
|
37 |
+
"max_context_length": 4096,
|
38 |
+
"max_topk": 256,
|
39 |
+
"mlp_kernel_enabled": false,
|
40 |
+
"mlp_kernel_fuse_residual_add": false,
|
41 |
+
"n_active_tokens": 4096,
|
42 |
+
"neuronxcc_version": "2.17.194.0+d312836f",
|
43 |
+
"num_cores_per_group": 1,
|
44 |
+
"on_device_sampling": true,
|
45 |
+
"optimum_neuron_version": "0.2.0.dev5",
|
46 |
+
"output_logits": false,
|
47 |
+
"padding_side": "right",
|
48 |
+
"pp_degree": 1,
|
49 |
+
"qk_layernorm": false,
|
50 |
+
"qkv_kernel_enabled": false,
|
51 |
+
"rpl_reduce_dtype": "bfloat16",
|
52 |
+
"sequence_length": 4096,
|
53 |
+
"sequence_parallel_enabled": false,
|
54 |
+
"speculation_length": 0,
|
55 |
+
"start_rank_id": 0,
|
56 |
+
"target": null,
|
57 |
+
"torch_dtype": "bfloat16",
|
58 |
+
"tp_degree": 8,
|
59 |
+
"vocab_parallel": false
|
60 |
+
},
|
61 |
+
"num_attention_heads": 32,
|
62 |
+
"num_hidden_layers": 32,
|
63 |
+
"num_key_value_heads": 8,
|
64 |
+
"pretraining_tp": 1,
|
65 |
+
"rms_norm_eps": 1e-05,
|
66 |
+
"rope_scaling": {
|
67 |
+
"factor": 8.0,
|
68 |
+
"high_freq_factor": 4.0,
|
69 |
+
"low_freq_factor": 1.0,
|
70 |
+
"original_max_position_embeddings": 8192,
|
71 |
+
"rope_type": "llama3"
|
72 |
+
},
|
73 |
+
"rope_theta": 500000.0,
|
74 |
+
"tie_word_embeddings": false,
|
75 |
+
"use_cache": true,
|
76 |
+
"vocab_size": 128256
|
77 |
+
}
|
neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt"
|
neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec7544aa0228f9807878b3593b3d9365ff45083234a699e7a6bf47b13bafd78d
|
3 |
+
size 136713
|
neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e37775bcf0f850099391bfb1e72da4cd5ceae7bfc2e9165983bf8ffd016057a2
|
3 |
+
size 2223104
|
neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper"
|
neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:918100fe462b8b8d4a67a7edf8bc9148a2c0d891ab575554f4055604f3838f10
|
3 |
+
size 777673
|
neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4196cbad5f93409e7072d3f84c49467f52bf5c8401cdc6044319f4d999e3ee1
|
3 |
+
size 2724864
|
neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/wrapped_neff.hlo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9cc744c22e9c0e5d585da942928d2aed8853154a05d0dd0546c211df5c3b7e74
|
3 |
+
size 2862808
|
neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt"
|
neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c0f4f84c58d9d114a5eaaa32456932c142d3aa585d6196c105de1c1d210d188
|
3 |
+
size 136009
|
neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:943fc968a87a68bb3041bc5845f1465523a85d1a8d40dc14703de0ca92ece72f
|
3 |
+
size 2202624
|
neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt"
|
neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb9ce39a84c7039460a99091ac81bc3cae059fa3e84d41889117abb0046577ca
|
3 |
+
size 136016
|
neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:982dbdbe9769b168a9badbde0a61aa4248a9d8e90f9cfd3bb49fadc04bd59929
|
3 |
+
size 2202624
|
neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"
|
neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d306ef1a91c6392e7b30deb0b89e40d577ce203876c1cd05ebf43e75ea116d00
|
3 |
+
size 920277
|
neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:998cf0bdf7be0346d1d879365761c2b59e43e60f7d259d938d7da977733d9953
|
3 |
+
size 32646144
|
neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"
|
neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f3824fa2b74064f4a61d73062b0f353faa3ac1e01342ca30b3854cf84258163
|
3 |
+
size 851466
|
neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0bd57a925819ece1b01115e8151a897c5223e0f9ce66671e4a3662e1646729e5
|
3 |
+
size 36301824
|
neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"
|
neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4dc5f8d4de9d781f0419a893cec6dc3bc62ac2040ce88b930d2931ab638a4e78
|
3 |
+
size 920277
|
neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e80fb0c317021aaef844fef7b99e44c023249d59c7425e85fb92ca347198d7bf
|
3 |
+
size 32646144
|
neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"
|
neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00b1571bed76d1fcca273b4f1da1762454696fd38aee799df079f2aa9696cc10
|
3 |
+
size 850865
|
neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7bf03400cd6f77525afba5bbc885e5d0c316eca3ac5f44f8713809e32a49337
|
3 |
+
size 32277504
|
neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper"
|
neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.done
ADDED
File without changes
|
neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af5da061dda32ca95842d9626ad2065911721bbc14196b4937d3705edde9b64c
|
3 |
+
size 774401
|
neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82431da774c7a22311535f345020110bc1bdc5b5bc60a48cc6cffdce209385b0
|
3 |
+
size 3789824
|
neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/wrapped_neff.hlo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b48a951a0fcd160dc01ce3c77887bdcba244ae3e72914001a3c9853835d7d414
|
3 |
+
size 3927641
|
neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper"
|
neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.done
ADDED
File without changes
|