optimum-internal-testing-user commited on
Commit
24f284a
·
verified ·
1 Parent(s): 1ae721e

Synchronizing local compiler cache.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +17 -0
  2. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/3744fb0ec890ed0ee45b.json +51 -0
  3. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/8ce05e932094a24524a2.json +51 -0
  4. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/a2054036498cf7febe73.json +51 -0
  5. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/1db37f9b275c28f31a4c.json +55 -0
  6. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/454a28c3ade838a69bb6.json +55 -0
  7. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/534c886df9d760cd4ee5.json +55 -0
  8. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/mixtral/dacorvo/Mixtral-tiny/42dbcbe3264236b62e21.json +73 -0
  9. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/mixtral/dacorvo/Mixtral-tiny/62410cde55b49f22ca2f.json +73 -0
  10. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/mixtral/dacorvo/Mixtral-tiny/a61f5b20450ca3689552.json +73 -0
  11. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/yujiepan/phi-4-tiny-random/1f86d9b323d4de2f798c.json +52 -0
  12. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/yujiepan/phi-4-tiny-random/cb63b47e3227ecbd6006.json +52 -0
  13. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/yujiepan/phi-4-tiny-random/e96c051b6b9b4c77a743.json +52 -0
  14. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/1f07aa27385a3bacc0ce.json +53 -0
  15. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/a8eda1338b750cadb3a6.json +53 -0
  16. neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/e14b78afc1fa5e13b44a.json +53 -0
  17. neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/model.neff +0 -0
  18. neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/model.neff +0 -0
  19. neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.neff +1 -1
  20. neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/compile_flags.json +1 -0
  21. neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.done +0 -0
  22. neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.hlo_module.pb +3 -0
  23. neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff +3 -0
  24. neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/compile_flags.json +1 -0
  25. neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.done +0 -0
  26. neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.hlo_module.pb +3 -0
  27. neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.neff +3 -0
  28. neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/compile_flags.json +1 -0
  29. neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.done +0 -0
  30. neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.hlo_module.pb +3 -0
  31. neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.neff +3 -0
  32. neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.neff +1 -1
  33. neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.neff +1 -1
  34. neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/compile_flags.json +1 -0
  35. neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.done +0 -0
  36. neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.hlo_module.pb +3 -0
  37. neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.neff +3 -0
  38. neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/wrapped_neff.hlo +3 -0
  39. neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.neff +1 -1
  40. neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.neff +1 -1
  41. neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/compile_flags.json +1 -0
  42. neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.done +0 -0
  43. neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.hlo_module.pb +3 -0
  44. neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff +3 -0
  45. neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.neff +1 -1
  46. neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/compile_flags.json +1 -0
  47. neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.done +0 -0
  48. neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.hlo_module.pb +3 -0
  49. neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.neff +3 -0
  50. neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff +1 -1
.gitattributes CHANGED
@@ -2450,3 +2450,20 @@ neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/wrapped_neff.
2450
  neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
2451
  neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
2452
  neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2450
  neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text
2451
  neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text
2452
  neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
2453
+ neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
2454
+ neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
2455
+ neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
2456
+ neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text
2457
+ neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
2458
+ neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
2459
+ neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
2460
+ neuronxcc-2.17.194.0+d312836f/MODULE_51d9fed86504dfbff43c+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
2461
+ neuronxcc-2.17.194.0+d312836f/MODULE_8c063f8f288a908bf850+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
2462
+ neuronxcc-2.17.194.0+d312836f/MODULE_b811ebc7b9aa6e1eb84f+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
2463
+ neuronxcc-2.17.194.0+d312836f/MODULE_cd4240e56f3558bf8cf0+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text
2464
+ neuronxcc-2.17.194.0+d312836f/MODULE_cf41a32ef696654dc19b+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
2465
+ neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text
2466
+ neuronxcc-2.17.194.0+d312836f/MODULE_d06255807e916c398b05+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
2467
+ neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text
2468
+ neuronxcc-2.17.194.0+d312836f/MODULE_e36f587c697c4d8df3f6+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
2469
+ neuronxcc-2.17.194.0+d312836f/MODULE_fda7fb53c475ba393ed7+613edded/model.neff filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/3744fb0ec890ed0ee45b.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "HloNeuronConfig",
22
+ "all_reduce_dtype": null,
23
+ "allow_flash_attention": true,
24
+ "attention_layout": "HSB",
25
+ "attn_output_transposed": false,
26
+ "auto_cast_type": "bf16",
27
+ "batch_size": 1,
28
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
29
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
30
+ "collectives_layout": "HSB",
31
+ "continuous_batching": false,
32
+ "fuse_qkv": true,
33
+ "group_query_attention": null,
34
+ "log_softmax_scores": false,
35
+ "neuronxcc_version": "2.17.194.0+d312836f",
36
+ "optimum_neuron_version": "0.2.0.dev5",
37
+ "output_all_logits": false,
38
+ "sequence_length": 100,
39
+ "tp_degree": 2
40
+ },
41
+ "num_attention_heads": 4,
42
+ "num_hidden_layers": 2,
43
+ "num_key_value_heads": 4,
44
+ "residual_multiplier": 1.0,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_scaling": null,
47
+ "rope_theta": 10000.0,
48
+ "tie_word_embeddings": false,
49
+ "use_cache": true,
50
+ "vocab_size": 49152
51
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/8ce05e932094a24524a2.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "HloNeuronConfig",
22
+ "all_reduce_dtype": null,
23
+ "allow_flash_attention": true,
24
+ "attention_layout": "HSB",
25
+ "attn_output_transposed": false,
26
+ "auto_cast_type": "fp16",
27
+ "batch_size": 2,
28
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
29
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
30
+ "collectives_layout": "HSB",
31
+ "continuous_batching": true,
32
+ "fuse_qkv": true,
33
+ "group_query_attention": null,
34
+ "log_softmax_scores": false,
35
+ "neuronxcc_version": "2.17.194.0+d312836f",
36
+ "optimum_neuron_version": "0.2.0.dev5",
37
+ "output_all_logits": false,
38
+ "sequence_length": 100,
39
+ "tp_degree": 2
40
+ },
41
+ "num_attention_heads": 4,
42
+ "num_hidden_layers": 2,
43
+ "num_key_value_heads": 4,
44
+ "residual_multiplier": 1.0,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_scaling": null,
47
+ "rope_theta": 10000.0,
48
+ "tie_word_embeddings": false,
49
+ "use_cache": true,
50
+ "vocab_size": 49152
51
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/a2054036498cf7febe73.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GraniteForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "attention_multiplier": 1.0,
11
+ "embedding_multiplier": 1.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 32,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 64,
16
+ "logits_scaling": 1.0,
17
+ "max_position_embeddings": 2048,
18
+ "mlp_bias": false,
19
+ "model_type": "granite",
20
+ "neuron": {
21
+ "_serialized_key": "HloNeuronConfig",
22
+ "all_reduce_dtype": null,
23
+ "allow_flash_attention": true,
24
+ "attention_layout": "HSB",
25
+ "attn_output_transposed": false,
26
+ "auto_cast_type": "fp16",
27
+ "batch_size": 1,
28
+ "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
29
+ "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
30
+ "collectives_layout": "HSB",
31
+ "continuous_batching": false,
32
+ "fuse_qkv": true,
33
+ "group_query_attention": null,
34
+ "log_softmax_scores": false,
35
+ "neuronxcc_version": "2.17.194.0+d312836f",
36
+ "optimum_neuron_version": "0.2.0.dev5",
37
+ "output_all_logits": false,
38
+ "sequence_length": 100,
39
+ "tp_degree": 2
40
+ },
41
+ "num_attention_heads": 4,
42
+ "num_hidden_layers": 2,
43
+ "num_key_value_heads": 4,
44
+ "residual_multiplier": 1.0,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_scaling": null,
47
+ "rope_theta": 10000.0,
48
+ "tie_word_embeddings": false,
49
+ "use_cache": true,
50
+ "vocab_size": 49152
51
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/1db37f9b275c28f31a4c.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "HloNeuronConfig",
20
+ "all_reduce_dtype": null,
21
+ "allow_flash_attention": true,
22
+ "attention_layout": "BSH",
23
+ "attn_output_transposed": false,
24
+ "auto_cast_type": "fp16",
25
+ "batch_size": 2,
26
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
27
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
28
+ "collectives_layout": "HSB",
29
+ "continuous_batching": true,
30
+ "fuse_qkv": true,
31
+ "group_query_attention": null,
32
+ "log_softmax_scores": false,
33
+ "neuronxcc_version": "2.17.194.0+d312836f",
34
+ "optimum_neuron_version": "0.2.0.dev5",
35
+ "output_all_logits": false,
36
+ "sequence_length": 100,
37
+ "tp_degree": 2
38
+ },
39
+ "num_attention_heads": 4,
40
+ "num_hidden_layers": 2,
41
+ "num_key_value_heads": 4,
42
+ "pretraining_tp": 1,
43
+ "rms_norm_eps": 1e-05,
44
+ "rope_scaling": {
45
+ "factor": 8.0,
46
+ "high_freq_factor": 4.0,
47
+ "low_freq_factor": 1.0,
48
+ "original_max_position_embeddings": 8192,
49
+ "rope_type": "llama3"
50
+ },
51
+ "rope_theta": 500000.0,
52
+ "tie_word_embeddings": false,
53
+ "use_cache": true,
54
+ "vocab_size": 128256
55
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/454a28c3ade838a69bb6.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "HloNeuronConfig",
20
+ "all_reduce_dtype": null,
21
+ "allow_flash_attention": true,
22
+ "attention_layout": "BSH",
23
+ "attn_output_transposed": false,
24
+ "auto_cast_type": "fp16",
25
+ "batch_size": 1,
26
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
27
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
28
+ "collectives_layout": "HSB",
29
+ "continuous_batching": false,
30
+ "fuse_qkv": true,
31
+ "group_query_attention": null,
32
+ "log_softmax_scores": false,
33
+ "neuronxcc_version": "2.17.194.0+d312836f",
34
+ "optimum_neuron_version": "0.2.0.dev5",
35
+ "output_all_logits": false,
36
+ "sequence_length": 100,
37
+ "tp_degree": 2
38
+ },
39
+ "num_attention_heads": 4,
40
+ "num_hidden_layers": 2,
41
+ "num_key_value_heads": 4,
42
+ "pretraining_tp": 1,
43
+ "rms_norm_eps": 1e-05,
44
+ "rope_scaling": {
45
+ "factor": 8.0,
46
+ "high_freq_factor": 4.0,
47
+ "low_freq_factor": 1.0,
48
+ "original_max_position_embeddings": 8192,
49
+ "rope_type": "llama3"
50
+ },
51
+ "rope_theta": 500000.0,
52
+ "tie_word_embeddings": false,
53
+ "use_cache": true,
54
+ "vocab_size": 128256
55
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/534c886df9d760cd4ee5.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "llamafactory/tiny-random-Llama-3",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "LlamaForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 4,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 16,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 64,
15
+ "max_position_embeddings": 131072,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "neuron": {
19
+ "_serialized_key": "HloNeuronConfig",
20
+ "all_reduce_dtype": null,
21
+ "allow_flash_attention": true,
22
+ "attention_layout": "BSH",
23
+ "attn_output_transposed": false,
24
+ "auto_cast_type": "bf16",
25
+ "batch_size": 1,
26
+ "checkpoint_id": "llamafactory/tiny-random-Llama-3",
27
+ "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
28
+ "collectives_layout": "HSB",
29
+ "continuous_batching": false,
30
+ "fuse_qkv": true,
31
+ "group_query_attention": null,
32
+ "log_softmax_scores": false,
33
+ "neuronxcc_version": "2.17.194.0+d312836f",
34
+ "optimum_neuron_version": "0.2.0.dev5",
35
+ "output_all_logits": false,
36
+ "sequence_length": 100,
37
+ "tp_degree": 2
38
+ },
39
+ "num_attention_heads": 4,
40
+ "num_hidden_layers": 2,
41
+ "num_key_value_heads": 4,
42
+ "pretraining_tp": 1,
43
+ "rms_norm_eps": 1e-05,
44
+ "rope_scaling": {
45
+ "factor": 8.0,
46
+ "high_freq_factor": 4.0,
47
+ "low_freq_factor": 1.0,
48
+ "original_max_position_embeddings": 8192,
49
+ "rope_type": "llama3"
50
+ },
51
+ "rope_theta": 500000.0,
52
+ "tie_word_embeddings": false,
53
+ "use_cache": true,
54
+ "vocab_size": 128256
55
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/mixtral/dacorvo/Mixtral-tiny/42dbcbe3264236b62e21.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 2,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 2,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.17.194.0+d312836f",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.2.0.dev5",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "float16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "float16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 32,
60
+ "num_experts_per_tok": 2,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 8,
63
+ "num_local_experts": 8,
64
+ "output_router_logits": false,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_theta": 10000.0,
67
+ "router_aux_loss_coef": 0.001,
68
+ "router_jitter_noise": 0.0,
69
+ "sliding_window": 4096,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 32000
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/mixtral/dacorvo/Mixtral-tiny/62410cde55b49f22ca2f.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.17.194.0+d312836f",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.2.0.dev5",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "float16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "float16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 32,
60
+ "num_experts_per_tok": 2,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 8,
63
+ "num_local_experts": 8,
64
+ "output_router_logits": false,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_theta": 10000.0,
67
+ "router_aux_loss_coef": 0.001,
68
+ "router_jitter_noise": 0.0,
69
+ "sliding_window": 4096,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 32000
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/mixtral/dacorvo/Mixtral-tiny/a61f5b20450ca3689552.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "dacorvo/Mixtral-tiny",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "MixtralForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "head_dim": 32,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3584,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "neuron": {
17
+ "_serialized_key": "NxDNeuronConfig",
18
+ "async_mode": false,
19
+ "attn_kernel_enabled": false,
20
+ "batch_size": 1,
21
+ "capacity_factor": null,
22
+ "cc_pipeline_tiling_factor": 2,
23
+ "checkpoint_id": "dacorvo/Mixtral-tiny",
24
+ "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
25
+ "continuous_batching": false,
26
+ "enable_bucketing": false,
27
+ "ep_degree": 1,
28
+ "flash_decoding_enabled": false,
29
+ "fused_qkv": false,
30
+ "glu_mlp": true,
31
+ "is_chunked_prefill": false,
32
+ "local_ranks_size": 2,
33
+ "logical_nc_config": 1,
34
+ "max_batch_size": 1,
35
+ "max_context_length": 100,
36
+ "max_topk": 256,
37
+ "mlp_kernel_enabled": false,
38
+ "mlp_kernel_fuse_residual_add": false,
39
+ "n_active_tokens": 100,
40
+ "neuronxcc_version": "2.17.194.0+d312836f",
41
+ "num_cores_per_group": 1,
42
+ "on_device_sampling": false,
43
+ "optimum_neuron_version": "0.2.0.dev5",
44
+ "output_logits": false,
45
+ "padding_side": "right",
46
+ "pp_degree": 1,
47
+ "qk_layernorm": false,
48
+ "qkv_kernel_enabled": false,
49
+ "rpl_reduce_dtype": "bfloat16",
50
+ "sequence_length": 100,
51
+ "sequence_parallel_enabled": false,
52
+ "speculation_length": 0,
53
+ "start_rank_id": 0,
54
+ "target": null,
55
+ "torch_dtype": "bfloat16",
56
+ "tp_degree": 2,
57
+ "vocab_parallel": false
58
+ },
59
+ "num_attention_heads": 32,
60
+ "num_experts_per_tok": 2,
61
+ "num_hidden_layers": 2,
62
+ "num_key_value_heads": 8,
63
+ "num_local_experts": 8,
64
+ "output_router_logits": false,
65
+ "rms_norm_eps": 1e-05,
66
+ "rope_theta": 10000.0,
67
+ "router_aux_loss_coef": 0.001,
68
+ "router_jitter_noise": 0.0,
69
+ "sliding_window": 4096,
70
+ "tie_word_embeddings": false,
71
+ "use_cache": true,
72
+ "vocab_size": 32000
73
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/yujiepan/phi-4-tiny-random/1f86d9b323d4de2f798c.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "HloNeuronConfig",
20
+ "all_reduce_dtype": null,
21
+ "allow_flash_attention": false,
22
+ "attention_layout": "HSB",
23
+ "attn_output_transposed": false,
24
+ "auto_cast_type": "fp16",
25
+ "batch_size": 2,
26
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
27
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
28
+ "collectives_layout": "HSB",
29
+ "continuous_batching": true,
30
+ "fuse_qkv": true,
31
+ "group_query_attention": "replicated-heads",
32
+ "log_softmax_scores": false,
33
+ "neuronxcc_version": "2.17.194.0+d312836f",
34
+ "optimum_neuron_version": "0.2.0.dev5",
35
+ "output_all_logits": false,
36
+ "sequence_length": 100,
37
+ "tp_degree": 2
38
+ },
39
+ "num_attention_heads": 2,
40
+ "num_hidden_layers": 2,
41
+ "num_key_value_heads": 1,
42
+ "original_max_position_embeddings": 16384,
43
+ "partial_rotary_factor": 1.0,
44
+ "resid_pdrop": 0.0,
45
+ "rms_norm_eps": 1e-05,
46
+ "rope_scaling": null,
47
+ "rope_theta": 250000,
48
+ "sliding_window": null,
49
+ "tie_word_embeddings": false,
50
+ "use_cache": true,
51
+ "vocab_size": 100352
52
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/yujiepan/phi-4-tiny-random/cb63b47e3227ecbd6006.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "HloNeuronConfig",
20
+ "all_reduce_dtype": null,
21
+ "allow_flash_attention": false,
22
+ "attention_layout": "HSB",
23
+ "attn_output_transposed": false,
24
+ "auto_cast_type": "bf16",
25
+ "batch_size": 1,
26
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
27
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
28
+ "collectives_layout": "HSB",
29
+ "continuous_batching": false,
30
+ "fuse_qkv": true,
31
+ "group_query_attention": "replicated-heads",
32
+ "log_softmax_scores": false,
33
+ "neuronxcc_version": "2.17.194.0+d312836f",
34
+ "optimum_neuron_version": "0.2.0.dev5",
35
+ "output_all_logits": false,
36
+ "sequence_length": 100,
37
+ "tp_degree": 2
38
+ },
39
+ "num_attention_heads": 2,
40
+ "num_hidden_layers": 2,
41
+ "num_key_value_heads": 1,
42
+ "original_max_position_embeddings": 16384,
43
+ "partial_rotary_factor": 1.0,
44
+ "resid_pdrop": 0.0,
45
+ "rms_norm_eps": 1e-05,
46
+ "rope_scaling": null,
47
+ "rope_theta": 250000,
48
+ "sliding_window": null,
49
+ "tie_word_embeddings": false,
50
+ "use_cache": true,
51
+ "vocab_size": 100352
52
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/yujiepan/phi-4-tiny-random/e96c051b6b9b4c77a743.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/phi-4-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Phi3ForCausalLM"
7
+ ],
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "auto_map": {},
11
+ "embd_pdrop": 0.0,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 16,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 32,
16
+ "max_position_embeddings": 16384,
17
+ "model_type": "phi3",
18
+ "neuron": {
19
+ "_serialized_key": "HloNeuronConfig",
20
+ "all_reduce_dtype": null,
21
+ "allow_flash_attention": false,
22
+ "attention_layout": "HSB",
23
+ "attn_output_transposed": false,
24
+ "auto_cast_type": "fp16",
25
+ "batch_size": 1,
26
+ "checkpoint_id": "yujiepan/phi-4-tiny-random",
27
+ "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
28
+ "collectives_layout": "HSB",
29
+ "continuous_batching": false,
30
+ "fuse_qkv": true,
31
+ "group_query_attention": "replicated-heads",
32
+ "log_softmax_scores": false,
33
+ "neuronxcc_version": "2.17.194.0+d312836f",
34
+ "optimum_neuron_version": "0.2.0.dev5",
35
+ "output_all_logits": false,
36
+ "sequence_length": 100,
37
+ "tp_degree": 2
38
+ },
39
+ "num_attention_heads": 2,
40
+ "num_hidden_layers": 2,
41
+ "num_key_value_heads": 1,
42
+ "original_max_position_embeddings": 16384,
43
+ "partial_rotary_factor": 1.0,
44
+ "resid_pdrop": 0.0,
45
+ "rms_norm_eps": 1e-05,
46
+ "rope_scaling": null,
47
+ "rope_theta": 250000,
48
+ "sliding_window": null,
49
+ "tie_word_embeddings": false,
50
+ "use_cache": true,
51
+ "vocab_size": 100352
52
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/1f07aa27385a3bacc0ce.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 1,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "HloNeuronConfig",
18
+ "all_reduce_dtype": null,
19
+ "allow_flash_attention": true,
20
+ "attention_layout": "HSB",
21
+ "attn_output_transposed": false,
22
+ "auto_cast_type": "fp16",
23
+ "batch_size": 2,
24
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
25
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
26
+ "collectives_layout": "HSB",
27
+ "continuous_batching": true,
28
+ "fuse_qkv": false,
29
+ "group_query_attention": "shard-over-heads",
30
+ "log_softmax_scores": false,
31
+ "neuronxcc_version": "2.17.194.0+d312836f",
32
+ "optimum_neuron_version": "0.2.0.dev5",
33
+ "output_all_logits": false,
34
+ "sequence_length": 100,
35
+ "tp_degree": 2
36
+ },
37
+ "num_attention_heads": 4,
38
+ "num_hidden_layers": 2,
39
+ "num_key_value_heads": 2,
40
+ "rms_norm_eps": 1e-06,
41
+ "rope_scaling": {
42
+ "factor": 4.0,
43
+ "original_max_position_embeddings": 32768,
44
+ "rope_type": "yarn",
45
+ "type": "yarn"
46
+ },
47
+ "rope_theta": 1000000.0,
48
+ "sliding_window": 131072,
49
+ "tie_word_embeddings": false,
50
+ "use_cache": true,
51
+ "use_sliding_window": false,
52
+ "vocab_size": 152064
53
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/a8eda1338b750cadb3a6.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 1,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "HloNeuronConfig",
18
+ "all_reduce_dtype": null,
19
+ "allow_flash_attention": true,
20
+ "attention_layout": "HSB",
21
+ "attn_output_transposed": false,
22
+ "auto_cast_type": "bf16",
23
+ "batch_size": 1,
24
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
25
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
26
+ "collectives_layout": "HSB",
27
+ "continuous_batching": false,
28
+ "fuse_qkv": false,
29
+ "group_query_attention": "shard-over-heads",
30
+ "log_softmax_scores": false,
31
+ "neuronxcc_version": "2.17.194.0+d312836f",
32
+ "optimum_neuron_version": "0.2.0.dev5",
33
+ "output_all_logits": false,
34
+ "sequence_length": 100,
35
+ "tp_degree": 2
36
+ },
37
+ "num_attention_heads": 4,
38
+ "num_hidden_layers": 2,
39
+ "num_key_value_heads": 2,
40
+ "rms_norm_eps": 1e-06,
41
+ "rope_scaling": {
42
+ "factor": 4.0,
43
+ "original_max_position_embeddings": 32768,
44
+ "rope_type": "yarn",
45
+ "type": "yarn"
46
+ },
47
+ "rope_theta": 1000000.0,
48
+ "sliding_window": 131072,
49
+ "tie_word_embeddings": false,
50
+ "use_cache": true,
51
+ "use_sliding_window": false,
52
+ "vocab_size": 152064
53
+ }
neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/yujiepan/qwen2.5-128k-tiny-random/e14b78afc1fa5e13b44a.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "yujiepan/qwen2.5-128k-tiny-random",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "Qwen2ForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 8,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 1,
15
+ "model_type": "qwen2",
16
+ "neuron": {
17
+ "_serialized_key": "HloNeuronConfig",
18
+ "all_reduce_dtype": null,
19
+ "allow_flash_attention": true,
20
+ "attention_layout": "HSB",
21
+ "attn_output_transposed": false,
22
+ "auto_cast_type": "fp16",
23
+ "batch_size": 1,
24
+ "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
25
+ "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
26
+ "collectives_layout": "HSB",
27
+ "continuous_batching": false,
28
+ "fuse_qkv": false,
29
+ "group_query_attention": "shard-over-heads",
30
+ "log_softmax_scores": false,
31
+ "neuronxcc_version": "2.17.194.0+d312836f",
32
+ "optimum_neuron_version": "0.2.0.dev5",
33
+ "output_all_logits": false,
34
+ "sequence_length": 100,
35
+ "tp_degree": 2
36
+ },
37
+ "num_attention_heads": 4,
38
+ "num_hidden_layers": 2,
39
+ "num_key_value_heads": 2,
40
+ "rms_norm_eps": 1e-06,
41
+ "rope_scaling": {
42
+ "factor": 4.0,
43
+ "original_max_position_embeddings": 32768,
44
+ "rope_type": "yarn",
45
+ "type": "yarn"
46
+ },
47
+ "rope_theta": 1000000.0,
48
+ "sliding_window": 131072,
49
+ "tie_word_embeddings": false,
50
+ "use_cache": true,
51
+ "use_sliding_window": false,
52
+ "vocab_size": 152064
53
+ }
neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/model.neff CHANGED
Binary files a/neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/model.neff and b/neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/model.neff differ
 
neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/model.neff CHANGED
Binary files a/neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/model.neff and b/neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/model.neff differ
 
neuronxcc-2.17.194.0+d312836f/MODULE_1649fc77b87fff02e370+613edded/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a42bde1cae14e2adb4b8c3626971bfda3749679e38c4d9ea4d5dbb5b993efa05
3
  size 134144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:104f51ab3675a23b89a21db5993a27e9a5eeb09867470734c4497617e146159f
3
  size 134144
neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt"
neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.done ADDED
File without changes
neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54bcdec06d89d654845d062183036a3a4b545c119ab7f8bfcfcf806a54b9b35d
3
+ size 11183
neuronxcc-2.17.194.0+d312836f/MODULE_18642e0fd797db5b7fcb+431f5505/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f539c4d46bd7143bb99ab78bed121b4565d8bbb9b15cd5fb0d86fa42eabeac7
3
+ size 103424
neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"]
neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.done ADDED
File without changes
neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3978ae0dae4598ee8e9bbe8392fc2e7e13757206db58da945eeacc59ca5ff3de
3
+ size 21402
neuronxcc-2.17.194.0+d312836f/MODULE_1b80b788e3a49498f963+613edded/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4017e0a785242f34f89ad69149e5db39b1a813f6c647b8b31659cc1f0868e99
3
+ size 144384
neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"]
neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.done ADDED
File without changes
neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89450bb664c81cdf16fe49daafa237c12c1cd71f5d8ddae84827db4a2eee340a
3
+ size 17475
neuronxcc-2.17.194.0+d312836f/MODULE_1df250ef1cf7a7de560f+613edded/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfd560af6819825c7e6f05303464c481e10d74ad257fc87de57528001c694afe
3
+ size 134144
neuronxcc-2.17.194.0+d312836f/MODULE_22cf23062ec53b3fd95d+613edded/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11b12164888b716b67ed46b93830fac4e43f9e7990378269f683e7ec2f468231
3
  size 144384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20351b22fb897e1f04a4638ab32aea1487833c6b89eca18339835ba2fe27093
3
  size 144384
neuronxcc-2.17.194.0+d312836f/MODULE_24ff9ac2787ce9a1d276+613edded/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ead3ebbe1cf12b9c8188be09fee3b1fc68585260e8c7a4404c442657ba6a6055
3
  size 134144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c20e3490f8986d6c6745d61708f73050a5ed63c89abe7876449170ae3914ddc
3
  size 134144
neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper"
neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.done ADDED
File without changes
neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc90fd439316fffc61e3d8c3621617633a5cb8d08dda6c0736121c32054c76bf
3
+ size 68277
neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b88ebb6d4762cb58b080eb10990ffb1777c6b5dc0aec53275fbe9a973c9fe294
3
+ size 257024
neuronxcc-2.17.194.0+d312836f/MODULE_284ddd1b388e504631b8+bfc62e4c/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dce1d8a73f724c37e0b050e6aabdd2a403608d397273e8ab815bd28129ade55
3
+ size 268322
neuronxcc-2.17.194.0+d312836f/MODULE_2ef52130792b59d66c66+613edded/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0c9cb5dd8f03f5a551d5e99cf274e04fe28815ccd3e879779a7516ce857332f
3
  size 134144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8a1c8c7baf1dd72136b79095ec0210f7f7ea3d2a0aec460b4b0d187d2c2d03d
3
  size 134144
neuronxcc-2.17.194.0+d312836f/MODULE_320f2622d4d0c9fdd0f1+613edded/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6e35c0d2fdb8ebdf90b9435bc04029b89281e32ed2f251d8fc0c888721056ff
3
  size 144384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55d6ee114007d520d9c57754d8e1ea3a59eec00c4e6fa2f8168ded13fe644d38
3
  size 144384
neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"]
neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.done ADDED
File without changes
neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38184c632616d2476fe6e7794270c8a51ed09eccce54dfc80c4ee171068da566
3
+ size 20501
neuronxcc-2.17.194.0+d312836f/MODULE_3cd14d7a79a82df7bd50+613edded/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba2fd19607592f515c10a8c9aa2bc7fe0a7cfdb61286057096848075d43a85f1
3
+ size 144384
neuronxcc-2.17.194.0+d312836f/MODULE_3da832fdaa3d62981800+613edded/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e8aa683c7d048770a00a8bc0e022488b1ab80ef008cc962d7d4d851eaad0943
3
  size 154624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b956eaad5b3ff2e9453f6a41327cb43195efebec1d36ee916427be098eee31f6
3
  size 154624
neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"]
neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.done ADDED
File without changes
neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0df5d765518a8a37e232a30badf00c24949b6001eda6acdb60c55bbf974e1df
3
+ size 20644
neuronxcc-2.17.194.0+d312836f/MODULE_40a0e75a65ac51fdd01a+613edded/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:287a3588ce8935f52349c24164e47aa539bd56ee6df2ab77d46e95b3ee22d9c2
3
+ size 144384
neuronxcc-2.17.194.0+d312836f/MODULE_48bfe9ceb9631fdca2d4+613edded/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a123eb66616175b59767a873603b31410f8a6e1f11edbc2be8d877baa3164ebc
3
  size 144384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f0b07361864dbb9075ea5b55536a3b994ae2afd864ff5545d540a4abcf2cafc
3
  size 144384