diff --git "a/Llama-3.2-1B-Instruct-4bit.mlmodelc/model.mil" "b/Llama-3.2-1B-Instruct-4bit.mlmodelc/model.mil" --- "a/Llama-3.2-1B-Instruct-4bit.mlmodelc/model.mil" +++ "b/Llama-3.2-1B-Instruct-4bit.mlmodelc/model.mil" @@ -2,152 +2,6 @@ program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3402.3.2"}, {"coremlc-version", "3402.4.1"}})] { func main(tensor causal_mask, tensor input_ids, state> key_cache, state> value_cache) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"causal_mask", [1, 1, 1, 1]}, {"input_ids", [1, 1]}}), ("RangeDims", {{"causal_mask", [[1, 1], [1, 1], [1, 2048], [1, 2048]]}, {"input_ids", [[1, 1], [1, 2048]]}})))] { - tensor model_model_embed_tokens_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131334272))))[name = string("model_model_embed_tokens_weight_quantized")]; - tensor model_model_layers_0_input_layernorm_weight = const()[name = string("model_model_layers_0_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147751104)))]; - tensor model_model_layers_0_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147755264))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149852480))))[name = string("model_model_layers_0_self_attn_q_proj_weight_quantized")]; - tensor model_model_layers_0_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150114688))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150639040))))[name = string("model_model_layers_0_self_attn_k_proj_weight_quantized")]; - tensor model_model_layers_0_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150704640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151228992))))[name = string("model_model_layers_0_self_attn_v_proj_weight_quantized")]; - tensor model_model_layers_0_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151294592))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153391808))))[name = string("model_model_layers_0_self_attn_o_proj_weight_quantized")]; - tensor model_model_layers_0_post_attention_layernorm_weight = const()[name = string("model_model_layers_0_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153654016)))]; - tensor model_model_layers_0_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153658176))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162046848))))[name = string("model_model_layers_0_mlp_gate_proj_weight_quantized")]; - tensor model_model_layers_0_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163095488))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171484160))))[name = string("model_model_layers_0_mlp_up_proj_weight_quantized")]; - tensor model_model_layers_0_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172532800))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180921472))))[name = string("model_model_layers_0_mlp_down_proj_weight_quantized")]; - tensor model_model_layers_1_input_layernorm_weight = const()[name = string("model_model_layers_1_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181970112)))]; - tensor model_model_layers_1_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181974272))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184071488))))[name = string("model_model_layers_1_self_attn_q_proj_weight_quantized")]; - tensor model_model_layers_1_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184333696))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184858048))))[name = string("model_model_layers_1_self_attn_k_proj_weight_quantized")]; - tensor model_model_layers_1_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184923648))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185448000))))[name = string("model_model_layers_1_self_attn_v_proj_weight_quantized")]; - tensor model_model_layers_1_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185513600))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187610816))))[name = string("model_model_layers_1_self_attn_o_proj_weight_quantized")]; - tensor model_model_layers_1_post_attention_layernorm_weight = const()[name = string("model_model_layers_1_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187873024)))]; - tensor model_model_layers_1_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187877184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(196265856))))[name = string("model_model_layers_1_mlp_gate_proj_weight_quantized")]; - tensor model_model_layers_1_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197314496))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205703168))))[name = string("model_model_layers_1_mlp_up_proj_weight_quantized")]; - tensor model_model_layers_1_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206751808))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215140480))))[name = string("model_model_layers_1_mlp_down_proj_weight_quantized")]; - tensor model_model_layers_2_input_layernorm_weight = const()[name = string("model_model_layers_2_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216189120)))]; - tensor model_model_layers_2_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216193280))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218290496))))[name = string("model_model_layers_2_self_attn_q_proj_weight_quantized")]; - tensor model_model_layers_2_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218552704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219077056))))[name = string("model_model_layers_2_self_attn_k_proj_weight_quantized")]; - tensor model_model_layers_2_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219142656))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219667008))))[name = string("model_model_layers_2_self_attn_v_proj_weight_quantized")]; - tensor model_model_layers_2_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219732608))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221829824))))[name = string("model_model_layers_2_self_attn_o_proj_weight_quantized")]; - tensor model_model_layers_2_post_attention_layernorm_weight = const()[name = string("model_model_layers_2_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222092032)))]; - tensor model_model_layers_2_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222096192))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230484864))))[name = string("model_model_layers_2_mlp_gate_proj_weight_quantized")]; - tensor model_model_layers_2_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231533504))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239922176))))[name = string("model_model_layers_2_mlp_up_proj_weight_quantized")]; - tensor model_model_layers_2_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240970816))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249359488))))[name = string("model_model_layers_2_mlp_down_proj_weight_quantized")]; - tensor model_model_layers_3_input_layernorm_weight = const()[name = string("model_model_layers_3_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250408128)))]; - tensor model_model_layers_3_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250412288))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252509504))))[name = string("model_model_layers_3_self_attn_q_proj_weight_quantized")]; - tensor model_model_layers_3_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252771712))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253296064))))[name = string("model_model_layers_3_self_attn_k_proj_weight_quantized")]; - tensor model_model_layers_3_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253361664))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253886016))))[name = string("model_model_layers_3_self_attn_v_proj_weight_quantized")]; - tensor model_model_layers_3_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253951616))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256048832))))[name = string("model_model_layers_3_self_attn_o_proj_weight_quantized")]; - tensor model_model_layers_3_post_attention_layernorm_weight = const()[name = string("model_model_layers_3_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256311040)))]; - tensor model_model_layers_3_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256315200))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264703872))))[name = string("model_model_layers_3_mlp_gate_proj_weight_quantized")]; - tensor model_model_layers_3_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265752512))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274141184))))[name = string("model_model_layers_3_mlp_up_proj_weight_quantized")]; - tensor model_model_layers_3_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275189824))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283578496))))[name = string("model_model_layers_3_mlp_down_proj_weight_quantized")]; - tensor model_model_layers_4_input_layernorm_weight = const()[name = string("model_model_layers_4_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284627136)))]; - tensor model_model_layers_4_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284631296))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286728512))))[name = string("model_model_layers_4_self_attn_q_proj_weight_quantized")]; - tensor model_model_layers_4_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286990720))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287515072))))[name = string("model_model_layers_4_self_attn_k_proj_weight_quantized")]; - tensor model_model_layers_4_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287580672))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(288105024))))[name = string("model_model_layers_4_self_attn_v_proj_weight_quantized")]; - tensor model_model_layers_4_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(288170624))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290267840))))[name = string("model_model_layers_4_self_attn_o_proj_weight_quantized")]; - tensor model_model_layers_4_post_attention_layernorm_weight = const()[name = string("model_model_layers_4_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290530048)))]; - tensor model_model_layers_4_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290534208))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298922880))))[name = string("model_model_layers_4_mlp_gate_proj_weight_quantized")]; - tensor model_model_layers_4_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299971520))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308360192))))[name = string("model_model_layers_4_mlp_up_proj_weight_quantized")]; - tensor model_model_layers_4_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309408832))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317797504))))[name = string("model_model_layers_4_mlp_down_proj_weight_quantized")]; - tensor model_model_layers_5_input_layernorm_weight = const()[name = string("model_model_layers_5_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318846144)))]; - tensor model_model_layers_5_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318850304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320947520))))[name = string("model_model_layers_5_self_attn_q_proj_weight_quantized")]; - tensor model_model_layers_5_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321209728))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321734080))))[name = string("model_model_layers_5_self_attn_k_proj_weight_quantized")]; - tensor model_model_layers_5_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321799680))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322324032))))[name = string("model_model_layers_5_self_attn_v_proj_weight_quantized")]; - tensor model_model_layers_5_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322389632))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324486848))))[name = string("model_model_layers_5_self_attn_o_proj_weight_quantized")]; - tensor model_model_layers_5_post_attention_layernorm_weight = const()[name = string("model_model_layers_5_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324749056)))]; - tensor model_model_layers_5_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324753216))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333141888))))[name = string("model_model_layers_5_mlp_gate_proj_weight_quantized")]; - tensor model_model_layers_5_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334190528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342579200))))[name = string("model_model_layers_5_mlp_up_proj_weight_quantized")]; - tensor model_model_layers_5_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343627840))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352016512))))[name = string("model_model_layers_5_mlp_down_proj_weight_quantized")]; - tensor model_model_layers_6_input_layernorm_weight = const()[name = string("model_model_layers_6_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353065152)))]; - tensor model_model_layers_6_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353069312))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355166528))))[name = string("model_model_layers_6_self_attn_q_proj_weight_quantized")]; - tensor model_model_layers_6_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355428736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355953088))))[name = string("model_model_layers_6_self_attn_k_proj_weight_quantized")]; - tensor model_model_layers_6_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356018688))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356543040))))[name = string("model_model_layers_6_self_attn_v_proj_weight_quantized")]; - tensor model_model_layers_6_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356608640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358705856))))[name = string("model_model_layers_6_self_attn_o_proj_weight_quantized")]; - tensor model_model_layers_6_post_attention_layernorm_weight = const()[name = string("model_model_layers_6_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358968064)))]; - tensor model_model_layers_6_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358972224))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367360896))))[name = string("model_model_layers_6_mlp_gate_proj_weight_quantized")]; - tensor model_model_layers_6_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368409536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376798208))))[name = string("model_model_layers_6_mlp_up_proj_weight_quantized")]; - tensor model_model_layers_6_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377846848))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386235520))))[name = string("model_model_layers_6_mlp_down_proj_weight_quantized")]; - tensor model_model_layers_7_input_layernorm_weight = const()[name = string("model_model_layers_7_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387284160)))]; - tensor model_model_layers_7_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387288320))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389385536))))[name = string("model_model_layers_7_self_attn_q_proj_weight_quantized")]; - tensor model_model_layers_7_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389647744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390172096))))[name = string("model_model_layers_7_self_attn_k_proj_weight_quantized")]; - tensor model_model_layers_7_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390237696))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390762048))))[name = string("model_model_layers_7_self_attn_v_proj_weight_quantized")]; - tensor model_model_layers_7_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390827648))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392924864))))[name = string("model_model_layers_7_self_attn_o_proj_weight_quantized")]; - tensor model_model_layers_7_post_attention_layernorm_weight = const()[name = string("model_model_layers_7_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393187072)))]; - tensor model_model_layers_7_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393191232))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401579904))))[name = string("model_model_layers_7_mlp_gate_proj_weight_quantized")]; - tensor model_model_layers_7_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(402628544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411017216))))[name = string("model_model_layers_7_mlp_up_proj_weight_quantized")]; - tensor model_model_layers_7_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412065856))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420454528))))[name = string("model_model_layers_7_mlp_down_proj_weight_quantized")]; - tensor model_model_layers_8_input_layernorm_weight = const()[name = string("model_model_layers_8_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421503168)))]; - tensor model_model_layers_8_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421507328))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423604544))))[name = string("model_model_layers_8_self_attn_q_proj_weight_quantized")]; - tensor model_model_layers_8_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423866752))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424391104))))[name = string("model_model_layers_8_self_attn_k_proj_weight_quantized")]; - tensor model_model_layers_8_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424456704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424981056))))[name = string("model_model_layers_8_self_attn_v_proj_weight_quantized")]; - tensor model_model_layers_8_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425046656))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427143872))))[name = string("model_model_layers_8_self_attn_o_proj_weight_quantized")]; - tensor model_model_layers_8_post_attention_layernorm_weight = const()[name = string("model_model_layers_8_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427406080)))]; - tensor model_model_layers_8_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427410240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(435798912))))[name = string("model_model_layers_8_mlp_gate_proj_weight_quantized")]; - tensor model_model_layers_8_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436847552))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(445236224))))[name = string("model_model_layers_8_mlp_up_proj_weight_quantized")]; - tensor model_model_layers_8_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446284864))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454673536))))[name = string("model_model_layers_8_mlp_down_proj_weight_quantized")]; - tensor model_model_layers_9_input_layernorm_weight = const()[name = string("model_model_layers_9_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455722176)))]; - tensor model_model_layers_9_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455726336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457823552))))[name = string("model_model_layers_9_self_attn_q_proj_weight_quantized")]; - tensor model_model_layers_9_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458085760))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458610112))))[name = string("model_model_layers_9_self_attn_k_proj_weight_quantized")]; - tensor model_model_layers_9_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458675712))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459200064))))[name = string("model_model_layers_9_self_attn_v_proj_weight_quantized")]; - tensor model_model_layers_9_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459265664))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461362880))))[name = string("model_model_layers_9_self_attn_o_proj_weight_quantized")]; - tensor model_model_layers_9_post_attention_layernorm_weight = const()[name = string("model_model_layers_9_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461625088)))]; - tensor model_model_layers_9_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461629248))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470017920))))[name = string("model_model_layers_9_mlp_gate_proj_weight_quantized")]; - tensor model_model_layers_9_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471066560))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479455232))))[name = string("model_model_layers_9_mlp_up_proj_weight_quantized")]; - tensor model_model_layers_9_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(480503872))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488892544))))[name = string("model_model_layers_9_mlp_down_proj_weight_quantized")]; - tensor model_model_layers_10_input_layernorm_weight = const()[name = string("model_model_layers_10_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489941184)))]; - tensor model_model_layers_10_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489945344))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492042560))))[name = string("model_model_layers_10_self_attn_q_proj_weight_quantized")]; - tensor model_model_layers_10_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492304768))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492829120))))[name = string("model_model_layers_10_self_attn_k_proj_weight_quantized")]; - tensor model_model_layers_10_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492894720))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493419072))))[name = string("model_model_layers_10_self_attn_v_proj_weight_quantized")]; - tensor model_model_layers_10_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493484672))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(495581888))))[name = string("model_model_layers_10_self_attn_o_proj_weight_quantized")]; - tensor model_model_layers_10_post_attention_layernorm_weight = const()[name = string("model_model_layers_10_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(495844096)))]; - tensor model_model_layers_10_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(495848256))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504236928))))[name = string("model_model_layers_10_mlp_gate_proj_weight_quantized")]; - tensor model_model_layers_10_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505285568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513674240))))[name = string("model_model_layers_10_mlp_up_proj_weight_quantized")]; - tensor model_model_layers_10_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(514722880))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523111552))))[name = string("model_model_layers_10_mlp_down_proj_weight_quantized")]; - tensor model_model_layers_11_input_layernorm_weight = const()[name = string("model_model_layers_11_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524160192)))]; - tensor model_model_layers_11_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524164352))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526261568))))[name = string("model_model_layers_11_self_attn_q_proj_weight_quantized")]; - tensor model_model_layers_11_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526523776))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(527048128))))[name = string("model_model_layers_11_self_attn_k_proj_weight_quantized")]; - tensor model_model_layers_11_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(527113728))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(527638080))))[name = string("model_model_layers_11_self_attn_v_proj_weight_quantized")]; - tensor model_model_layers_11_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(527703680))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529800896))))[name = string("model_model_layers_11_self_attn_o_proj_weight_quantized")]; - tensor model_model_layers_11_post_attention_layernorm_weight = const()[name = string("model_model_layers_11_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530063104)))]; - tensor model_model_layers_11_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530067264))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538455936))))[name = string("model_model_layers_11_mlp_gate_proj_weight_quantized")]; - tensor model_model_layers_11_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539504576))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547893248))))[name = string("model_model_layers_11_mlp_up_proj_weight_quantized")]; - tensor model_model_layers_11_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548941888))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557330560))))[name = string("model_model_layers_11_mlp_down_proj_weight_quantized")]; - tensor model_model_layers_12_input_layernorm_weight = const()[name = string("model_model_layers_12_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558379200)))]; - tensor model_model_layers_12_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558383360))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560480576))))[name = string("model_model_layers_12_self_attn_q_proj_weight_quantized")]; - tensor model_model_layers_12_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560742784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561267136))))[name = string("model_model_layers_12_self_attn_k_proj_weight_quantized")]; - tensor model_model_layers_12_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561332736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561857088))))[name = string("model_model_layers_12_self_attn_v_proj_weight_quantized")]; - tensor model_model_layers_12_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561922688))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564019904))))[name = string("model_model_layers_12_self_attn_o_proj_weight_quantized")]; - tensor model_model_layers_12_post_attention_layernorm_weight = const()[name = string("model_model_layers_12_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564282112)))]; - tensor model_model_layers_12_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564286272))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572674944))))[name = string("model_model_layers_12_mlp_gate_proj_weight_quantized")]; - tensor model_model_layers_12_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(573723584))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582112256))))[name = string("model_model_layers_12_mlp_up_proj_weight_quantized")]; - tensor model_model_layers_12_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(583160896))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591549568))))[name = string("model_model_layers_12_mlp_down_proj_weight_quantized")]; - tensor model_model_layers_13_input_layernorm_weight = const()[name = string("model_model_layers_13_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(592598208)))]; - tensor model_model_layers_13_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(592602368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594699584))))[name = string("model_model_layers_13_self_attn_q_proj_weight_quantized")]; - tensor model_model_layers_13_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594961792))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(595486144))))[name = string("model_model_layers_13_self_attn_k_proj_weight_quantized")]; - tensor model_model_layers_13_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(595551744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(596076096))))[name = string("model_model_layers_13_self_attn_v_proj_weight_quantized")]; - tensor model_model_layers_13_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(596141696))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598238912))))[name = string("model_model_layers_13_self_attn_o_proj_weight_quantized")]; - tensor model_model_layers_13_post_attention_layernorm_weight = const()[name = string("model_model_layers_13_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598501120)))]; - tensor model_model_layers_13_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598505280))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606893952))))[name = string("model_model_layers_13_mlp_gate_proj_weight_quantized")]; - tensor model_model_layers_13_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607942592))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(616331264))))[name = string("model_model_layers_13_mlp_up_proj_weight_quantized")]; - tensor model_model_layers_13_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617379904))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(625768576))))[name = string("model_model_layers_13_mlp_down_proj_weight_quantized")]; - tensor model_model_layers_14_input_layernorm_weight = const()[name = string("model_model_layers_14_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(626817216)))]; - tensor model_model_layers_14_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(626821376))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(628918592))))[name = string("model_model_layers_14_self_attn_q_proj_weight_quantized")]; - tensor model_model_layers_14_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(629180800))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(629705152))))[name = string("model_model_layers_14_self_attn_k_proj_weight_quantized")]; - tensor model_model_layers_14_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(629770752))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630295104))))[name = string("model_model_layers_14_self_attn_v_proj_weight_quantized")]; - tensor model_model_layers_14_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630360704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(632457920))))[name = string("model_model_layers_14_self_attn_o_proj_weight_quantized")]; - tensor model_model_layers_14_post_attention_layernorm_weight = const()[name = string("model_model_layers_14_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(632720128)))]; - tensor model_model_layers_14_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(632724288))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(641112960))))[name = string("model_model_layers_14_mlp_gate_proj_weight_quantized")]; - tensor model_model_layers_14_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(642161600))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(650550272))))[name = string("model_model_layers_14_mlp_up_proj_weight_quantized")]; - tensor model_model_layers_14_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(651598912))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(659987584))))[name = string("model_model_layers_14_mlp_down_proj_weight_quantized")]; - tensor model_model_layers_15_input_layernorm_weight = const()[name = string("model_model_layers_15_input_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(661036224)))]; - tensor model_model_layers_15_self_attn_q_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(661040384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(663137600))))[name = string("model_model_layers_15_self_attn_q_proj_weight_quantized")]; - tensor model_model_layers_15_self_attn_k_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(663399808))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(663924160))))[name = string("model_model_layers_15_self_attn_k_proj_weight_quantized")]; - tensor model_model_layers_15_self_attn_v_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(663989760))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(664514112))))[name = string("model_model_layers_15_self_attn_v_proj_weight_quantized")]; - tensor model_model_layers_15_self_attn_o_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(664579712))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666676928))))[name = string("model_model_layers_15_self_attn_o_proj_weight_quantized")]; - tensor model_model_layers_15_post_attention_layernorm_weight = const()[name = string("model_model_layers_15_post_attention_layernorm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666939136)))]; - tensor model_model_layers_15_mlp_gate_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666943296))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(675331968))))[name = string("model_model_layers_15_mlp_gate_proj_weight_quantized")]; - tensor model_model_layers_15_mlp_up_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676380608))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(684769280))))[name = string("model_model_layers_15_mlp_up_proj_weight_quantized")]; - tensor model_model_layers_15_mlp_down_proj_weight_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(685817920))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(694206592))))[name = string("model_model_layers_15_mlp_down_proj_weight_quantized")]; - tensor model_model_norm_weight = const()[name = string("model_model_norm_weight"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695255232)))]; tensor var_7_shape_cast_fp16 = shape(x = causal_mask)[name = string("op_7_shape_cast_fp16")]; int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; @@ -170,19 +24,20 @@ program(1.3) int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_134")]; int32 past_seen_tokens = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("past_seen_tokens")]; int32 var_48 = const()[name = string("op_48"), val = int32(-1)]; - int32 var_60 = const()[name = string("op_60"), val = int32(4)]; + int32 var_59 = const()[name = string("op_59"), val = int32(4)]; int32 inputs_embeds_axis_0 = const()[name = string("inputs_embeds_axis_0"), val = int32(0)]; int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)]; bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)]; - tensor inputs_embeds = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = input_ids, validate_indices = inputs_embeds_validate_indices_0, x = model_model_embed_tokens_weight_quantized)[name = string("inputs_embeds")]; - tensor var_113_shape = shape(x = inputs_embeds)[name = string("op_113_shape")]; + tensor model_model_embed_tokens_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(131334272))))[name = string("model_model_embed_tokens_weight_to_fp16_quantized")]; + tensor inputs_embeds_cast_fp16 = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = input_ids, validate_indices = inputs_embeds_validate_indices_0, x = model_model_embed_tokens_weight_to_fp16_quantized)[name = string("inputs_embeds_cast_fp16")]; + tensor var_113_shape_cast_fp16 = shape(x = inputs_embeds_cast_fp16)[name = string("op_113_shape_cast_fp16")]; int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; - string var_113_shape_to_uint16_dtype_0 = const()[name = string("op_113_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_113_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_113_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; - tensor var_113_shape_to_uint16 = cast(dtype = var_113_shape_to_uint16_dtype_0, x = var_113_shape)[name = string("cast_132")]; - uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_113_shape_to_uint16)[name = string("gather_2_cast_uint16")]; + tensor var_113_shape_cast_fp16_to_uint16 = cast(dtype = var_113_shape_cast_fp16_to_uint16_dtype_0, x = var_113_shape_cast_fp16)[name = string("cast_132")]; + uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_113_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_131")]; int32 var_115 = add(x = past_seen_tokens, y = gather_2_cast_uint16_to_int32)[name = string("op_115")]; @@ -194,7 +49,7 @@ program(1.3) tensor var_128 = expand_dims(axes = var_128_axes_0, x = position_ids)[name = string("op_128")]; bool var_133_transpose_x_0 = const()[name = string("op_133_transpose_x_0"), val = bool(false)]; bool var_133_transpose_y_0 = const()[name = string("op_133_transpose_y_0"), val = bool(false)]; - tensor const_2_to_fp16 = const()[name = string("const_2_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695259392)))]; + tensor const_2_to_fp16 = const()[name = string("const_2_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147751104)))]; string cast_2_to_fp16_dtype_0 = const()[name = string("cast_2_to_fp16_dtype_0"), val = string("fp16")]; tensor var_128_to_fp16 = cast(dtype = cast_2_to_fp16_dtype_0, x = var_128)[name = string("cast_130")]; tensor var_133_cast_fp16 = matmul(transpose_x = var_133_transpose_x_0, transpose_y = var_133_transpose_y_0, x = const_2_to_fp16, y = var_128_to_fp16)[name = string("op_133_cast_fp16")]; @@ -204,8 +59,8 @@ program(1.3) tensor emb_cast_fp16 = concat(axis = var_48, interleave = emb_interleave_0, values = (freqs_cast_fp16, freqs_cast_fp16))[name = string("emb_cast_fp16")]; tensor cos_1_cast_fp16 = cos(x = emb_cast_fp16)[name = string("cos_1_cast_fp16")]; tensor sin_1_cast_fp16 = sin(x = emb_cast_fp16)[name = string("sin_1_cast_fp16")]; - fp16 var_55_promoted_to_fp16 = const()[name = string("op_55_promoted_to_fp16"), val = fp16(0x1p+1)]; - tensor var_154_cast_fp16 = pow(x = inputs_embeds, y = var_55_promoted_to_fp16)[name = string("op_154_cast_fp16")]; + fp16 var_54_promoted_to_fp16 = const()[name = string("op_54_promoted_to_fp16"), val = fp16(0x1p+1)]; + tensor var_154_cast_fp16 = pow(x = inputs_embeds_cast_fp16, y = var_54_promoted_to_fp16)[name = string("op_154_cast_fp16")]; tensor variance_1_axes_0 = const()[name = string("variance_1_axes_0"), val = tensor([-1])]; bool variance_1_keep_dims_0 = const()[name = string("variance_1_keep_dims_0"), val = bool(true)]; tensor variance_1_cast_fp16 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_154_cast_fp16)[name = string("variance_1_cast_fp16")]; @@ -213,76 +68,80 @@ program(1.3) tensor var_158_cast_fp16 = add(x = variance_1_cast_fp16, y = var_157_to_fp16)[name = string("op_158_cast_fp16")]; fp32 var_159_epsilon_0 = const()[name = string("op_159_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_159_cast_fp16 = rsqrt(epsilon = var_159_epsilon_0, x = var_158_cast_fp16)[name = string("op_159_cast_fp16")]; - tensor hidden_states_3_cast_fp16 = mul(x = inputs_embeds, y = var_159_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; - tensor hidden_states_5 = mul(x = model_model_layers_0_input_layernorm_weight, y = hidden_states_3_cast_fp16)[name = string("hidden_states_5")]; - tensor var_167_shape = shape(x = hidden_states_5)[name = string("op_167_shape")]; + tensor hidden_states_3_cast_fp16 = mul(x = inputs_embeds_cast_fp16, y = var_159_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147751232)))]; + tensor hidden_states_7_cast_fp16 = mul(x = model_model_layers_0_input_layernorm_weight_to_fp16, y = hidden_states_3_cast_fp16)[name = string("hidden_states_7_cast_fp16")]; + tensor var_170_shape_cast_fp16 = shape(x = hidden_states_7_cast_fp16)[name = string("op_170_shape_cast_fp16")]; int32 gather_4 = const()[name = string("gather_4"), val = int32(1)]; int32 gather_5_axis_0 = const()[name = string("gather_5_axis_0"), val = int32(0)]; int32 gather_5_batch_dims_0 = const()[name = string("gather_5_batch_dims_0"), val = int32(0)]; bool gather_5_validate_indices_0 = const()[name = string("gather_5_validate_indices_0"), val = bool(false)]; - string var_167_shape_to_uint16_dtype_0 = const()[name = string("op_167_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_170_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_170_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_5_to_uint16 = const()[name = string("select_5_to_uint16"), val = uint16(1)]; - tensor var_167_shape_to_uint16 = cast(dtype = var_167_shape_to_uint16_dtype_0, x = var_167_shape)[name = string("cast_129")]; - uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_167_shape_to_uint16)[name = string("gather_5_cast_uint16")]; + tensor var_170_shape_cast_fp16_to_uint16 = cast(dtype = var_170_shape_cast_fp16_to_uint16_dtype_0, x = var_170_shape_cast_fp16)[name = string("cast_129")]; + uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_170_shape_cast_fp16_to_uint16)[name = string("gather_5_cast_uint16")]; string gather_5_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_5_cast_uint16_to_int32_dtype_0"), val = string("int32")]; - tensor linear_0_bias_0 = const()[name = string("linear_0_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695259520)))]; - tensor linear_0 = linear(bias = linear_0_bias_0, weight = model_model_layers_0_self_attn_q_proj_weight_quantized, x = hidden_states_5)[name = string("linear_0")]; - tensor linear_1_bias_0 = const()[name = string("linear_1_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695263680)))]; - tensor linear_1 = linear(bias = linear_1_bias_0, weight = model_model_layers_0_self_attn_k_proj_weight_quantized, x = hidden_states_5)[name = string("linear_1")]; - tensor linear_2 = linear(bias = linear_1_bias_0, weight = model_model_layers_0_self_attn_v_proj_weight_quantized, x = hidden_states_5)[name = string("linear_2")]; + tensor model_model_layers_0_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147755392))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149852608))))[name = string("model_model_layers_0_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150114816)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor model_model_layers_0_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150118976))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150643328))))[name = string("model_model_layers_0_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150708928)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_0_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor model_model_layers_0_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(150710016))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151234368))))[name = string("model_model_layers_0_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_2_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_0_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor concat_0x = const()[name = string("concat_0x"), val = tensor([1, -1, 32, 64])]; - tensor var_176 = reshape(shape = concat_0x, x = linear_0)[name = string("op_176")]; + tensor var_179_cast_fp16 = reshape(shape = concat_0x, x = linear_0_cast_fp16)[name = string("op_179_cast_fp16")]; tensor q_1_perm_0 = const()[name = string("q_1_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_1x = const()[name = string("concat_1x"), val = tensor([1, -1, 8, 64])]; - tensor var_179 = reshape(shape = concat_1x, x = linear_1)[name = string("op_179")]; + tensor var_182_cast_fp16 = reshape(shape = concat_1x, x = linear_1_cast_fp16)[name = string("op_182_cast_fp16")]; tensor k_1_perm_0 = const()[name = string("k_1_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_2x = const()[name = string("concat_2x"), val = tensor([1, -1, 8, 64])]; - tensor var_182 = reshape(shape = concat_2x, x = linear_2)[name = string("op_182")]; + tensor var_185_cast_fp16 = reshape(shape = concat_2x, x = linear_2_cast_fp16)[name = string("op_185_cast_fp16")]; tensor v_state_1_perm_0 = const()[name = string("v_state_1_perm_0"), val = tensor([0, 2, 1, 3])]; tensor cos_7_axes_0 = const()[name = string("cos_7_axes_0"), val = tensor([1])]; - tensor cos_7 = expand_dims(axes = cos_7_axes_0, x = cos_1_cast_fp16)[name = string("cos_7")]; + tensor cos_7_cast_fp16 = expand_dims(axes = cos_7_axes_0, x = cos_1_cast_fp16)[name = string("cos_7_cast_fp16")]; tensor sin_7_axes_0 = const()[name = string("sin_7_axes_0"), val = tensor([1])]; - tensor sin_7 = expand_dims(axes = sin_7_axes_0, x = sin_1_cast_fp16)[name = string("sin_7")]; - tensor q_1 = transpose(perm = q_1_perm_0, x = var_176)[name = string("transpose_63")]; - tensor var_186 = mul(x = q_1, y = cos_7)[name = string("op_186")]; + tensor sin_7_cast_fp16 = expand_dims(axes = sin_7_axes_0, x = sin_1_cast_fp16)[name = string("sin_7_cast_fp16")]; + tensor q_1_cast_fp16 = transpose(perm = q_1_perm_0, x = var_179_cast_fp16)[name = string("transpose_63")]; + tensor var_189_cast_fp16 = mul(x = q_1_cast_fp16, y = cos_7_cast_fp16)[name = string("op_189_cast_fp16")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 32, 0, 32])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_1 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1)[name = string("x1_1")]; + tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1_cast_fp16)[name = string("x1_1_cast_fp16")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 32, 0, 64])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_1 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1)[name = string("x2_1")]; - fp16 const_3_promoted = const()[name = string("const_3_promoted"), val = fp16(-0x1p+0)]; - tensor var_197 = mul(x = x2_1, y = const_3_promoted)[name = string("op_197")]; - bool var_199_interleave_0 = const()[name = string("op_199_interleave_0"), val = bool(false)]; - tensor var_199 = concat(axis = var_48, interleave = var_199_interleave_0, values = (var_197, x1_1))[name = string("op_199")]; - tensor var_200 = mul(x = var_199, y = sin_7)[name = string("op_200")]; - tensor query_states_3 = add(x = var_186, y = var_200)[name = string("query_states_3")]; - tensor k_1 = transpose(perm = k_1_perm_0, x = var_179)[name = string("transpose_62")]; - tensor var_202 = mul(x = k_1, y = cos_7)[name = string("op_202")]; + tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1_cast_fp16)[name = string("x2_1_cast_fp16")]; + fp16 const_3_promoted_to_fp16 = const()[name = string("const_3_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_200_cast_fp16 = mul(x = x2_1_cast_fp16, y = const_3_promoted_to_fp16)[name = string("op_200_cast_fp16")]; + bool var_202_interleave_0 = const()[name = string("op_202_interleave_0"), val = bool(false)]; + tensor var_202_cast_fp16 = concat(axis = var_48, interleave = var_202_interleave_0, values = (var_200_cast_fp16, x1_1_cast_fp16))[name = string("op_202_cast_fp16")]; + tensor var_203_cast_fp16 = mul(x = var_202_cast_fp16, y = sin_7_cast_fp16)[name = string("op_203_cast_fp16")]; + tensor query_states_3_cast_fp16 = add(x = var_189_cast_fp16, y = var_203_cast_fp16)[name = string("query_states_3_cast_fp16")]; + tensor k_1_cast_fp16 = transpose(perm = k_1_perm_0, x = var_182_cast_fp16)[name = string("transpose_62")]; + tensor var_205_cast_fp16 = mul(x = k_1_cast_fp16, y = cos_7_cast_fp16)[name = string("op_205_cast_fp16")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 0, 32])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_3 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1)[name = string("x1_3")]; + tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1_cast_fp16)[name = string("x1_3_cast_fp16")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 0, 64])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_3 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1)[name = string("x2_3")]; - fp16 const_4_promoted = const()[name = string("const_4_promoted"), val = fp16(-0x1p+0)]; - tensor var_213 = mul(x = x2_3, y = const_4_promoted)[name = string("op_213")]; - bool var_215_interleave_0 = const()[name = string("op_215_interleave_0"), val = bool(false)]; - tensor var_215 = concat(axis = var_48, interleave = var_215_interleave_0, values = (var_213, x1_3))[name = string("op_215")]; - tensor var_216 = mul(x = var_215, y = sin_7)[name = string("op_216")]; - tensor k_state_1 = add(x = var_202, y = var_216)[name = string("k_state_1")]; - tensor var_218_shape = shape(x = cache_position)[name = string("op_218_shape")]; + tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1_cast_fp16)[name = string("x2_3_cast_fp16")]; + fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_216_cast_fp16 = mul(x = x2_3_cast_fp16, y = const_4_promoted_to_fp16)[name = string("op_216_cast_fp16")]; + bool var_218_interleave_0 = const()[name = string("op_218_interleave_0"), val = bool(false)]; + tensor var_218_cast_fp16 = concat(axis = var_48, interleave = var_218_interleave_0, values = (var_216_cast_fp16, x1_3_cast_fp16))[name = string("op_218_cast_fp16")]; + tensor var_219_cast_fp16 = mul(x = var_218_cast_fp16, y = sin_7_cast_fp16)[name = string("op_219_cast_fp16")]; + tensor k_state_1_cast_fp16 = add(x = var_205_cast_fp16, y = var_219_cast_fp16)[name = string("k_state_1_cast_fp16")]; + tensor var_221_shape = shape(x = cache_position)[name = string("op_221_shape")]; int32 gather_10_axis_0 = const()[name = string("gather_10_axis_0"), val = int32(0)]; int32 gather_10_batch_dims_0 = const()[name = string("gather_10_batch_dims_0"), val = int32(0)]; bool gather_10_validate_indices_0 = const()[name = string("gather_10_validate_indices_0"), val = bool(false)]; - string var_218_shape_to_uint16_dtype_0 = const()[name = string("op_218_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_221_shape_to_uint16_dtype_0 = const()[name = string("op_221_shape_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_10_to_uint16 = const()[name = string("select_10_to_uint16"), val = uint16(0)]; - tensor var_218_shape_to_uint16 = cast(dtype = var_218_shape_to_uint16_dtype_0, x = var_218_shape)[name = string("cast_128")]; - uint16 gather_10_cast_uint16 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = select_10_to_uint16, validate_indices = gather_10_validate_indices_0, x = var_218_shape_to_uint16)[name = string("gather_10_cast_uint16")]; + tensor var_221_shape_to_uint16 = cast(dtype = var_221_shape_to_uint16_dtype_0, x = var_221_shape)[name = string("cast_128")]; + uint16 gather_10_cast_uint16 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = select_10_to_uint16, validate_indices = gather_10_validate_indices_0, x = var_221_shape_to_uint16)[name = string("gather_10_cast_uint16")]; string gather_10_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_10_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_10_cast_uint16_to_int32 = cast(dtype = gather_10_cast_uint16_to_int32_dtype_0, x = gather_10_cast_uint16)[name = string("cast_127")]; int32 end_1 = add(x = past_seen_tokens, y = gather_10_cast_uint16_to_int32)[name = string("end_1")]; @@ -309,94 +168,94 @@ program(1.3) tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor key_cache_internal_tensor_assign_1 = slice_update(begin = concat_5, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = k_state_1, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1")]; - write_state(data = key_cache_internal_tensor_assign_1, input = key_cache)[name = string("coreml_update_state_32_write_state")]; + tensor key_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = k_state_1_cast_fp16, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_1_cast_fp16, input = key_cache)[name = string("coreml_update_state_32_write_state")]; tensor coreml_update_state_32 = read_state(input = key_cache)[name = string("coreml_update_state_32")]; tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor v_state_1 = transpose(perm = v_state_1_perm_0, x = var_182)[name = string("transpose_61")]; - tensor value_cache_internal_tensor_assign_1 = slice_update(begin = concat_5, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = v_state_1, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1")]; - write_state(data = value_cache_internal_tensor_assign_1, input = value_cache)[name = string("coreml_update_state_33_write_state")]; + tensor v_state_1_cast_fp16 = transpose(perm = v_state_1_perm_0, x = var_185_cast_fp16)[name = string("transpose_61")]; + tensor value_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = v_state_1_cast_fp16, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_1_cast_fp16, input = value_cache)[name = string("coreml_update_state_33_write_state")]; tensor coreml_update_state_33 = read_state(input = value_cache)[name = string("coreml_update_state_33")]; - tensor var_239_begin_0 = const()[name = string("op_239_begin_0"), val = tensor([0, 0, 0, 0, 0])]; - tensor var_239_end_0 = const()[name = string("op_239_end_0"), val = tensor([1, 1, 8, 2048, 64])]; - tensor var_239_end_mask_0 = const()[name = string("op_239_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_239_squeeze_mask_0 = const()[name = string("op_239_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_239 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, squeeze_mask = var_239_squeeze_mask_0, x = coreml_update_state_32)[name = string("op_239")]; + tensor var_242_begin_0 = const()[name = string("op_242_begin_0"), val = tensor([0, 0, 0, 0, 0])]; + tensor var_242_end_0 = const()[name = string("op_242_end_0"), val = tensor([1, 1, 8, 2048, 64])]; + tensor var_242_end_mask_0 = const()[name = string("op_242_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_242_squeeze_mask_0 = const()[name = string("op_242_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_242_cast_fp16 = slice_by_index(begin = var_242_begin_0, end = var_242_end_0, end_mask = var_242_end_mask_0, squeeze_mask = var_242_squeeze_mask_0, x = coreml_update_state_32)[name = string("op_242_cast_fp16")]; int32 concat_11_values0_0 = const()[name = string("concat_11_values0_0"), val = int32(1)]; int32 concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = int32(8)]; int32 concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = int32(64)]; int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (concat_11_values0_0, concat_11_values1_0, end_1, concat_11_values3_0))[name = string("concat_11")]; - tensor var_242_begin_0 = const()[name = string("op_242_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_242_end_mask_0 = const()[name = string("op_242_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_242 = slice_by_index(begin = var_242_begin_0, end = concat_11, end_mask = var_242_end_mask_0, x = var_239)[name = string("op_242")]; - tensor var_244_begin_0 = const()[name = string("op_244_begin_0"), val = tensor([0, 0, 0, 0, 0])]; - tensor var_244_end_0 = const()[name = string("op_244_end_0"), val = tensor([1, 1, 8, 2048, 64])]; - tensor var_244_end_mask_0 = const()[name = string("op_244_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_244_squeeze_mask_0 = const()[name = string("op_244_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_244 = slice_by_index(begin = var_244_begin_0, end = var_244_end_0, end_mask = var_244_end_mask_0, squeeze_mask = var_244_squeeze_mask_0, x = coreml_update_state_33)[name = string("op_244")]; - tensor var_247_begin_0 = const()[name = string("op_247_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_247_end_mask_0 = const()[name = string("op_247_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_247 = slice_by_index(begin = var_247_begin_0, end = concat_11, end_mask = var_247_end_mask_0, x = var_244)[name = string("op_247")]; - tensor var_249_shape = shape(x = var_242)[name = string("op_249_shape")]; + tensor var_245_begin_0 = const()[name = string("op_245_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_245_end_mask_0 = const()[name = string("op_245_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_245_cast_fp16 = slice_by_index(begin = var_245_begin_0, end = concat_11, end_mask = var_245_end_mask_0, x = var_242_cast_fp16)[name = string("op_245_cast_fp16")]; + tensor var_247_begin_0 = const()[name = string("op_247_begin_0"), val = tensor([0, 0, 0, 0, 0])]; + tensor var_247_end_0 = const()[name = string("op_247_end_0"), val = tensor([1, 1, 8, 2048, 64])]; + tensor var_247_end_mask_0 = const()[name = string("op_247_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_247_squeeze_mask_0 = const()[name = string("op_247_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, squeeze_mask = var_247_squeeze_mask_0, x = coreml_update_state_33)[name = string("op_247_cast_fp16")]; + tensor var_250_begin_0 = const()[name = string("op_250_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_250_end_mask_0 = const()[name = string("op_250_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_250_cast_fp16 = slice_by_index(begin = var_250_begin_0, end = concat_11, end_mask = var_250_end_mask_0, x = var_247_cast_fp16)[name = string("op_250_cast_fp16")]; + tensor var_252_shape_cast_fp16 = shape(x = var_245_cast_fp16)[name = string("op_252_shape_cast_fp16")]; int32 gather_13 = const()[name = string("gather_13"), val = int32(1)]; int32 gather_14 = const()[name = string("gather_14"), val = int32(8)]; int32 gather_15_axis_0 = const()[name = string("gather_15_axis_0"), val = int32(0)]; int32 gather_15_batch_dims_0 = const()[name = string("gather_15_batch_dims_0"), val = int32(0)]; bool gather_15_validate_indices_0 = const()[name = string("gather_15_validate_indices_0"), val = bool(false)]; - string var_249_shape_to_uint16_dtype_0 = const()[name = string("op_249_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_252_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_252_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_15_to_uint16 = const()[name = string("select_15_to_uint16"), val = uint16(2)]; - tensor var_249_shape_to_uint16 = cast(dtype = var_249_shape_to_uint16_dtype_0, x = var_249_shape)[name = string("cast_126")]; - uint16 gather_15_cast_uint16 = gather(axis = gather_15_axis_0, batch_dims = gather_15_batch_dims_0, indices = select_15_to_uint16, validate_indices = gather_15_validate_indices_0, x = var_249_shape_to_uint16)[name = string("gather_15_cast_uint16")]; + tensor var_252_shape_cast_fp16_to_uint16 = cast(dtype = var_252_shape_cast_fp16_to_uint16_dtype_0, x = var_252_shape_cast_fp16)[name = string("cast_126")]; + uint16 gather_15_cast_uint16 = gather(axis = gather_15_axis_0, batch_dims = gather_15_batch_dims_0, indices = select_15_to_uint16, validate_indices = gather_15_validate_indices_0, x = var_252_shape_cast_fp16_to_uint16)[name = string("gather_15_cast_uint16")]; string gather_15_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_15_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_16 = const()[name = string("gather_16"), val = int32(64)]; - tensor var_256_axes_0 = const()[name = string("op_256_axes_0"), val = tensor([2])]; - tensor var_256 = expand_dims(axes = var_256_axes_0, x = var_242)[name = string("op_256")]; - tensor shape_17 = shape(x = var_256)[name = string("shape_17")]; + tensor var_259_axes_0 = const()[name = string("op_259_axes_0"), val = tensor([2])]; + tensor var_259_cast_fp16 = expand_dims(axes = var_259_axes_0, x = var_245_cast_fp16)[name = string("op_259_cast_fp16")]; + tensor shape_17_cast_fp16 = shape(x = var_259_cast_fp16)[name = string("shape_17_cast_fp16")]; int32 concat_13_axis_0 = const()[name = string("concat_13_axis_0"), val = int32(0)]; bool concat_13_interleave_0 = const()[name = string("concat_13_interleave_0"), val = bool(false)]; int32 gather_15_cast_uint16_to_int32 = cast(dtype = gather_15_cast_uint16_to_int32_dtype_0, x = gather_15_cast_uint16)[name = string("cast_125")]; - tensor concat_13 = concat(axis = concat_13_axis_0, interleave = concat_13_interleave_0, values = (gather_13, gather_14, var_60, gather_15_cast_uint16_to_int32, gather_16))[name = string("concat_13")]; - tensor real_div_0 = real_div(x = concat_13, y = shape_17)[name = string("real_div_0")]; - tensor hidden_states_9 = tile(reps = real_div_0, x = var_256)[name = string("hidden_states_9")]; + tensor concat_13 = concat(axis = concat_13_axis_0, interleave = concat_13_interleave_0, values = (gather_13, gather_14, var_59, gather_15_cast_uint16_to_int32, gather_16))[name = string("concat_13")]; + tensor real_div_0 = real_div(x = concat_13, y = shape_17_cast_fp16)[name = string("real_div_0")]; + tensor hidden_states_11_cast_fp16 = tile(reps = real_div_0, x = var_259_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor concat_14x = const()[name = string("concat_14x"), val = tensor([1, 32, -1, 64])]; - tensor key_states_3 = reshape(shape = concat_14x, x = hidden_states_9)[name = string("key_states_3")]; - tensor var_266_shape = shape(x = var_247)[name = string("op_266_shape")]; + tensor key_states_3_cast_fp16 = reshape(shape = concat_14x, x = hidden_states_11_cast_fp16)[name = string("key_states_3_cast_fp16")]; + tensor var_269_shape_cast_fp16 = shape(x = var_250_cast_fp16)[name = string("op_269_shape_cast_fp16")]; int32 gather_17 = const()[name = string("gather_17"), val = int32(1)]; int32 gather_18 = const()[name = string("gather_18"), val = int32(8)]; int32 gather_19_axis_0 = const()[name = string("gather_19_axis_0"), val = int32(0)]; int32 gather_19_batch_dims_0 = const()[name = string("gather_19_batch_dims_0"), val = int32(0)]; bool gather_19_validate_indices_0 = const()[name = string("gather_19_validate_indices_0"), val = bool(false)]; - string var_266_shape_to_uint16_dtype_0 = const()[name = string("op_266_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_269_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_269_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_19_to_uint16 = const()[name = string("select_19_to_uint16"), val = uint16(2)]; - tensor var_266_shape_to_uint16 = cast(dtype = var_266_shape_to_uint16_dtype_0, x = var_266_shape)[name = string("cast_124")]; - uint16 gather_19_cast_uint16 = gather(axis = gather_19_axis_0, batch_dims = gather_19_batch_dims_0, indices = select_19_to_uint16, validate_indices = gather_19_validate_indices_0, x = var_266_shape_to_uint16)[name = string("gather_19_cast_uint16")]; + tensor var_269_shape_cast_fp16_to_uint16 = cast(dtype = var_269_shape_cast_fp16_to_uint16_dtype_0, x = var_269_shape_cast_fp16)[name = string("cast_124")]; + uint16 gather_19_cast_uint16 = gather(axis = gather_19_axis_0, batch_dims = gather_19_batch_dims_0, indices = select_19_to_uint16, validate_indices = gather_19_validate_indices_0, x = var_269_shape_cast_fp16_to_uint16)[name = string("gather_19_cast_uint16")]; string gather_19_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_19_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_20 = const()[name = string("gather_20"), val = int32(64)]; - tensor var_273_axes_0 = const()[name = string("op_273_axes_0"), val = tensor([2])]; - tensor var_273 = expand_dims(axes = var_273_axes_0, x = var_247)[name = string("op_273")]; - tensor shape_22 = shape(x = var_273)[name = string("shape_22")]; + tensor var_276_axes_0 = const()[name = string("op_276_axes_0"), val = tensor([2])]; + tensor var_276_cast_fp16 = expand_dims(axes = var_276_axes_0, x = var_250_cast_fp16)[name = string("op_276_cast_fp16")]; + tensor shape_22_cast_fp16 = shape(x = var_276_cast_fp16)[name = string("shape_22_cast_fp16")]; int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; int32 gather_19_cast_uint16_to_int32 = cast(dtype = gather_19_cast_uint16_to_int32_dtype_0, x = gather_19_cast_uint16)[name = string("cast_123")]; - tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_17, gather_18, var_60, gather_19_cast_uint16_to_int32, gather_20))[name = string("concat_15")]; - tensor real_div_1 = real_div(x = concat_15, y = shape_22)[name = string("real_div_1")]; - tensor hidden_states_13 = tile(reps = real_div_1, x = var_273)[name = string("hidden_states_13")]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_17, gather_18, var_59, gather_19_cast_uint16_to_int32, gather_20))[name = string("concat_15")]; + tensor real_div_1 = real_div(x = concat_15, y = shape_22_cast_fp16)[name = string("real_div_1")]; + tensor hidden_states_15_cast_fp16 = tile(reps = real_div_1, x = var_276_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; tensor concat_16x = const()[name = string("concat_16x"), val = tensor([1, 32, -1, 64])]; - tensor value_states_3 = reshape(shape = concat_16x, x = hidden_states_13)[name = string("value_states_3")]; - tensor var_283_shape = shape(x = key_states_3)[name = string("op_283_shape")]; + tensor value_states_3_cast_fp16 = reshape(shape = concat_16x, x = hidden_states_15_cast_fp16)[name = string("value_states_3_cast_fp16")]; + tensor var_286_shape_cast_fp16 = shape(x = key_states_3_cast_fp16)[name = string("op_286_shape_cast_fp16")]; int32 gather_21_axis_0 = const()[name = string("gather_21_axis_0"), val = int32(0)]; int32 gather_21_batch_dims_0 = const()[name = string("gather_21_batch_dims_0"), val = int32(0)]; bool gather_21_validate_indices_0 = const()[name = string("gather_21_validate_indices_0"), val = bool(false)]; - string var_283_shape_to_uint16_dtype_0 = const()[name = string("op_283_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_286_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_286_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_21_to_uint16 = const()[name = string("select_21_to_uint16"), val = uint16(2)]; - tensor var_283_shape_to_uint16 = cast(dtype = var_283_shape_to_uint16_dtype_0, x = var_283_shape)[name = string("cast_122")]; - uint16 gather_21_cast_uint16 = gather(axis = gather_21_axis_0, batch_dims = gather_21_batch_dims_0, indices = select_21_to_uint16, validate_indices = gather_21_validate_indices_0, x = var_283_shape_to_uint16)[name = string("gather_21_cast_uint16")]; + tensor var_286_shape_cast_fp16_to_uint16 = cast(dtype = var_286_shape_cast_fp16_to_uint16_dtype_0, x = var_286_shape_cast_fp16)[name = string("cast_122")]; + uint16 gather_21_cast_uint16 = gather(axis = gather_21_axis_0, batch_dims = gather_21_batch_dims_0, indices = select_21_to_uint16, validate_indices = gather_21_validate_indices_0, x = var_286_shape_cast_fp16_to_uint16)[name = string("gather_21_cast_uint16")]; string gather_21_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_21_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_17_values0_0 = const()[name = string("concat_17_values0_0"), val = int32(1)]; int32 concat_17_values1_0 = const()[name = string("concat_17_values1_0"), val = int32(1)]; @@ -408,99 +267,108 @@ program(1.3) tensor causal_mask_3_begin_0 = const()[name = string("causal_mask_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_3_end_mask_0 = const()[name = string("causal_mask_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_3_cast_fp16 = slice_by_index(begin = causal_mask_3_begin_0, end = concat_17, end_mask = causal_mask_3_end_mask_0, x = causal_mask)[name = string("causal_mask_3_cast_fp16")]; - tensor attn_output_1_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_3_cast_fp16, key = key_states_3, query = query_states_3, value = value_states_3)[name = string("attn_output_1_cast_fp16")]; - tensor var_289_perm_0 = const()[name = string("op_289_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor attn_output_1_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_3_cast_fp16, key = key_states_3_cast_fp16, query = query_states_3_cast_fp16, value = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_292_perm_0 = const()[name = string("op_292_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; int32 gather_5_cast_uint16_to_int32 = cast(dtype = gather_5_cast_uint16_to_int32_dtype_0, x = gather_5_cast_uint16)[name = string("cast_120")]; tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (gather_4, gather_5_cast_uint16_to_int32, var_48))[name = string("concat_18")]; - tensor var_289 = transpose(perm = var_289_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_60")]; - tensor input_1 = reshape(shape = concat_18, x = var_289)[name = string("input_1")]; - tensor linear_3 = linear(bias = linear_0_bias_0, weight = model_model_layers_0_self_attn_o_proj_weight_quantized, x = input_1)[name = string("linear_3")]; - tensor hidden_states_17 = add(x = inputs_embeds, y = linear_3)[name = string("hidden_states_17")]; - fp16 var_55_promoted_1_to_fp16 = const()[name = string("op_55_promoted_1_to_fp16"), val = fp16(0x1p+1)]; - tensor var_298_cast_fp16 = pow(x = hidden_states_17, y = var_55_promoted_1_to_fp16)[name = string("op_298_cast_fp16")]; + tensor var_292_cast_fp16 = transpose(perm = var_292_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_60")]; + tensor input_1_cast_fp16 = reshape(shape = concat_18, x = var_292_cast_fp16)[name = string("input_1_cast_fp16")]; + tensor model_model_layers_0_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151299968))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153397184))))[name = string("model_model_layers_0_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_to_fp16_quantized, x = input_1_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor hidden_states_19_cast_fp16 = add(x = inputs_embeds_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_19_cast_fp16")]; + fp16 var_54_promoted_1_to_fp16 = const()[name = string("op_54_promoted_1_to_fp16"), val = fp16(0x1p+1)]; + tensor var_301_cast_fp16 = pow(x = hidden_states_19_cast_fp16, y = var_54_promoted_1_to_fp16)[name = string("op_301_cast_fp16")]; tensor variance_3_axes_0 = const()[name = string("variance_3_axes_0"), val = tensor([-1])]; bool variance_3_keep_dims_0 = const()[name = string("variance_3_keep_dims_0"), val = bool(true)]; - tensor variance_3_cast_fp16 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = var_298_cast_fp16)[name = string("variance_3_cast_fp16")]; - fp16 var_301_to_fp16 = const()[name = string("op_301_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_302_cast_fp16 = add(x = variance_3_cast_fp16, y = var_301_to_fp16)[name = string("op_302_cast_fp16")]; - fp32 var_303_epsilon_0 = const()[name = string("op_303_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_303_cast_fp16 = rsqrt(epsilon = var_303_epsilon_0, x = var_302_cast_fp16)[name = string("op_303_cast_fp16")]; - tensor hidden_states_21_cast_fp16 = mul(x = hidden_states_17, y = var_303_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; - tensor input_3 = mul(x = model_model_layers_0_post_attention_layernorm_weight, y = hidden_states_21_cast_fp16)[name = string("input_3")]; - tensor linear_4_bias_0 = const()[name = string("linear_4_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695264768)))]; - tensor linear_4 = linear(bias = linear_4_bias_0, weight = model_model_layers_0_mlp_gate_proj_weight_quantized, x = input_3)[name = string("linear_4")]; - tensor var_312 = silu(x = linear_4)[name = string("op_312")]; - tensor linear_5 = linear(bias = linear_4_bias_0, weight = model_model_layers_0_mlp_up_proj_weight_quantized, x = input_3)[name = string("linear_5")]; - tensor input_7 = mul(x = var_312, y = linear_5)[name = string("input_7")]; - tensor linear_6 = linear(bias = linear_0_bias_0, weight = model_model_layers_0_mlp_down_proj_weight_quantized, x = input_7)[name = string("linear_6")]; - tensor hidden_states_25 = add(x = hidden_states_17, y = linear_6)[name = string("hidden_states_25")]; - fp16 var_55_promoted_2_to_fp16 = const()[name = string("op_55_promoted_2_to_fp16"), val = fp16(0x1p+1)]; - tensor var_325_cast_fp16 = pow(x = hidden_states_25, y = var_55_promoted_2_to_fp16)[name = string("op_325_cast_fp16")]; + tensor variance_3_cast_fp16 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = var_301_cast_fp16)[name = string("variance_3_cast_fp16")]; + fp16 var_304_to_fp16 = const()[name = string("op_304_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_305_cast_fp16 = add(x = variance_3_cast_fp16, y = var_304_to_fp16)[name = string("op_305_cast_fp16")]; + fp32 var_306_epsilon_0 = const()[name = string("op_306_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_306_cast_fp16 = rsqrt(epsilon = var_306_epsilon_0, x = var_305_cast_fp16)[name = string("op_306_cast_fp16")]; + tensor hidden_states_23_cast_fp16 = mul(x = hidden_states_19_cast_fp16, y = var_306_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153659392)))]; + tensor input_3_cast_fp16 = mul(x = model_model_layers_0_post_attention_layernorm_weight_to_fp16, y = hidden_states_23_cast_fp16)[name = string("input_3_cast_fp16")]; + tensor model_model_layers_0_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153663552))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(162052224))))[name = string("model_model_layers_0_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_4_bias_0_to_fp16 = const()[name = string("linear_4_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163100864)))]; + tensor linear_4_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_0_mlp_gate_proj_weight_to_fp16_quantized, x = input_3_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor var_318_cast_fp16 = silu(x = linear_4_cast_fp16)[name = string("op_318_cast_fp16")]; + tensor model_model_layers_0_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163117312))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(171505984))))[name = string("model_model_layers_0_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_5_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_0_mlp_up_proj_weight_to_fp16_quantized, x = input_3_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor input_7_cast_fp16 = mul(x = var_318_cast_fp16, y = linear_5_cast_fp16)[name = string("input_7_cast_fp16")]; + tensor model_model_layers_0_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172554624))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180943296))))[name = string("model_model_layers_0_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_mlp_down_proj_weight_to_fp16_quantized, x = input_7_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor hidden_states_29_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + fp16 var_54_promoted_2_to_fp16 = const()[name = string("op_54_promoted_2_to_fp16"), val = fp16(0x1p+1)]; + tensor var_331_cast_fp16 = pow(x = hidden_states_29_cast_fp16, y = var_54_promoted_2_to_fp16)[name = string("op_331_cast_fp16")]; tensor variance_5_axes_0 = const()[name = string("variance_5_axes_0"), val = tensor([-1])]; bool variance_5_keep_dims_0 = const()[name = string("variance_5_keep_dims_0"), val = bool(true)]; - tensor variance_5_cast_fp16 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = var_325_cast_fp16)[name = string("variance_5_cast_fp16")]; - fp16 var_328_to_fp16 = const()[name = string("op_328_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_329_cast_fp16 = add(x = variance_5_cast_fp16, y = var_328_to_fp16)[name = string("op_329_cast_fp16")]; - fp32 var_330_epsilon_0 = const()[name = string("op_330_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_330_cast_fp16 = rsqrt(epsilon = var_330_epsilon_0, x = var_329_cast_fp16)[name = string("op_330_cast_fp16")]; - tensor hidden_states_29_cast_fp16 = mul(x = hidden_states_25, y = var_330_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; - tensor hidden_states_31 = mul(x = model_model_layers_1_input_layernorm_weight, y = hidden_states_29_cast_fp16)[name = string("hidden_states_31")]; - tensor var_338_shape = shape(x = hidden_states_31)[name = string("op_338_shape")]; + tensor variance_5_cast_fp16 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = var_331_cast_fp16)[name = string("variance_5_cast_fp16")]; + fp16 var_334_to_fp16 = const()[name = string("op_334_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_335_cast_fp16 = add(x = variance_5_cast_fp16, y = var_334_to_fp16)[name = string("op_335_cast_fp16")]; + fp32 var_336_epsilon_0 = const()[name = string("op_336_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_336_cast_fp16 = rsqrt(epsilon = var_336_epsilon_0, x = var_335_cast_fp16)[name = string("op_336_cast_fp16")]; + tensor hidden_states_33_cast_fp16 = mul(x = hidden_states_29_cast_fp16, y = var_336_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; + tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181991936)))]; + tensor hidden_states_37_cast_fp16 = mul(x = model_model_layers_1_input_layernorm_weight_to_fp16, y = hidden_states_33_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + tensor var_347_shape_cast_fp16 = shape(x = hidden_states_37_cast_fp16)[name = string("op_347_shape_cast_fp16")]; int32 gather_22 = const()[name = string("gather_22"), val = int32(1)]; int32 gather_23_axis_0 = const()[name = string("gather_23_axis_0"), val = int32(0)]; int32 gather_23_batch_dims_0 = const()[name = string("gather_23_batch_dims_0"), val = int32(0)]; bool gather_23_validate_indices_0 = const()[name = string("gather_23_validate_indices_0"), val = bool(false)]; - string var_338_shape_to_uint16_dtype_0 = const()[name = string("op_338_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_347_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_347_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_23_to_uint16 = const()[name = string("select_23_to_uint16"), val = uint16(1)]; - tensor var_338_shape_to_uint16 = cast(dtype = var_338_shape_to_uint16_dtype_0, x = var_338_shape)[name = string("cast_119")]; - uint16 gather_23_cast_uint16 = gather(axis = gather_23_axis_0, batch_dims = gather_23_batch_dims_0, indices = select_23_to_uint16, validate_indices = gather_23_validate_indices_0, x = var_338_shape_to_uint16)[name = string("gather_23_cast_uint16")]; + tensor var_347_shape_cast_fp16_to_uint16 = cast(dtype = var_347_shape_cast_fp16_to_uint16_dtype_0, x = var_347_shape_cast_fp16)[name = string("cast_119")]; + uint16 gather_23_cast_uint16 = gather(axis = gather_23_axis_0, batch_dims = gather_23_batch_dims_0, indices = select_23_to_uint16, validate_indices = gather_23_validate_indices_0, x = var_347_shape_cast_fp16_to_uint16)[name = string("gather_23_cast_uint16")]; string gather_23_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_23_cast_uint16_to_int32_dtype_0"), val = string("int32")]; - tensor linear_7 = linear(bias = linear_0_bias_0, weight = model_model_layers_1_self_attn_q_proj_weight_quantized, x = hidden_states_31)[name = string("linear_7")]; - tensor linear_8 = linear(bias = linear_1_bias_0, weight = model_model_layers_1_self_attn_k_proj_weight_quantized, x = hidden_states_31)[name = string("linear_8")]; - tensor linear_9 = linear(bias = linear_1_bias_0, weight = model_model_layers_1_self_attn_v_proj_weight_quantized, x = hidden_states_31)[name = string("linear_9")]; + tensor model_model_layers_1_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(181996096))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184093312))))[name = string("model_model_layers_1_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_37_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor model_model_layers_1_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184355520))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184879872))))[name = string("model_model_layers_1_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_8_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_1_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_37_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor model_model_layers_1_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(184945472))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185469824))))[name = string("model_model_layers_1_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_1_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_37_cast_fp16)[name = string("linear_9_cast_fp16")]; tensor concat_19x = const()[name = string("concat_19x"), val = tensor([1, -1, 32, 64])]; - tensor var_347 = reshape(shape = concat_19x, x = linear_7)[name = string("op_347")]; + tensor var_356_cast_fp16 = reshape(shape = concat_19x, x = linear_7_cast_fp16)[name = string("op_356_cast_fp16")]; tensor q_3_perm_0 = const()[name = string("q_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_20x = const()[name = string("concat_20x"), val = tensor([1, -1, 8, 64])]; - tensor var_350 = reshape(shape = concat_20x, x = linear_8)[name = string("op_350")]; + tensor var_359_cast_fp16 = reshape(shape = concat_20x, x = linear_8_cast_fp16)[name = string("op_359_cast_fp16")]; tensor k_3_perm_0 = const()[name = string("k_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_21x = const()[name = string("concat_21x"), val = tensor([1, -1, 8, 64])]; - tensor var_353 = reshape(shape = concat_21x, x = linear_9)[name = string("op_353")]; + tensor var_362_cast_fp16 = reshape(shape = concat_21x, x = linear_9_cast_fp16)[name = string("op_362_cast_fp16")]; tensor v_state_3_perm_0 = const()[name = string("v_state_3_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor q_3 = transpose(perm = q_3_perm_0, x = var_347)[name = string("transpose_59")]; - tensor var_357 = mul(x = q_3, y = cos_7)[name = string("op_357")]; + tensor q_3_cast_fp16 = transpose(perm = q_3_perm_0, x = var_356_cast_fp16)[name = string("transpose_59")]; + tensor var_366_cast_fp16 = mul(x = q_3_cast_fp16, y = cos_7_cast_fp16)[name = string("op_366_cast_fp16")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 32, 0, 32])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_5 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3)[name = string("x1_5")]; + tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3_cast_fp16)[name = string("x1_5_cast_fp16")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 32, 0, 64])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_5 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3)[name = string("x2_5")]; - fp16 const_5_promoted = const()[name = string("const_5_promoted"), val = fp16(-0x1p+0)]; - tensor var_368 = mul(x = x2_5, y = const_5_promoted)[name = string("op_368")]; - bool var_370_interleave_0 = const()[name = string("op_370_interleave_0"), val = bool(false)]; - tensor var_370 = concat(axis = var_48, interleave = var_370_interleave_0, values = (var_368, x1_5))[name = string("op_370")]; - tensor var_371 = mul(x = var_370, y = sin_7)[name = string("op_371")]; - tensor query_states_7 = add(x = var_357, y = var_371)[name = string("query_states_7")]; - tensor k_3 = transpose(perm = k_3_perm_0, x = var_350)[name = string("transpose_58")]; - tensor var_373 = mul(x = k_3, y = cos_7)[name = string("op_373")]; + tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3_cast_fp16)[name = string("x2_5_cast_fp16")]; + fp16 const_5_promoted_to_fp16 = const()[name = string("const_5_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_377_cast_fp16 = mul(x = x2_5_cast_fp16, y = const_5_promoted_to_fp16)[name = string("op_377_cast_fp16")]; + bool var_379_interleave_0 = const()[name = string("op_379_interleave_0"), val = bool(false)]; + tensor var_379_cast_fp16 = concat(axis = var_48, interleave = var_379_interleave_0, values = (var_377_cast_fp16, x1_5_cast_fp16))[name = string("op_379_cast_fp16")]; + tensor var_380_cast_fp16 = mul(x = var_379_cast_fp16, y = sin_7_cast_fp16)[name = string("op_380_cast_fp16")]; + tensor query_states_7_cast_fp16 = add(x = var_366_cast_fp16, y = var_380_cast_fp16)[name = string("query_states_7_cast_fp16")]; + tensor k_3_cast_fp16 = transpose(perm = k_3_perm_0, x = var_359_cast_fp16)[name = string("transpose_58")]; + tensor var_382_cast_fp16 = mul(x = k_3_cast_fp16, y = cos_7_cast_fp16)[name = string("op_382_cast_fp16")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 0, 32])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_7 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3)[name = string("x1_7")]; + tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3_cast_fp16)[name = string("x1_7_cast_fp16")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 0, 64])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_7 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3)[name = string("x2_7")]; - fp16 const_6_promoted = const()[name = string("const_6_promoted"), val = fp16(-0x1p+0)]; - tensor var_384 = mul(x = x2_7, y = const_6_promoted)[name = string("op_384")]; - bool var_386_interleave_0 = const()[name = string("op_386_interleave_0"), val = bool(false)]; - tensor var_386 = concat(axis = var_48, interleave = var_386_interleave_0, values = (var_384, x1_7))[name = string("op_386")]; - tensor var_387 = mul(x = var_386, y = sin_7)[name = string("op_387")]; - tensor k_state_3 = add(x = var_373, y = var_387)[name = string("k_state_3")]; + tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3_cast_fp16)[name = string("x2_7_cast_fp16")]; + fp16 const_6_promoted_to_fp16 = const()[name = string("const_6_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_393_cast_fp16 = mul(x = x2_7_cast_fp16, y = const_6_promoted_to_fp16)[name = string("op_393_cast_fp16")]; + bool var_395_interleave_0 = const()[name = string("op_395_interleave_0"), val = bool(false)]; + tensor var_395_cast_fp16 = concat(axis = var_48, interleave = var_395_interleave_0, values = (var_393_cast_fp16, x1_7_cast_fp16))[name = string("op_395_cast_fp16")]; + tensor var_396_cast_fp16 = mul(x = var_395_cast_fp16, y = sin_7_cast_fp16)[name = string("op_396_cast_fp16")]; + tensor k_state_3_cast_fp16 = add(x = var_382_cast_fp16, y = var_396_cast_fp16)[name = string("k_state_3_cast_fp16")]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; @@ -512,87 +380,87 @@ program(1.3) tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor key_cache_internal_tensor_assign_2 = slice_update(begin = concat_24, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = k_state_3, x = coreml_update_state_32)[name = string("key_cache_internal_tensor_assign_2")]; - write_state(data = key_cache_internal_tensor_assign_2, input = key_cache)[name = string("coreml_update_state_34_write_state")]; + tensor key_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_24, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = k_state_3_cast_fp16, x = coreml_update_state_32)[name = string("key_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_2_cast_fp16, input = key_cache)[name = string("coreml_update_state_34_write_state")]; tensor coreml_update_state_34 = read_state(input = key_cache)[name = string("coreml_update_state_34")]; tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor v_state_3 = transpose(perm = v_state_3_perm_0, x = var_353)[name = string("transpose_57")]; - tensor value_cache_internal_tensor_assign_2 = slice_update(begin = concat_24, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = v_state_3, x = coreml_update_state_33)[name = string("value_cache_internal_tensor_assign_2")]; - write_state(data = value_cache_internal_tensor_assign_2, input = value_cache)[name = string("coreml_update_state_35_write_state")]; + tensor v_state_3_cast_fp16 = transpose(perm = v_state_3_perm_0, x = var_362_cast_fp16)[name = string("transpose_57")]; + tensor value_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_24, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = v_state_3_cast_fp16, x = coreml_update_state_33)[name = string("value_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_2_cast_fp16, input = value_cache)[name = string("coreml_update_state_35_write_state")]; tensor coreml_update_state_35 = read_state(input = value_cache)[name = string("coreml_update_state_35")]; - tensor var_410_begin_0 = const()[name = string("op_410_begin_0"), val = tensor([1, 0, 0, 0, 0])]; - tensor var_410_end_0 = const()[name = string("op_410_end_0"), val = tensor([2, 1, 8, 2048, 64])]; - tensor var_410_end_mask_0 = const()[name = string("op_410_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_410_squeeze_mask_0 = const()[name = string("op_410_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_410 = slice_by_index(begin = var_410_begin_0, end = var_410_end_0, end_mask = var_410_end_mask_0, squeeze_mask = var_410_squeeze_mask_0, x = coreml_update_state_34)[name = string("op_410")]; - tensor var_413_begin_0 = const()[name = string("op_413_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_413_end_mask_0 = const()[name = string("op_413_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_413 = slice_by_index(begin = var_413_begin_0, end = concat_11, end_mask = var_413_end_mask_0, x = var_410)[name = string("op_413")]; - tensor var_415_begin_0 = const()[name = string("op_415_begin_0"), val = tensor([1, 0, 0, 0, 0])]; - tensor var_415_end_0 = const()[name = string("op_415_end_0"), val = tensor([2, 1, 8, 2048, 64])]; - tensor var_415_end_mask_0 = const()[name = string("op_415_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_415_squeeze_mask_0 = const()[name = string("op_415_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_415 = slice_by_index(begin = var_415_begin_0, end = var_415_end_0, end_mask = var_415_end_mask_0, squeeze_mask = var_415_squeeze_mask_0, x = coreml_update_state_35)[name = string("op_415")]; - tensor var_418_begin_0 = const()[name = string("op_418_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_418_end_mask_0 = const()[name = string("op_418_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_418 = slice_by_index(begin = var_418_begin_0, end = concat_11, end_mask = var_418_end_mask_0, x = var_415)[name = string("op_418")]; - tensor var_420_shape = shape(x = var_413)[name = string("op_420_shape")]; + tensor var_419_begin_0 = const()[name = string("op_419_begin_0"), val = tensor([1, 0, 0, 0, 0])]; + tensor var_419_end_0 = const()[name = string("op_419_end_0"), val = tensor([2, 1, 8, 2048, 64])]; + tensor var_419_end_mask_0 = const()[name = string("op_419_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_419_squeeze_mask_0 = const()[name = string("op_419_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_419_cast_fp16 = slice_by_index(begin = var_419_begin_0, end = var_419_end_0, end_mask = var_419_end_mask_0, squeeze_mask = var_419_squeeze_mask_0, x = coreml_update_state_34)[name = string("op_419_cast_fp16")]; + tensor var_422_begin_0 = const()[name = string("op_422_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_422_end_mask_0 = const()[name = string("op_422_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_422_cast_fp16 = slice_by_index(begin = var_422_begin_0, end = concat_11, end_mask = var_422_end_mask_0, x = var_419_cast_fp16)[name = string("op_422_cast_fp16")]; + tensor var_424_begin_0 = const()[name = string("op_424_begin_0"), val = tensor([1, 0, 0, 0, 0])]; + tensor var_424_end_0 = const()[name = string("op_424_end_0"), val = tensor([2, 1, 8, 2048, 64])]; + tensor var_424_end_mask_0 = const()[name = string("op_424_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_424_squeeze_mask_0 = const()[name = string("op_424_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_424_cast_fp16 = slice_by_index(begin = var_424_begin_0, end = var_424_end_0, end_mask = var_424_end_mask_0, squeeze_mask = var_424_squeeze_mask_0, x = coreml_update_state_35)[name = string("op_424_cast_fp16")]; + tensor var_427_begin_0 = const()[name = string("op_427_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_427_end_mask_0 = const()[name = string("op_427_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_427_cast_fp16 = slice_by_index(begin = var_427_begin_0, end = concat_11, end_mask = var_427_end_mask_0, x = var_424_cast_fp16)[name = string("op_427_cast_fp16")]; + tensor var_429_shape_cast_fp16 = shape(x = var_422_cast_fp16)[name = string("op_429_shape_cast_fp16")]; int32 gather_31 = const()[name = string("gather_31"), val = int32(1)]; int32 gather_32 = const()[name = string("gather_32"), val = int32(8)]; int32 gather_33_axis_0 = const()[name = string("gather_33_axis_0"), val = int32(0)]; int32 gather_33_batch_dims_0 = const()[name = string("gather_33_batch_dims_0"), val = int32(0)]; bool gather_33_validate_indices_0 = const()[name = string("gather_33_validate_indices_0"), val = bool(false)]; - string var_420_shape_to_uint16_dtype_0 = const()[name = string("op_420_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_429_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_429_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_33_to_uint16 = const()[name = string("select_33_to_uint16"), val = uint16(2)]; - tensor var_420_shape_to_uint16 = cast(dtype = var_420_shape_to_uint16_dtype_0, x = var_420_shape)[name = string("cast_118")]; - uint16 gather_33_cast_uint16 = gather(axis = gather_33_axis_0, batch_dims = gather_33_batch_dims_0, indices = select_33_to_uint16, validate_indices = gather_33_validate_indices_0, x = var_420_shape_to_uint16)[name = string("gather_33_cast_uint16")]; + tensor var_429_shape_cast_fp16_to_uint16 = cast(dtype = var_429_shape_cast_fp16_to_uint16_dtype_0, x = var_429_shape_cast_fp16)[name = string("cast_118")]; + uint16 gather_33_cast_uint16 = gather(axis = gather_33_axis_0, batch_dims = gather_33_batch_dims_0, indices = select_33_to_uint16, validate_indices = gather_33_validate_indices_0, x = var_429_shape_cast_fp16_to_uint16)[name = string("gather_33_cast_uint16")]; string gather_33_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_33_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_34 = const()[name = string("gather_34"), val = int32(64)]; - tensor var_427_axes_0 = const()[name = string("op_427_axes_0"), val = tensor([2])]; - tensor var_427 = expand_dims(axes = var_427_axes_0, x = var_413)[name = string("op_427")]; - tensor shape_37 = shape(x = var_427)[name = string("shape_37")]; + tensor var_436_axes_0 = const()[name = string("op_436_axes_0"), val = tensor([2])]; + tensor var_436_cast_fp16 = expand_dims(axes = var_436_axes_0, x = var_422_cast_fp16)[name = string("op_436_cast_fp16")]; + tensor shape_37_cast_fp16 = shape(x = var_436_cast_fp16)[name = string("shape_37_cast_fp16")]; int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)]; bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)]; int32 gather_33_cast_uint16_to_int32 = cast(dtype = gather_33_cast_uint16_to_int32_dtype_0, x = gather_33_cast_uint16)[name = string("cast_117")]; - tensor concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (gather_31, gather_32, var_60, gather_33_cast_uint16_to_int32, gather_34))[name = string("concat_32")]; - tensor real_div_2 = real_div(x = concat_32, y = shape_37)[name = string("real_div_2")]; - tensor hidden_states_35 = tile(reps = real_div_2, x = var_427)[name = string("hidden_states_35")]; + tensor concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (gather_31, gather_32, var_59, gather_33_cast_uint16_to_int32, gather_34))[name = string("concat_32")]; + tensor real_div_2 = real_div(x = concat_32, y = shape_37_cast_fp16)[name = string("real_div_2")]; + tensor hidden_states_41_cast_fp16 = tile(reps = real_div_2, x = var_436_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor concat_33x = const()[name = string("concat_33x"), val = tensor([1, 32, -1, 64])]; - tensor key_states_7 = reshape(shape = concat_33x, x = hidden_states_35)[name = string("key_states_7")]; - tensor var_437_shape = shape(x = var_418)[name = string("op_437_shape")]; + tensor key_states_7_cast_fp16 = reshape(shape = concat_33x, x = hidden_states_41_cast_fp16)[name = string("key_states_7_cast_fp16")]; + tensor var_446_shape_cast_fp16 = shape(x = var_427_cast_fp16)[name = string("op_446_shape_cast_fp16")]; int32 gather_35 = const()[name = string("gather_35"), val = int32(1)]; int32 gather_36 = const()[name = string("gather_36"), val = int32(8)]; int32 gather_37_axis_0 = const()[name = string("gather_37_axis_0"), val = int32(0)]; int32 gather_37_batch_dims_0 = const()[name = string("gather_37_batch_dims_0"), val = int32(0)]; bool gather_37_validate_indices_0 = const()[name = string("gather_37_validate_indices_0"), val = bool(false)]; - string var_437_shape_to_uint16_dtype_0 = const()[name = string("op_437_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_446_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_446_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_37_to_uint16 = const()[name = string("select_37_to_uint16"), val = uint16(2)]; - tensor var_437_shape_to_uint16 = cast(dtype = var_437_shape_to_uint16_dtype_0, x = var_437_shape)[name = string("cast_116")]; - uint16 gather_37_cast_uint16 = gather(axis = gather_37_axis_0, batch_dims = gather_37_batch_dims_0, indices = select_37_to_uint16, validate_indices = gather_37_validate_indices_0, x = var_437_shape_to_uint16)[name = string("gather_37_cast_uint16")]; + tensor var_446_shape_cast_fp16_to_uint16 = cast(dtype = var_446_shape_cast_fp16_to_uint16_dtype_0, x = var_446_shape_cast_fp16)[name = string("cast_116")]; + uint16 gather_37_cast_uint16 = gather(axis = gather_37_axis_0, batch_dims = gather_37_batch_dims_0, indices = select_37_to_uint16, validate_indices = gather_37_validate_indices_0, x = var_446_shape_cast_fp16_to_uint16)[name = string("gather_37_cast_uint16")]; string gather_37_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_37_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_38 = const()[name = string("gather_38"), val = int32(64)]; - tensor var_444_axes_0 = const()[name = string("op_444_axes_0"), val = tensor([2])]; - tensor var_444 = expand_dims(axes = var_444_axes_0, x = var_418)[name = string("op_444")]; - tensor shape_42 = shape(x = var_444)[name = string("shape_42")]; + tensor var_453_axes_0 = const()[name = string("op_453_axes_0"), val = tensor([2])]; + tensor var_453_cast_fp16 = expand_dims(axes = var_453_axes_0, x = var_427_cast_fp16)[name = string("op_453_cast_fp16")]; + tensor shape_42_cast_fp16 = shape(x = var_453_cast_fp16)[name = string("shape_42_cast_fp16")]; int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; int32 gather_37_cast_uint16_to_int32 = cast(dtype = gather_37_cast_uint16_to_int32_dtype_0, x = gather_37_cast_uint16)[name = string("cast_115")]; - tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (gather_35, gather_36, var_60, gather_37_cast_uint16_to_int32, gather_38))[name = string("concat_34")]; - tensor real_div_3 = real_div(x = concat_34, y = shape_42)[name = string("real_div_3")]; - tensor hidden_states_39 = tile(reps = real_div_3, x = var_444)[name = string("hidden_states_39")]; + tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (gather_35, gather_36, var_59, gather_37_cast_uint16_to_int32, gather_38))[name = string("concat_34")]; + tensor real_div_3 = real_div(x = concat_34, y = shape_42_cast_fp16)[name = string("real_div_3")]; + tensor hidden_states_45_cast_fp16 = tile(reps = real_div_3, x = var_453_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; tensor concat_35x = const()[name = string("concat_35x"), val = tensor([1, 32, -1, 64])]; - tensor value_states_7 = reshape(shape = concat_35x, x = hidden_states_39)[name = string("value_states_7")]; - tensor var_454_shape = shape(x = key_states_7)[name = string("op_454_shape")]; + tensor value_states_7_cast_fp16 = reshape(shape = concat_35x, x = hidden_states_45_cast_fp16)[name = string("value_states_7_cast_fp16")]; + tensor var_463_shape_cast_fp16 = shape(x = key_states_7_cast_fp16)[name = string("op_463_shape_cast_fp16")]; int32 gather_39_axis_0 = const()[name = string("gather_39_axis_0"), val = int32(0)]; int32 gather_39_batch_dims_0 = const()[name = string("gather_39_batch_dims_0"), val = int32(0)]; bool gather_39_validate_indices_0 = const()[name = string("gather_39_validate_indices_0"), val = bool(false)]; - string var_454_shape_to_uint16_dtype_0 = const()[name = string("op_454_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_463_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_463_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_39_to_uint16 = const()[name = string("select_39_to_uint16"), val = uint16(2)]; - tensor var_454_shape_to_uint16 = cast(dtype = var_454_shape_to_uint16_dtype_0, x = var_454_shape)[name = string("cast_114")]; - uint16 gather_39_cast_uint16 = gather(axis = gather_39_axis_0, batch_dims = gather_39_batch_dims_0, indices = select_39_to_uint16, validate_indices = gather_39_validate_indices_0, x = var_454_shape_to_uint16)[name = string("gather_39_cast_uint16")]; + tensor var_463_shape_cast_fp16_to_uint16 = cast(dtype = var_463_shape_cast_fp16_to_uint16_dtype_0, x = var_463_shape_cast_fp16)[name = string("cast_114")]; + uint16 gather_39_cast_uint16 = gather(axis = gather_39_axis_0, batch_dims = gather_39_batch_dims_0, indices = select_39_to_uint16, validate_indices = gather_39_validate_indices_0, x = var_463_shape_cast_fp16_to_uint16)[name = string("gather_39_cast_uint16")]; string gather_39_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_39_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_36_values0_0 = const()[name = string("concat_36_values0_0"), val = int32(1)]; int32 concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = int32(1)]; @@ -604,98 +472,107 @@ program(1.3) tensor causal_mask_5_begin_0 = const()[name = string("causal_mask_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_5_end_mask_0 = const()[name = string("causal_mask_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_5_cast_fp16 = slice_by_index(begin = causal_mask_5_begin_0, end = concat_36, end_mask = causal_mask_5_end_mask_0, x = causal_mask)[name = string("causal_mask_5_cast_fp16")]; - tensor attn_output_5_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_5_cast_fp16, key = key_states_7, query = query_states_7, value = value_states_7)[name = string("attn_output_5_cast_fp16")]; - tensor var_460_perm_0 = const()[name = string("op_460_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor attn_output_5_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_5_cast_fp16, key = key_states_7_cast_fp16, query = query_states_7_cast_fp16, value = value_states_7_cast_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor var_469_perm_0 = const()[name = string("op_469_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)]; bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)]; int32 gather_23_cast_uint16_to_int32 = cast(dtype = gather_23_cast_uint16_to_int32_dtype_0, x = gather_23_cast_uint16)[name = string("cast_112")]; tensor concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_22, gather_23_cast_uint16_to_int32, var_48))[name = string("concat_37")]; - tensor var_460 = transpose(perm = var_460_perm_0, x = attn_output_5_cast_fp16)[name = string("transpose_56")]; - tensor input_9 = reshape(shape = concat_37, x = var_460)[name = string("input_9")]; - tensor linear_10 = linear(bias = linear_0_bias_0, weight = model_model_layers_1_self_attn_o_proj_weight_quantized, x = input_9)[name = string("linear_10")]; - tensor hidden_states_43 = add(x = hidden_states_25, y = linear_10)[name = string("hidden_states_43")]; - fp16 var_55_promoted_3_to_fp16 = const()[name = string("op_55_promoted_3_to_fp16"), val = fp16(0x1p+1)]; - tensor var_469_cast_fp16 = pow(x = hidden_states_43, y = var_55_promoted_3_to_fp16)[name = string("op_469_cast_fp16")]; + tensor var_469_cast_fp16 = transpose(perm = var_469_perm_0, x = attn_output_5_cast_fp16)[name = string("transpose_56")]; + tensor input_9_cast_fp16 = reshape(shape = concat_37, x = var_469_cast_fp16)[name = string("input_9_cast_fp16")]; + tensor model_model_layers_1_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185535424))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187632640))))[name = string("model_model_layers_1_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_to_fp16_quantized, x = input_9_cast_fp16)[name = string("linear_10_cast_fp16")]; + tensor hidden_states_49_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = linear_10_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; + fp16 var_54_promoted_3_to_fp16 = const()[name = string("op_54_promoted_3_to_fp16"), val = fp16(0x1p+1)]; + tensor var_478_cast_fp16 = pow(x = hidden_states_49_cast_fp16, y = var_54_promoted_3_to_fp16)[name = string("op_478_cast_fp16")]; tensor variance_7_axes_0 = const()[name = string("variance_7_axes_0"), val = tensor([-1])]; bool variance_7_keep_dims_0 = const()[name = string("variance_7_keep_dims_0"), val = bool(true)]; - tensor variance_7_cast_fp16 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_469_cast_fp16)[name = string("variance_7_cast_fp16")]; - fp16 var_472_to_fp16 = const()[name = string("op_472_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_473_cast_fp16 = add(x = variance_7_cast_fp16, y = var_472_to_fp16)[name = string("op_473_cast_fp16")]; - fp32 var_474_epsilon_0 = const()[name = string("op_474_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_474_cast_fp16 = rsqrt(epsilon = var_474_epsilon_0, x = var_473_cast_fp16)[name = string("op_474_cast_fp16")]; - tensor hidden_states_47_cast_fp16 = mul(x = hidden_states_43, y = var_474_cast_fp16)[name = string("hidden_states_47_cast_fp16")]; - tensor input_11 = mul(x = model_model_layers_1_post_attention_layernorm_weight, y = hidden_states_47_cast_fp16)[name = string("input_11")]; - tensor linear_11 = linear(bias = linear_4_bias_0, weight = model_model_layers_1_mlp_gate_proj_weight_quantized, x = input_11)[name = string("linear_11")]; - tensor var_483 = silu(x = linear_11)[name = string("op_483")]; - tensor linear_12 = linear(bias = linear_4_bias_0, weight = model_model_layers_1_mlp_up_proj_weight_quantized, x = input_11)[name = string("linear_12")]; - tensor input_15 = mul(x = var_483, y = linear_12)[name = string("input_15")]; - tensor linear_13 = linear(bias = linear_0_bias_0, weight = model_model_layers_1_mlp_down_proj_weight_quantized, x = input_15)[name = string("linear_13")]; - tensor hidden_states_51 = add(x = hidden_states_43, y = linear_13)[name = string("hidden_states_51")]; - fp16 var_55_promoted_4_to_fp16 = const()[name = string("op_55_promoted_4_to_fp16"), val = fp16(0x1p+1)]; - tensor var_496_cast_fp16 = pow(x = hidden_states_51, y = var_55_promoted_4_to_fp16)[name = string("op_496_cast_fp16")]; + tensor variance_7_cast_fp16 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_478_cast_fp16)[name = string("variance_7_cast_fp16")]; + fp16 var_481_to_fp16 = const()[name = string("op_481_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_482_cast_fp16 = add(x = variance_7_cast_fp16, y = var_481_to_fp16)[name = string("op_482_cast_fp16")]; + fp32 var_483_epsilon_0 = const()[name = string("op_483_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_483_cast_fp16 = rsqrt(epsilon = var_483_epsilon_0, x = var_482_cast_fp16)[name = string("op_483_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = mul(x = hidden_states_49_cast_fp16, y = var_483_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; + tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187894848)))]; + tensor input_11_cast_fp16 = mul(x = model_model_layers_1_post_attention_layernorm_weight_to_fp16, y = hidden_states_53_cast_fp16)[name = string("input_11_cast_fp16")]; + tensor model_model_layers_1_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(187899008))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(196287680))))[name = string("model_model_layers_1_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_11_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_1_mlp_gate_proj_weight_to_fp16_quantized, x = input_11_cast_fp16)[name = string("linear_11_cast_fp16")]; + tensor var_495_cast_fp16 = silu(x = linear_11_cast_fp16)[name = string("op_495_cast_fp16")]; + tensor model_model_layers_1_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197336320))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205724992))))[name = string("model_model_layers_1_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_12_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_1_mlp_up_proj_weight_to_fp16_quantized, x = input_11_cast_fp16)[name = string("linear_12_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_495_cast_fp16, y = linear_12_cast_fp16)[name = string("input_15_cast_fp16")]; + tensor model_model_layers_1_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(206773632))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(215162304))))[name = string("model_model_layers_1_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_13_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_mlp_down_proj_weight_to_fp16_quantized, x = input_15_cast_fp16)[name = string("linear_13_cast_fp16")]; + tensor hidden_states_59_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_13_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; + fp16 var_54_promoted_4_to_fp16 = const()[name = string("op_54_promoted_4_to_fp16"), val = fp16(0x1p+1)]; + tensor var_508_cast_fp16 = pow(x = hidden_states_59_cast_fp16, y = var_54_promoted_4_to_fp16)[name = string("op_508_cast_fp16")]; tensor variance_9_axes_0 = const()[name = string("variance_9_axes_0"), val = tensor([-1])]; bool variance_9_keep_dims_0 = const()[name = string("variance_9_keep_dims_0"), val = bool(true)]; - tensor variance_9_cast_fp16 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_496_cast_fp16)[name = string("variance_9_cast_fp16")]; - fp16 var_499_to_fp16 = const()[name = string("op_499_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_500_cast_fp16 = add(x = variance_9_cast_fp16, y = var_499_to_fp16)[name = string("op_500_cast_fp16")]; - fp32 var_501_epsilon_0 = const()[name = string("op_501_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_501_cast_fp16 = rsqrt(epsilon = var_501_epsilon_0, x = var_500_cast_fp16)[name = string("op_501_cast_fp16")]; - tensor hidden_states_55_cast_fp16 = mul(x = hidden_states_51, y = var_501_cast_fp16)[name = string("hidden_states_55_cast_fp16")]; - tensor hidden_states_57 = mul(x = model_model_layers_2_input_layernorm_weight, y = hidden_states_55_cast_fp16)[name = string("hidden_states_57")]; - tensor var_509_shape = shape(x = hidden_states_57)[name = string("op_509_shape")]; + tensor variance_9_cast_fp16 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_508_cast_fp16)[name = string("variance_9_cast_fp16")]; + fp16 var_511_to_fp16 = const()[name = string("op_511_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_512_cast_fp16 = add(x = variance_9_cast_fp16, y = var_511_to_fp16)[name = string("op_512_cast_fp16")]; + fp32 var_513_epsilon_0 = const()[name = string("op_513_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_513_cast_fp16 = rsqrt(epsilon = var_513_epsilon_0, x = var_512_cast_fp16)[name = string("op_513_cast_fp16")]; + tensor hidden_states_63_cast_fp16 = mul(x = hidden_states_59_cast_fp16, y = var_513_cast_fp16)[name = string("hidden_states_63_cast_fp16")]; + tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216210944)))]; + tensor hidden_states_67_cast_fp16 = mul(x = model_model_layers_2_input_layernorm_weight_to_fp16, y = hidden_states_63_cast_fp16)[name = string("hidden_states_67_cast_fp16")]; + tensor var_524_shape_cast_fp16 = shape(x = hidden_states_67_cast_fp16)[name = string("op_524_shape_cast_fp16")]; int32 gather_40 = const()[name = string("gather_40"), val = int32(1)]; int32 gather_41_axis_0 = const()[name = string("gather_41_axis_0"), val = int32(0)]; int32 gather_41_batch_dims_0 = const()[name = string("gather_41_batch_dims_0"), val = int32(0)]; bool gather_41_validate_indices_0 = const()[name = string("gather_41_validate_indices_0"), val = bool(false)]; - string var_509_shape_to_uint16_dtype_0 = const()[name = string("op_509_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_524_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_524_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_41_to_uint16 = const()[name = string("select_41_to_uint16"), val = uint16(1)]; - tensor var_509_shape_to_uint16 = cast(dtype = var_509_shape_to_uint16_dtype_0, x = var_509_shape)[name = string("cast_111")]; - uint16 gather_41_cast_uint16 = gather(axis = gather_41_axis_0, batch_dims = gather_41_batch_dims_0, indices = select_41_to_uint16, validate_indices = gather_41_validate_indices_0, x = var_509_shape_to_uint16)[name = string("gather_41_cast_uint16")]; + tensor var_524_shape_cast_fp16_to_uint16 = cast(dtype = var_524_shape_cast_fp16_to_uint16_dtype_0, x = var_524_shape_cast_fp16)[name = string("cast_111")]; + uint16 gather_41_cast_uint16 = gather(axis = gather_41_axis_0, batch_dims = gather_41_batch_dims_0, indices = select_41_to_uint16, validate_indices = gather_41_validate_indices_0, x = var_524_shape_cast_fp16_to_uint16)[name = string("gather_41_cast_uint16")]; string gather_41_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_41_cast_uint16_to_int32_dtype_0"), val = string("int32")]; - tensor linear_14 = linear(bias = linear_0_bias_0, weight = model_model_layers_2_self_attn_q_proj_weight_quantized, x = hidden_states_57)[name = string("linear_14")]; - tensor linear_15 = linear(bias = linear_1_bias_0, weight = model_model_layers_2_self_attn_k_proj_weight_quantized, x = hidden_states_57)[name = string("linear_15")]; - tensor linear_16 = linear(bias = linear_1_bias_0, weight = model_model_layers_2_self_attn_v_proj_weight_quantized, x = hidden_states_57)[name = string("linear_16")]; + tensor model_model_layers_2_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216215104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218312320))))[name = string("model_model_layers_2_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_67_cast_fp16)[name = string("linear_14_cast_fp16")]; + tensor model_model_layers_2_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(218574528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219098880))))[name = string("model_model_layers_2_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_15_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_2_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_67_cast_fp16)[name = string("linear_15_cast_fp16")]; + tensor model_model_layers_2_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219164480))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219688832))))[name = string("model_model_layers_2_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_16_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_2_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_67_cast_fp16)[name = string("linear_16_cast_fp16")]; tensor concat_38x = const()[name = string("concat_38x"), val = tensor([1, -1, 32, 64])]; - tensor var_518 = reshape(shape = concat_38x, x = linear_14)[name = string("op_518")]; + tensor var_533_cast_fp16 = reshape(shape = concat_38x, x = linear_14_cast_fp16)[name = string("op_533_cast_fp16")]; tensor q_5_perm_0 = const()[name = string("q_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_39x = const()[name = string("concat_39x"), val = tensor([1, -1, 8, 64])]; - tensor var_521 = reshape(shape = concat_39x, x = linear_15)[name = string("op_521")]; + tensor var_536_cast_fp16 = reshape(shape = concat_39x, x = linear_15_cast_fp16)[name = string("op_536_cast_fp16")]; tensor k_5_perm_0 = const()[name = string("k_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_40x = const()[name = string("concat_40x"), val = tensor([1, -1, 8, 64])]; - tensor var_524 = reshape(shape = concat_40x, x = linear_16)[name = string("op_524")]; + tensor var_539_cast_fp16 = reshape(shape = concat_40x, x = linear_16_cast_fp16)[name = string("op_539_cast_fp16")]; tensor v_state_5_perm_0 = const()[name = string("v_state_5_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor q_5 = transpose(perm = q_5_perm_0, x = var_518)[name = string("transpose_55")]; - tensor var_528 = mul(x = q_5, y = cos_7)[name = string("op_528")]; + tensor q_5_cast_fp16 = transpose(perm = q_5_perm_0, x = var_533_cast_fp16)[name = string("transpose_55")]; + tensor var_543_cast_fp16 = mul(x = q_5_cast_fp16, y = cos_7_cast_fp16)[name = string("op_543_cast_fp16")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 32, 0, 32])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_9 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5)[name = string("x1_9")]; + tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5_cast_fp16)[name = string("x1_9_cast_fp16")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 32, 0, 64])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_9 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5)[name = string("x2_9")]; - fp16 const_7_promoted = const()[name = string("const_7_promoted"), val = fp16(-0x1p+0)]; - tensor var_539 = mul(x = x2_9, y = const_7_promoted)[name = string("op_539")]; - bool var_541_interleave_0 = const()[name = string("op_541_interleave_0"), val = bool(false)]; - tensor var_541 = concat(axis = var_48, interleave = var_541_interleave_0, values = (var_539, x1_9))[name = string("op_541")]; - tensor var_542 = mul(x = var_541, y = sin_7)[name = string("op_542")]; - tensor query_states_11 = add(x = var_528, y = var_542)[name = string("query_states_11")]; - tensor k_5 = transpose(perm = k_5_perm_0, x = var_521)[name = string("transpose_54")]; - tensor var_544 = mul(x = k_5, y = cos_7)[name = string("op_544")]; + tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5_cast_fp16)[name = string("x2_9_cast_fp16")]; + fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_554_cast_fp16 = mul(x = x2_9_cast_fp16, y = const_7_promoted_to_fp16)[name = string("op_554_cast_fp16")]; + bool var_556_interleave_0 = const()[name = string("op_556_interleave_0"), val = bool(false)]; + tensor var_556_cast_fp16 = concat(axis = var_48, interleave = var_556_interleave_0, values = (var_554_cast_fp16, x1_9_cast_fp16))[name = string("op_556_cast_fp16")]; + tensor var_557_cast_fp16 = mul(x = var_556_cast_fp16, y = sin_7_cast_fp16)[name = string("op_557_cast_fp16")]; + tensor query_states_11_cast_fp16 = add(x = var_543_cast_fp16, y = var_557_cast_fp16)[name = string("query_states_11_cast_fp16")]; + tensor k_5_cast_fp16 = transpose(perm = k_5_perm_0, x = var_536_cast_fp16)[name = string("transpose_54")]; + tensor var_559_cast_fp16 = mul(x = k_5_cast_fp16, y = cos_7_cast_fp16)[name = string("op_559_cast_fp16")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 0, 32])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_11 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5)[name = string("x1_11")]; + tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5_cast_fp16)[name = string("x1_11_cast_fp16")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 0, 64])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_11 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5)[name = string("x2_11")]; - fp16 const_8_promoted = const()[name = string("const_8_promoted"), val = fp16(-0x1p+0)]; - tensor var_555 = mul(x = x2_11, y = const_8_promoted)[name = string("op_555")]; - bool var_557_interleave_0 = const()[name = string("op_557_interleave_0"), val = bool(false)]; - tensor var_557 = concat(axis = var_48, interleave = var_557_interleave_0, values = (var_555, x1_11))[name = string("op_557")]; - tensor var_558 = mul(x = var_557, y = sin_7)[name = string("op_558")]; - tensor k_state_5 = add(x = var_544, y = var_558)[name = string("k_state_5")]; + tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5_cast_fp16)[name = string("x2_11_cast_fp16")]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_570_cast_fp16 = mul(x = x2_11_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_570_cast_fp16")]; + bool var_572_interleave_0 = const()[name = string("op_572_interleave_0"), val = bool(false)]; + tensor var_572_cast_fp16 = concat(axis = var_48, interleave = var_572_interleave_0, values = (var_570_cast_fp16, x1_11_cast_fp16))[name = string("op_572_cast_fp16")]; + tensor var_573_cast_fp16 = mul(x = var_572_cast_fp16, y = sin_7_cast_fp16)[name = string("op_573_cast_fp16")]; + tensor k_state_5_cast_fp16 = add(x = var_559_cast_fp16, y = var_573_cast_fp16)[name = string("k_state_5_cast_fp16")]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([0])]; tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; @@ -707,87 +584,87 @@ program(1.3) tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor key_cache_internal_tensor_assign_3 = slice_update(begin = concat_43, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = k_state_5, x = coreml_update_state_34)[name = string("key_cache_internal_tensor_assign_3")]; - write_state(data = key_cache_internal_tensor_assign_3, input = key_cache)[name = string("coreml_update_state_36_write_state")]; + tensor key_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_43, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = k_state_5_cast_fp16, x = coreml_update_state_34)[name = string("key_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_3_cast_fp16, input = key_cache)[name = string("coreml_update_state_36_write_state")]; tensor coreml_update_state_36 = read_state(input = key_cache)[name = string("coreml_update_state_36")]; tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor v_state_5 = transpose(perm = v_state_5_perm_0, x = var_524)[name = string("transpose_53")]; - tensor value_cache_internal_tensor_assign_3 = slice_update(begin = concat_43, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = v_state_5, x = coreml_update_state_35)[name = string("value_cache_internal_tensor_assign_3")]; - write_state(data = value_cache_internal_tensor_assign_3, input = value_cache)[name = string("coreml_update_state_37_write_state")]; + tensor v_state_5_cast_fp16 = transpose(perm = v_state_5_perm_0, x = var_539_cast_fp16)[name = string("transpose_53")]; + tensor value_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_43, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = v_state_5_cast_fp16, x = coreml_update_state_35)[name = string("value_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_3_cast_fp16, input = value_cache)[name = string("coreml_update_state_37_write_state")]; tensor coreml_update_state_37 = read_state(input = value_cache)[name = string("coreml_update_state_37")]; - tensor var_581_begin_0 = const()[name = string("op_581_begin_0"), val = tensor([2, 0, 0, 0, 0])]; - tensor var_581_end_0 = const()[name = string("op_581_end_0"), val = tensor([3, 1, 8, 2048, 64])]; - tensor var_581_end_mask_0 = const()[name = string("op_581_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_581_squeeze_mask_0 = const()[name = string("op_581_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_581 = slice_by_index(begin = var_581_begin_0, end = var_581_end_0, end_mask = var_581_end_mask_0, squeeze_mask = var_581_squeeze_mask_0, x = coreml_update_state_36)[name = string("op_581")]; - tensor var_584_begin_0 = const()[name = string("op_584_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_584_end_mask_0 = const()[name = string("op_584_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_584 = slice_by_index(begin = var_584_begin_0, end = concat_11, end_mask = var_584_end_mask_0, x = var_581)[name = string("op_584")]; - tensor var_586_begin_0 = const()[name = string("op_586_begin_0"), val = tensor([2, 0, 0, 0, 0])]; - tensor var_586_end_0 = const()[name = string("op_586_end_0"), val = tensor([3, 1, 8, 2048, 64])]; - tensor var_586_end_mask_0 = const()[name = string("op_586_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_586_squeeze_mask_0 = const()[name = string("op_586_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_586 = slice_by_index(begin = var_586_begin_0, end = var_586_end_0, end_mask = var_586_end_mask_0, squeeze_mask = var_586_squeeze_mask_0, x = coreml_update_state_37)[name = string("op_586")]; - tensor var_589_begin_0 = const()[name = string("op_589_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_589_end_mask_0 = const()[name = string("op_589_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_589 = slice_by_index(begin = var_589_begin_0, end = concat_11, end_mask = var_589_end_mask_0, x = var_586)[name = string("op_589")]; - tensor var_591_shape = shape(x = var_584)[name = string("op_591_shape")]; + tensor var_596_begin_0 = const()[name = string("op_596_begin_0"), val = tensor([2, 0, 0, 0, 0])]; + tensor var_596_end_0 = const()[name = string("op_596_end_0"), val = tensor([3, 1, 8, 2048, 64])]; + tensor var_596_end_mask_0 = const()[name = string("op_596_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_596_squeeze_mask_0 = const()[name = string("op_596_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_596_cast_fp16 = slice_by_index(begin = var_596_begin_0, end = var_596_end_0, end_mask = var_596_end_mask_0, squeeze_mask = var_596_squeeze_mask_0, x = coreml_update_state_36)[name = string("op_596_cast_fp16")]; + tensor var_599_begin_0 = const()[name = string("op_599_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_599_end_mask_0 = const()[name = string("op_599_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_599_cast_fp16 = slice_by_index(begin = var_599_begin_0, end = concat_11, end_mask = var_599_end_mask_0, x = var_596_cast_fp16)[name = string("op_599_cast_fp16")]; + tensor var_601_begin_0 = const()[name = string("op_601_begin_0"), val = tensor([2, 0, 0, 0, 0])]; + tensor var_601_end_0 = const()[name = string("op_601_end_0"), val = tensor([3, 1, 8, 2048, 64])]; + tensor var_601_end_mask_0 = const()[name = string("op_601_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_601_squeeze_mask_0 = const()[name = string("op_601_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_601_cast_fp16 = slice_by_index(begin = var_601_begin_0, end = var_601_end_0, end_mask = var_601_end_mask_0, squeeze_mask = var_601_squeeze_mask_0, x = coreml_update_state_37)[name = string("op_601_cast_fp16")]; + tensor var_604_begin_0 = const()[name = string("op_604_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_604_end_mask_0 = const()[name = string("op_604_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_604_cast_fp16 = slice_by_index(begin = var_604_begin_0, end = concat_11, end_mask = var_604_end_mask_0, x = var_601_cast_fp16)[name = string("op_604_cast_fp16")]; + tensor var_606_shape_cast_fp16 = shape(x = var_599_cast_fp16)[name = string("op_606_shape_cast_fp16")]; int32 gather_49 = const()[name = string("gather_49"), val = int32(1)]; int32 gather_50 = const()[name = string("gather_50"), val = int32(8)]; int32 gather_51_axis_0 = const()[name = string("gather_51_axis_0"), val = int32(0)]; int32 gather_51_batch_dims_0 = const()[name = string("gather_51_batch_dims_0"), val = int32(0)]; bool gather_51_validate_indices_0 = const()[name = string("gather_51_validate_indices_0"), val = bool(false)]; - string var_591_shape_to_uint16_dtype_0 = const()[name = string("op_591_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_606_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_606_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_51_to_uint16 = const()[name = string("select_51_to_uint16"), val = uint16(2)]; - tensor var_591_shape_to_uint16 = cast(dtype = var_591_shape_to_uint16_dtype_0, x = var_591_shape)[name = string("cast_110")]; - uint16 gather_51_cast_uint16 = gather(axis = gather_51_axis_0, batch_dims = gather_51_batch_dims_0, indices = select_51_to_uint16, validate_indices = gather_51_validate_indices_0, x = var_591_shape_to_uint16)[name = string("gather_51_cast_uint16")]; + tensor var_606_shape_cast_fp16_to_uint16 = cast(dtype = var_606_shape_cast_fp16_to_uint16_dtype_0, x = var_606_shape_cast_fp16)[name = string("cast_110")]; + uint16 gather_51_cast_uint16 = gather(axis = gather_51_axis_0, batch_dims = gather_51_batch_dims_0, indices = select_51_to_uint16, validate_indices = gather_51_validate_indices_0, x = var_606_shape_cast_fp16_to_uint16)[name = string("gather_51_cast_uint16")]; string gather_51_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_51_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_52 = const()[name = string("gather_52"), val = int32(64)]; - tensor var_598_axes_0 = const()[name = string("op_598_axes_0"), val = tensor([2])]; - tensor var_598 = expand_dims(axes = var_598_axes_0, x = var_584)[name = string("op_598")]; - tensor shape_57 = shape(x = var_598)[name = string("shape_57")]; + tensor var_613_axes_0 = const()[name = string("op_613_axes_0"), val = tensor([2])]; + tensor var_613_cast_fp16 = expand_dims(axes = var_613_axes_0, x = var_599_cast_fp16)[name = string("op_613_cast_fp16")]; + tensor shape_57_cast_fp16 = shape(x = var_613_cast_fp16)[name = string("shape_57_cast_fp16")]; int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; int32 gather_51_cast_uint16_to_int32 = cast(dtype = gather_51_cast_uint16_to_int32_dtype_0, x = gather_51_cast_uint16)[name = string("cast_109")]; - tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (gather_49, gather_50, var_60, gather_51_cast_uint16_to_int32, gather_52))[name = string("concat_51")]; - tensor real_div_4 = real_div(x = concat_51, y = shape_57)[name = string("real_div_4")]; - tensor hidden_states_61 = tile(reps = real_div_4, x = var_598)[name = string("hidden_states_61")]; + tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (gather_49, gather_50, var_59, gather_51_cast_uint16_to_int32, gather_52))[name = string("concat_51")]; + tensor real_div_4 = real_div(x = concat_51, y = shape_57_cast_fp16)[name = string("real_div_4")]; + tensor hidden_states_71_cast_fp16 = tile(reps = real_div_4, x = var_613_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; tensor concat_52x = const()[name = string("concat_52x"), val = tensor([1, 32, -1, 64])]; - tensor key_states_11 = reshape(shape = concat_52x, x = hidden_states_61)[name = string("key_states_11")]; - tensor var_608_shape = shape(x = var_589)[name = string("op_608_shape")]; + tensor key_states_11_cast_fp16 = reshape(shape = concat_52x, x = hidden_states_71_cast_fp16)[name = string("key_states_11_cast_fp16")]; + tensor var_623_shape_cast_fp16 = shape(x = var_604_cast_fp16)[name = string("op_623_shape_cast_fp16")]; int32 gather_53 = const()[name = string("gather_53"), val = int32(1)]; int32 gather_54 = const()[name = string("gather_54"), val = int32(8)]; int32 gather_55_axis_0 = const()[name = string("gather_55_axis_0"), val = int32(0)]; int32 gather_55_batch_dims_0 = const()[name = string("gather_55_batch_dims_0"), val = int32(0)]; bool gather_55_validate_indices_0 = const()[name = string("gather_55_validate_indices_0"), val = bool(false)]; - string var_608_shape_to_uint16_dtype_0 = const()[name = string("op_608_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_623_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_623_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_55_to_uint16 = const()[name = string("select_55_to_uint16"), val = uint16(2)]; - tensor var_608_shape_to_uint16 = cast(dtype = var_608_shape_to_uint16_dtype_0, x = var_608_shape)[name = string("cast_108")]; - uint16 gather_55_cast_uint16 = gather(axis = gather_55_axis_0, batch_dims = gather_55_batch_dims_0, indices = select_55_to_uint16, validate_indices = gather_55_validate_indices_0, x = var_608_shape_to_uint16)[name = string("gather_55_cast_uint16")]; + tensor var_623_shape_cast_fp16_to_uint16 = cast(dtype = var_623_shape_cast_fp16_to_uint16_dtype_0, x = var_623_shape_cast_fp16)[name = string("cast_108")]; + uint16 gather_55_cast_uint16 = gather(axis = gather_55_axis_0, batch_dims = gather_55_batch_dims_0, indices = select_55_to_uint16, validate_indices = gather_55_validate_indices_0, x = var_623_shape_cast_fp16_to_uint16)[name = string("gather_55_cast_uint16")]; string gather_55_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_55_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_56 = const()[name = string("gather_56"), val = int32(64)]; - tensor var_615_axes_0 = const()[name = string("op_615_axes_0"), val = tensor([2])]; - tensor var_615 = expand_dims(axes = var_615_axes_0, x = var_589)[name = string("op_615")]; - tensor shape_62 = shape(x = var_615)[name = string("shape_62")]; + tensor var_630_axes_0 = const()[name = string("op_630_axes_0"), val = tensor([2])]; + tensor var_630_cast_fp16 = expand_dims(axes = var_630_axes_0, x = var_604_cast_fp16)[name = string("op_630_cast_fp16")]; + tensor shape_62_cast_fp16 = shape(x = var_630_cast_fp16)[name = string("shape_62_cast_fp16")]; int32 concat_53_axis_0 = const()[name = string("concat_53_axis_0"), val = int32(0)]; bool concat_53_interleave_0 = const()[name = string("concat_53_interleave_0"), val = bool(false)]; int32 gather_55_cast_uint16_to_int32 = cast(dtype = gather_55_cast_uint16_to_int32_dtype_0, x = gather_55_cast_uint16)[name = string("cast_107")]; - tensor concat_53 = concat(axis = concat_53_axis_0, interleave = concat_53_interleave_0, values = (gather_53, gather_54, var_60, gather_55_cast_uint16_to_int32, gather_56))[name = string("concat_53")]; - tensor real_div_5 = real_div(x = concat_53, y = shape_62)[name = string("real_div_5")]; - tensor hidden_states_65 = tile(reps = real_div_5, x = var_615)[name = string("hidden_states_65")]; + tensor concat_53 = concat(axis = concat_53_axis_0, interleave = concat_53_interleave_0, values = (gather_53, gather_54, var_59, gather_55_cast_uint16_to_int32, gather_56))[name = string("concat_53")]; + tensor real_div_5 = real_div(x = concat_53, y = shape_62_cast_fp16)[name = string("real_div_5")]; + tensor hidden_states_75_cast_fp16 = tile(reps = real_div_5, x = var_630_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; tensor concat_54x = const()[name = string("concat_54x"), val = tensor([1, 32, -1, 64])]; - tensor value_states_11 = reshape(shape = concat_54x, x = hidden_states_65)[name = string("value_states_11")]; - tensor var_625_shape = shape(x = key_states_11)[name = string("op_625_shape")]; + tensor value_states_11_cast_fp16 = reshape(shape = concat_54x, x = hidden_states_75_cast_fp16)[name = string("value_states_11_cast_fp16")]; + tensor var_640_shape_cast_fp16 = shape(x = key_states_11_cast_fp16)[name = string("op_640_shape_cast_fp16")]; int32 gather_57_axis_0 = const()[name = string("gather_57_axis_0"), val = int32(0)]; int32 gather_57_batch_dims_0 = const()[name = string("gather_57_batch_dims_0"), val = int32(0)]; bool gather_57_validate_indices_0 = const()[name = string("gather_57_validate_indices_0"), val = bool(false)]; - string var_625_shape_to_uint16_dtype_0 = const()[name = string("op_625_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_640_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_640_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_57_to_uint16 = const()[name = string("select_57_to_uint16"), val = uint16(2)]; - tensor var_625_shape_to_uint16 = cast(dtype = var_625_shape_to_uint16_dtype_0, x = var_625_shape)[name = string("cast_106")]; - uint16 gather_57_cast_uint16 = gather(axis = gather_57_axis_0, batch_dims = gather_57_batch_dims_0, indices = select_57_to_uint16, validate_indices = gather_57_validate_indices_0, x = var_625_shape_to_uint16)[name = string("gather_57_cast_uint16")]; + tensor var_640_shape_cast_fp16_to_uint16 = cast(dtype = var_640_shape_cast_fp16_to_uint16_dtype_0, x = var_640_shape_cast_fp16)[name = string("cast_106")]; + uint16 gather_57_cast_uint16 = gather(axis = gather_57_axis_0, batch_dims = gather_57_batch_dims_0, indices = select_57_to_uint16, validate_indices = gather_57_validate_indices_0, x = var_640_shape_cast_fp16_to_uint16)[name = string("gather_57_cast_uint16")]; string gather_57_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_57_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_55_values0_0 = const()[name = string("concat_55_values0_0"), val = int32(1)]; int32 concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = int32(1)]; @@ -799,98 +676,107 @@ program(1.3) tensor causal_mask_7_begin_0 = const()[name = string("causal_mask_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_7_end_mask_0 = const()[name = string("causal_mask_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_7_cast_fp16 = slice_by_index(begin = causal_mask_7_begin_0, end = concat_55, end_mask = causal_mask_7_end_mask_0, x = causal_mask)[name = string("causal_mask_7_cast_fp16")]; - tensor attn_output_9_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_7_cast_fp16, key = key_states_11, query = query_states_11, value = value_states_11)[name = string("attn_output_9_cast_fp16")]; - tensor var_631_perm_0 = const()[name = string("op_631_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor attn_output_9_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_7_cast_fp16, key = key_states_11_cast_fp16, query = query_states_11_cast_fp16, value = value_states_11_cast_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor var_646_perm_0 = const()[name = string("op_646_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; int32 gather_41_cast_uint16_to_int32 = cast(dtype = gather_41_cast_uint16_to_int32_dtype_0, x = gather_41_cast_uint16)[name = string("cast_104")]; tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (gather_40, gather_41_cast_uint16_to_int32, var_48))[name = string("concat_56")]; - tensor var_631 = transpose(perm = var_631_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_52")]; - tensor input_17 = reshape(shape = concat_56, x = var_631)[name = string("input_17")]; - tensor linear_17 = linear(bias = linear_0_bias_0, weight = model_model_layers_2_self_attn_o_proj_weight_quantized, x = input_17)[name = string("linear_17")]; - tensor hidden_states_69 = add(x = hidden_states_51, y = linear_17)[name = string("hidden_states_69")]; - fp16 var_55_promoted_5_to_fp16 = const()[name = string("op_55_promoted_5_to_fp16"), val = fp16(0x1p+1)]; - tensor var_640_cast_fp16 = pow(x = hidden_states_69, y = var_55_promoted_5_to_fp16)[name = string("op_640_cast_fp16")]; + tensor var_646_cast_fp16 = transpose(perm = var_646_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_52")]; + tensor input_17_cast_fp16 = reshape(shape = concat_56, x = var_646_cast_fp16)[name = string("input_17_cast_fp16")]; + tensor model_model_layers_2_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219754432))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(221851648))))[name = string("model_model_layers_2_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_17_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_to_fp16_quantized, x = input_17_cast_fp16)[name = string("linear_17_cast_fp16")]; + tensor hidden_states_79_cast_fp16 = add(x = hidden_states_59_cast_fp16, y = linear_17_cast_fp16)[name = string("hidden_states_79_cast_fp16")]; + fp16 var_54_promoted_5_to_fp16 = const()[name = string("op_54_promoted_5_to_fp16"), val = fp16(0x1p+1)]; + tensor var_655_cast_fp16 = pow(x = hidden_states_79_cast_fp16, y = var_54_promoted_5_to_fp16)[name = string("op_655_cast_fp16")]; tensor variance_11_axes_0 = const()[name = string("variance_11_axes_0"), val = tensor([-1])]; bool variance_11_keep_dims_0 = const()[name = string("variance_11_keep_dims_0"), val = bool(true)]; - tensor variance_11_cast_fp16 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = var_640_cast_fp16)[name = string("variance_11_cast_fp16")]; - fp16 var_643_to_fp16 = const()[name = string("op_643_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_644_cast_fp16 = add(x = variance_11_cast_fp16, y = var_643_to_fp16)[name = string("op_644_cast_fp16")]; - fp32 var_645_epsilon_0 = const()[name = string("op_645_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_645_cast_fp16 = rsqrt(epsilon = var_645_epsilon_0, x = var_644_cast_fp16)[name = string("op_645_cast_fp16")]; - tensor hidden_states_73_cast_fp16 = mul(x = hidden_states_69, y = var_645_cast_fp16)[name = string("hidden_states_73_cast_fp16")]; - tensor input_19 = mul(x = model_model_layers_2_post_attention_layernorm_weight, y = hidden_states_73_cast_fp16)[name = string("input_19")]; - tensor linear_18 = linear(bias = linear_4_bias_0, weight = model_model_layers_2_mlp_gate_proj_weight_quantized, x = input_19)[name = string("linear_18")]; - tensor var_654 = silu(x = linear_18)[name = string("op_654")]; - tensor linear_19 = linear(bias = linear_4_bias_0, weight = model_model_layers_2_mlp_up_proj_weight_quantized, x = input_19)[name = string("linear_19")]; - tensor input_23 = mul(x = var_654, y = linear_19)[name = string("input_23")]; - tensor linear_20 = linear(bias = linear_0_bias_0, weight = model_model_layers_2_mlp_down_proj_weight_quantized, x = input_23)[name = string("linear_20")]; - tensor hidden_states_77 = add(x = hidden_states_69, y = linear_20)[name = string("hidden_states_77")]; - fp16 var_55_promoted_6_to_fp16 = const()[name = string("op_55_promoted_6_to_fp16"), val = fp16(0x1p+1)]; - tensor var_667_cast_fp16 = pow(x = hidden_states_77, y = var_55_promoted_6_to_fp16)[name = string("op_667_cast_fp16")]; + tensor variance_11_cast_fp16 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = var_655_cast_fp16)[name = string("variance_11_cast_fp16")]; + fp16 var_658_to_fp16 = const()[name = string("op_658_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_659_cast_fp16 = add(x = variance_11_cast_fp16, y = var_658_to_fp16)[name = string("op_659_cast_fp16")]; + fp32 var_660_epsilon_0 = const()[name = string("op_660_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_660_cast_fp16 = rsqrt(epsilon = var_660_epsilon_0, x = var_659_cast_fp16)[name = string("op_660_cast_fp16")]; + tensor hidden_states_83_cast_fp16 = mul(x = hidden_states_79_cast_fp16, y = var_660_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; + tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222113856)))]; + tensor input_19_cast_fp16 = mul(x = model_model_layers_2_post_attention_layernorm_weight_to_fp16, y = hidden_states_83_cast_fp16)[name = string("input_19_cast_fp16")]; + tensor model_model_layers_2_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222118016))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230506688))))[name = string("model_model_layers_2_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_18_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_2_mlp_gate_proj_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = string("linear_18_cast_fp16")]; + tensor var_672_cast_fp16 = silu(x = linear_18_cast_fp16)[name = string("op_672_cast_fp16")]; + tensor model_model_layers_2_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231555328))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239944000))))[name = string("model_model_layers_2_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_19_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_2_mlp_up_proj_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = string("linear_19_cast_fp16")]; + tensor input_23_cast_fp16 = mul(x = var_672_cast_fp16, y = linear_19_cast_fp16)[name = string("input_23_cast_fp16")]; + tensor model_model_layers_2_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(240992640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(249381312))))[name = string("model_model_layers_2_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_mlp_down_proj_weight_to_fp16_quantized, x = input_23_cast_fp16)[name = string("linear_20_cast_fp16")]; + tensor hidden_states_89_cast_fp16 = add(x = hidden_states_79_cast_fp16, y = linear_20_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; + fp16 var_54_promoted_6_to_fp16 = const()[name = string("op_54_promoted_6_to_fp16"), val = fp16(0x1p+1)]; + tensor var_685_cast_fp16 = pow(x = hidden_states_89_cast_fp16, y = var_54_promoted_6_to_fp16)[name = string("op_685_cast_fp16")]; tensor variance_13_axes_0 = const()[name = string("variance_13_axes_0"), val = tensor([-1])]; bool variance_13_keep_dims_0 = const()[name = string("variance_13_keep_dims_0"), val = bool(true)]; - tensor variance_13_cast_fp16 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = var_667_cast_fp16)[name = string("variance_13_cast_fp16")]; - fp16 var_670_to_fp16 = const()[name = string("op_670_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_671_cast_fp16 = add(x = variance_13_cast_fp16, y = var_670_to_fp16)[name = string("op_671_cast_fp16")]; - fp32 var_672_epsilon_0 = const()[name = string("op_672_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_672_cast_fp16 = rsqrt(epsilon = var_672_epsilon_0, x = var_671_cast_fp16)[name = string("op_672_cast_fp16")]; - tensor hidden_states_81_cast_fp16 = mul(x = hidden_states_77, y = var_672_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; - tensor hidden_states_83 = mul(x = model_model_layers_3_input_layernorm_weight, y = hidden_states_81_cast_fp16)[name = string("hidden_states_83")]; - tensor var_680_shape = shape(x = hidden_states_83)[name = string("op_680_shape")]; + tensor variance_13_cast_fp16 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = var_685_cast_fp16)[name = string("variance_13_cast_fp16")]; + fp16 var_688_to_fp16 = const()[name = string("op_688_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_689_cast_fp16 = add(x = variance_13_cast_fp16, y = var_688_to_fp16)[name = string("op_689_cast_fp16")]; + fp32 var_690_epsilon_0 = const()[name = string("op_690_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_690_cast_fp16 = rsqrt(epsilon = var_690_epsilon_0, x = var_689_cast_fp16)[name = string("op_690_cast_fp16")]; + tensor hidden_states_93_cast_fp16 = mul(x = hidden_states_89_cast_fp16, y = var_690_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; + tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250429952)))]; + tensor hidden_states_97_cast_fp16 = mul(x = model_model_layers_3_input_layernorm_weight_to_fp16, y = hidden_states_93_cast_fp16)[name = string("hidden_states_97_cast_fp16")]; + tensor var_701_shape_cast_fp16 = shape(x = hidden_states_97_cast_fp16)[name = string("op_701_shape_cast_fp16")]; int32 gather_58 = const()[name = string("gather_58"), val = int32(1)]; int32 gather_59_axis_0 = const()[name = string("gather_59_axis_0"), val = int32(0)]; int32 gather_59_batch_dims_0 = const()[name = string("gather_59_batch_dims_0"), val = int32(0)]; bool gather_59_validate_indices_0 = const()[name = string("gather_59_validate_indices_0"), val = bool(false)]; - string var_680_shape_to_uint16_dtype_0 = const()[name = string("op_680_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_701_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_701_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_59_to_uint16 = const()[name = string("select_59_to_uint16"), val = uint16(1)]; - tensor var_680_shape_to_uint16 = cast(dtype = var_680_shape_to_uint16_dtype_0, x = var_680_shape)[name = string("cast_103")]; - uint16 gather_59_cast_uint16 = gather(axis = gather_59_axis_0, batch_dims = gather_59_batch_dims_0, indices = select_59_to_uint16, validate_indices = gather_59_validate_indices_0, x = var_680_shape_to_uint16)[name = string("gather_59_cast_uint16")]; + tensor var_701_shape_cast_fp16_to_uint16 = cast(dtype = var_701_shape_cast_fp16_to_uint16_dtype_0, x = var_701_shape_cast_fp16)[name = string("cast_103")]; + uint16 gather_59_cast_uint16 = gather(axis = gather_59_axis_0, batch_dims = gather_59_batch_dims_0, indices = select_59_to_uint16, validate_indices = gather_59_validate_indices_0, x = var_701_shape_cast_fp16_to_uint16)[name = string("gather_59_cast_uint16")]; string gather_59_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_59_cast_uint16_to_int32_dtype_0"), val = string("int32")]; - tensor linear_21 = linear(bias = linear_0_bias_0, weight = model_model_layers_3_self_attn_q_proj_weight_quantized, x = hidden_states_83)[name = string("linear_21")]; - tensor linear_22 = linear(bias = linear_1_bias_0, weight = model_model_layers_3_self_attn_k_proj_weight_quantized, x = hidden_states_83)[name = string("linear_22")]; - tensor linear_23 = linear(bias = linear_1_bias_0, weight = model_model_layers_3_self_attn_v_proj_weight_quantized, x = hidden_states_83)[name = string("linear_23")]; + tensor model_model_layers_3_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250434112))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252531328))))[name = string("model_model_layers_3_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_21_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_97_cast_fp16)[name = string("linear_21_cast_fp16")]; + tensor model_model_layers_3_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252793536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253317888))))[name = string("model_model_layers_3_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_22_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_3_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_97_cast_fp16)[name = string("linear_22_cast_fp16")]; + tensor model_model_layers_3_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253383488))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253907840))))[name = string("model_model_layers_3_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_23_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_3_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_97_cast_fp16)[name = string("linear_23_cast_fp16")]; tensor concat_57x = const()[name = string("concat_57x"), val = tensor([1, -1, 32, 64])]; - tensor var_689 = reshape(shape = concat_57x, x = linear_21)[name = string("op_689")]; + tensor var_710_cast_fp16 = reshape(shape = concat_57x, x = linear_21_cast_fp16)[name = string("op_710_cast_fp16")]; tensor q_7_perm_0 = const()[name = string("q_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_58x = const()[name = string("concat_58x"), val = tensor([1, -1, 8, 64])]; - tensor var_692 = reshape(shape = concat_58x, x = linear_22)[name = string("op_692")]; + tensor var_713_cast_fp16 = reshape(shape = concat_58x, x = linear_22_cast_fp16)[name = string("op_713_cast_fp16")]; tensor k_7_perm_0 = const()[name = string("k_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_59x = const()[name = string("concat_59x"), val = tensor([1, -1, 8, 64])]; - tensor var_695 = reshape(shape = concat_59x, x = linear_23)[name = string("op_695")]; + tensor var_716_cast_fp16 = reshape(shape = concat_59x, x = linear_23_cast_fp16)[name = string("op_716_cast_fp16")]; tensor v_state_7_perm_0 = const()[name = string("v_state_7_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor q_7 = transpose(perm = q_7_perm_0, x = var_689)[name = string("transpose_51")]; - tensor var_699 = mul(x = q_7, y = cos_7)[name = string("op_699")]; + tensor q_7_cast_fp16 = transpose(perm = q_7_perm_0, x = var_710_cast_fp16)[name = string("transpose_51")]; + tensor var_720_cast_fp16 = mul(x = q_7_cast_fp16, y = cos_7_cast_fp16)[name = string("op_720_cast_fp16")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 32, 0, 32])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_13 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7)[name = string("x1_13")]; + tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7_cast_fp16)[name = string("x1_13_cast_fp16")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 32, 0, 64])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_13 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7)[name = string("x2_13")]; - fp16 const_9_promoted = const()[name = string("const_9_promoted"), val = fp16(-0x1p+0)]; - tensor var_710 = mul(x = x2_13, y = const_9_promoted)[name = string("op_710")]; - bool var_712_interleave_0 = const()[name = string("op_712_interleave_0"), val = bool(false)]; - tensor var_712 = concat(axis = var_48, interleave = var_712_interleave_0, values = (var_710, x1_13))[name = string("op_712")]; - tensor var_713 = mul(x = var_712, y = sin_7)[name = string("op_713")]; - tensor query_states_15 = add(x = var_699, y = var_713)[name = string("query_states_15")]; - tensor k_7 = transpose(perm = k_7_perm_0, x = var_692)[name = string("transpose_50")]; - tensor var_715 = mul(x = k_7, y = cos_7)[name = string("op_715")]; + tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7_cast_fp16)[name = string("x2_13_cast_fp16")]; + fp16 const_9_promoted_to_fp16 = const()[name = string("const_9_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_731_cast_fp16 = mul(x = x2_13_cast_fp16, y = const_9_promoted_to_fp16)[name = string("op_731_cast_fp16")]; + bool var_733_interleave_0 = const()[name = string("op_733_interleave_0"), val = bool(false)]; + tensor var_733_cast_fp16 = concat(axis = var_48, interleave = var_733_interleave_0, values = (var_731_cast_fp16, x1_13_cast_fp16))[name = string("op_733_cast_fp16")]; + tensor var_734_cast_fp16 = mul(x = var_733_cast_fp16, y = sin_7_cast_fp16)[name = string("op_734_cast_fp16")]; + tensor query_states_15_cast_fp16 = add(x = var_720_cast_fp16, y = var_734_cast_fp16)[name = string("query_states_15_cast_fp16")]; + tensor k_7_cast_fp16 = transpose(perm = k_7_perm_0, x = var_713_cast_fp16)[name = string("transpose_50")]; + tensor var_736_cast_fp16 = mul(x = k_7_cast_fp16, y = cos_7_cast_fp16)[name = string("op_736_cast_fp16")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 0, 32])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_15 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7)[name = string("x1_15")]; + tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7_cast_fp16)[name = string("x1_15_cast_fp16")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 0, 64])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_15 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7)[name = string("x2_15")]; - fp16 const_10_promoted = const()[name = string("const_10_promoted"), val = fp16(-0x1p+0)]; - tensor var_726 = mul(x = x2_15, y = const_10_promoted)[name = string("op_726")]; - bool var_728_interleave_0 = const()[name = string("op_728_interleave_0"), val = bool(false)]; - tensor var_728 = concat(axis = var_48, interleave = var_728_interleave_0, values = (var_726, x1_15))[name = string("op_728")]; - tensor var_729 = mul(x = var_728, y = sin_7)[name = string("op_729")]; - tensor k_state_7 = add(x = var_715, y = var_729)[name = string("k_state_7")]; + tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7_cast_fp16)[name = string("x2_15_cast_fp16")]; + fp16 const_10_promoted_to_fp16 = const()[name = string("const_10_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_747_cast_fp16 = mul(x = x2_15_cast_fp16, y = const_10_promoted_to_fp16)[name = string("op_747_cast_fp16")]; + bool var_749_interleave_0 = const()[name = string("op_749_interleave_0"), val = bool(false)]; + tensor var_749_cast_fp16 = concat(axis = var_48, interleave = var_749_interleave_0, values = (var_747_cast_fp16, x1_15_cast_fp16))[name = string("op_749_cast_fp16")]; + tensor var_750_cast_fp16 = mul(x = var_749_cast_fp16, y = sin_7_cast_fp16)[name = string("op_750_cast_fp16")]; + tensor k_state_7_cast_fp16 = add(x = var_736_cast_fp16, y = var_750_cast_fp16)[name = string("k_state_7_cast_fp16")]; tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([0])]; tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; @@ -902,87 +788,87 @@ program(1.3) tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor key_cache_internal_tensor_assign_4 = slice_update(begin = concat_62, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = k_state_7, x = coreml_update_state_36)[name = string("key_cache_internal_tensor_assign_4")]; - write_state(data = key_cache_internal_tensor_assign_4, input = key_cache)[name = string("coreml_update_state_38_write_state")]; + tensor key_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_62, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = k_state_7_cast_fp16, x = coreml_update_state_36)[name = string("key_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_4_cast_fp16, input = key_cache)[name = string("coreml_update_state_38_write_state")]; tensor coreml_update_state_38 = read_state(input = key_cache)[name = string("coreml_update_state_38")]; tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor v_state_7 = transpose(perm = v_state_7_perm_0, x = var_695)[name = string("transpose_49")]; - tensor value_cache_internal_tensor_assign_4 = slice_update(begin = concat_62, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = v_state_7, x = coreml_update_state_37)[name = string("value_cache_internal_tensor_assign_4")]; - write_state(data = value_cache_internal_tensor_assign_4, input = value_cache)[name = string("coreml_update_state_39_write_state")]; + tensor v_state_7_cast_fp16 = transpose(perm = v_state_7_perm_0, x = var_716_cast_fp16)[name = string("transpose_49")]; + tensor value_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_62, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = v_state_7_cast_fp16, x = coreml_update_state_37)[name = string("value_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_4_cast_fp16, input = value_cache)[name = string("coreml_update_state_39_write_state")]; tensor coreml_update_state_39 = read_state(input = value_cache)[name = string("coreml_update_state_39")]; - tensor var_752_begin_0 = const()[name = string("op_752_begin_0"), val = tensor([3, 0, 0, 0, 0])]; - tensor var_752_end_0 = const()[name = string("op_752_end_0"), val = tensor([4, 1, 8, 2048, 64])]; - tensor var_752_end_mask_0 = const()[name = string("op_752_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_752_squeeze_mask_0 = const()[name = string("op_752_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_752 = slice_by_index(begin = var_752_begin_0, end = var_752_end_0, end_mask = var_752_end_mask_0, squeeze_mask = var_752_squeeze_mask_0, x = coreml_update_state_38)[name = string("op_752")]; - tensor var_755_begin_0 = const()[name = string("op_755_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_755_end_mask_0 = const()[name = string("op_755_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_755 = slice_by_index(begin = var_755_begin_0, end = concat_11, end_mask = var_755_end_mask_0, x = var_752)[name = string("op_755")]; - tensor var_757_begin_0 = const()[name = string("op_757_begin_0"), val = tensor([3, 0, 0, 0, 0])]; - tensor var_757_end_0 = const()[name = string("op_757_end_0"), val = tensor([4, 1, 8, 2048, 64])]; - tensor var_757_end_mask_0 = const()[name = string("op_757_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_757_squeeze_mask_0 = const()[name = string("op_757_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_757 = slice_by_index(begin = var_757_begin_0, end = var_757_end_0, end_mask = var_757_end_mask_0, squeeze_mask = var_757_squeeze_mask_0, x = coreml_update_state_39)[name = string("op_757")]; - tensor var_760_begin_0 = const()[name = string("op_760_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_760_end_mask_0 = const()[name = string("op_760_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_760 = slice_by_index(begin = var_760_begin_0, end = concat_11, end_mask = var_760_end_mask_0, x = var_757)[name = string("op_760")]; - tensor var_762_shape = shape(x = var_755)[name = string("op_762_shape")]; + tensor var_773_begin_0 = const()[name = string("op_773_begin_0"), val = tensor([3, 0, 0, 0, 0])]; + tensor var_773_end_0 = const()[name = string("op_773_end_0"), val = tensor([4, 1, 8, 2048, 64])]; + tensor var_773_end_mask_0 = const()[name = string("op_773_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_773_squeeze_mask_0 = const()[name = string("op_773_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_773_cast_fp16 = slice_by_index(begin = var_773_begin_0, end = var_773_end_0, end_mask = var_773_end_mask_0, squeeze_mask = var_773_squeeze_mask_0, x = coreml_update_state_38)[name = string("op_773_cast_fp16")]; + tensor var_776_begin_0 = const()[name = string("op_776_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_776_end_mask_0 = const()[name = string("op_776_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_776_cast_fp16 = slice_by_index(begin = var_776_begin_0, end = concat_11, end_mask = var_776_end_mask_0, x = var_773_cast_fp16)[name = string("op_776_cast_fp16")]; + tensor var_778_begin_0 = const()[name = string("op_778_begin_0"), val = tensor([3, 0, 0, 0, 0])]; + tensor var_778_end_0 = const()[name = string("op_778_end_0"), val = tensor([4, 1, 8, 2048, 64])]; + tensor var_778_end_mask_0 = const()[name = string("op_778_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_778_squeeze_mask_0 = const()[name = string("op_778_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_778_cast_fp16 = slice_by_index(begin = var_778_begin_0, end = var_778_end_0, end_mask = var_778_end_mask_0, squeeze_mask = var_778_squeeze_mask_0, x = coreml_update_state_39)[name = string("op_778_cast_fp16")]; + tensor var_781_begin_0 = const()[name = string("op_781_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_781_end_mask_0 = const()[name = string("op_781_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_781_cast_fp16 = slice_by_index(begin = var_781_begin_0, end = concat_11, end_mask = var_781_end_mask_0, x = var_778_cast_fp16)[name = string("op_781_cast_fp16")]; + tensor var_783_shape_cast_fp16 = shape(x = var_776_cast_fp16)[name = string("op_783_shape_cast_fp16")]; int32 gather_67 = const()[name = string("gather_67"), val = int32(1)]; int32 gather_68 = const()[name = string("gather_68"), val = int32(8)]; int32 gather_69_axis_0 = const()[name = string("gather_69_axis_0"), val = int32(0)]; int32 gather_69_batch_dims_0 = const()[name = string("gather_69_batch_dims_0"), val = int32(0)]; bool gather_69_validate_indices_0 = const()[name = string("gather_69_validate_indices_0"), val = bool(false)]; - string var_762_shape_to_uint16_dtype_0 = const()[name = string("op_762_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_783_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_783_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_69_to_uint16 = const()[name = string("select_69_to_uint16"), val = uint16(2)]; - tensor var_762_shape_to_uint16 = cast(dtype = var_762_shape_to_uint16_dtype_0, x = var_762_shape)[name = string("cast_102")]; - uint16 gather_69_cast_uint16 = gather(axis = gather_69_axis_0, batch_dims = gather_69_batch_dims_0, indices = select_69_to_uint16, validate_indices = gather_69_validate_indices_0, x = var_762_shape_to_uint16)[name = string("gather_69_cast_uint16")]; + tensor var_783_shape_cast_fp16_to_uint16 = cast(dtype = var_783_shape_cast_fp16_to_uint16_dtype_0, x = var_783_shape_cast_fp16)[name = string("cast_102")]; + uint16 gather_69_cast_uint16 = gather(axis = gather_69_axis_0, batch_dims = gather_69_batch_dims_0, indices = select_69_to_uint16, validate_indices = gather_69_validate_indices_0, x = var_783_shape_cast_fp16_to_uint16)[name = string("gather_69_cast_uint16")]; string gather_69_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_69_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_70 = const()[name = string("gather_70"), val = int32(64)]; - tensor var_769_axes_0 = const()[name = string("op_769_axes_0"), val = tensor([2])]; - tensor var_769 = expand_dims(axes = var_769_axes_0, x = var_755)[name = string("op_769")]; - tensor shape_77 = shape(x = var_769)[name = string("shape_77")]; + tensor var_790_axes_0 = const()[name = string("op_790_axes_0"), val = tensor([2])]; + tensor var_790_cast_fp16 = expand_dims(axes = var_790_axes_0, x = var_776_cast_fp16)[name = string("op_790_cast_fp16")]; + tensor shape_77_cast_fp16 = shape(x = var_790_cast_fp16)[name = string("shape_77_cast_fp16")]; int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; int32 gather_69_cast_uint16_to_int32 = cast(dtype = gather_69_cast_uint16_to_int32_dtype_0, x = gather_69_cast_uint16)[name = string("cast_101")]; - tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (gather_67, gather_68, var_60, gather_69_cast_uint16_to_int32, gather_70))[name = string("concat_70")]; - tensor real_div_6 = real_div(x = concat_70, y = shape_77)[name = string("real_div_6")]; - tensor hidden_states_87 = tile(reps = real_div_6, x = var_769)[name = string("hidden_states_87")]; + tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (gather_67, gather_68, var_59, gather_69_cast_uint16_to_int32, gather_70))[name = string("concat_70")]; + tensor real_div_6 = real_div(x = concat_70, y = shape_77_cast_fp16)[name = string("real_div_6")]; + tensor hidden_states_101_cast_fp16 = tile(reps = real_div_6, x = var_790_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor concat_71x = const()[name = string("concat_71x"), val = tensor([1, 32, -1, 64])]; - tensor key_states_15 = reshape(shape = concat_71x, x = hidden_states_87)[name = string("key_states_15")]; - tensor var_779_shape = shape(x = var_760)[name = string("op_779_shape")]; + tensor key_states_15_cast_fp16 = reshape(shape = concat_71x, x = hidden_states_101_cast_fp16)[name = string("key_states_15_cast_fp16")]; + tensor var_800_shape_cast_fp16 = shape(x = var_781_cast_fp16)[name = string("op_800_shape_cast_fp16")]; int32 gather_71 = const()[name = string("gather_71"), val = int32(1)]; int32 gather_72 = const()[name = string("gather_72"), val = int32(8)]; int32 gather_73_axis_0 = const()[name = string("gather_73_axis_0"), val = int32(0)]; int32 gather_73_batch_dims_0 = const()[name = string("gather_73_batch_dims_0"), val = int32(0)]; bool gather_73_validate_indices_0 = const()[name = string("gather_73_validate_indices_0"), val = bool(false)]; - string var_779_shape_to_uint16_dtype_0 = const()[name = string("op_779_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_800_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_800_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_73_to_uint16 = const()[name = string("select_73_to_uint16"), val = uint16(2)]; - tensor var_779_shape_to_uint16 = cast(dtype = var_779_shape_to_uint16_dtype_0, x = var_779_shape)[name = string("cast_100")]; - uint16 gather_73_cast_uint16 = gather(axis = gather_73_axis_0, batch_dims = gather_73_batch_dims_0, indices = select_73_to_uint16, validate_indices = gather_73_validate_indices_0, x = var_779_shape_to_uint16)[name = string("gather_73_cast_uint16")]; + tensor var_800_shape_cast_fp16_to_uint16 = cast(dtype = var_800_shape_cast_fp16_to_uint16_dtype_0, x = var_800_shape_cast_fp16)[name = string("cast_100")]; + uint16 gather_73_cast_uint16 = gather(axis = gather_73_axis_0, batch_dims = gather_73_batch_dims_0, indices = select_73_to_uint16, validate_indices = gather_73_validate_indices_0, x = var_800_shape_cast_fp16_to_uint16)[name = string("gather_73_cast_uint16")]; string gather_73_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_73_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_74 = const()[name = string("gather_74"), val = int32(64)]; - tensor var_786_axes_0 = const()[name = string("op_786_axes_0"), val = tensor([2])]; - tensor var_786 = expand_dims(axes = var_786_axes_0, x = var_760)[name = string("op_786")]; - tensor shape_82 = shape(x = var_786)[name = string("shape_82")]; + tensor var_807_axes_0 = const()[name = string("op_807_axes_0"), val = tensor([2])]; + tensor var_807_cast_fp16 = expand_dims(axes = var_807_axes_0, x = var_781_cast_fp16)[name = string("op_807_cast_fp16")]; + tensor shape_82_cast_fp16 = shape(x = var_807_cast_fp16)[name = string("shape_82_cast_fp16")]; int32 concat_72_axis_0 = const()[name = string("concat_72_axis_0"), val = int32(0)]; bool concat_72_interleave_0 = const()[name = string("concat_72_interleave_0"), val = bool(false)]; int32 gather_73_cast_uint16_to_int32 = cast(dtype = gather_73_cast_uint16_to_int32_dtype_0, x = gather_73_cast_uint16)[name = string("cast_99")]; - tensor concat_72 = concat(axis = concat_72_axis_0, interleave = concat_72_interleave_0, values = (gather_71, gather_72, var_60, gather_73_cast_uint16_to_int32, gather_74))[name = string("concat_72")]; - tensor real_div_7 = real_div(x = concat_72, y = shape_82)[name = string("real_div_7")]; - tensor hidden_states_91 = tile(reps = real_div_7, x = var_786)[name = string("hidden_states_91")]; + tensor concat_72 = concat(axis = concat_72_axis_0, interleave = concat_72_interleave_0, values = (gather_71, gather_72, var_59, gather_73_cast_uint16_to_int32, gather_74))[name = string("concat_72")]; + tensor real_div_7 = real_div(x = concat_72, y = shape_82_cast_fp16)[name = string("real_div_7")]; + tensor hidden_states_105_cast_fp16 = tile(reps = real_div_7, x = var_807_cast_fp16)[name = string("hidden_states_105_cast_fp16")]; tensor concat_73x = const()[name = string("concat_73x"), val = tensor([1, 32, -1, 64])]; - tensor value_states_15 = reshape(shape = concat_73x, x = hidden_states_91)[name = string("value_states_15")]; - tensor var_796_shape = shape(x = key_states_15)[name = string("op_796_shape")]; + tensor value_states_15_cast_fp16 = reshape(shape = concat_73x, x = hidden_states_105_cast_fp16)[name = string("value_states_15_cast_fp16")]; + tensor var_817_shape_cast_fp16 = shape(x = key_states_15_cast_fp16)[name = string("op_817_shape_cast_fp16")]; int32 gather_75_axis_0 = const()[name = string("gather_75_axis_0"), val = int32(0)]; int32 gather_75_batch_dims_0 = const()[name = string("gather_75_batch_dims_0"), val = int32(0)]; bool gather_75_validate_indices_0 = const()[name = string("gather_75_validate_indices_0"), val = bool(false)]; - string var_796_shape_to_uint16_dtype_0 = const()[name = string("op_796_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_817_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_817_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_75_to_uint16 = const()[name = string("select_75_to_uint16"), val = uint16(2)]; - tensor var_796_shape_to_uint16 = cast(dtype = var_796_shape_to_uint16_dtype_0, x = var_796_shape)[name = string("cast_98")]; - uint16 gather_75_cast_uint16 = gather(axis = gather_75_axis_0, batch_dims = gather_75_batch_dims_0, indices = select_75_to_uint16, validate_indices = gather_75_validate_indices_0, x = var_796_shape_to_uint16)[name = string("gather_75_cast_uint16")]; + tensor var_817_shape_cast_fp16_to_uint16 = cast(dtype = var_817_shape_cast_fp16_to_uint16_dtype_0, x = var_817_shape_cast_fp16)[name = string("cast_98")]; + uint16 gather_75_cast_uint16 = gather(axis = gather_75_axis_0, batch_dims = gather_75_batch_dims_0, indices = select_75_to_uint16, validate_indices = gather_75_validate_indices_0, x = var_817_shape_cast_fp16_to_uint16)[name = string("gather_75_cast_uint16")]; string gather_75_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_75_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_74_values0_0 = const()[name = string("concat_74_values0_0"), val = int32(1)]; int32 concat_74_values1_0 = const()[name = string("concat_74_values1_0"), val = int32(1)]; @@ -994,98 +880,107 @@ program(1.3) tensor causal_mask_9_begin_0 = const()[name = string("causal_mask_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_9_end_mask_0 = const()[name = string("causal_mask_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_9_cast_fp16 = slice_by_index(begin = causal_mask_9_begin_0, end = concat_74, end_mask = causal_mask_9_end_mask_0, x = causal_mask)[name = string("causal_mask_9_cast_fp16")]; - tensor attn_output_13_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_9_cast_fp16, key = key_states_15, query = query_states_15, value = value_states_15)[name = string("attn_output_13_cast_fp16")]; - tensor var_802_perm_0 = const()[name = string("op_802_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor attn_output_13_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_9_cast_fp16, key = key_states_15_cast_fp16, query = query_states_15_cast_fp16, value = value_states_15_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_823_perm_0 = const()[name = string("op_823_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; int32 gather_59_cast_uint16_to_int32 = cast(dtype = gather_59_cast_uint16_to_int32_dtype_0, x = gather_59_cast_uint16)[name = string("cast_96")]; tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (gather_58, gather_59_cast_uint16_to_int32, var_48))[name = string("concat_75")]; - tensor var_802 = transpose(perm = var_802_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_48")]; - tensor input_25 = reshape(shape = concat_75, x = var_802)[name = string("input_25")]; - tensor linear_24 = linear(bias = linear_0_bias_0, weight = model_model_layers_3_self_attn_o_proj_weight_quantized, x = input_25)[name = string("linear_24")]; - tensor hidden_states_95 = add(x = hidden_states_77, y = linear_24)[name = string("hidden_states_95")]; - fp16 var_55_promoted_7_to_fp16 = const()[name = string("op_55_promoted_7_to_fp16"), val = fp16(0x1p+1)]; - tensor var_811_cast_fp16 = pow(x = hidden_states_95, y = var_55_promoted_7_to_fp16)[name = string("op_811_cast_fp16")]; + tensor var_823_cast_fp16 = transpose(perm = var_823_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_48")]; + tensor input_25_cast_fp16 = reshape(shape = concat_75, x = var_823_cast_fp16)[name = string("input_25_cast_fp16")]; + tensor model_model_layers_3_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253973440))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256070656))))[name = string("model_model_layers_3_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_24_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_to_fp16_quantized, x = input_25_cast_fp16)[name = string("linear_24_cast_fp16")]; + tensor hidden_states_109_cast_fp16 = add(x = hidden_states_89_cast_fp16, y = linear_24_cast_fp16)[name = string("hidden_states_109_cast_fp16")]; + fp16 var_54_promoted_7_to_fp16 = const()[name = string("op_54_promoted_7_to_fp16"), val = fp16(0x1p+1)]; + tensor var_832_cast_fp16 = pow(x = hidden_states_109_cast_fp16, y = var_54_promoted_7_to_fp16)[name = string("op_832_cast_fp16")]; tensor variance_15_axes_0 = const()[name = string("variance_15_axes_0"), val = tensor([-1])]; bool variance_15_keep_dims_0 = const()[name = string("variance_15_keep_dims_0"), val = bool(true)]; - tensor variance_15_cast_fp16 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_811_cast_fp16)[name = string("variance_15_cast_fp16")]; - fp16 var_814_to_fp16 = const()[name = string("op_814_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_815_cast_fp16 = add(x = variance_15_cast_fp16, y = var_814_to_fp16)[name = string("op_815_cast_fp16")]; - fp32 var_816_epsilon_0 = const()[name = string("op_816_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_816_cast_fp16 = rsqrt(epsilon = var_816_epsilon_0, x = var_815_cast_fp16)[name = string("op_816_cast_fp16")]; - tensor hidden_states_99_cast_fp16 = mul(x = hidden_states_95, y = var_816_cast_fp16)[name = string("hidden_states_99_cast_fp16")]; - tensor input_27 = mul(x = model_model_layers_3_post_attention_layernorm_weight, y = hidden_states_99_cast_fp16)[name = string("input_27")]; - tensor linear_25 = linear(bias = linear_4_bias_0, weight = model_model_layers_3_mlp_gate_proj_weight_quantized, x = input_27)[name = string("linear_25")]; - tensor var_825 = silu(x = linear_25)[name = string("op_825")]; - tensor linear_26 = linear(bias = linear_4_bias_0, weight = model_model_layers_3_mlp_up_proj_weight_quantized, x = input_27)[name = string("linear_26")]; - tensor input_31 = mul(x = var_825, y = linear_26)[name = string("input_31")]; - tensor linear_27 = linear(bias = linear_0_bias_0, weight = model_model_layers_3_mlp_down_proj_weight_quantized, x = input_31)[name = string("linear_27")]; - tensor hidden_states_103 = add(x = hidden_states_95, y = linear_27)[name = string("hidden_states_103")]; - fp16 var_55_promoted_8_to_fp16 = const()[name = string("op_55_promoted_8_to_fp16"), val = fp16(0x1p+1)]; - tensor var_838_cast_fp16 = pow(x = hidden_states_103, y = var_55_promoted_8_to_fp16)[name = string("op_838_cast_fp16")]; + tensor variance_15_cast_fp16 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_832_cast_fp16)[name = string("variance_15_cast_fp16")]; + fp16 var_835_to_fp16 = const()[name = string("op_835_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_836_cast_fp16 = add(x = variance_15_cast_fp16, y = var_835_to_fp16)[name = string("op_836_cast_fp16")]; + fp32 var_837_epsilon_0 = const()[name = string("op_837_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_837_cast_fp16 = rsqrt(epsilon = var_837_epsilon_0, x = var_836_cast_fp16)[name = string("op_837_cast_fp16")]; + tensor hidden_states_113_cast_fp16 = mul(x = hidden_states_109_cast_fp16, y = var_837_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; + tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256332864)))]; + tensor input_27_cast_fp16 = mul(x = model_model_layers_3_post_attention_layernorm_weight_to_fp16, y = hidden_states_113_cast_fp16)[name = string("input_27_cast_fp16")]; + tensor model_model_layers_3_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256337024))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264725696))))[name = string("model_model_layers_3_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_25_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_3_mlp_gate_proj_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = string("linear_25_cast_fp16")]; + tensor var_849_cast_fp16 = silu(x = linear_25_cast_fp16)[name = string("op_849_cast_fp16")]; + tensor model_model_layers_3_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(265774336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(274163008))))[name = string("model_model_layers_3_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_26_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_3_mlp_up_proj_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = string("linear_26_cast_fp16")]; + tensor input_31_cast_fp16 = mul(x = var_849_cast_fp16, y = linear_26_cast_fp16)[name = string("input_31_cast_fp16")]; + tensor model_model_layers_3_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275211648))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(283600320))))[name = string("model_model_layers_3_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_27_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_mlp_down_proj_weight_to_fp16_quantized, x = input_31_cast_fp16)[name = string("linear_27_cast_fp16")]; + tensor hidden_states_119_cast_fp16 = add(x = hidden_states_109_cast_fp16, y = linear_27_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; + fp16 var_54_promoted_8_to_fp16 = const()[name = string("op_54_promoted_8_to_fp16"), val = fp16(0x1p+1)]; + tensor var_862_cast_fp16 = pow(x = hidden_states_119_cast_fp16, y = var_54_promoted_8_to_fp16)[name = string("op_862_cast_fp16")]; tensor variance_17_axes_0 = const()[name = string("variance_17_axes_0"), val = tensor([-1])]; bool variance_17_keep_dims_0 = const()[name = string("variance_17_keep_dims_0"), val = bool(true)]; - tensor variance_17_cast_fp16 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_838_cast_fp16)[name = string("variance_17_cast_fp16")]; - fp16 var_841_to_fp16 = const()[name = string("op_841_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_842_cast_fp16 = add(x = variance_17_cast_fp16, y = var_841_to_fp16)[name = string("op_842_cast_fp16")]; - fp32 var_843_epsilon_0 = const()[name = string("op_843_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_843_cast_fp16 = rsqrt(epsilon = var_843_epsilon_0, x = var_842_cast_fp16)[name = string("op_843_cast_fp16")]; - tensor hidden_states_107_cast_fp16 = mul(x = hidden_states_103, y = var_843_cast_fp16)[name = string("hidden_states_107_cast_fp16")]; - tensor hidden_states_109 = mul(x = model_model_layers_4_input_layernorm_weight, y = hidden_states_107_cast_fp16)[name = string("hidden_states_109")]; - tensor var_851_shape = shape(x = hidden_states_109)[name = string("op_851_shape")]; + tensor variance_17_cast_fp16 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_862_cast_fp16)[name = string("variance_17_cast_fp16")]; + fp16 var_865_to_fp16 = const()[name = string("op_865_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_866_cast_fp16 = add(x = variance_17_cast_fp16, y = var_865_to_fp16)[name = string("op_866_cast_fp16")]; + fp32 var_867_epsilon_0 = const()[name = string("op_867_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_867_cast_fp16 = rsqrt(epsilon = var_867_epsilon_0, x = var_866_cast_fp16)[name = string("op_867_cast_fp16")]; + tensor hidden_states_123_cast_fp16 = mul(x = hidden_states_119_cast_fp16, y = var_867_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; + tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284648960)))]; + tensor hidden_states_127_cast_fp16 = mul(x = model_model_layers_4_input_layernorm_weight_to_fp16, y = hidden_states_123_cast_fp16)[name = string("hidden_states_127_cast_fp16")]; + tensor var_878_shape_cast_fp16 = shape(x = hidden_states_127_cast_fp16)[name = string("op_878_shape_cast_fp16")]; int32 gather_76 = const()[name = string("gather_76"), val = int32(1)]; int32 gather_77_axis_0 = const()[name = string("gather_77_axis_0"), val = int32(0)]; int32 gather_77_batch_dims_0 = const()[name = string("gather_77_batch_dims_0"), val = int32(0)]; bool gather_77_validate_indices_0 = const()[name = string("gather_77_validate_indices_0"), val = bool(false)]; - string var_851_shape_to_uint16_dtype_0 = const()[name = string("op_851_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_878_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_878_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_77_to_uint16 = const()[name = string("select_77_to_uint16"), val = uint16(1)]; - tensor var_851_shape_to_uint16 = cast(dtype = var_851_shape_to_uint16_dtype_0, x = var_851_shape)[name = string("cast_95")]; - uint16 gather_77_cast_uint16 = gather(axis = gather_77_axis_0, batch_dims = gather_77_batch_dims_0, indices = select_77_to_uint16, validate_indices = gather_77_validate_indices_0, x = var_851_shape_to_uint16)[name = string("gather_77_cast_uint16")]; + tensor var_878_shape_cast_fp16_to_uint16 = cast(dtype = var_878_shape_cast_fp16_to_uint16_dtype_0, x = var_878_shape_cast_fp16)[name = string("cast_95")]; + uint16 gather_77_cast_uint16 = gather(axis = gather_77_axis_0, batch_dims = gather_77_batch_dims_0, indices = select_77_to_uint16, validate_indices = gather_77_validate_indices_0, x = var_878_shape_cast_fp16_to_uint16)[name = string("gather_77_cast_uint16")]; string gather_77_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_77_cast_uint16_to_int32_dtype_0"), val = string("int32")]; - tensor linear_28 = linear(bias = linear_0_bias_0, weight = model_model_layers_4_self_attn_q_proj_weight_quantized, x = hidden_states_109)[name = string("linear_28")]; - tensor linear_29 = linear(bias = linear_1_bias_0, weight = model_model_layers_4_self_attn_k_proj_weight_quantized, x = hidden_states_109)[name = string("linear_29")]; - tensor linear_30 = linear(bias = linear_1_bias_0, weight = model_model_layers_4_self_attn_v_proj_weight_quantized, x = hidden_states_109)[name = string("linear_30")]; + tensor model_model_layers_4_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(284653120))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(286750336))))[name = string("model_model_layers_4_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_127_cast_fp16)[name = string("linear_28_cast_fp16")]; + tensor model_model_layers_4_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287012544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287536896))))[name = string("model_model_layers_4_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_4_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_127_cast_fp16)[name = string("linear_29_cast_fp16")]; + tensor model_model_layers_4_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(287602496))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(288126848))))[name = string("model_model_layers_4_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_30_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_4_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_127_cast_fp16)[name = string("linear_30_cast_fp16")]; tensor concat_76x = const()[name = string("concat_76x"), val = tensor([1, -1, 32, 64])]; - tensor var_860 = reshape(shape = concat_76x, x = linear_28)[name = string("op_860")]; + tensor var_887_cast_fp16 = reshape(shape = concat_76x, x = linear_28_cast_fp16)[name = string("op_887_cast_fp16")]; tensor q_9_perm_0 = const()[name = string("q_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_77x = const()[name = string("concat_77x"), val = tensor([1, -1, 8, 64])]; - tensor var_863 = reshape(shape = concat_77x, x = linear_29)[name = string("op_863")]; + tensor var_890_cast_fp16 = reshape(shape = concat_77x, x = linear_29_cast_fp16)[name = string("op_890_cast_fp16")]; tensor k_9_perm_0 = const()[name = string("k_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_78x = const()[name = string("concat_78x"), val = tensor([1, -1, 8, 64])]; - tensor var_866 = reshape(shape = concat_78x, x = linear_30)[name = string("op_866")]; + tensor var_893_cast_fp16 = reshape(shape = concat_78x, x = linear_30_cast_fp16)[name = string("op_893_cast_fp16")]; tensor v_state_9_perm_0 = const()[name = string("v_state_9_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor q_9 = transpose(perm = q_9_perm_0, x = var_860)[name = string("transpose_47")]; - tensor var_870 = mul(x = q_9, y = cos_7)[name = string("op_870")]; + tensor q_9_cast_fp16 = transpose(perm = q_9_perm_0, x = var_887_cast_fp16)[name = string("transpose_47")]; + tensor var_897_cast_fp16 = mul(x = q_9_cast_fp16, y = cos_7_cast_fp16)[name = string("op_897_cast_fp16")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 32, 0, 32])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_17 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9)[name = string("x1_17")]; + tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9_cast_fp16)[name = string("x1_17_cast_fp16")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 32, 0, 64])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_17 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9)[name = string("x2_17")]; - fp16 const_11_promoted = const()[name = string("const_11_promoted"), val = fp16(-0x1p+0)]; - tensor var_881 = mul(x = x2_17, y = const_11_promoted)[name = string("op_881")]; - bool var_883_interleave_0 = const()[name = string("op_883_interleave_0"), val = bool(false)]; - tensor var_883 = concat(axis = var_48, interleave = var_883_interleave_0, values = (var_881, x1_17))[name = string("op_883")]; - tensor var_884 = mul(x = var_883, y = sin_7)[name = string("op_884")]; - tensor query_states_19 = add(x = var_870, y = var_884)[name = string("query_states_19")]; - tensor k_9 = transpose(perm = k_9_perm_0, x = var_863)[name = string("transpose_46")]; - tensor var_886 = mul(x = k_9, y = cos_7)[name = string("op_886")]; + tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9_cast_fp16)[name = string("x2_17_cast_fp16")]; + fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_908_cast_fp16 = mul(x = x2_17_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_908_cast_fp16")]; + bool var_910_interleave_0 = const()[name = string("op_910_interleave_0"), val = bool(false)]; + tensor var_910_cast_fp16 = concat(axis = var_48, interleave = var_910_interleave_0, values = (var_908_cast_fp16, x1_17_cast_fp16))[name = string("op_910_cast_fp16")]; + tensor var_911_cast_fp16 = mul(x = var_910_cast_fp16, y = sin_7_cast_fp16)[name = string("op_911_cast_fp16")]; + tensor query_states_19_cast_fp16 = add(x = var_897_cast_fp16, y = var_911_cast_fp16)[name = string("query_states_19_cast_fp16")]; + tensor k_9_cast_fp16 = transpose(perm = k_9_perm_0, x = var_890_cast_fp16)[name = string("transpose_46")]; + tensor var_913_cast_fp16 = mul(x = k_9_cast_fp16, y = cos_7_cast_fp16)[name = string("op_913_cast_fp16")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 0, 32])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_19 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9)[name = string("x1_19")]; + tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9_cast_fp16)[name = string("x1_19_cast_fp16")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 0, 64])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_19 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9)[name = string("x2_19")]; - fp16 const_12_promoted = const()[name = string("const_12_promoted"), val = fp16(-0x1p+0)]; - tensor var_897 = mul(x = x2_19, y = const_12_promoted)[name = string("op_897")]; - bool var_899_interleave_0 = const()[name = string("op_899_interleave_0"), val = bool(false)]; - tensor var_899 = concat(axis = var_48, interleave = var_899_interleave_0, values = (var_897, x1_19))[name = string("op_899")]; - tensor var_900 = mul(x = var_899, y = sin_7)[name = string("op_900")]; - tensor k_state_9 = add(x = var_886, y = var_900)[name = string("k_state_9")]; + tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9_cast_fp16)[name = string("x2_19_cast_fp16")]; + fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_924_cast_fp16 = mul(x = x2_19_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_924_cast_fp16")]; + bool var_926_interleave_0 = const()[name = string("op_926_interleave_0"), val = bool(false)]; + tensor var_926_cast_fp16 = concat(axis = var_48, interleave = var_926_interleave_0, values = (var_924_cast_fp16, x1_19_cast_fp16))[name = string("op_926_cast_fp16")]; + tensor var_927_cast_fp16 = mul(x = var_926_cast_fp16, y = sin_7_cast_fp16)[name = string("op_927_cast_fp16")]; + tensor k_state_9_cast_fp16 = add(x = var_913_cast_fp16, y = var_927_cast_fp16)[name = string("k_state_9_cast_fp16")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([0])]; tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; @@ -1097,87 +992,87 @@ program(1.3) tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor key_cache_internal_tensor_assign_5 = slice_update(begin = concat_81, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = k_state_9, x = coreml_update_state_38)[name = string("key_cache_internal_tensor_assign_5")]; - write_state(data = key_cache_internal_tensor_assign_5, input = key_cache)[name = string("coreml_update_state_40_write_state")]; + tensor key_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_81, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = k_state_9_cast_fp16, x = coreml_update_state_38)[name = string("key_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_5_cast_fp16, input = key_cache)[name = string("coreml_update_state_40_write_state")]; tensor coreml_update_state_40 = read_state(input = key_cache)[name = string("coreml_update_state_40")]; tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor v_state_9 = transpose(perm = v_state_9_perm_0, x = var_866)[name = string("transpose_45")]; - tensor value_cache_internal_tensor_assign_5 = slice_update(begin = concat_81, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = v_state_9, x = coreml_update_state_39)[name = string("value_cache_internal_tensor_assign_5")]; - write_state(data = value_cache_internal_tensor_assign_5, input = value_cache)[name = string("coreml_update_state_41_write_state")]; + tensor v_state_9_cast_fp16 = transpose(perm = v_state_9_perm_0, x = var_893_cast_fp16)[name = string("transpose_45")]; + tensor value_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_81, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = v_state_9_cast_fp16, x = coreml_update_state_39)[name = string("value_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_5_cast_fp16, input = value_cache)[name = string("coreml_update_state_41_write_state")]; tensor coreml_update_state_41 = read_state(input = value_cache)[name = string("coreml_update_state_41")]; - tensor var_923_begin_0 = const()[name = string("op_923_begin_0"), val = tensor([4, 0, 0, 0, 0])]; - tensor var_923_end_0 = const()[name = string("op_923_end_0"), val = tensor([5, 1, 8, 2048, 64])]; - tensor var_923_end_mask_0 = const()[name = string("op_923_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_923_squeeze_mask_0 = const()[name = string("op_923_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_923 = slice_by_index(begin = var_923_begin_0, end = var_923_end_0, end_mask = var_923_end_mask_0, squeeze_mask = var_923_squeeze_mask_0, x = coreml_update_state_40)[name = string("op_923")]; - tensor var_926_begin_0 = const()[name = string("op_926_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_926_end_mask_0 = const()[name = string("op_926_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_926 = slice_by_index(begin = var_926_begin_0, end = concat_11, end_mask = var_926_end_mask_0, x = var_923)[name = string("op_926")]; - tensor var_928_begin_0 = const()[name = string("op_928_begin_0"), val = tensor([4, 0, 0, 0, 0])]; - tensor var_928_end_0 = const()[name = string("op_928_end_0"), val = tensor([5, 1, 8, 2048, 64])]; - tensor var_928_end_mask_0 = const()[name = string("op_928_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_928_squeeze_mask_0 = const()[name = string("op_928_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_928 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, squeeze_mask = var_928_squeeze_mask_0, x = coreml_update_state_41)[name = string("op_928")]; - tensor var_931_begin_0 = const()[name = string("op_931_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_931_end_mask_0 = const()[name = string("op_931_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_931 = slice_by_index(begin = var_931_begin_0, end = concat_11, end_mask = var_931_end_mask_0, x = var_928)[name = string("op_931")]; - tensor var_933_shape = shape(x = var_926)[name = string("op_933_shape")]; + tensor var_950_begin_0 = const()[name = string("op_950_begin_0"), val = tensor([4, 0, 0, 0, 0])]; + tensor var_950_end_0 = const()[name = string("op_950_end_0"), val = tensor([5, 1, 8, 2048, 64])]; + tensor var_950_end_mask_0 = const()[name = string("op_950_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_950_squeeze_mask_0 = const()[name = string("op_950_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_950_cast_fp16 = slice_by_index(begin = var_950_begin_0, end = var_950_end_0, end_mask = var_950_end_mask_0, squeeze_mask = var_950_squeeze_mask_0, x = coreml_update_state_40)[name = string("op_950_cast_fp16")]; + tensor var_953_begin_0 = const()[name = string("op_953_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_953_end_mask_0 = const()[name = string("op_953_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_953_cast_fp16 = slice_by_index(begin = var_953_begin_0, end = concat_11, end_mask = var_953_end_mask_0, x = var_950_cast_fp16)[name = string("op_953_cast_fp16")]; + tensor var_955_begin_0 = const()[name = string("op_955_begin_0"), val = tensor([4, 0, 0, 0, 0])]; + tensor var_955_end_0 = const()[name = string("op_955_end_0"), val = tensor([5, 1, 8, 2048, 64])]; + tensor var_955_end_mask_0 = const()[name = string("op_955_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_955_squeeze_mask_0 = const()[name = string("op_955_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_955_cast_fp16 = slice_by_index(begin = var_955_begin_0, end = var_955_end_0, end_mask = var_955_end_mask_0, squeeze_mask = var_955_squeeze_mask_0, x = coreml_update_state_41)[name = string("op_955_cast_fp16")]; + tensor var_958_begin_0 = const()[name = string("op_958_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_958_end_mask_0 = const()[name = string("op_958_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_958_cast_fp16 = slice_by_index(begin = var_958_begin_0, end = concat_11, end_mask = var_958_end_mask_0, x = var_955_cast_fp16)[name = string("op_958_cast_fp16")]; + tensor var_960_shape_cast_fp16 = shape(x = var_953_cast_fp16)[name = string("op_960_shape_cast_fp16")]; int32 gather_85 = const()[name = string("gather_85"), val = int32(1)]; int32 gather_86 = const()[name = string("gather_86"), val = int32(8)]; int32 gather_87_axis_0 = const()[name = string("gather_87_axis_0"), val = int32(0)]; int32 gather_87_batch_dims_0 = const()[name = string("gather_87_batch_dims_0"), val = int32(0)]; bool gather_87_validate_indices_0 = const()[name = string("gather_87_validate_indices_0"), val = bool(false)]; - string var_933_shape_to_uint16_dtype_0 = const()[name = string("op_933_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_960_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_960_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_87_to_uint16 = const()[name = string("select_87_to_uint16"), val = uint16(2)]; - tensor var_933_shape_to_uint16 = cast(dtype = var_933_shape_to_uint16_dtype_0, x = var_933_shape)[name = string("cast_94")]; - uint16 gather_87_cast_uint16 = gather(axis = gather_87_axis_0, batch_dims = gather_87_batch_dims_0, indices = select_87_to_uint16, validate_indices = gather_87_validate_indices_0, x = var_933_shape_to_uint16)[name = string("gather_87_cast_uint16")]; + tensor var_960_shape_cast_fp16_to_uint16 = cast(dtype = var_960_shape_cast_fp16_to_uint16_dtype_0, x = var_960_shape_cast_fp16)[name = string("cast_94")]; + uint16 gather_87_cast_uint16 = gather(axis = gather_87_axis_0, batch_dims = gather_87_batch_dims_0, indices = select_87_to_uint16, validate_indices = gather_87_validate_indices_0, x = var_960_shape_cast_fp16_to_uint16)[name = string("gather_87_cast_uint16")]; string gather_87_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_87_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_88 = const()[name = string("gather_88"), val = int32(64)]; - tensor var_940_axes_0 = const()[name = string("op_940_axes_0"), val = tensor([2])]; - tensor var_940 = expand_dims(axes = var_940_axes_0, x = var_926)[name = string("op_940")]; - tensor shape_97 = shape(x = var_940)[name = string("shape_97")]; + tensor var_967_axes_0 = const()[name = string("op_967_axes_0"), val = tensor([2])]; + tensor var_967_cast_fp16 = expand_dims(axes = var_967_axes_0, x = var_953_cast_fp16)[name = string("op_967_cast_fp16")]; + tensor shape_97_cast_fp16 = shape(x = var_967_cast_fp16)[name = string("shape_97_cast_fp16")]; int32 concat_89_axis_0 = const()[name = string("concat_89_axis_0"), val = int32(0)]; bool concat_89_interleave_0 = const()[name = string("concat_89_interleave_0"), val = bool(false)]; int32 gather_87_cast_uint16_to_int32 = cast(dtype = gather_87_cast_uint16_to_int32_dtype_0, x = gather_87_cast_uint16)[name = string("cast_93")]; - tensor concat_89 = concat(axis = concat_89_axis_0, interleave = concat_89_interleave_0, values = (gather_85, gather_86, var_60, gather_87_cast_uint16_to_int32, gather_88))[name = string("concat_89")]; - tensor real_div_8 = real_div(x = concat_89, y = shape_97)[name = string("real_div_8")]; - tensor hidden_states_113 = tile(reps = real_div_8, x = var_940)[name = string("hidden_states_113")]; + tensor concat_89 = concat(axis = concat_89_axis_0, interleave = concat_89_interleave_0, values = (gather_85, gather_86, var_59, gather_87_cast_uint16_to_int32, gather_88))[name = string("concat_89")]; + tensor real_div_8 = real_div(x = concat_89, y = shape_97_cast_fp16)[name = string("real_div_8")]; + tensor hidden_states_131_cast_fp16 = tile(reps = real_div_8, x = var_967_cast_fp16)[name = string("hidden_states_131_cast_fp16")]; tensor concat_90x = const()[name = string("concat_90x"), val = tensor([1, 32, -1, 64])]; - tensor key_states_19 = reshape(shape = concat_90x, x = hidden_states_113)[name = string("key_states_19")]; - tensor var_950_shape = shape(x = var_931)[name = string("op_950_shape")]; + tensor key_states_19_cast_fp16 = reshape(shape = concat_90x, x = hidden_states_131_cast_fp16)[name = string("key_states_19_cast_fp16")]; + tensor var_977_shape_cast_fp16 = shape(x = var_958_cast_fp16)[name = string("op_977_shape_cast_fp16")]; int32 gather_89 = const()[name = string("gather_89"), val = int32(1)]; int32 gather_90 = const()[name = string("gather_90"), val = int32(8)]; int32 gather_91_axis_0 = const()[name = string("gather_91_axis_0"), val = int32(0)]; int32 gather_91_batch_dims_0 = const()[name = string("gather_91_batch_dims_0"), val = int32(0)]; bool gather_91_validate_indices_0 = const()[name = string("gather_91_validate_indices_0"), val = bool(false)]; - string var_950_shape_to_uint16_dtype_0 = const()[name = string("op_950_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_977_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_977_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_91_to_uint16 = const()[name = string("select_91_to_uint16"), val = uint16(2)]; - tensor var_950_shape_to_uint16 = cast(dtype = var_950_shape_to_uint16_dtype_0, x = var_950_shape)[name = string("cast_92")]; - uint16 gather_91_cast_uint16 = gather(axis = gather_91_axis_0, batch_dims = gather_91_batch_dims_0, indices = select_91_to_uint16, validate_indices = gather_91_validate_indices_0, x = var_950_shape_to_uint16)[name = string("gather_91_cast_uint16")]; + tensor var_977_shape_cast_fp16_to_uint16 = cast(dtype = var_977_shape_cast_fp16_to_uint16_dtype_0, x = var_977_shape_cast_fp16)[name = string("cast_92")]; + uint16 gather_91_cast_uint16 = gather(axis = gather_91_axis_0, batch_dims = gather_91_batch_dims_0, indices = select_91_to_uint16, validate_indices = gather_91_validate_indices_0, x = var_977_shape_cast_fp16_to_uint16)[name = string("gather_91_cast_uint16")]; string gather_91_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_91_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_92 = const()[name = string("gather_92"), val = int32(64)]; - tensor var_957_axes_0 = const()[name = string("op_957_axes_0"), val = tensor([2])]; - tensor var_957 = expand_dims(axes = var_957_axes_0, x = var_931)[name = string("op_957")]; - tensor shape_102 = shape(x = var_957)[name = string("shape_102")]; + tensor var_984_axes_0 = const()[name = string("op_984_axes_0"), val = tensor([2])]; + tensor var_984_cast_fp16 = expand_dims(axes = var_984_axes_0, x = var_958_cast_fp16)[name = string("op_984_cast_fp16")]; + tensor shape_102_cast_fp16 = shape(x = var_984_cast_fp16)[name = string("shape_102_cast_fp16")]; int32 concat_91_axis_0 = const()[name = string("concat_91_axis_0"), val = int32(0)]; bool concat_91_interleave_0 = const()[name = string("concat_91_interleave_0"), val = bool(false)]; int32 gather_91_cast_uint16_to_int32 = cast(dtype = gather_91_cast_uint16_to_int32_dtype_0, x = gather_91_cast_uint16)[name = string("cast_91")]; - tensor concat_91 = concat(axis = concat_91_axis_0, interleave = concat_91_interleave_0, values = (gather_89, gather_90, var_60, gather_91_cast_uint16_to_int32, gather_92))[name = string("concat_91")]; - tensor real_div_9 = real_div(x = concat_91, y = shape_102)[name = string("real_div_9")]; - tensor hidden_states_117 = tile(reps = real_div_9, x = var_957)[name = string("hidden_states_117")]; + tensor concat_91 = concat(axis = concat_91_axis_0, interleave = concat_91_interleave_0, values = (gather_89, gather_90, var_59, gather_91_cast_uint16_to_int32, gather_92))[name = string("concat_91")]; + tensor real_div_9 = real_div(x = concat_91, y = shape_102_cast_fp16)[name = string("real_div_9")]; + tensor hidden_states_135_cast_fp16 = tile(reps = real_div_9, x = var_984_cast_fp16)[name = string("hidden_states_135_cast_fp16")]; tensor concat_92x = const()[name = string("concat_92x"), val = tensor([1, 32, -1, 64])]; - tensor value_states_19 = reshape(shape = concat_92x, x = hidden_states_117)[name = string("value_states_19")]; - tensor var_967_shape = shape(x = key_states_19)[name = string("op_967_shape")]; + tensor value_states_19_cast_fp16 = reshape(shape = concat_92x, x = hidden_states_135_cast_fp16)[name = string("value_states_19_cast_fp16")]; + tensor var_994_shape_cast_fp16 = shape(x = key_states_19_cast_fp16)[name = string("op_994_shape_cast_fp16")]; int32 gather_93_axis_0 = const()[name = string("gather_93_axis_0"), val = int32(0)]; int32 gather_93_batch_dims_0 = const()[name = string("gather_93_batch_dims_0"), val = int32(0)]; bool gather_93_validate_indices_0 = const()[name = string("gather_93_validate_indices_0"), val = bool(false)]; - string var_967_shape_to_uint16_dtype_0 = const()[name = string("op_967_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_994_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_994_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_93_to_uint16 = const()[name = string("select_93_to_uint16"), val = uint16(2)]; - tensor var_967_shape_to_uint16 = cast(dtype = var_967_shape_to_uint16_dtype_0, x = var_967_shape)[name = string("cast_90")]; - uint16 gather_93_cast_uint16 = gather(axis = gather_93_axis_0, batch_dims = gather_93_batch_dims_0, indices = select_93_to_uint16, validate_indices = gather_93_validate_indices_0, x = var_967_shape_to_uint16)[name = string("gather_93_cast_uint16")]; + tensor var_994_shape_cast_fp16_to_uint16 = cast(dtype = var_994_shape_cast_fp16_to_uint16_dtype_0, x = var_994_shape_cast_fp16)[name = string("cast_90")]; + uint16 gather_93_cast_uint16 = gather(axis = gather_93_axis_0, batch_dims = gather_93_batch_dims_0, indices = select_93_to_uint16, validate_indices = gather_93_validate_indices_0, x = var_994_shape_cast_fp16_to_uint16)[name = string("gather_93_cast_uint16")]; string gather_93_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_93_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = int32(1)]; int32 concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = int32(1)]; @@ -1189,98 +1084,107 @@ program(1.3) tensor causal_mask_11_begin_0 = const()[name = string("causal_mask_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_11_end_mask_0 = const()[name = string("causal_mask_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_11_cast_fp16 = slice_by_index(begin = causal_mask_11_begin_0, end = concat_93, end_mask = causal_mask_11_end_mask_0, x = causal_mask)[name = string("causal_mask_11_cast_fp16")]; - tensor attn_output_17_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_11_cast_fp16, key = key_states_19, query = query_states_19, value = value_states_19)[name = string("attn_output_17_cast_fp16")]; - tensor var_973_perm_0 = const()[name = string("op_973_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor attn_output_17_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_11_cast_fp16, key = key_states_19_cast_fp16, query = query_states_19_cast_fp16, value = value_states_19_cast_fp16)[name = string("attn_output_17_cast_fp16")]; + tensor var_1000_perm_0 = const()[name = string("op_1000_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_94_axis_0 = const()[name = string("concat_94_axis_0"), val = int32(0)]; bool concat_94_interleave_0 = const()[name = string("concat_94_interleave_0"), val = bool(false)]; int32 gather_77_cast_uint16_to_int32 = cast(dtype = gather_77_cast_uint16_to_int32_dtype_0, x = gather_77_cast_uint16)[name = string("cast_88")]; tensor concat_94 = concat(axis = concat_94_axis_0, interleave = concat_94_interleave_0, values = (gather_76, gather_77_cast_uint16_to_int32, var_48))[name = string("concat_94")]; - tensor var_973 = transpose(perm = var_973_perm_0, x = attn_output_17_cast_fp16)[name = string("transpose_44")]; - tensor input_33 = reshape(shape = concat_94, x = var_973)[name = string("input_33")]; - tensor linear_31 = linear(bias = linear_0_bias_0, weight = model_model_layers_4_self_attn_o_proj_weight_quantized, x = input_33)[name = string("linear_31")]; - tensor hidden_states_121 = add(x = hidden_states_103, y = linear_31)[name = string("hidden_states_121")]; - fp16 var_55_promoted_9_to_fp16 = const()[name = string("op_55_promoted_9_to_fp16"), val = fp16(0x1p+1)]; - tensor var_982_cast_fp16 = pow(x = hidden_states_121, y = var_55_promoted_9_to_fp16)[name = string("op_982_cast_fp16")]; + tensor var_1000_cast_fp16 = transpose(perm = var_1000_perm_0, x = attn_output_17_cast_fp16)[name = string("transpose_44")]; + tensor input_33_cast_fp16 = reshape(shape = concat_94, x = var_1000_cast_fp16)[name = string("input_33_cast_fp16")]; + tensor model_model_layers_4_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(288192448))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290289664))))[name = string("model_model_layers_4_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_31_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_to_fp16_quantized, x = input_33_cast_fp16)[name = string("linear_31_cast_fp16")]; + tensor hidden_states_139_cast_fp16 = add(x = hidden_states_119_cast_fp16, y = linear_31_cast_fp16)[name = string("hidden_states_139_cast_fp16")]; + fp16 var_54_promoted_9_to_fp16 = const()[name = string("op_54_promoted_9_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1009_cast_fp16 = pow(x = hidden_states_139_cast_fp16, y = var_54_promoted_9_to_fp16)[name = string("op_1009_cast_fp16")]; tensor variance_19_axes_0 = const()[name = string("variance_19_axes_0"), val = tensor([-1])]; bool variance_19_keep_dims_0 = const()[name = string("variance_19_keep_dims_0"), val = bool(true)]; - tensor variance_19_cast_fp16 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = var_982_cast_fp16)[name = string("variance_19_cast_fp16")]; - fp16 var_985_to_fp16 = const()[name = string("op_985_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_986_cast_fp16 = add(x = variance_19_cast_fp16, y = var_985_to_fp16)[name = string("op_986_cast_fp16")]; - fp32 var_987_epsilon_0 = const()[name = string("op_987_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_987_cast_fp16 = rsqrt(epsilon = var_987_epsilon_0, x = var_986_cast_fp16)[name = string("op_987_cast_fp16")]; - tensor hidden_states_125_cast_fp16 = mul(x = hidden_states_121, y = var_987_cast_fp16)[name = string("hidden_states_125_cast_fp16")]; - tensor input_35 = mul(x = model_model_layers_4_post_attention_layernorm_weight, y = hidden_states_125_cast_fp16)[name = string("input_35")]; - tensor linear_32 = linear(bias = linear_4_bias_0, weight = model_model_layers_4_mlp_gate_proj_weight_quantized, x = input_35)[name = string("linear_32")]; - tensor var_996 = silu(x = linear_32)[name = string("op_996")]; - tensor linear_33 = linear(bias = linear_4_bias_0, weight = model_model_layers_4_mlp_up_proj_weight_quantized, x = input_35)[name = string("linear_33")]; - tensor input_39 = mul(x = var_996, y = linear_33)[name = string("input_39")]; - tensor linear_34 = linear(bias = linear_0_bias_0, weight = model_model_layers_4_mlp_down_proj_weight_quantized, x = input_39)[name = string("linear_34")]; - tensor hidden_states_129 = add(x = hidden_states_121, y = linear_34)[name = string("hidden_states_129")]; - fp16 var_55_promoted_10_to_fp16 = const()[name = string("op_55_promoted_10_to_fp16"), val = fp16(0x1p+1)]; - tensor var_1009_cast_fp16 = pow(x = hidden_states_129, y = var_55_promoted_10_to_fp16)[name = string("op_1009_cast_fp16")]; - tensor variance_21_axes_0 = const()[name = string("variance_21_axes_0"), val = tensor([-1])]; - bool variance_21_keep_dims_0 = const()[name = string("variance_21_keep_dims_0"), val = bool(true)]; - tensor variance_21_cast_fp16 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = var_1009_cast_fp16)[name = string("variance_21_cast_fp16")]; + tensor variance_19_cast_fp16 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = var_1009_cast_fp16)[name = string("variance_19_cast_fp16")]; fp16 var_1012_to_fp16 = const()[name = string("op_1012_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_1013_cast_fp16 = add(x = variance_21_cast_fp16, y = var_1012_to_fp16)[name = string("op_1013_cast_fp16")]; + tensor var_1013_cast_fp16 = add(x = variance_19_cast_fp16, y = var_1012_to_fp16)[name = string("op_1013_cast_fp16")]; fp32 var_1014_epsilon_0 = const()[name = string("op_1014_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1014_cast_fp16 = rsqrt(epsilon = var_1014_epsilon_0, x = var_1013_cast_fp16)[name = string("op_1014_cast_fp16")]; - tensor hidden_states_133_cast_fp16 = mul(x = hidden_states_129, y = var_1014_cast_fp16)[name = string("hidden_states_133_cast_fp16")]; - tensor hidden_states_135 = mul(x = model_model_layers_5_input_layernorm_weight, y = hidden_states_133_cast_fp16)[name = string("hidden_states_135")]; - tensor var_1022_shape = shape(x = hidden_states_135)[name = string("op_1022_shape")]; + tensor hidden_states_143_cast_fp16 = mul(x = hidden_states_139_cast_fp16, y = var_1014_cast_fp16)[name = string("hidden_states_143_cast_fp16")]; + tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290551872)))]; + tensor input_35_cast_fp16 = mul(x = model_model_layers_4_post_attention_layernorm_weight_to_fp16, y = hidden_states_143_cast_fp16)[name = string("input_35_cast_fp16")]; + tensor model_model_layers_4_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(290556032))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(298944704))))[name = string("model_model_layers_4_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_32_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_4_mlp_gate_proj_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = string("linear_32_cast_fp16")]; + tensor var_1026_cast_fp16 = silu(x = linear_32_cast_fp16)[name = string("op_1026_cast_fp16")]; + tensor model_model_layers_4_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(299993344))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(308382016))))[name = string("model_model_layers_4_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_33_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_4_mlp_up_proj_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = string("linear_33_cast_fp16")]; + tensor input_39_cast_fp16 = mul(x = var_1026_cast_fp16, y = linear_33_cast_fp16)[name = string("input_39_cast_fp16")]; + tensor model_model_layers_4_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(309430656))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317819328))))[name = string("model_model_layers_4_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_34_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_mlp_down_proj_weight_to_fp16_quantized, x = input_39_cast_fp16)[name = string("linear_34_cast_fp16")]; + tensor hidden_states_149_cast_fp16 = add(x = hidden_states_139_cast_fp16, y = linear_34_cast_fp16)[name = string("hidden_states_149_cast_fp16")]; + fp16 var_54_promoted_10_to_fp16 = const()[name = string("op_54_promoted_10_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1039_cast_fp16 = pow(x = hidden_states_149_cast_fp16, y = var_54_promoted_10_to_fp16)[name = string("op_1039_cast_fp16")]; + tensor variance_21_axes_0 = const()[name = string("variance_21_axes_0"), val = tensor([-1])]; + bool variance_21_keep_dims_0 = const()[name = string("variance_21_keep_dims_0"), val = bool(true)]; + tensor variance_21_cast_fp16 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = var_1039_cast_fp16)[name = string("variance_21_cast_fp16")]; + fp16 var_1042_to_fp16 = const()[name = string("op_1042_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1043_cast_fp16 = add(x = variance_21_cast_fp16, y = var_1042_to_fp16)[name = string("op_1043_cast_fp16")]; + fp32 var_1044_epsilon_0 = const()[name = string("op_1044_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1044_cast_fp16 = rsqrt(epsilon = var_1044_epsilon_0, x = var_1043_cast_fp16)[name = string("op_1044_cast_fp16")]; + tensor hidden_states_153_cast_fp16 = mul(x = hidden_states_149_cast_fp16, y = var_1044_cast_fp16)[name = string("hidden_states_153_cast_fp16")]; + tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318867968)))]; + tensor hidden_states_157_cast_fp16 = mul(x = model_model_layers_5_input_layernorm_weight_to_fp16, y = hidden_states_153_cast_fp16)[name = string("hidden_states_157_cast_fp16")]; + tensor var_1055_shape_cast_fp16 = shape(x = hidden_states_157_cast_fp16)[name = string("op_1055_shape_cast_fp16")]; int32 gather_94 = const()[name = string("gather_94"), val = int32(1)]; int32 gather_95_axis_0 = const()[name = string("gather_95_axis_0"), val = int32(0)]; int32 gather_95_batch_dims_0 = const()[name = string("gather_95_batch_dims_0"), val = int32(0)]; bool gather_95_validate_indices_0 = const()[name = string("gather_95_validate_indices_0"), val = bool(false)]; - string var_1022_shape_to_uint16_dtype_0 = const()[name = string("op_1022_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1055_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1055_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_95_to_uint16 = const()[name = string("select_95_to_uint16"), val = uint16(1)]; - tensor var_1022_shape_to_uint16 = cast(dtype = var_1022_shape_to_uint16_dtype_0, x = var_1022_shape)[name = string("cast_87")]; - uint16 gather_95_cast_uint16 = gather(axis = gather_95_axis_0, batch_dims = gather_95_batch_dims_0, indices = select_95_to_uint16, validate_indices = gather_95_validate_indices_0, x = var_1022_shape_to_uint16)[name = string("gather_95_cast_uint16")]; + tensor var_1055_shape_cast_fp16_to_uint16 = cast(dtype = var_1055_shape_cast_fp16_to_uint16_dtype_0, x = var_1055_shape_cast_fp16)[name = string("cast_87")]; + uint16 gather_95_cast_uint16 = gather(axis = gather_95_axis_0, batch_dims = gather_95_batch_dims_0, indices = select_95_to_uint16, validate_indices = gather_95_validate_indices_0, x = var_1055_shape_cast_fp16_to_uint16)[name = string("gather_95_cast_uint16")]; string gather_95_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_95_cast_uint16_to_int32_dtype_0"), val = string("int32")]; - tensor linear_35 = linear(bias = linear_0_bias_0, weight = model_model_layers_5_self_attn_q_proj_weight_quantized, x = hidden_states_135)[name = string("linear_35")]; - tensor linear_36 = linear(bias = linear_1_bias_0, weight = model_model_layers_5_self_attn_k_proj_weight_quantized, x = hidden_states_135)[name = string("linear_36")]; - tensor linear_37 = linear(bias = linear_1_bias_0, weight = model_model_layers_5_self_attn_v_proj_weight_quantized, x = hidden_states_135)[name = string("linear_37")]; + tensor model_model_layers_5_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(318872128))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(320969344))))[name = string("model_model_layers_5_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_35_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_157_cast_fp16)[name = string("linear_35_cast_fp16")]; + tensor model_model_layers_5_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321231552))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321755904))))[name = string("model_model_layers_5_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_36_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_5_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_157_cast_fp16)[name = string("linear_36_cast_fp16")]; + tensor model_model_layers_5_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(321821504))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322345856))))[name = string("model_model_layers_5_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_5_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_157_cast_fp16)[name = string("linear_37_cast_fp16")]; tensor concat_95x = const()[name = string("concat_95x"), val = tensor([1, -1, 32, 64])]; - tensor var_1031 = reshape(shape = concat_95x, x = linear_35)[name = string("op_1031")]; + tensor var_1064_cast_fp16 = reshape(shape = concat_95x, x = linear_35_cast_fp16)[name = string("op_1064_cast_fp16")]; tensor q_11_perm_0 = const()[name = string("q_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_96x = const()[name = string("concat_96x"), val = tensor([1, -1, 8, 64])]; - tensor var_1034 = reshape(shape = concat_96x, x = linear_36)[name = string("op_1034")]; + tensor var_1067_cast_fp16 = reshape(shape = concat_96x, x = linear_36_cast_fp16)[name = string("op_1067_cast_fp16")]; tensor k_11_perm_0 = const()[name = string("k_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_97x = const()[name = string("concat_97x"), val = tensor([1, -1, 8, 64])]; - tensor var_1037 = reshape(shape = concat_97x, x = linear_37)[name = string("op_1037")]; + tensor var_1070_cast_fp16 = reshape(shape = concat_97x, x = linear_37_cast_fp16)[name = string("op_1070_cast_fp16")]; tensor v_state_11_perm_0 = const()[name = string("v_state_11_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor q_11 = transpose(perm = q_11_perm_0, x = var_1031)[name = string("transpose_43")]; - tensor var_1041 = mul(x = q_11, y = cos_7)[name = string("op_1041")]; + tensor q_11_cast_fp16 = transpose(perm = q_11_perm_0, x = var_1064_cast_fp16)[name = string("transpose_43")]; + tensor var_1074_cast_fp16 = mul(x = q_11_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1074_cast_fp16")]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 32, 0, 32])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_21 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11)[name = string("x1_21")]; + tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11_cast_fp16)[name = string("x1_21_cast_fp16")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 32, 0, 64])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_21 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11)[name = string("x2_21")]; - fp16 const_13_promoted = const()[name = string("const_13_promoted"), val = fp16(-0x1p+0)]; - tensor var_1052 = mul(x = x2_21, y = const_13_promoted)[name = string("op_1052")]; - bool var_1054_interleave_0 = const()[name = string("op_1054_interleave_0"), val = bool(false)]; - tensor var_1054 = concat(axis = var_48, interleave = var_1054_interleave_0, values = (var_1052, x1_21))[name = string("op_1054")]; - tensor var_1055 = mul(x = var_1054, y = sin_7)[name = string("op_1055")]; - tensor query_states_23 = add(x = var_1041, y = var_1055)[name = string("query_states_23")]; - tensor k_11 = transpose(perm = k_11_perm_0, x = var_1034)[name = string("transpose_42")]; - tensor var_1057 = mul(x = k_11, y = cos_7)[name = string("op_1057")]; + tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11_cast_fp16)[name = string("x2_21_cast_fp16")]; + fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1085_cast_fp16 = mul(x = x2_21_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_1085_cast_fp16")]; + bool var_1087_interleave_0 = const()[name = string("op_1087_interleave_0"), val = bool(false)]; + tensor var_1087_cast_fp16 = concat(axis = var_48, interleave = var_1087_interleave_0, values = (var_1085_cast_fp16, x1_21_cast_fp16))[name = string("op_1087_cast_fp16")]; + tensor var_1088_cast_fp16 = mul(x = var_1087_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1088_cast_fp16")]; + tensor query_states_23_cast_fp16 = add(x = var_1074_cast_fp16, y = var_1088_cast_fp16)[name = string("query_states_23_cast_fp16")]; + tensor k_11_cast_fp16 = transpose(perm = k_11_perm_0, x = var_1067_cast_fp16)[name = string("transpose_42")]; + tensor var_1090_cast_fp16 = mul(x = k_11_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1090_cast_fp16")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 0, 32])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_23 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11)[name = string("x1_23")]; + tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11_cast_fp16)[name = string("x1_23_cast_fp16")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 0, 64])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_23 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11)[name = string("x2_23")]; - fp16 const_14_promoted = const()[name = string("const_14_promoted"), val = fp16(-0x1p+0)]; - tensor var_1068 = mul(x = x2_23, y = const_14_promoted)[name = string("op_1068")]; - bool var_1070_interleave_0 = const()[name = string("op_1070_interleave_0"), val = bool(false)]; - tensor var_1070 = concat(axis = var_48, interleave = var_1070_interleave_0, values = (var_1068, x1_23))[name = string("op_1070")]; - tensor var_1071 = mul(x = var_1070, y = sin_7)[name = string("op_1071")]; - tensor k_state_11 = add(x = var_1057, y = var_1071)[name = string("k_state_11")]; + tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11_cast_fp16)[name = string("x2_23_cast_fp16")]; + fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1101_cast_fp16 = mul(x = x2_23_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1101_cast_fp16")]; + bool var_1103_interleave_0 = const()[name = string("op_1103_interleave_0"), val = bool(false)]; + tensor var_1103_cast_fp16 = concat(axis = var_48, interleave = var_1103_interleave_0, values = (var_1101_cast_fp16, x1_23_cast_fp16))[name = string("op_1103_cast_fp16")]; + tensor var_1104_cast_fp16 = mul(x = var_1103_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1104_cast_fp16")]; + tensor k_state_11_cast_fp16 = add(x = var_1090_cast_fp16, y = var_1104_cast_fp16)[name = string("k_state_11_cast_fp16")]; tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([0])]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; @@ -1292,87 +1196,87 @@ program(1.3) tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor key_cache_internal_tensor_assign_6 = slice_update(begin = concat_100, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = k_state_11, x = coreml_update_state_40)[name = string("key_cache_internal_tensor_assign_6")]; - write_state(data = key_cache_internal_tensor_assign_6, input = key_cache)[name = string("coreml_update_state_42_write_state")]; + tensor key_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_100, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = k_state_11_cast_fp16, x = coreml_update_state_40)[name = string("key_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_6_cast_fp16, input = key_cache)[name = string("coreml_update_state_42_write_state")]; tensor coreml_update_state_42 = read_state(input = key_cache)[name = string("coreml_update_state_42")]; tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor v_state_11 = transpose(perm = v_state_11_perm_0, x = var_1037)[name = string("transpose_41")]; - tensor value_cache_internal_tensor_assign_6 = slice_update(begin = concat_100, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = v_state_11, x = coreml_update_state_41)[name = string("value_cache_internal_tensor_assign_6")]; - write_state(data = value_cache_internal_tensor_assign_6, input = value_cache)[name = string("coreml_update_state_43_write_state")]; + tensor v_state_11_cast_fp16 = transpose(perm = v_state_11_perm_0, x = var_1070_cast_fp16)[name = string("transpose_41")]; + tensor value_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_100, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = v_state_11_cast_fp16, x = coreml_update_state_41)[name = string("value_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_6_cast_fp16, input = value_cache)[name = string("coreml_update_state_43_write_state")]; tensor coreml_update_state_43 = read_state(input = value_cache)[name = string("coreml_update_state_43")]; - tensor var_1094_begin_0 = const()[name = string("op_1094_begin_0"), val = tensor([5, 0, 0, 0, 0])]; - tensor var_1094_end_0 = const()[name = string("op_1094_end_0"), val = tensor([6, 1, 8, 2048, 64])]; - tensor var_1094_end_mask_0 = const()[name = string("op_1094_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_1094_squeeze_mask_0 = const()[name = string("op_1094_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_1094 = slice_by_index(begin = var_1094_begin_0, end = var_1094_end_0, end_mask = var_1094_end_mask_0, squeeze_mask = var_1094_squeeze_mask_0, x = coreml_update_state_42)[name = string("op_1094")]; - tensor var_1097_begin_0 = const()[name = string("op_1097_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1097_end_mask_0 = const()[name = string("op_1097_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_1097 = slice_by_index(begin = var_1097_begin_0, end = concat_11, end_mask = var_1097_end_mask_0, x = var_1094)[name = string("op_1097")]; - tensor var_1099_begin_0 = const()[name = string("op_1099_begin_0"), val = tensor([5, 0, 0, 0, 0])]; - tensor var_1099_end_0 = const()[name = string("op_1099_end_0"), val = tensor([6, 1, 8, 2048, 64])]; - tensor var_1099_end_mask_0 = const()[name = string("op_1099_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_1099_squeeze_mask_0 = const()[name = string("op_1099_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_1099 = slice_by_index(begin = var_1099_begin_0, end = var_1099_end_0, end_mask = var_1099_end_mask_0, squeeze_mask = var_1099_squeeze_mask_0, x = coreml_update_state_43)[name = string("op_1099")]; - tensor var_1102_begin_0 = const()[name = string("op_1102_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1102_end_mask_0 = const()[name = string("op_1102_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_1102 = slice_by_index(begin = var_1102_begin_0, end = concat_11, end_mask = var_1102_end_mask_0, x = var_1099)[name = string("op_1102")]; - tensor var_1104_shape = shape(x = var_1097)[name = string("op_1104_shape")]; + tensor var_1127_begin_0 = const()[name = string("op_1127_begin_0"), val = tensor([5, 0, 0, 0, 0])]; + tensor var_1127_end_0 = const()[name = string("op_1127_end_0"), val = tensor([6, 1, 8, 2048, 64])]; + tensor var_1127_end_mask_0 = const()[name = string("op_1127_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1127_squeeze_mask_0 = const()[name = string("op_1127_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1127_cast_fp16 = slice_by_index(begin = var_1127_begin_0, end = var_1127_end_0, end_mask = var_1127_end_mask_0, squeeze_mask = var_1127_squeeze_mask_0, x = coreml_update_state_42)[name = string("op_1127_cast_fp16")]; + tensor var_1130_begin_0 = const()[name = string("op_1130_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1130_end_mask_0 = const()[name = string("op_1130_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1130_cast_fp16 = slice_by_index(begin = var_1130_begin_0, end = concat_11, end_mask = var_1130_end_mask_0, x = var_1127_cast_fp16)[name = string("op_1130_cast_fp16")]; + tensor var_1132_begin_0 = const()[name = string("op_1132_begin_0"), val = tensor([5, 0, 0, 0, 0])]; + tensor var_1132_end_0 = const()[name = string("op_1132_end_0"), val = tensor([6, 1, 8, 2048, 64])]; + tensor var_1132_end_mask_0 = const()[name = string("op_1132_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1132_squeeze_mask_0 = const()[name = string("op_1132_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1132_cast_fp16 = slice_by_index(begin = var_1132_begin_0, end = var_1132_end_0, end_mask = var_1132_end_mask_0, squeeze_mask = var_1132_squeeze_mask_0, x = coreml_update_state_43)[name = string("op_1132_cast_fp16")]; + tensor var_1135_begin_0 = const()[name = string("op_1135_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1135_end_mask_0 = const()[name = string("op_1135_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1135_cast_fp16 = slice_by_index(begin = var_1135_begin_0, end = concat_11, end_mask = var_1135_end_mask_0, x = var_1132_cast_fp16)[name = string("op_1135_cast_fp16")]; + tensor var_1137_shape_cast_fp16 = shape(x = var_1130_cast_fp16)[name = string("op_1137_shape_cast_fp16")]; int32 gather_103 = const()[name = string("gather_103"), val = int32(1)]; int32 gather_104 = const()[name = string("gather_104"), val = int32(8)]; int32 gather_105_axis_0 = const()[name = string("gather_105_axis_0"), val = int32(0)]; int32 gather_105_batch_dims_0 = const()[name = string("gather_105_batch_dims_0"), val = int32(0)]; bool gather_105_validate_indices_0 = const()[name = string("gather_105_validate_indices_0"), val = bool(false)]; - string var_1104_shape_to_uint16_dtype_0 = const()[name = string("op_1104_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1137_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1137_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_105_to_uint16 = const()[name = string("select_105_to_uint16"), val = uint16(2)]; - tensor var_1104_shape_to_uint16 = cast(dtype = var_1104_shape_to_uint16_dtype_0, x = var_1104_shape)[name = string("cast_86")]; - uint16 gather_105_cast_uint16 = gather(axis = gather_105_axis_0, batch_dims = gather_105_batch_dims_0, indices = select_105_to_uint16, validate_indices = gather_105_validate_indices_0, x = var_1104_shape_to_uint16)[name = string("gather_105_cast_uint16")]; + tensor var_1137_shape_cast_fp16_to_uint16 = cast(dtype = var_1137_shape_cast_fp16_to_uint16_dtype_0, x = var_1137_shape_cast_fp16)[name = string("cast_86")]; + uint16 gather_105_cast_uint16 = gather(axis = gather_105_axis_0, batch_dims = gather_105_batch_dims_0, indices = select_105_to_uint16, validate_indices = gather_105_validate_indices_0, x = var_1137_shape_cast_fp16_to_uint16)[name = string("gather_105_cast_uint16")]; string gather_105_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_105_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_106 = const()[name = string("gather_106"), val = int32(64)]; - tensor var_1111_axes_0 = const()[name = string("op_1111_axes_0"), val = tensor([2])]; - tensor var_1111 = expand_dims(axes = var_1111_axes_0, x = var_1097)[name = string("op_1111")]; - tensor shape_117 = shape(x = var_1111)[name = string("shape_117")]; + tensor var_1144_axes_0 = const()[name = string("op_1144_axes_0"), val = tensor([2])]; + tensor var_1144_cast_fp16 = expand_dims(axes = var_1144_axes_0, x = var_1130_cast_fp16)[name = string("op_1144_cast_fp16")]; + tensor shape_117_cast_fp16 = shape(x = var_1144_cast_fp16)[name = string("shape_117_cast_fp16")]; int32 concat_108_axis_0 = const()[name = string("concat_108_axis_0"), val = int32(0)]; bool concat_108_interleave_0 = const()[name = string("concat_108_interleave_0"), val = bool(false)]; int32 gather_105_cast_uint16_to_int32 = cast(dtype = gather_105_cast_uint16_to_int32_dtype_0, x = gather_105_cast_uint16)[name = string("cast_85")]; - tensor concat_108 = concat(axis = concat_108_axis_0, interleave = concat_108_interleave_0, values = (gather_103, gather_104, var_60, gather_105_cast_uint16_to_int32, gather_106))[name = string("concat_108")]; - tensor real_div_10 = real_div(x = concat_108, y = shape_117)[name = string("real_div_10")]; - tensor hidden_states_139 = tile(reps = real_div_10, x = var_1111)[name = string("hidden_states_139")]; + tensor concat_108 = concat(axis = concat_108_axis_0, interleave = concat_108_interleave_0, values = (gather_103, gather_104, var_59, gather_105_cast_uint16_to_int32, gather_106))[name = string("concat_108")]; + tensor real_div_10 = real_div(x = concat_108, y = shape_117_cast_fp16)[name = string("real_div_10")]; + tensor hidden_states_161_cast_fp16 = tile(reps = real_div_10, x = var_1144_cast_fp16)[name = string("hidden_states_161_cast_fp16")]; tensor concat_109x = const()[name = string("concat_109x"), val = tensor([1, 32, -1, 64])]; - tensor key_states_23 = reshape(shape = concat_109x, x = hidden_states_139)[name = string("key_states_23")]; - tensor var_1121_shape = shape(x = var_1102)[name = string("op_1121_shape")]; + tensor key_states_23_cast_fp16 = reshape(shape = concat_109x, x = hidden_states_161_cast_fp16)[name = string("key_states_23_cast_fp16")]; + tensor var_1154_shape_cast_fp16 = shape(x = var_1135_cast_fp16)[name = string("op_1154_shape_cast_fp16")]; int32 gather_107 = const()[name = string("gather_107"), val = int32(1)]; int32 gather_108 = const()[name = string("gather_108"), val = int32(8)]; int32 gather_109_axis_0 = const()[name = string("gather_109_axis_0"), val = int32(0)]; int32 gather_109_batch_dims_0 = const()[name = string("gather_109_batch_dims_0"), val = int32(0)]; bool gather_109_validate_indices_0 = const()[name = string("gather_109_validate_indices_0"), val = bool(false)]; - string var_1121_shape_to_uint16_dtype_0 = const()[name = string("op_1121_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1154_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1154_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_109_to_uint16 = const()[name = string("select_109_to_uint16"), val = uint16(2)]; - tensor var_1121_shape_to_uint16 = cast(dtype = var_1121_shape_to_uint16_dtype_0, x = var_1121_shape)[name = string("cast_84")]; - uint16 gather_109_cast_uint16 = gather(axis = gather_109_axis_0, batch_dims = gather_109_batch_dims_0, indices = select_109_to_uint16, validate_indices = gather_109_validate_indices_0, x = var_1121_shape_to_uint16)[name = string("gather_109_cast_uint16")]; + tensor var_1154_shape_cast_fp16_to_uint16 = cast(dtype = var_1154_shape_cast_fp16_to_uint16_dtype_0, x = var_1154_shape_cast_fp16)[name = string("cast_84")]; + uint16 gather_109_cast_uint16 = gather(axis = gather_109_axis_0, batch_dims = gather_109_batch_dims_0, indices = select_109_to_uint16, validate_indices = gather_109_validate_indices_0, x = var_1154_shape_cast_fp16_to_uint16)[name = string("gather_109_cast_uint16")]; string gather_109_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_109_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_110 = const()[name = string("gather_110"), val = int32(64)]; - tensor var_1128_axes_0 = const()[name = string("op_1128_axes_0"), val = tensor([2])]; - tensor var_1128 = expand_dims(axes = var_1128_axes_0, x = var_1102)[name = string("op_1128")]; - tensor shape_122 = shape(x = var_1128)[name = string("shape_122")]; + tensor var_1161_axes_0 = const()[name = string("op_1161_axes_0"), val = tensor([2])]; + tensor var_1161_cast_fp16 = expand_dims(axes = var_1161_axes_0, x = var_1135_cast_fp16)[name = string("op_1161_cast_fp16")]; + tensor shape_122_cast_fp16 = shape(x = var_1161_cast_fp16)[name = string("shape_122_cast_fp16")]; int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; int32 gather_109_cast_uint16_to_int32 = cast(dtype = gather_109_cast_uint16_to_int32_dtype_0, x = gather_109_cast_uint16)[name = string("cast_83")]; - tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (gather_107, gather_108, var_60, gather_109_cast_uint16_to_int32, gather_110))[name = string("concat_110")]; - tensor real_div_11 = real_div(x = concat_110, y = shape_122)[name = string("real_div_11")]; - tensor hidden_states_143 = tile(reps = real_div_11, x = var_1128)[name = string("hidden_states_143")]; + tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (gather_107, gather_108, var_59, gather_109_cast_uint16_to_int32, gather_110))[name = string("concat_110")]; + tensor real_div_11 = real_div(x = concat_110, y = shape_122_cast_fp16)[name = string("real_div_11")]; + tensor hidden_states_165_cast_fp16 = tile(reps = real_div_11, x = var_1161_cast_fp16)[name = string("hidden_states_165_cast_fp16")]; tensor concat_111x = const()[name = string("concat_111x"), val = tensor([1, 32, -1, 64])]; - tensor value_states_23 = reshape(shape = concat_111x, x = hidden_states_143)[name = string("value_states_23")]; - tensor var_1138_shape = shape(x = key_states_23)[name = string("op_1138_shape")]; + tensor value_states_23_cast_fp16 = reshape(shape = concat_111x, x = hidden_states_165_cast_fp16)[name = string("value_states_23_cast_fp16")]; + tensor var_1171_shape_cast_fp16 = shape(x = key_states_23_cast_fp16)[name = string("op_1171_shape_cast_fp16")]; int32 gather_111_axis_0 = const()[name = string("gather_111_axis_0"), val = int32(0)]; int32 gather_111_batch_dims_0 = const()[name = string("gather_111_batch_dims_0"), val = int32(0)]; bool gather_111_validate_indices_0 = const()[name = string("gather_111_validate_indices_0"), val = bool(false)]; - string var_1138_shape_to_uint16_dtype_0 = const()[name = string("op_1138_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1171_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1171_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_111_to_uint16 = const()[name = string("select_111_to_uint16"), val = uint16(2)]; - tensor var_1138_shape_to_uint16 = cast(dtype = var_1138_shape_to_uint16_dtype_0, x = var_1138_shape)[name = string("cast_82")]; - uint16 gather_111_cast_uint16 = gather(axis = gather_111_axis_0, batch_dims = gather_111_batch_dims_0, indices = select_111_to_uint16, validate_indices = gather_111_validate_indices_0, x = var_1138_shape_to_uint16)[name = string("gather_111_cast_uint16")]; + tensor var_1171_shape_cast_fp16_to_uint16 = cast(dtype = var_1171_shape_cast_fp16_to_uint16_dtype_0, x = var_1171_shape_cast_fp16)[name = string("cast_82")]; + uint16 gather_111_cast_uint16 = gather(axis = gather_111_axis_0, batch_dims = gather_111_batch_dims_0, indices = select_111_to_uint16, validate_indices = gather_111_validate_indices_0, x = var_1171_shape_cast_fp16_to_uint16)[name = string("gather_111_cast_uint16")]; string gather_111_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_111_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_112_values0_0 = const()[name = string("concat_112_values0_0"), val = int32(1)]; int32 concat_112_values1_0 = const()[name = string("concat_112_values1_0"), val = int32(1)]; @@ -1384,98 +1288,107 @@ program(1.3) tensor causal_mask_13_begin_0 = const()[name = string("causal_mask_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_13_end_mask_0 = const()[name = string("causal_mask_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_13_cast_fp16 = slice_by_index(begin = causal_mask_13_begin_0, end = concat_112, end_mask = causal_mask_13_end_mask_0, x = causal_mask)[name = string("causal_mask_13_cast_fp16")]; - tensor attn_output_21_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_13_cast_fp16, key = key_states_23, query = query_states_23, value = value_states_23)[name = string("attn_output_21_cast_fp16")]; - tensor var_1144_perm_0 = const()[name = string("op_1144_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor attn_output_21_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_13_cast_fp16, key = key_states_23_cast_fp16, query = query_states_23_cast_fp16, value = value_states_23_cast_fp16)[name = string("attn_output_21_cast_fp16")]; + tensor var_1177_perm_0 = const()[name = string("op_1177_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_113_axis_0 = const()[name = string("concat_113_axis_0"), val = int32(0)]; bool concat_113_interleave_0 = const()[name = string("concat_113_interleave_0"), val = bool(false)]; int32 gather_95_cast_uint16_to_int32 = cast(dtype = gather_95_cast_uint16_to_int32_dtype_0, x = gather_95_cast_uint16)[name = string("cast_80")]; tensor concat_113 = concat(axis = concat_113_axis_0, interleave = concat_113_interleave_0, values = (gather_94, gather_95_cast_uint16_to_int32, var_48))[name = string("concat_113")]; - tensor var_1144 = transpose(perm = var_1144_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_40")]; - tensor input_41 = reshape(shape = concat_113, x = var_1144)[name = string("input_41")]; - tensor linear_38 = linear(bias = linear_0_bias_0, weight = model_model_layers_5_self_attn_o_proj_weight_quantized, x = input_41)[name = string("linear_38")]; - tensor hidden_states_147 = add(x = hidden_states_129, y = linear_38)[name = string("hidden_states_147")]; - fp16 var_55_promoted_11_to_fp16 = const()[name = string("op_55_promoted_11_to_fp16"), val = fp16(0x1p+1)]; - tensor var_1153_cast_fp16 = pow(x = hidden_states_147, y = var_55_promoted_11_to_fp16)[name = string("op_1153_cast_fp16")]; + tensor var_1177_cast_fp16 = transpose(perm = var_1177_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_40")]; + tensor input_41_cast_fp16 = reshape(shape = concat_113, x = var_1177_cast_fp16)[name = string("input_41_cast_fp16")]; + tensor model_model_layers_5_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(322411456))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324508672))))[name = string("model_model_layers_5_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_38_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_to_fp16_quantized, x = input_41_cast_fp16)[name = string("linear_38_cast_fp16")]; + tensor hidden_states_169_cast_fp16 = add(x = hidden_states_149_cast_fp16, y = linear_38_cast_fp16)[name = string("hidden_states_169_cast_fp16")]; + fp16 var_54_promoted_11_to_fp16 = const()[name = string("op_54_promoted_11_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1186_cast_fp16 = pow(x = hidden_states_169_cast_fp16, y = var_54_promoted_11_to_fp16)[name = string("op_1186_cast_fp16")]; tensor variance_23_axes_0 = const()[name = string("variance_23_axes_0"), val = tensor([-1])]; bool variance_23_keep_dims_0 = const()[name = string("variance_23_keep_dims_0"), val = bool(true)]; - tensor variance_23_cast_fp16 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_1153_cast_fp16)[name = string("variance_23_cast_fp16")]; - fp16 var_1156_to_fp16 = const()[name = string("op_1156_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_1157_cast_fp16 = add(x = variance_23_cast_fp16, y = var_1156_to_fp16)[name = string("op_1157_cast_fp16")]; - fp32 var_1158_epsilon_0 = const()[name = string("op_1158_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_1158_cast_fp16 = rsqrt(epsilon = var_1158_epsilon_0, x = var_1157_cast_fp16)[name = string("op_1158_cast_fp16")]; - tensor hidden_states_151_cast_fp16 = mul(x = hidden_states_147, y = var_1158_cast_fp16)[name = string("hidden_states_151_cast_fp16")]; - tensor input_43 = mul(x = model_model_layers_5_post_attention_layernorm_weight, y = hidden_states_151_cast_fp16)[name = string("input_43")]; - tensor linear_39 = linear(bias = linear_4_bias_0, weight = model_model_layers_5_mlp_gate_proj_weight_quantized, x = input_43)[name = string("linear_39")]; - tensor var_1167 = silu(x = linear_39)[name = string("op_1167")]; - tensor linear_40 = linear(bias = linear_4_bias_0, weight = model_model_layers_5_mlp_up_proj_weight_quantized, x = input_43)[name = string("linear_40")]; - tensor input_47 = mul(x = var_1167, y = linear_40)[name = string("input_47")]; - tensor linear_41 = linear(bias = linear_0_bias_0, weight = model_model_layers_5_mlp_down_proj_weight_quantized, x = input_47)[name = string("linear_41")]; - tensor hidden_states_155 = add(x = hidden_states_147, y = linear_41)[name = string("hidden_states_155")]; - fp16 var_55_promoted_12_to_fp16 = const()[name = string("op_55_promoted_12_to_fp16"), val = fp16(0x1p+1)]; - tensor var_1180_cast_fp16 = pow(x = hidden_states_155, y = var_55_promoted_12_to_fp16)[name = string("op_1180_cast_fp16")]; + tensor variance_23_cast_fp16 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_1186_cast_fp16)[name = string("variance_23_cast_fp16")]; + fp16 var_1189_to_fp16 = const()[name = string("op_1189_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1190_cast_fp16 = add(x = variance_23_cast_fp16, y = var_1189_to_fp16)[name = string("op_1190_cast_fp16")]; + fp32 var_1191_epsilon_0 = const()[name = string("op_1191_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1191_cast_fp16 = rsqrt(epsilon = var_1191_epsilon_0, x = var_1190_cast_fp16)[name = string("op_1191_cast_fp16")]; + tensor hidden_states_173_cast_fp16 = mul(x = hidden_states_169_cast_fp16, y = var_1191_cast_fp16)[name = string("hidden_states_173_cast_fp16")]; + tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324770880)))]; + tensor input_43_cast_fp16 = mul(x = model_model_layers_5_post_attention_layernorm_weight_to_fp16, y = hidden_states_173_cast_fp16)[name = string("input_43_cast_fp16")]; + tensor model_model_layers_5_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(324775040))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(333163712))))[name = string("model_model_layers_5_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_39_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_5_mlp_gate_proj_weight_to_fp16_quantized, x = input_43_cast_fp16)[name = string("linear_39_cast_fp16")]; + tensor var_1203_cast_fp16 = silu(x = linear_39_cast_fp16)[name = string("op_1203_cast_fp16")]; + tensor model_model_layers_5_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(334212352))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342601024))))[name = string("model_model_layers_5_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_40_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_5_mlp_up_proj_weight_to_fp16_quantized, x = input_43_cast_fp16)[name = string("linear_40_cast_fp16")]; + tensor input_47_cast_fp16 = mul(x = var_1203_cast_fp16, y = linear_40_cast_fp16)[name = string("input_47_cast_fp16")]; + tensor model_model_layers_5_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(343649664))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(352038336))))[name = string("model_model_layers_5_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_41_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_mlp_down_proj_weight_to_fp16_quantized, x = input_47_cast_fp16)[name = string("linear_41_cast_fp16")]; + tensor hidden_states_179_cast_fp16 = add(x = hidden_states_169_cast_fp16, y = linear_41_cast_fp16)[name = string("hidden_states_179_cast_fp16")]; + fp16 var_54_promoted_12_to_fp16 = const()[name = string("op_54_promoted_12_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1216_cast_fp16 = pow(x = hidden_states_179_cast_fp16, y = var_54_promoted_12_to_fp16)[name = string("op_1216_cast_fp16")]; tensor variance_25_axes_0 = const()[name = string("variance_25_axes_0"), val = tensor([-1])]; bool variance_25_keep_dims_0 = const()[name = string("variance_25_keep_dims_0"), val = bool(true)]; - tensor variance_25_cast_fp16 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = var_1180_cast_fp16)[name = string("variance_25_cast_fp16")]; - fp16 var_1183_to_fp16 = const()[name = string("op_1183_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_1184_cast_fp16 = add(x = variance_25_cast_fp16, y = var_1183_to_fp16)[name = string("op_1184_cast_fp16")]; - fp32 var_1185_epsilon_0 = const()[name = string("op_1185_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_1185_cast_fp16 = rsqrt(epsilon = var_1185_epsilon_0, x = var_1184_cast_fp16)[name = string("op_1185_cast_fp16")]; - tensor hidden_states_159_cast_fp16 = mul(x = hidden_states_155, y = var_1185_cast_fp16)[name = string("hidden_states_159_cast_fp16")]; - tensor hidden_states_161 = mul(x = model_model_layers_6_input_layernorm_weight, y = hidden_states_159_cast_fp16)[name = string("hidden_states_161")]; - tensor var_1193_shape = shape(x = hidden_states_161)[name = string("op_1193_shape")]; + tensor variance_25_cast_fp16 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = var_1216_cast_fp16)[name = string("variance_25_cast_fp16")]; + fp16 var_1219_to_fp16 = const()[name = string("op_1219_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1220_cast_fp16 = add(x = variance_25_cast_fp16, y = var_1219_to_fp16)[name = string("op_1220_cast_fp16")]; + fp32 var_1221_epsilon_0 = const()[name = string("op_1221_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1221_cast_fp16 = rsqrt(epsilon = var_1221_epsilon_0, x = var_1220_cast_fp16)[name = string("op_1221_cast_fp16")]; + tensor hidden_states_183_cast_fp16 = mul(x = hidden_states_179_cast_fp16, y = var_1221_cast_fp16)[name = string("hidden_states_183_cast_fp16")]; + tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353086976)))]; + tensor hidden_states_187_cast_fp16 = mul(x = model_model_layers_6_input_layernorm_weight_to_fp16, y = hidden_states_183_cast_fp16)[name = string("hidden_states_187_cast_fp16")]; + tensor var_1232_shape_cast_fp16 = shape(x = hidden_states_187_cast_fp16)[name = string("op_1232_shape_cast_fp16")]; int32 gather_112 = const()[name = string("gather_112"), val = int32(1)]; int32 gather_113_axis_0 = const()[name = string("gather_113_axis_0"), val = int32(0)]; int32 gather_113_batch_dims_0 = const()[name = string("gather_113_batch_dims_0"), val = int32(0)]; bool gather_113_validate_indices_0 = const()[name = string("gather_113_validate_indices_0"), val = bool(false)]; - string var_1193_shape_to_uint16_dtype_0 = const()[name = string("op_1193_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1232_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1232_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_113_to_uint16 = const()[name = string("select_113_to_uint16"), val = uint16(1)]; - tensor var_1193_shape_to_uint16 = cast(dtype = var_1193_shape_to_uint16_dtype_0, x = var_1193_shape)[name = string("cast_79")]; - uint16 gather_113_cast_uint16 = gather(axis = gather_113_axis_0, batch_dims = gather_113_batch_dims_0, indices = select_113_to_uint16, validate_indices = gather_113_validate_indices_0, x = var_1193_shape_to_uint16)[name = string("gather_113_cast_uint16")]; + tensor var_1232_shape_cast_fp16_to_uint16 = cast(dtype = var_1232_shape_cast_fp16_to_uint16_dtype_0, x = var_1232_shape_cast_fp16)[name = string("cast_79")]; + uint16 gather_113_cast_uint16 = gather(axis = gather_113_axis_0, batch_dims = gather_113_batch_dims_0, indices = select_113_to_uint16, validate_indices = gather_113_validate_indices_0, x = var_1232_shape_cast_fp16_to_uint16)[name = string("gather_113_cast_uint16")]; string gather_113_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_113_cast_uint16_to_int32_dtype_0"), val = string("int32")]; - tensor linear_42 = linear(bias = linear_0_bias_0, weight = model_model_layers_6_self_attn_q_proj_weight_quantized, x = hidden_states_161)[name = string("linear_42")]; - tensor linear_43 = linear(bias = linear_1_bias_0, weight = model_model_layers_6_self_attn_k_proj_weight_quantized, x = hidden_states_161)[name = string("linear_43")]; - tensor linear_44 = linear(bias = linear_1_bias_0, weight = model_model_layers_6_self_attn_v_proj_weight_quantized, x = hidden_states_161)[name = string("linear_44")]; + tensor model_model_layers_6_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(353091136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355188352))))[name = string("model_model_layers_6_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_42_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_187_cast_fp16)[name = string("linear_42_cast_fp16")]; + tensor model_model_layers_6_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355450560))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355974912))))[name = string("model_model_layers_6_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_6_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_187_cast_fp16)[name = string("linear_43_cast_fp16")]; + tensor model_model_layers_6_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356040512))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356564864))))[name = string("model_model_layers_6_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_6_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_187_cast_fp16)[name = string("linear_44_cast_fp16")]; tensor concat_114x = const()[name = string("concat_114x"), val = tensor([1, -1, 32, 64])]; - tensor var_1202 = reshape(shape = concat_114x, x = linear_42)[name = string("op_1202")]; + tensor var_1241_cast_fp16 = reshape(shape = concat_114x, x = linear_42_cast_fp16)[name = string("op_1241_cast_fp16")]; tensor q_13_perm_0 = const()[name = string("q_13_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_115x = const()[name = string("concat_115x"), val = tensor([1, -1, 8, 64])]; - tensor var_1205 = reshape(shape = concat_115x, x = linear_43)[name = string("op_1205")]; + tensor var_1244_cast_fp16 = reshape(shape = concat_115x, x = linear_43_cast_fp16)[name = string("op_1244_cast_fp16")]; tensor k_13_perm_0 = const()[name = string("k_13_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_116x = const()[name = string("concat_116x"), val = tensor([1, -1, 8, 64])]; - tensor var_1208 = reshape(shape = concat_116x, x = linear_44)[name = string("op_1208")]; + tensor var_1247_cast_fp16 = reshape(shape = concat_116x, x = linear_44_cast_fp16)[name = string("op_1247_cast_fp16")]; tensor v_state_13_perm_0 = const()[name = string("v_state_13_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor q_13 = transpose(perm = q_13_perm_0, x = var_1202)[name = string("transpose_39")]; - tensor var_1212 = mul(x = q_13, y = cos_7)[name = string("op_1212")]; + tensor q_13_cast_fp16 = transpose(perm = q_13_perm_0, x = var_1241_cast_fp16)[name = string("transpose_39")]; + tensor var_1251_cast_fp16 = mul(x = q_13_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1251_cast_fp16")]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 32, 0, 32])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_25 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13)[name = string("x1_25")]; + tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13_cast_fp16)[name = string("x1_25_cast_fp16")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 32, 0, 64])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_25 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13)[name = string("x2_25")]; - fp16 const_15_promoted = const()[name = string("const_15_promoted"), val = fp16(-0x1p+0)]; - tensor var_1223 = mul(x = x2_25, y = const_15_promoted)[name = string("op_1223")]; - bool var_1225_interleave_0 = const()[name = string("op_1225_interleave_0"), val = bool(false)]; - tensor var_1225 = concat(axis = var_48, interleave = var_1225_interleave_0, values = (var_1223, x1_25))[name = string("op_1225")]; - tensor var_1226 = mul(x = var_1225, y = sin_7)[name = string("op_1226")]; - tensor query_states_27 = add(x = var_1212, y = var_1226)[name = string("query_states_27")]; - tensor k_13 = transpose(perm = k_13_perm_0, x = var_1205)[name = string("transpose_38")]; - tensor var_1228 = mul(x = k_13, y = cos_7)[name = string("op_1228")]; + tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13_cast_fp16)[name = string("x2_25_cast_fp16")]; + fp16 const_15_promoted_to_fp16 = const()[name = string("const_15_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1262_cast_fp16 = mul(x = x2_25_cast_fp16, y = const_15_promoted_to_fp16)[name = string("op_1262_cast_fp16")]; + bool var_1264_interleave_0 = const()[name = string("op_1264_interleave_0"), val = bool(false)]; + tensor var_1264_cast_fp16 = concat(axis = var_48, interleave = var_1264_interleave_0, values = (var_1262_cast_fp16, x1_25_cast_fp16))[name = string("op_1264_cast_fp16")]; + tensor var_1265_cast_fp16 = mul(x = var_1264_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1265_cast_fp16")]; + tensor query_states_27_cast_fp16 = add(x = var_1251_cast_fp16, y = var_1265_cast_fp16)[name = string("query_states_27_cast_fp16")]; + tensor k_13_cast_fp16 = transpose(perm = k_13_perm_0, x = var_1244_cast_fp16)[name = string("transpose_38")]; + tensor var_1267_cast_fp16 = mul(x = k_13_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1267_cast_fp16")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 0, 32])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_27 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13)[name = string("x1_27")]; + tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13_cast_fp16)[name = string("x1_27_cast_fp16")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 0, 64])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_27 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13)[name = string("x2_27")]; - fp16 const_16_promoted = const()[name = string("const_16_promoted"), val = fp16(-0x1p+0)]; - tensor var_1239 = mul(x = x2_27, y = const_16_promoted)[name = string("op_1239")]; - bool var_1241_interleave_0 = const()[name = string("op_1241_interleave_0"), val = bool(false)]; - tensor var_1241 = concat(axis = var_48, interleave = var_1241_interleave_0, values = (var_1239, x1_27))[name = string("op_1241")]; - tensor var_1242 = mul(x = var_1241, y = sin_7)[name = string("op_1242")]; - tensor k_state_13 = add(x = var_1228, y = var_1242)[name = string("k_state_13")]; + tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13_cast_fp16)[name = string("x2_27_cast_fp16")]; + fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1278_cast_fp16 = mul(x = x2_27_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1278_cast_fp16")]; + bool var_1280_interleave_0 = const()[name = string("op_1280_interleave_0"), val = bool(false)]; + tensor var_1280_cast_fp16 = concat(axis = var_48, interleave = var_1280_interleave_0, values = (var_1278_cast_fp16, x1_27_cast_fp16))[name = string("op_1280_cast_fp16")]; + tensor var_1281_cast_fp16 = mul(x = var_1280_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1281_cast_fp16")]; + tensor k_state_13_cast_fp16 = add(x = var_1267_cast_fp16, y = var_1281_cast_fp16)[name = string("k_state_13_cast_fp16")]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([0])]; tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; @@ -1487,87 +1400,87 @@ program(1.3) tensor key_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor key_cache_internal_tensor_assign_7 = slice_update(begin = concat_119, begin_mask = key_cache_internal_tensor_assign_7_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_7_squeeze_mask_0, stride = key_cache_internal_tensor_assign_7_stride_0, update = k_state_13, x = coreml_update_state_42)[name = string("key_cache_internal_tensor_assign_7")]; - write_state(data = key_cache_internal_tensor_assign_7, input = key_cache)[name = string("coreml_update_state_44_write_state")]; + tensor key_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_119, begin_mask = key_cache_internal_tensor_assign_7_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_7_squeeze_mask_0, stride = key_cache_internal_tensor_assign_7_stride_0, update = k_state_13_cast_fp16, x = coreml_update_state_42)[name = string("key_cache_internal_tensor_assign_7_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_7_cast_fp16, input = key_cache)[name = string("coreml_update_state_44_write_state")]; tensor coreml_update_state_44 = read_state(input = key_cache)[name = string("coreml_update_state_44")]; tensor value_cache_internal_tensor_assign_7_stride_0 = const()[name = string("value_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor v_state_13 = transpose(perm = v_state_13_perm_0, x = var_1208)[name = string("transpose_37")]; - tensor value_cache_internal_tensor_assign_7 = slice_update(begin = concat_119, begin_mask = value_cache_internal_tensor_assign_7_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_7_squeeze_mask_0, stride = value_cache_internal_tensor_assign_7_stride_0, update = v_state_13, x = coreml_update_state_43)[name = string("value_cache_internal_tensor_assign_7")]; - write_state(data = value_cache_internal_tensor_assign_7, input = value_cache)[name = string("coreml_update_state_45_write_state")]; + tensor v_state_13_cast_fp16 = transpose(perm = v_state_13_perm_0, x = var_1247_cast_fp16)[name = string("transpose_37")]; + tensor value_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_119, begin_mask = value_cache_internal_tensor_assign_7_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_7_squeeze_mask_0, stride = value_cache_internal_tensor_assign_7_stride_0, update = v_state_13_cast_fp16, x = coreml_update_state_43)[name = string("value_cache_internal_tensor_assign_7_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_7_cast_fp16, input = value_cache)[name = string("coreml_update_state_45_write_state")]; tensor coreml_update_state_45 = read_state(input = value_cache)[name = string("coreml_update_state_45")]; - tensor var_1265_begin_0 = const()[name = string("op_1265_begin_0"), val = tensor([6, 0, 0, 0, 0])]; - tensor var_1265_end_0 = const()[name = string("op_1265_end_0"), val = tensor([7, 1, 8, 2048, 64])]; - tensor var_1265_end_mask_0 = const()[name = string("op_1265_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_1265_squeeze_mask_0 = const()[name = string("op_1265_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_1265 = slice_by_index(begin = var_1265_begin_0, end = var_1265_end_0, end_mask = var_1265_end_mask_0, squeeze_mask = var_1265_squeeze_mask_0, x = coreml_update_state_44)[name = string("op_1265")]; - tensor var_1268_begin_0 = const()[name = string("op_1268_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1268_end_mask_0 = const()[name = string("op_1268_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_1268 = slice_by_index(begin = var_1268_begin_0, end = concat_11, end_mask = var_1268_end_mask_0, x = var_1265)[name = string("op_1268")]; - tensor var_1270_begin_0 = const()[name = string("op_1270_begin_0"), val = tensor([6, 0, 0, 0, 0])]; - tensor var_1270_end_0 = const()[name = string("op_1270_end_0"), val = tensor([7, 1, 8, 2048, 64])]; - tensor var_1270_end_mask_0 = const()[name = string("op_1270_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_1270_squeeze_mask_0 = const()[name = string("op_1270_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_1270 = slice_by_index(begin = var_1270_begin_0, end = var_1270_end_0, end_mask = var_1270_end_mask_0, squeeze_mask = var_1270_squeeze_mask_0, x = coreml_update_state_45)[name = string("op_1270")]; - tensor var_1273_begin_0 = const()[name = string("op_1273_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1273_end_mask_0 = const()[name = string("op_1273_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_1273 = slice_by_index(begin = var_1273_begin_0, end = concat_11, end_mask = var_1273_end_mask_0, x = var_1270)[name = string("op_1273")]; - tensor var_1275_shape = shape(x = var_1268)[name = string("op_1275_shape")]; + tensor var_1304_begin_0 = const()[name = string("op_1304_begin_0"), val = tensor([6, 0, 0, 0, 0])]; + tensor var_1304_end_0 = const()[name = string("op_1304_end_0"), val = tensor([7, 1, 8, 2048, 64])]; + tensor var_1304_end_mask_0 = const()[name = string("op_1304_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1304_squeeze_mask_0 = const()[name = string("op_1304_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1304_cast_fp16 = slice_by_index(begin = var_1304_begin_0, end = var_1304_end_0, end_mask = var_1304_end_mask_0, squeeze_mask = var_1304_squeeze_mask_0, x = coreml_update_state_44)[name = string("op_1304_cast_fp16")]; + tensor var_1307_begin_0 = const()[name = string("op_1307_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1307_end_mask_0 = const()[name = string("op_1307_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1307_cast_fp16 = slice_by_index(begin = var_1307_begin_0, end = concat_11, end_mask = var_1307_end_mask_0, x = var_1304_cast_fp16)[name = string("op_1307_cast_fp16")]; + tensor var_1309_begin_0 = const()[name = string("op_1309_begin_0"), val = tensor([6, 0, 0, 0, 0])]; + tensor var_1309_end_0 = const()[name = string("op_1309_end_0"), val = tensor([7, 1, 8, 2048, 64])]; + tensor var_1309_end_mask_0 = const()[name = string("op_1309_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1309_squeeze_mask_0 = const()[name = string("op_1309_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1309_cast_fp16 = slice_by_index(begin = var_1309_begin_0, end = var_1309_end_0, end_mask = var_1309_end_mask_0, squeeze_mask = var_1309_squeeze_mask_0, x = coreml_update_state_45)[name = string("op_1309_cast_fp16")]; + tensor var_1312_begin_0 = const()[name = string("op_1312_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1312_end_mask_0 = const()[name = string("op_1312_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1312_cast_fp16 = slice_by_index(begin = var_1312_begin_0, end = concat_11, end_mask = var_1312_end_mask_0, x = var_1309_cast_fp16)[name = string("op_1312_cast_fp16")]; + tensor var_1314_shape_cast_fp16 = shape(x = var_1307_cast_fp16)[name = string("op_1314_shape_cast_fp16")]; int32 gather_121 = const()[name = string("gather_121"), val = int32(1)]; int32 gather_122 = const()[name = string("gather_122"), val = int32(8)]; int32 gather_123_axis_0 = const()[name = string("gather_123_axis_0"), val = int32(0)]; int32 gather_123_batch_dims_0 = const()[name = string("gather_123_batch_dims_0"), val = int32(0)]; bool gather_123_validate_indices_0 = const()[name = string("gather_123_validate_indices_0"), val = bool(false)]; - string var_1275_shape_to_uint16_dtype_0 = const()[name = string("op_1275_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1314_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1314_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_123_to_uint16 = const()[name = string("select_123_to_uint16"), val = uint16(2)]; - tensor var_1275_shape_to_uint16 = cast(dtype = var_1275_shape_to_uint16_dtype_0, x = var_1275_shape)[name = string("cast_78")]; - uint16 gather_123_cast_uint16 = gather(axis = gather_123_axis_0, batch_dims = gather_123_batch_dims_0, indices = select_123_to_uint16, validate_indices = gather_123_validate_indices_0, x = var_1275_shape_to_uint16)[name = string("gather_123_cast_uint16")]; + tensor var_1314_shape_cast_fp16_to_uint16 = cast(dtype = var_1314_shape_cast_fp16_to_uint16_dtype_0, x = var_1314_shape_cast_fp16)[name = string("cast_78")]; + uint16 gather_123_cast_uint16 = gather(axis = gather_123_axis_0, batch_dims = gather_123_batch_dims_0, indices = select_123_to_uint16, validate_indices = gather_123_validate_indices_0, x = var_1314_shape_cast_fp16_to_uint16)[name = string("gather_123_cast_uint16")]; string gather_123_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_123_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_124 = const()[name = string("gather_124"), val = int32(64)]; - tensor var_1282_axes_0 = const()[name = string("op_1282_axes_0"), val = tensor([2])]; - tensor var_1282 = expand_dims(axes = var_1282_axes_0, x = var_1268)[name = string("op_1282")]; - tensor shape_137 = shape(x = var_1282)[name = string("shape_137")]; + tensor var_1321_axes_0 = const()[name = string("op_1321_axes_0"), val = tensor([2])]; + tensor var_1321_cast_fp16 = expand_dims(axes = var_1321_axes_0, x = var_1307_cast_fp16)[name = string("op_1321_cast_fp16")]; + tensor shape_137_cast_fp16 = shape(x = var_1321_cast_fp16)[name = string("shape_137_cast_fp16")]; int32 concat_127_axis_0 = const()[name = string("concat_127_axis_0"), val = int32(0)]; bool concat_127_interleave_0 = const()[name = string("concat_127_interleave_0"), val = bool(false)]; int32 gather_123_cast_uint16_to_int32 = cast(dtype = gather_123_cast_uint16_to_int32_dtype_0, x = gather_123_cast_uint16)[name = string("cast_77")]; - tensor concat_127 = concat(axis = concat_127_axis_0, interleave = concat_127_interleave_0, values = (gather_121, gather_122, var_60, gather_123_cast_uint16_to_int32, gather_124))[name = string("concat_127")]; - tensor real_div_12 = real_div(x = concat_127, y = shape_137)[name = string("real_div_12")]; - tensor hidden_states_165 = tile(reps = real_div_12, x = var_1282)[name = string("hidden_states_165")]; + tensor concat_127 = concat(axis = concat_127_axis_0, interleave = concat_127_interleave_0, values = (gather_121, gather_122, var_59, gather_123_cast_uint16_to_int32, gather_124))[name = string("concat_127")]; + tensor real_div_12 = real_div(x = concat_127, y = shape_137_cast_fp16)[name = string("real_div_12")]; + tensor hidden_states_191_cast_fp16 = tile(reps = real_div_12, x = var_1321_cast_fp16)[name = string("hidden_states_191_cast_fp16")]; tensor concat_128x = const()[name = string("concat_128x"), val = tensor([1, 32, -1, 64])]; - tensor key_states_27 = reshape(shape = concat_128x, x = hidden_states_165)[name = string("key_states_27")]; - tensor var_1292_shape = shape(x = var_1273)[name = string("op_1292_shape")]; + tensor key_states_27_cast_fp16 = reshape(shape = concat_128x, x = hidden_states_191_cast_fp16)[name = string("key_states_27_cast_fp16")]; + tensor var_1331_shape_cast_fp16 = shape(x = var_1312_cast_fp16)[name = string("op_1331_shape_cast_fp16")]; int32 gather_125 = const()[name = string("gather_125"), val = int32(1)]; int32 gather_126 = const()[name = string("gather_126"), val = int32(8)]; int32 gather_127_axis_0 = const()[name = string("gather_127_axis_0"), val = int32(0)]; int32 gather_127_batch_dims_0 = const()[name = string("gather_127_batch_dims_0"), val = int32(0)]; bool gather_127_validate_indices_0 = const()[name = string("gather_127_validate_indices_0"), val = bool(false)]; - string var_1292_shape_to_uint16_dtype_0 = const()[name = string("op_1292_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1331_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1331_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_127_to_uint16 = const()[name = string("select_127_to_uint16"), val = uint16(2)]; - tensor var_1292_shape_to_uint16 = cast(dtype = var_1292_shape_to_uint16_dtype_0, x = var_1292_shape)[name = string("cast_76")]; - uint16 gather_127_cast_uint16 = gather(axis = gather_127_axis_0, batch_dims = gather_127_batch_dims_0, indices = select_127_to_uint16, validate_indices = gather_127_validate_indices_0, x = var_1292_shape_to_uint16)[name = string("gather_127_cast_uint16")]; + tensor var_1331_shape_cast_fp16_to_uint16 = cast(dtype = var_1331_shape_cast_fp16_to_uint16_dtype_0, x = var_1331_shape_cast_fp16)[name = string("cast_76")]; + uint16 gather_127_cast_uint16 = gather(axis = gather_127_axis_0, batch_dims = gather_127_batch_dims_0, indices = select_127_to_uint16, validate_indices = gather_127_validate_indices_0, x = var_1331_shape_cast_fp16_to_uint16)[name = string("gather_127_cast_uint16")]; string gather_127_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_127_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_128 = const()[name = string("gather_128"), val = int32(64)]; - tensor var_1299_axes_0 = const()[name = string("op_1299_axes_0"), val = tensor([2])]; - tensor var_1299 = expand_dims(axes = var_1299_axes_0, x = var_1273)[name = string("op_1299")]; - tensor shape_142 = shape(x = var_1299)[name = string("shape_142")]; + tensor var_1338_axes_0 = const()[name = string("op_1338_axes_0"), val = tensor([2])]; + tensor var_1338_cast_fp16 = expand_dims(axes = var_1338_axes_0, x = var_1312_cast_fp16)[name = string("op_1338_cast_fp16")]; + tensor shape_142_cast_fp16 = shape(x = var_1338_cast_fp16)[name = string("shape_142_cast_fp16")]; int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; int32 gather_127_cast_uint16_to_int32 = cast(dtype = gather_127_cast_uint16_to_int32_dtype_0, x = gather_127_cast_uint16)[name = string("cast_75")]; - tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (gather_125, gather_126, var_60, gather_127_cast_uint16_to_int32, gather_128))[name = string("concat_129")]; - tensor real_div_13 = real_div(x = concat_129, y = shape_142)[name = string("real_div_13")]; - tensor hidden_states_169 = tile(reps = real_div_13, x = var_1299)[name = string("hidden_states_169")]; + tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (gather_125, gather_126, var_59, gather_127_cast_uint16_to_int32, gather_128))[name = string("concat_129")]; + tensor real_div_13 = real_div(x = concat_129, y = shape_142_cast_fp16)[name = string("real_div_13")]; + tensor hidden_states_195_cast_fp16 = tile(reps = real_div_13, x = var_1338_cast_fp16)[name = string("hidden_states_195_cast_fp16")]; tensor concat_130x = const()[name = string("concat_130x"), val = tensor([1, 32, -1, 64])]; - tensor value_states_27 = reshape(shape = concat_130x, x = hidden_states_169)[name = string("value_states_27")]; - tensor var_1309_shape = shape(x = key_states_27)[name = string("op_1309_shape")]; + tensor value_states_27_cast_fp16 = reshape(shape = concat_130x, x = hidden_states_195_cast_fp16)[name = string("value_states_27_cast_fp16")]; + tensor var_1348_shape_cast_fp16 = shape(x = key_states_27_cast_fp16)[name = string("op_1348_shape_cast_fp16")]; int32 gather_129_axis_0 = const()[name = string("gather_129_axis_0"), val = int32(0)]; int32 gather_129_batch_dims_0 = const()[name = string("gather_129_batch_dims_0"), val = int32(0)]; bool gather_129_validate_indices_0 = const()[name = string("gather_129_validate_indices_0"), val = bool(false)]; - string var_1309_shape_to_uint16_dtype_0 = const()[name = string("op_1309_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1348_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1348_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_129_to_uint16 = const()[name = string("select_129_to_uint16"), val = uint16(2)]; - tensor var_1309_shape_to_uint16 = cast(dtype = var_1309_shape_to_uint16_dtype_0, x = var_1309_shape)[name = string("cast_74")]; - uint16 gather_129_cast_uint16 = gather(axis = gather_129_axis_0, batch_dims = gather_129_batch_dims_0, indices = select_129_to_uint16, validate_indices = gather_129_validate_indices_0, x = var_1309_shape_to_uint16)[name = string("gather_129_cast_uint16")]; + tensor var_1348_shape_cast_fp16_to_uint16 = cast(dtype = var_1348_shape_cast_fp16_to_uint16_dtype_0, x = var_1348_shape_cast_fp16)[name = string("cast_74")]; + uint16 gather_129_cast_uint16 = gather(axis = gather_129_axis_0, batch_dims = gather_129_batch_dims_0, indices = select_129_to_uint16, validate_indices = gather_129_validate_indices_0, x = var_1348_shape_cast_fp16_to_uint16)[name = string("gather_129_cast_uint16")]; string gather_129_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_129_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_131_values0_0 = const()[name = string("concat_131_values0_0"), val = int32(1)]; int32 concat_131_values1_0 = const()[name = string("concat_131_values1_0"), val = int32(1)]; @@ -1579,98 +1492,107 @@ program(1.3) tensor causal_mask_15_begin_0 = const()[name = string("causal_mask_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_15_end_mask_0 = const()[name = string("causal_mask_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_15_cast_fp16 = slice_by_index(begin = causal_mask_15_begin_0, end = concat_131, end_mask = causal_mask_15_end_mask_0, x = causal_mask)[name = string("causal_mask_15_cast_fp16")]; - tensor attn_output_25_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_15_cast_fp16, key = key_states_27, query = query_states_27, value = value_states_27)[name = string("attn_output_25_cast_fp16")]; - tensor var_1315_perm_0 = const()[name = string("op_1315_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor attn_output_25_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_15_cast_fp16, key = key_states_27_cast_fp16, query = query_states_27_cast_fp16, value = value_states_27_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_1354_perm_0 = const()[name = string("op_1354_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; int32 gather_113_cast_uint16_to_int32 = cast(dtype = gather_113_cast_uint16_to_int32_dtype_0, x = gather_113_cast_uint16)[name = string("cast_72")]; tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (gather_112, gather_113_cast_uint16_to_int32, var_48))[name = string("concat_132")]; - tensor var_1315 = transpose(perm = var_1315_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_36")]; - tensor input_49 = reshape(shape = concat_132, x = var_1315)[name = string("input_49")]; - tensor linear_45 = linear(bias = linear_0_bias_0, weight = model_model_layers_6_self_attn_o_proj_weight_quantized, x = input_49)[name = string("linear_45")]; - tensor hidden_states_173 = add(x = hidden_states_155, y = linear_45)[name = string("hidden_states_173")]; - fp16 var_55_promoted_13_to_fp16 = const()[name = string("op_55_promoted_13_to_fp16"), val = fp16(0x1p+1)]; - tensor var_1324_cast_fp16 = pow(x = hidden_states_173, y = var_55_promoted_13_to_fp16)[name = string("op_1324_cast_fp16")]; + tensor var_1354_cast_fp16 = transpose(perm = var_1354_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_36")]; + tensor input_49_cast_fp16 = reshape(shape = concat_132, x = var_1354_cast_fp16)[name = string("input_49_cast_fp16")]; + tensor model_model_layers_6_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(356630464))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358727680))))[name = string("model_model_layers_6_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_45_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_to_fp16_quantized, x = input_49_cast_fp16)[name = string("linear_45_cast_fp16")]; + tensor hidden_states_199_cast_fp16 = add(x = hidden_states_179_cast_fp16, y = linear_45_cast_fp16)[name = string("hidden_states_199_cast_fp16")]; + fp16 var_54_promoted_13_to_fp16 = const()[name = string("op_54_promoted_13_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1363_cast_fp16 = pow(x = hidden_states_199_cast_fp16, y = var_54_promoted_13_to_fp16)[name = string("op_1363_cast_fp16")]; tensor variance_27_axes_0 = const()[name = string("variance_27_axes_0"), val = tensor([-1])]; bool variance_27_keep_dims_0 = const()[name = string("variance_27_keep_dims_0"), val = bool(true)]; - tensor variance_27_cast_fp16 = reduce_mean(axes = variance_27_axes_0, keep_dims = variance_27_keep_dims_0, x = var_1324_cast_fp16)[name = string("variance_27_cast_fp16")]; - fp16 var_1327_to_fp16 = const()[name = string("op_1327_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_1328_cast_fp16 = add(x = variance_27_cast_fp16, y = var_1327_to_fp16)[name = string("op_1328_cast_fp16")]; - fp32 var_1329_epsilon_0 = const()[name = string("op_1329_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_1329_cast_fp16 = rsqrt(epsilon = var_1329_epsilon_0, x = var_1328_cast_fp16)[name = string("op_1329_cast_fp16")]; - tensor hidden_states_177_cast_fp16 = mul(x = hidden_states_173, y = var_1329_cast_fp16)[name = string("hidden_states_177_cast_fp16")]; - tensor input_51 = mul(x = model_model_layers_6_post_attention_layernorm_weight, y = hidden_states_177_cast_fp16)[name = string("input_51")]; - tensor linear_46 = linear(bias = linear_4_bias_0, weight = model_model_layers_6_mlp_gate_proj_weight_quantized, x = input_51)[name = string("linear_46")]; - tensor var_1338 = silu(x = linear_46)[name = string("op_1338")]; - tensor linear_47 = linear(bias = linear_4_bias_0, weight = model_model_layers_6_mlp_up_proj_weight_quantized, x = input_51)[name = string("linear_47")]; - tensor input_55 = mul(x = var_1338, y = linear_47)[name = string("input_55")]; - tensor linear_48 = linear(bias = linear_0_bias_0, weight = model_model_layers_6_mlp_down_proj_weight_quantized, x = input_55)[name = string("linear_48")]; - tensor hidden_states_181 = add(x = hidden_states_173, y = linear_48)[name = string("hidden_states_181")]; - fp16 var_55_promoted_14_to_fp16 = const()[name = string("op_55_promoted_14_to_fp16"), val = fp16(0x1p+1)]; - tensor var_1351_cast_fp16 = pow(x = hidden_states_181, y = var_55_promoted_14_to_fp16)[name = string("op_1351_cast_fp16")]; + tensor variance_27_cast_fp16 = reduce_mean(axes = variance_27_axes_0, keep_dims = variance_27_keep_dims_0, x = var_1363_cast_fp16)[name = string("variance_27_cast_fp16")]; + fp16 var_1366_to_fp16 = const()[name = string("op_1366_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1367_cast_fp16 = add(x = variance_27_cast_fp16, y = var_1366_to_fp16)[name = string("op_1367_cast_fp16")]; + fp32 var_1368_epsilon_0 = const()[name = string("op_1368_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1368_cast_fp16 = rsqrt(epsilon = var_1368_epsilon_0, x = var_1367_cast_fp16)[name = string("op_1368_cast_fp16")]; + tensor hidden_states_203_cast_fp16 = mul(x = hidden_states_199_cast_fp16, y = var_1368_cast_fp16)[name = string("hidden_states_203_cast_fp16")]; + tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358989888)))]; + tensor input_51_cast_fp16 = mul(x = model_model_layers_6_post_attention_layernorm_weight_to_fp16, y = hidden_states_203_cast_fp16)[name = string("input_51_cast_fp16")]; + tensor model_model_layers_6_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(358994048))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367382720))))[name = string("model_model_layers_6_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_46_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_6_mlp_gate_proj_weight_to_fp16_quantized, x = input_51_cast_fp16)[name = string("linear_46_cast_fp16")]; + tensor var_1380_cast_fp16 = silu(x = linear_46_cast_fp16)[name = string("op_1380_cast_fp16")]; + tensor model_model_layers_6_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(368431360))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(376820032))))[name = string("model_model_layers_6_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_47_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_6_mlp_up_proj_weight_to_fp16_quantized, x = input_51_cast_fp16)[name = string("linear_47_cast_fp16")]; + tensor input_55_cast_fp16 = mul(x = var_1380_cast_fp16, y = linear_47_cast_fp16)[name = string("input_55_cast_fp16")]; + tensor model_model_layers_6_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(377868672))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386257344))))[name = string("model_model_layers_6_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_48_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_mlp_down_proj_weight_to_fp16_quantized, x = input_55_cast_fp16)[name = string("linear_48_cast_fp16")]; + tensor hidden_states_209_cast_fp16 = add(x = hidden_states_199_cast_fp16, y = linear_48_cast_fp16)[name = string("hidden_states_209_cast_fp16")]; + fp16 var_54_promoted_14_to_fp16 = const()[name = string("op_54_promoted_14_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1393_cast_fp16 = pow(x = hidden_states_209_cast_fp16, y = var_54_promoted_14_to_fp16)[name = string("op_1393_cast_fp16")]; tensor variance_29_axes_0 = const()[name = string("variance_29_axes_0"), val = tensor([-1])]; bool variance_29_keep_dims_0 = const()[name = string("variance_29_keep_dims_0"), val = bool(true)]; - tensor variance_29_cast_fp16 = reduce_mean(axes = variance_29_axes_0, keep_dims = variance_29_keep_dims_0, x = var_1351_cast_fp16)[name = string("variance_29_cast_fp16")]; - fp16 var_1354_to_fp16 = const()[name = string("op_1354_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_1355_cast_fp16 = add(x = variance_29_cast_fp16, y = var_1354_to_fp16)[name = string("op_1355_cast_fp16")]; - fp32 var_1356_epsilon_0 = const()[name = string("op_1356_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_1356_cast_fp16 = rsqrt(epsilon = var_1356_epsilon_0, x = var_1355_cast_fp16)[name = string("op_1356_cast_fp16")]; - tensor hidden_states_185_cast_fp16 = mul(x = hidden_states_181, y = var_1356_cast_fp16)[name = string("hidden_states_185_cast_fp16")]; - tensor hidden_states_187 = mul(x = model_model_layers_7_input_layernorm_weight, y = hidden_states_185_cast_fp16)[name = string("hidden_states_187")]; - tensor var_1364_shape = shape(x = hidden_states_187)[name = string("op_1364_shape")]; + tensor variance_29_cast_fp16 = reduce_mean(axes = variance_29_axes_0, keep_dims = variance_29_keep_dims_0, x = var_1393_cast_fp16)[name = string("variance_29_cast_fp16")]; + fp16 var_1396_to_fp16 = const()[name = string("op_1396_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1397_cast_fp16 = add(x = variance_29_cast_fp16, y = var_1396_to_fp16)[name = string("op_1397_cast_fp16")]; + fp32 var_1398_epsilon_0 = const()[name = string("op_1398_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1398_cast_fp16 = rsqrt(epsilon = var_1398_epsilon_0, x = var_1397_cast_fp16)[name = string("op_1398_cast_fp16")]; + tensor hidden_states_213_cast_fp16 = mul(x = hidden_states_209_cast_fp16, y = var_1398_cast_fp16)[name = string("hidden_states_213_cast_fp16")]; + tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387305984)))]; + tensor hidden_states_217_cast_fp16 = mul(x = model_model_layers_7_input_layernorm_weight_to_fp16, y = hidden_states_213_cast_fp16)[name = string("hidden_states_217_cast_fp16")]; + tensor var_1409_shape_cast_fp16 = shape(x = hidden_states_217_cast_fp16)[name = string("op_1409_shape_cast_fp16")]; int32 gather_130 = const()[name = string("gather_130"), val = int32(1)]; int32 gather_131_axis_0 = const()[name = string("gather_131_axis_0"), val = int32(0)]; int32 gather_131_batch_dims_0 = const()[name = string("gather_131_batch_dims_0"), val = int32(0)]; bool gather_131_validate_indices_0 = const()[name = string("gather_131_validate_indices_0"), val = bool(false)]; - string var_1364_shape_to_uint16_dtype_0 = const()[name = string("op_1364_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1409_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1409_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_131_to_uint16 = const()[name = string("select_131_to_uint16"), val = uint16(1)]; - tensor var_1364_shape_to_uint16 = cast(dtype = var_1364_shape_to_uint16_dtype_0, x = var_1364_shape)[name = string("cast_71")]; - uint16 gather_131_cast_uint16 = gather(axis = gather_131_axis_0, batch_dims = gather_131_batch_dims_0, indices = select_131_to_uint16, validate_indices = gather_131_validate_indices_0, x = var_1364_shape_to_uint16)[name = string("gather_131_cast_uint16")]; + tensor var_1409_shape_cast_fp16_to_uint16 = cast(dtype = var_1409_shape_cast_fp16_to_uint16_dtype_0, x = var_1409_shape_cast_fp16)[name = string("cast_71")]; + uint16 gather_131_cast_uint16 = gather(axis = gather_131_axis_0, batch_dims = gather_131_batch_dims_0, indices = select_131_to_uint16, validate_indices = gather_131_validate_indices_0, x = var_1409_shape_cast_fp16_to_uint16)[name = string("gather_131_cast_uint16")]; string gather_131_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_131_cast_uint16_to_int32_dtype_0"), val = string("int32")]; - tensor linear_49 = linear(bias = linear_0_bias_0, weight = model_model_layers_7_self_attn_q_proj_weight_quantized, x = hidden_states_187)[name = string("linear_49")]; - tensor linear_50 = linear(bias = linear_1_bias_0, weight = model_model_layers_7_self_attn_k_proj_weight_quantized, x = hidden_states_187)[name = string("linear_50")]; - tensor linear_51 = linear(bias = linear_1_bias_0, weight = model_model_layers_7_self_attn_v_proj_weight_quantized, x = hidden_states_187)[name = string("linear_51")]; + tensor model_model_layers_7_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(387310144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389407360))))[name = string("model_model_layers_7_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_49_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_217_cast_fp16)[name = string("linear_49_cast_fp16")]; + tensor model_model_layers_7_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(389669568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390193920))))[name = string("model_model_layers_7_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_50_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_7_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_217_cast_fp16)[name = string("linear_50_cast_fp16")]; + tensor model_model_layers_7_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390259520))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390783872))))[name = string("model_model_layers_7_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_51_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_7_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_217_cast_fp16)[name = string("linear_51_cast_fp16")]; tensor concat_133x = const()[name = string("concat_133x"), val = tensor([1, -1, 32, 64])]; - tensor var_1373 = reshape(shape = concat_133x, x = linear_49)[name = string("op_1373")]; + tensor var_1418_cast_fp16 = reshape(shape = concat_133x, x = linear_49_cast_fp16)[name = string("op_1418_cast_fp16")]; tensor q_15_perm_0 = const()[name = string("q_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_134x = const()[name = string("concat_134x"), val = tensor([1, -1, 8, 64])]; - tensor var_1376 = reshape(shape = concat_134x, x = linear_50)[name = string("op_1376")]; + tensor var_1421_cast_fp16 = reshape(shape = concat_134x, x = linear_50_cast_fp16)[name = string("op_1421_cast_fp16")]; tensor k_15_perm_0 = const()[name = string("k_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_135x = const()[name = string("concat_135x"), val = tensor([1, -1, 8, 64])]; - tensor var_1379 = reshape(shape = concat_135x, x = linear_51)[name = string("op_1379")]; + tensor var_1424_cast_fp16 = reshape(shape = concat_135x, x = linear_51_cast_fp16)[name = string("op_1424_cast_fp16")]; tensor v_state_15_perm_0 = const()[name = string("v_state_15_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor q_15 = transpose(perm = q_15_perm_0, x = var_1373)[name = string("transpose_35")]; - tensor var_1383 = mul(x = q_15, y = cos_7)[name = string("op_1383")]; + tensor q_15_cast_fp16 = transpose(perm = q_15_perm_0, x = var_1418_cast_fp16)[name = string("transpose_35")]; + tensor var_1428_cast_fp16 = mul(x = q_15_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1428_cast_fp16")]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 32, 0, 32])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_29 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15)[name = string("x1_29")]; + tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15_cast_fp16)[name = string("x1_29_cast_fp16")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 32, 0, 64])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_29 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15)[name = string("x2_29")]; - fp16 const_17_promoted = const()[name = string("const_17_promoted"), val = fp16(-0x1p+0)]; - tensor var_1394 = mul(x = x2_29, y = const_17_promoted)[name = string("op_1394")]; - bool var_1396_interleave_0 = const()[name = string("op_1396_interleave_0"), val = bool(false)]; - tensor var_1396 = concat(axis = var_48, interleave = var_1396_interleave_0, values = (var_1394, x1_29))[name = string("op_1396")]; - tensor var_1397 = mul(x = var_1396, y = sin_7)[name = string("op_1397")]; - tensor query_states_31 = add(x = var_1383, y = var_1397)[name = string("query_states_31")]; - tensor k_15 = transpose(perm = k_15_perm_0, x = var_1376)[name = string("transpose_34")]; - tensor var_1399 = mul(x = k_15, y = cos_7)[name = string("op_1399")]; + tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15_cast_fp16)[name = string("x2_29_cast_fp16")]; + fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1439_cast_fp16 = mul(x = x2_29_cast_fp16, y = const_17_promoted_to_fp16)[name = string("op_1439_cast_fp16")]; + bool var_1441_interleave_0 = const()[name = string("op_1441_interleave_0"), val = bool(false)]; + tensor var_1441_cast_fp16 = concat(axis = var_48, interleave = var_1441_interleave_0, values = (var_1439_cast_fp16, x1_29_cast_fp16))[name = string("op_1441_cast_fp16")]; + tensor var_1442_cast_fp16 = mul(x = var_1441_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1442_cast_fp16")]; + tensor query_states_31_cast_fp16 = add(x = var_1428_cast_fp16, y = var_1442_cast_fp16)[name = string("query_states_31_cast_fp16")]; + tensor k_15_cast_fp16 = transpose(perm = k_15_perm_0, x = var_1421_cast_fp16)[name = string("transpose_34")]; + tensor var_1444_cast_fp16 = mul(x = k_15_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1444_cast_fp16")]; tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 0, 32])]; tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_31 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15)[name = string("x1_31")]; + tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15_cast_fp16)[name = string("x1_31_cast_fp16")]; tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 0, 64])]; tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_31 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15)[name = string("x2_31")]; - fp16 const_18_promoted = const()[name = string("const_18_promoted"), val = fp16(-0x1p+0)]; - tensor var_1410 = mul(x = x2_31, y = const_18_promoted)[name = string("op_1410")]; - bool var_1412_interleave_0 = const()[name = string("op_1412_interleave_0"), val = bool(false)]; - tensor var_1412 = concat(axis = var_48, interleave = var_1412_interleave_0, values = (var_1410, x1_31))[name = string("op_1412")]; - tensor var_1413 = mul(x = var_1412, y = sin_7)[name = string("op_1413")]; - tensor k_state_15 = add(x = var_1399, y = var_1413)[name = string("k_state_15")]; + tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15_cast_fp16)[name = string("x2_31_cast_fp16")]; + fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1455_cast_fp16 = mul(x = x2_31_cast_fp16, y = const_18_promoted_to_fp16)[name = string("op_1455_cast_fp16")]; + bool var_1457_interleave_0 = const()[name = string("op_1457_interleave_0"), val = bool(false)]; + tensor var_1457_cast_fp16 = concat(axis = var_48, interleave = var_1457_interleave_0, values = (var_1455_cast_fp16, x1_31_cast_fp16))[name = string("op_1457_cast_fp16")]; + tensor var_1458_cast_fp16 = mul(x = var_1457_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1458_cast_fp16")]; + tensor k_state_15_cast_fp16 = add(x = var_1444_cast_fp16, y = var_1458_cast_fp16)[name = string("k_state_15_cast_fp16")]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([0])]; tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; @@ -1682,87 +1604,87 @@ program(1.3) tensor key_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor key_cache_internal_tensor_assign_8 = slice_update(begin = concat_138, begin_mask = key_cache_internal_tensor_assign_8_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_8_squeeze_mask_0, stride = key_cache_internal_tensor_assign_8_stride_0, update = k_state_15, x = coreml_update_state_44)[name = string("key_cache_internal_tensor_assign_8")]; - write_state(data = key_cache_internal_tensor_assign_8, input = key_cache)[name = string("coreml_update_state_46_write_state")]; + tensor key_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_138, begin_mask = key_cache_internal_tensor_assign_8_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_8_squeeze_mask_0, stride = key_cache_internal_tensor_assign_8_stride_0, update = k_state_15_cast_fp16, x = coreml_update_state_44)[name = string("key_cache_internal_tensor_assign_8_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_8_cast_fp16, input = key_cache)[name = string("coreml_update_state_46_write_state")]; tensor coreml_update_state_46 = read_state(input = key_cache)[name = string("coreml_update_state_46")]; tensor value_cache_internal_tensor_assign_8_stride_0 = const()[name = string("value_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor v_state_15 = transpose(perm = v_state_15_perm_0, x = var_1379)[name = string("transpose_33")]; - tensor value_cache_internal_tensor_assign_8 = slice_update(begin = concat_138, begin_mask = value_cache_internal_tensor_assign_8_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_8_squeeze_mask_0, stride = value_cache_internal_tensor_assign_8_stride_0, update = v_state_15, x = coreml_update_state_45)[name = string("value_cache_internal_tensor_assign_8")]; - write_state(data = value_cache_internal_tensor_assign_8, input = value_cache)[name = string("coreml_update_state_47_write_state")]; + tensor v_state_15_cast_fp16 = transpose(perm = v_state_15_perm_0, x = var_1424_cast_fp16)[name = string("transpose_33")]; + tensor value_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_138, begin_mask = value_cache_internal_tensor_assign_8_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_8_squeeze_mask_0, stride = value_cache_internal_tensor_assign_8_stride_0, update = v_state_15_cast_fp16, x = coreml_update_state_45)[name = string("value_cache_internal_tensor_assign_8_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_8_cast_fp16, input = value_cache)[name = string("coreml_update_state_47_write_state")]; tensor coreml_update_state_47 = read_state(input = value_cache)[name = string("coreml_update_state_47")]; - tensor var_1436_begin_0 = const()[name = string("op_1436_begin_0"), val = tensor([7, 0, 0, 0, 0])]; - tensor var_1436_end_0 = const()[name = string("op_1436_end_0"), val = tensor([8, 1, 8, 2048, 64])]; - tensor var_1436_end_mask_0 = const()[name = string("op_1436_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_1436_squeeze_mask_0 = const()[name = string("op_1436_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_1436 = slice_by_index(begin = var_1436_begin_0, end = var_1436_end_0, end_mask = var_1436_end_mask_0, squeeze_mask = var_1436_squeeze_mask_0, x = coreml_update_state_46)[name = string("op_1436")]; - tensor var_1439_begin_0 = const()[name = string("op_1439_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1439_end_mask_0 = const()[name = string("op_1439_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_1439 = slice_by_index(begin = var_1439_begin_0, end = concat_11, end_mask = var_1439_end_mask_0, x = var_1436)[name = string("op_1439")]; - tensor var_1441_begin_0 = const()[name = string("op_1441_begin_0"), val = tensor([7, 0, 0, 0, 0])]; - tensor var_1441_end_0 = const()[name = string("op_1441_end_0"), val = tensor([8, 1, 8, 2048, 64])]; - tensor var_1441_end_mask_0 = const()[name = string("op_1441_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_1441_squeeze_mask_0 = const()[name = string("op_1441_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_1441 = slice_by_index(begin = var_1441_begin_0, end = var_1441_end_0, end_mask = var_1441_end_mask_0, squeeze_mask = var_1441_squeeze_mask_0, x = coreml_update_state_47)[name = string("op_1441")]; - tensor var_1444_begin_0 = const()[name = string("op_1444_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1444_end_mask_0 = const()[name = string("op_1444_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_1444 = slice_by_index(begin = var_1444_begin_0, end = concat_11, end_mask = var_1444_end_mask_0, x = var_1441)[name = string("op_1444")]; - tensor var_1446_shape = shape(x = var_1439)[name = string("op_1446_shape")]; + tensor var_1481_begin_0 = const()[name = string("op_1481_begin_0"), val = tensor([7, 0, 0, 0, 0])]; + tensor var_1481_end_0 = const()[name = string("op_1481_end_0"), val = tensor([8, 1, 8, 2048, 64])]; + tensor var_1481_end_mask_0 = const()[name = string("op_1481_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1481_squeeze_mask_0 = const()[name = string("op_1481_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1481_cast_fp16 = slice_by_index(begin = var_1481_begin_0, end = var_1481_end_0, end_mask = var_1481_end_mask_0, squeeze_mask = var_1481_squeeze_mask_0, x = coreml_update_state_46)[name = string("op_1481_cast_fp16")]; + tensor var_1484_begin_0 = const()[name = string("op_1484_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1484_end_mask_0 = const()[name = string("op_1484_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1484_cast_fp16 = slice_by_index(begin = var_1484_begin_0, end = concat_11, end_mask = var_1484_end_mask_0, x = var_1481_cast_fp16)[name = string("op_1484_cast_fp16")]; + tensor var_1486_begin_0 = const()[name = string("op_1486_begin_0"), val = tensor([7, 0, 0, 0, 0])]; + tensor var_1486_end_0 = const()[name = string("op_1486_end_0"), val = tensor([8, 1, 8, 2048, 64])]; + tensor var_1486_end_mask_0 = const()[name = string("op_1486_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1486_squeeze_mask_0 = const()[name = string("op_1486_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1486_cast_fp16 = slice_by_index(begin = var_1486_begin_0, end = var_1486_end_0, end_mask = var_1486_end_mask_0, squeeze_mask = var_1486_squeeze_mask_0, x = coreml_update_state_47)[name = string("op_1486_cast_fp16")]; + tensor var_1489_begin_0 = const()[name = string("op_1489_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1489_end_mask_0 = const()[name = string("op_1489_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1489_cast_fp16 = slice_by_index(begin = var_1489_begin_0, end = concat_11, end_mask = var_1489_end_mask_0, x = var_1486_cast_fp16)[name = string("op_1489_cast_fp16")]; + tensor var_1491_shape_cast_fp16 = shape(x = var_1484_cast_fp16)[name = string("op_1491_shape_cast_fp16")]; int32 gather_139 = const()[name = string("gather_139"), val = int32(1)]; int32 gather_140 = const()[name = string("gather_140"), val = int32(8)]; int32 gather_141_axis_0 = const()[name = string("gather_141_axis_0"), val = int32(0)]; int32 gather_141_batch_dims_0 = const()[name = string("gather_141_batch_dims_0"), val = int32(0)]; bool gather_141_validate_indices_0 = const()[name = string("gather_141_validate_indices_0"), val = bool(false)]; - string var_1446_shape_to_uint16_dtype_0 = const()[name = string("op_1446_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1491_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1491_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_141_to_uint16 = const()[name = string("select_141_to_uint16"), val = uint16(2)]; - tensor var_1446_shape_to_uint16 = cast(dtype = var_1446_shape_to_uint16_dtype_0, x = var_1446_shape)[name = string("cast_70")]; - uint16 gather_141_cast_uint16 = gather(axis = gather_141_axis_0, batch_dims = gather_141_batch_dims_0, indices = select_141_to_uint16, validate_indices = gather_141_validate_indices_0, x = var_1446_shape_to_uint16)[name = string("gather_141_cast_uint16")]; + tensor var_1491_shape_cast_fp16_to_uint16 = cast(dtype = var_1491_shape_cast_fp16_to_uint16_dtype_0, x = var_1491_shape_cast_fp16)[name = string("cast_70")]; + uint16 gather_141_cast_uint16 = gather(axis = gather_141_axis_0, batch_dims = gather_141_batch_dims_0, indices = select_141_to_uint16, validate_indices = gather_141_validate_indices_0, x = var_1491_shape_cast_fp16_to_uint16)[name = string("gather_141_cast_uint16")]; string gather_141_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_141_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_142 = const()[name = string("gather_142"), val = int32(64)]; - tensor var_1453_axes_0 = const()[name = string("op_1453_axes_0"), val = tensor([2])]; - tensor var_1453 = expand_dims(axes = var_1453_axes_0, x = var_1439)[name = string("op_1453")]; - tensor shape_157 = shape(x = var_1453)[name = string("shape_157")]; + tensor var_1498_axes_0 = const()[name = string("op_1498_axes_0"), val = tensor([2])]; + tensor var_1498_cast_fp16 = expand_dims(axes = var_1498_axes_0, x = var_1484_cast_fp16)[name = string("op_1498_cast_fp16")]; + tensor shape_157_cast_fp16 = shape(x = var_1498_cast_fp16)[name = string("shape_157_cast_fp16")]; int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; int32 gather_141_cast_uint16_to_int32 = cast(dtype = gather_141_cast_uint16_to_int32_dtype_0, x = gather_141_cast_uint16)[name = string("cast_69")]; - tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (gather_139, gather_140, var_60, gather_141_cast_uint16_to_int32, gather_142))[name = string("concat_146")]; - tensor real_div_14 = real_div(x = concat_146, y = shape_157)[name = string("real_div_14")]; - tensor hidden_states_191 = tile(reps = real_div_14, x = var_1453)[name = string("hidden_states_191")]; + tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (gather_139, gather_140, var_59, gather_141_cast_uint16_to_int32, gather_142))[name = string("concat_146")]; + tensor real_div_14 = real_div(x = concat_146, y = shape_157_cast_fp16)[name = string("real_div_14")]; + tensor hidden_states_221_cast_fp16 = tile(reps = real_div_14, x = var_1498_cast_fp16)[name = string("hidden_states_221_cast_fp16")]; tensor concat_147x = const()[name = string("concat_147x"), val = tensor([1, 32, -1, 64])]; - tensor key_states_31 = reshape(shape = concat_147x, x = hidden_states_191)[name = string("key_states_31")]; - tensor var_1463_shape = shape(x = var_1444)[name = string("op_1463_shape")]; + tensor key_states_31_cast_fp16 = reshape(shape = concat_147x, x = hidden_states_221_cast_fp16)[name = string("key_states_31_cast_fp16")]; + tensor var_1508_shape_cast_fp16 = shape(x = var_1489_cast_fp16)[name = string("op_1508_shape_cast_fp16")]; int32 gather_143 = const()[name = string("gather_143"), val = int32(1)]; int32 gather_144 = const()[name = string("gather_144"), val = int32(8)]; int32 gather_145_axis_0 = const()[name = string("gather_145_axis_0"), val = int32(0)]; int32 gather_145_batch_dims_0 = const()[name = string("gather_145_batch_dims_0"), val = int32(0)]; bool gather_145_validate_indices_0 = const()[name = string("gather_145_validate_indices_0"), val = bool(false)]; - string var_1463_shape_to_uint16_dtype_0 = const()[name = string("op_1463_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1508_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1508_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_145_to_uint16 = const()[name = string("select_145_to_uint16"), val = uint16(2)]; - tensor var_1463_shape_to_uint16 = cast(dtype = var_1463_shape_to_uint16_dtype_0, x = var_1463_shape)[name = string("cast_68")]; - uint16 gather_145_cast_uint16 = gather(axis = gather_145_axis_0, batch_dims = gather_145_batch_dims_0, indices = select_145_to_uint16, validate_indices = gather_145_validate_indices_0, x = var_1463_shape_to_uint16)[name = string("gather_145_cast_uint16")]; + tensor var_1508_shape_cast_fp16_to_uint16 = cast(dtype = var_1508_shape_cast_fp16_to_uint16_dtype_0, x = var_1508_shape_cast_fp16)[name = string("cast_68")]; + uint16 gather_145_cast_uint16 = gather(axis = gather_145_axis_0, batch_dims = gather_145_batch_dims_0, indices = select_145_to_uint16, validate_indices = gather_145_validate_indices_0, x = var_1508_shape_cast_fp16_to_uint16)[name = string("gather_145_cast_uint16")]; string gather_145_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_145_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_146 = const()[name = string("gather_146"), val = int32(64)]; - tensor var_1470_axes_0 = const()[name = string("op_1470_axes_0"), val = tensor([2])]; - tensor var_1470 = expand_dims(axes = var_1470_axes_0, x = var_1444)[name = string("op_1470")]; - tensor shape_162 = shape(x = var_1470)[name = string("shape_162")]; + tensor var_1515_axes_0 = const()[name = string("op_1515_axes_0"), val = tensor([2])]; + tensor var_1515_cast_fp16 = expand_dims(axes = var_1515_axes_0, x = var_1489_cast_fp16)[name = string("op_1515_cast_fp16")]; + tensor shape_162_cast_fp16 = shape(x = var_1515_cast_fp16)[name = string("shape_162_cast_fp16")]; int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; int32 gather_145_cast_uint16_to_int32 = cast(dtype = gather_145_cast_uint16_to_int32_dtype_0, x = gather_145_cast_uint16)[name = string("cast_67")]; - tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (gather_143, gather_144, var_60, gather_145_cast_uint16_to_int32, gather_146))[name = string("concat_148")]; - tensor real_div_15 = real_div(x = concat_148, y = shape_162)[name = string("real_div_15")]; - tensor hidden_states_195 = tile(reps = real_div_15, x = var_1470)[name = string("hidden_states_195")]; + tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (gather_143, gather_144, var_59, gather_145_cast_uint16_to_int32, gather_146))[name = string("concat_148")]; + tensor real_div_15 = real_div(x = concat_148, y = shape_162_cast_fp16)[name = string("real_div_15")]; + tensor hidden_states_225_cast_fp16 = tile(reps = real_div_15, x = var_1515_cast_fp16)[name = string("hidden_states_225_cast_fp16")]; tensor concat_149x = const()[name = string("concat_149x"), val = tensor([1, 32, -1, 64])]; - tensor value_states_31 = reshape(shape = concat_149x, x = hidden_states_195)[name = string("value_states_31")]; - tensor var_1480_shape = shape(x = key_states_31)[name = string("op_1480_shape")]; + tensor value_states_31_cast_fp16 = reshape(shape = concat_149x, x = hidden_states_225_cast_fp16)[name = string("value_states_31_cast_fp16")]; + tensor var_1525_shape_cast_fp16 = shape(x = key_states_31_cast_fp16)[name = string("op_1525_shape_cast_fp16")]; int32 gather_147_axis_0 = const()[name = string("gather_147_axis_0"), val = int32(0)]; int32 gather_147_batch_dims_0 = const()[name = string("gather_147_batch_dims_0"), val = int32(0)]; bool gather_147_validate_indices_0 = const()[name = string("gather_147_validate_indices_0"), val = bool(false)]; - string var_1480_shape_to_uint16_dtype_0 = const()[name = string("op_1480_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1525_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1525_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_147_to_uint16 = const()[name = string("select_147_to_uint16"), val = uint16(2)]; - tensor var_1480_shape_to_uint16 = cast(dtype = var_1480_shape_to_uint16_dtype_0, x = var_1480_shape)[name = string("cast_66")]; - uint16 gather_147_cast_uint16 = gather(axis = gather_147_axis_0, batch_dims = gather_147_batch_dims_0, indices = select_147_to_uint16, validate_indices = gather_147_validate_indices_0, x = var_1480_shape_to_uint16)[name = string("gather_147_cast_uint16")]; + tensor var_1525_shape_cast_fp16_to_uint16 = cast(dtype = var_1525_shape_cast_fp16_to_uint16_dtype_0, x = var_1525_shape_cast_fp16)[name = string("cast_66")]; + uint16 gather_147_cast_uint16 = gather(axis = gather_147_axis_0, batch_dims = gather_147_batch_dims_0, indices = select_147_to_uint16, validate_indices = gather_147_validate_indices_0, x = var_1525_shape_cast_fp16_to_uint16)[name = string("gather_147_cast_uint16")]; string gather_147_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_147_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_150_values0_0 = const()[name = string("concat_150_values0_0"), val = int32(1)]; int32 concat_150_values1_0 = const()[name = string("concat_150_values1_0"), val = int32(1)]; @@ -1774,98 +1696,107 @@ program(1.3) tensor causal_mask_17_begin_0 = const()[name = string("causal_mask_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_17_end_mask_0 = const()[name = string("causal_mask_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_17_cast_fp16 = slice_by_index(begin = causal_mask_17_begin_0, end = concat_150, end_mask = causal_mask_17_end_mask_0, x = causal_mask)[name = string("causal_mask_17_cast_fp16")]; - tensor attn_output_29_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_17_cast_fp16, key = key_states_31, query = query_states_31, value = value_states_31)[name = string("attn_output_29_cast_fp16")]; - tensor var_1486_perm_0 = const()[name = string("op_1486_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor attn_output_29_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_17_cast_fp16, key = key_states_31_cast_fp16, query = query_states_31_cast_fp16, value = value_states_31_cast_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor var_1531_perm_0 = const()[name = string("op_1531_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; int32 gather_131_cast_uint16_to_int32 = cast(dtype = gather_131_cast_uint16_to_int32_dtype_0, x = gather_131_cast_uint16)[name = string("cast_64")]; tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (gather_130, gather_131_cast_uint16_to_int32, var_48))[name = string("concat_151")]; - tensor var_1486 = transpose(perm = var_1486_perm_0, x = attn_output_29_cast_fp16)[name = string("transpose_32")]; - tensor input_57 = reshape(shape = concat_151, x = var_1486)[name = string("input_57")]; - tensor linear_52 = linear(bias = linear_0_bias_0, weight = model_model_layers_7_self_attn_o_proj_weight_quantized, x = input_57)[name = string("linear_52")]; - tensor hidden_states_199 = add(x = hidden_states_181, y = linear_52)[name = string("hidden_states_199")]; - fp16 var_55_promoted_15_to_fp16 = const()[name = string("op_55_promoted_15_to_fp16"), val = fp16(0x1p+1)]; - tensor var_1495_cast_fp16 = pow(x = hidden_states_199, y = var_55_promoted_15_to_fp16)[name = string("op_1495_cast_fp16")]; + tensor var_1531_cast_fp16 = transpose(perm = var_1531_perm_0, x = attn_output_29_cast_fp16)[name = string("transpose_32")]; + tensor input_57_cast_fp16 = reshape(shape = concat_151, x = var_1531_cast_fp16)[name = string("input_57_cast_fp16")]; + tensor model_model_layers_7_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(390849472))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392946688))))[name = string("model_model_layers_7_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_52_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_to_fp16_quantized, x = input_57_cast_fp16)[name = string("linear_52_cast_fp16")]; + tensor hidden_states_229_cast_fp16 = add(x = hidden_states_209_cast_fp16, y = linear_52_cast_fp16)[name = string("hidden_states_229_cast_fp16")]; + fp16 var_54_promoted_15_to_fp16 = const()[name = string("op_54_promoted_15_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1540_cast_fp16 = pow(x = hidden_states_229_cast_fp16, y = var_54_promoted_15_to_fp16)[name = string("op_1540_cast_fp16")]; tensor variance_31_axes_0 = const()[name = string("variance_31_axes_0"), val = tensor([-1])]; bool variance_31_keep_dims_0 = const()[name = string("variance_31_keep_dims_0"), val = bool(true)]; - tensor variance_31_cast_fp16 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = var_1495_cast_fp16)[name = string("variance_31_cast_fp16")]; - fp16 var_1498_to_fp16 = const()[name = string("op_1498_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_1499_cast_fp16 = add(x = variance_31_cast_fp16, y = var_1498_to_fp16)[name = string("op_1499_cast_fp16")]; - fp32 var_1500_epsilon_0 = const()[name = string("op_1500_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_1500_cast_fp16 = rsqrt(epsilon = var_1500_epsilon_0, x = var_1499_cast_fp16)[name = string("op_1500_cast_fp16")]; - tensor hidden_states_203_cast_fp16 = mul(x = hidden_states_199, y = var_1500_cast_fp16)[name = string("hidden_states_203_cast_fp16")]; - tensor input_59 = mul(x = model_model_layers_7_post_attention_layernorm_weight, y = hidden_states_203_cast_fp16)[name = string("input_59")]; - tensor linear_53 = linear(bias = linear_4_bias_0, weight = model_model_layers_7_mlp_gate_proj_weight_quantized, x = input_59)[name = string("linear_53")]; - tensor var_1509 = silu(x = linear_53)[name = string("op_1509")]; - tensor linear_54 = linear(bias = linear_4_bias_0, weight = model_model_layers_7_mlp_up_proj_weight_quantized, x = input_59)[name = string("linear_54")]; - tensor input_63 = mul(x = var_1509, y = linear_54)[name = string("input_63")]; - tensor linear_55 = linear(bias = linear_0_bias_0, weight = model_model_layers_7_mlp_down_proj_weight_quantized, x = input_63)[name = string("linear_55")]; - tensor hidden_states_207 = add(x = hidden_states_199, y = linear_55)[name = string("hidden_states_207")]; - fp16 var_55_promoted_16_to_fp16 = const()[name = string("op_55_promoted_16_to_fp16"), val = fp16(0x1p+1)]; - tensor var_1522_cast_fp16 = pow(x = hidden_states_207, y = var_55_promoted_16_to_fp16)[name = string("op_1522_cast_fp16")]; + tensor variance_31_cast_fp16 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = var_1540_cast_fp16)[name = string("variance_31_cast_fp16")]; + fp16 var_1543_to_fp16 = const()[name = string("op_1543_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1544_cast_fp16 = add(x = variance_31_cast_fp16, y = var_1543_to_fp16)[name = string("op_1544_cast_fp16")]; + fp32 var_1545_epsilon_0 = const()[name = string("op_1545_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1545_cast_fp16 = rsqrt(epsilon = var_1545_epsilon_0, x = var_1544_cast_fp16)[name = string("op_1545_cast_fp16")]; + tensor hidden_states_233_cast_fp16 = mul(x = hidden_states_229_cast_fp16, y = var_1545_cast_fp16)[name = string("hidden_states_233_cast_fp16")]; + tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393208896)))]; + tensor input_59_cast_fp16 = mul(x = model_model_layers_7_post_attention_layernorm_weight_to_fp16, y = hidden_states_233_cast_fp16)[name = string("input_59_cast_fp16")]; + tensor model_model_layers_7_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(393213056))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(401601728))))[name = string("model_model_layers_7_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_53_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_7_mlp_gate_proj_weight_to_fp16_quantized, x = input_59_cast_fp16)[name = string("linear_53_cast_fp16")]; + tensor var_1557_cast_fp16 = silu(x = linear_53_cast_fp16)[name = string("op_1557_cast_fp16")]; + tensor model_model_layers_7_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(402650368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411039040))))[name = string("model_model_layers_7_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_54_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_7_mlp_up_proj_weight_to_fp16_quantized, x = input_59_cast_fp16)[name = string("linear_54_cast_fp16")]; + tensor input_63_cast_fp16 = mul(x = var_1557_cast_fp16, y = linear_54_cast_fp16)[name = string("input_63_cast_fp16")]; + tensor model_model_layers_7_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(412087680))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(420476352))))[name = string("model_model_layers_7_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_55_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_mlp_down_proj_weight_to_fp16_quantized, x = input_63_cast_fp16)[name = string("linear_55_cast_fp16")]; + tensor hidden_states_239_cast_fp16 = add(x = hidden_states_229_cast_fp16, y = linear_55_cast_fp16)[name = string("hidden_states_239_cast_fp16")]; + fp16 var_54_promoted_16_to_fp16 = const()[name = string("op_54_promoted_16_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1570_cast_fp16 = pow(x = hidden_states_239_cast_fp16, y = var_54_promoted_16_to_fp16)[name = string("op_1570_cast_fp16")]; tensor variance_33_axes_0 = const()[name = string("variance_33_axes_0"), val = tensor([-1])]; bool variance_33_keep_dims_0 = const()[name = string("variance_33_keep_dims_0"), val = bool(true)]; - tensor variance_33_cast_fp16 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = var_1522_cast_fp16)[name = string("variance_33_cast_fp16")]; - fp16 var_1525_to_fp16 = const()[name = string("op_1525_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_1526_cast_fp16 = add(x = variance_33_cast_fp16, y = var_1525_to_fp16)[name = string("op_1526_cast_fp16")]; - fp32 var_1527_epsilon_0 = const()[name = string("op_1527_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_1527_cast_fp16 = rsqrt(epsilon = var_1527_epsilon_0, x = var_1526_cast_fp16)[name = string("op_1527_cast_fp16")]; - tensor hidden_states_211_cast_fp16 = mul(x = hidden_states_207, y = var_1527_cast_fp16)[name = string("hidden_states_211_cast_fp16")]; - tensor hidden_states_213 = mul(x = model_model_layers_8_input_layernorm_weight, y = hidden_states_211_cast_fp16)[name = string("hidden_states_213")]; - tensor var_1535_shape = shape(x = hidden_states_213)[name = string("op_1535_shape")]; + tensor variance_33_cast_fp16 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = var_1570_cast_fp16)[name = string("variance_33_cast_fp16")]; + fp16 var_1573_to_fp16 = const()[name = string("op_1573_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1574_cast_fp16 = add(x = variance_33_cast_fp16, y = var_1573_to_fp16)[name = string("op_1574_cast_fp16")]; + fp32 var_1575_epsilon_0 = const()[name = string("op_1575_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1575_cast_fp16 = rsqrt(epsilon = var_1575_epsilon_0, x = var_1574_cast_fp16)[name = string("op_1575_cast_fp16")]; + tensor hidden_states_243_cast_fp16 = mul(x = hidden_states_239_cast_fp16, y = var_1575_cast_fp16)[name = string("hidden_states_243_cast_fp16")]; + tensor model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421524992)))]; + tensor hidden_states_247_cast_fp16 = mul(x = model_model_layers_8_input_layernorm_weight_to_fp16, y = hidden_states_243_cast_fp16)[name = string("hidden_states_247_cast_fp16")]; + tensor var_1586_shape_cast_fp16 = shape(x = hidden_states_247_cast_fp16)[name = string("op_1586_shape_cast_fp16")]; int32 gather_148 = const()[name = string("gather_148"), val = int32(1)]; int32 gather_149_axis_0 = const()[name = string("gather_149_axis_0"), val = int32(0)]; int32 gather_149_batch_dims_0 = const()[name = string("gather_149_batch_dims_0"), val = int32(0)]; bool gather_149_validate_indices_0 = const()[name = string("gather_149_validate_indices_0"), val = bool(false)]; - string var_1535_shape_to_uint16_dtype_0 = const()[name = string("op_1535_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1586_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1586_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_149_to_uint16 = const()[name = string("select_149_to_uint16"), val = uint16(1)]; - tensor var_1535_shape_to_uint16 = cast(dtype = var_1535_shape_to_uint16_dtype_0, x = var_1535_shape)[name = string("cast_63")]; - uint16 gather_149_cast_uint16 = gather(axis = gather_149_axis_0, batch_dims = gather_149_batch_dims_0, indices = select_149_to_uint16, validate_indices = gather_149_validate_indices_0, x = var_1535_shape_to_uint16)[name = string("gather_149_cast_uint16")]; + tensor var_1586_shape_cast_fp16_to_uint16 = cast(dtype = var_1586_shape_cast_fp16_to_uint16_dtype_0, x = var_1586_shape_cast_fp16)[name = string("cast_63")]; + uint16 gather_149_cast_uint16 = gather(axis = gather_149_axis_0, batch_dims = gather_149_batch_dims_0, indices = select_149_to_uint16, validate_indices = gather_149_validate_indices_0, x = var_1586_shape_cast_fp16_to_uint16)[name = string("gather_149_cast_uint16")]; string gather_149_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_149_cast_uint16_to_int32_dtype_0"), val = string("int32")]; - tensor linear_56 = linear(bias = linear_0_bias_0, weight = model_model_layers_8_self_attn_q_proj_weight_quantized, x = hidden_states_213)[name = string("linear_56")]; - tensor linear_57 = linear(bias = linear_1_bias_0, weight = model_model_layers_8_self_attn_k_proj_weight_quantized, x = hidden_states_213)[name = string("linear_57")]; - tensor linear_58 = linear(bias = linear_1_bias_0, weight = model_model_layers_8_self_attn_v_proj_weight_quantized, x = hidden_states_213)[name = string("linear_58")]; + tensor model_model_layers_8_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(421529152))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423626368))))[name = string("model_model_layers_8_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_56_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_247_cast_fp16)[name = string("linear_56_cast_fp16")]; + tensor model_model_layers_8_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(423888576))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424412928))))[name = string("model_model_layers_8_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_8_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_247_cast_fp16)[name = string("linear_57_cast_fp16")]; + tensor model_model_layers_8_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424478528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425002880))))[name = string("model_model_layers_8_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_58_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_8_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_247_cast_fp16)[name = string("linear_58_cast_fp16")]; tensor concat_152x = const()[name = string("concat_152x"), val = tensor([1, -1, 32, 64])]; - tensor var_1544 = reshape(shape = concat_152x, x = linear_56)[name = string("op_1544")]; + tensor var_1595_cast_fp16 = reshape(shape = concat_152x, x = linear_56_cast_fp16)[name = string("op_1595_cast_fp16")]; tensor q_17_perm_0 = const()[name = string("q_17_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_153x = const()[name = string("concat_153x"), val = tensor([1, -1, 8, 64])]; - tensor var_1547 = reshape(shape = concat_153x, x = linear_57)[name = string("op_1547")]; + tensor var_1598_cast_fp16 = reshape(shape = concat_153x, x = linear_57_cast_fp16)[name = string("op_1598_cast_fp16")]; tensor k_17_perm_0 = const()[name = string("k_17_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_154x = const()[name = string("concat_154x"), val = tensor([1, -1, 8, 64])]; - tensor var_1550 = reshape(shape = concat_154x, x = linear_58)[name = string("op_1550")]; + tensor var_1601_cast_fp16 = reshape(shape = concat_154x, x = linear_58_cast_fp16)[name = string("op_1601_cast_fp16")]; tensor v_state_17_perm_0 = const()[name = string("v_state_17_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor q_17 = transpose(perm = q_17_perm_0, x = var_1544)[name = string("transpose_31")]; - tensor var_1554 = mul(x = q_17, y = cos_7)[name = string("op_1554")]; + tensor q_17_cast_fp16 = transpose(perm = q_17_perm_0, x = var_1595_cast_fp16)[name = string("transpose_31")]; + tensor var_1605_cast_fp16 = mul(x = q_17_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1605_cast_fp16")]; tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 32, 0, 32])]; tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_33 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17)[name = string("x1_33")]; + tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17_cast_fp16)[name = string("x1_33_cast_fp16")]; tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 32, 0, 64])]; tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_33 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17)[name = string("x2_33")]; - fp16 const_19_promoted = const()[name = string("const_19_promoted"), val = fp16(-0x1p+0)]; - tensor var_1565 = mul(x = x2_33, y = const_19_promoted)[name = string("op_1565")]; - bool var_1567_interleave_0 = const()[name = string("op_1567_interleave_0"), val = bool(false)]; - tensor var_1567 = concat(axis = var_48, interleave = var_1567_interleave_0, values = (var_1565, x1_33))[name = string("op_1567")]; - tensor var_1568 = mul(x = var_1567, y = sin_7)[name = string("op_1568")]; - tensor query_states_35 = add(x = var_1554, y = var_1568)[name = string("query_states_35")]; - tensor k_17 = transpose(perm = k_17_perm_0, x = var_1547)[name = string("transpose_30")]; - tensor var_1570 = mul(x = k_17, y = cos_7)[name = string("op_1570")]; + tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17_cast_fp16)[name = string("x2_33_cast_fp16")]; + fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1616_cast_fp16 = mul(x = x2_33_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1616_cast_fp16")]; + bool var_1618_interleave_0 = const()[name = string("op_1618_interleave_0"), val = bool(false)]; + tensor var_1618_cast_fp16 = concat(axis = var_48, interleave = var_1618_interleave_0, values = (var_1616_cast_fp16, x1_33_cast_fp16))[name = string("op_1618_cast_fp16")]; + tensor var_1619_cast_fp16 = mul(x = var_1618_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1619_cast_fp16")]; + tensor query_states_35_cast_fp16 = add(x = var_1605_cast_fp16, y = var_1619_cast_fp16)[name = string("query_states_35_cast_fp16")]; + tensor k_17_cast_fp16 = transpose(perm = k_17_perm_0, x = var_1598_cast_fp16)[name = string("transpose_30")]; + tensor var_1621_cast_fp16 = mul(x = k_17_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1621_cast_fp16")]; tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 8, 0, 32])]; tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_35 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17)[name = string("x1_35")]; + tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17_cast_fp16)[name = string("x1_35_cast_fp16")]; tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 8, 0, 64])]; tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_35 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17)[name = string("x2_35")]; - fp16 const_20_promoted = const()[name = string("const_20_promoted"), val = fp16(-0x1p+0)]; - tensor var_1581 = mul(x = x2_35, y = const_20_promoted)[name = string("op_1581")]; - bool var_1583_interleave_0 = const()[name = string("op_1583_interleave_0"), val = bool(false)]; - tensor var_1583 = concat(axis = var_48, interleave = var_1583_interleave_0, values = (var_1581, x1_35))[name = string("op_1583")]; - tensor var_1584 = mul(x = var_1583, y = sin_7)[name = string("op_1584")]; - tensor k_state_17 = add(x = var_1570, y = var_1584)[name = string("k_state_17")]; + tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17_cast_fp16)[name = string("x2_35_cast_fp16")]; + fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1632_cast_fp16 = mul(x = x2_35_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1632_cast_fp16")]; + bool var_1634_interleave_0 = const()[name = string("op_1634_interleave_0"), val = bool(false)]; + tensor var_1634_cast_fp16 = concat(axis = var_48, interleave = var_1634_interleave_0, values = (var_1632_cast_fp16, x1_35_cast_fp16))[name = string("op_1634_cast_fp16")]; + tensor var_1635_cast_fp16 = mul(x = var_1634_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1635_cast_fp16")]; + tensor k_state_17_cast_fp16 = add(x = var_1621_cast_fp16, y = var_1635_cast_fp16)[name = string("k_state_17_cast_fp16")]; tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([0])]; tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; @@ -1877,87 +1808,87 @@ program(1.3) tensor key_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor key_cache_internal_tensor_assign_9 = slice_update(begin = concat_157, begin_mask = key_cache_internal_tensor_assign_9_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_9_squeeze_mask_0, stride = key_cache_internal_tensor_assign_9_stride_0, update = k_state_17, x = coreml_update_state_46)[name = string("key_cache_internal_tensor_assign_9")]; - write_state(data = key_cache_internal_tensor_assign_9, input = key_cache)[name = string("coreml_update_state_48_write_state")]; + tensor key_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_157, begin_mask = key_cache_internal_tensor_assign_9_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_9_squeeze_mask_0, stride = key_cache_internal_tensor_assign_9_stride_0, update = k_state_17_cast_fp16, x = coreml_update_state_46)[name = string("key_cache_internal_tensor_assign_9_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_9_cast_fp16, input = key_cache)[name = string("coreml_update_state_48_write_state")]; tensor coreml_update_state_48 = read_state(input = key_cache)[name = string("coreml_update_state_48")]; tensor value_cache_internal_tensor_assign_9_stride_0 = const()[name = string("value_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor v_state_17 = transpose(perm = v_state_17_perm_0, x = var_1550)[name = string("transpose_29")]; - tensor value_cache_internal_tensor_assign_9 = slice_update(begin = concat_157, begin_mask = value_cache_internal_tensor_assign_9_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_9_squeeze_mask_0, stride = value_cache_internal_tensor_assign_9_stride_0, update = v_state_17, x = coreml_update_state_47)[name = string("value_cache_internal_tensor_assign_9")]; - write_state(data = value_cache_internal_tensor_assign_9, input = value_cache)[name = string("coreml_update_state_49_write_state")]; + tensor v_state_17_cast_fp16 = transpose(perm = v_state_17_perm_0, x = var_1601_cast_fp16)[name = string("transpose_29")]; + tensor value_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_157, begin_mask = value_cache_internal_tensor_assign_9_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_9_squeeze_mask_0, stride = value_cache_internal_tensor_assign_9_stride_0, update = v_state_17_cast_fp16, x = coreml_update_state_47)[name = string("value_cache_internal_tensor_assign_9_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_9_cast_fp16, input = value_cache)[name = string("coreml_update_state_49_write_state")]; tensor coreml_update_state_49 = read_state(input = value_cache)[name = string("coreml_update_state_49")]; - tensor var_1607_begin_0 = const()[name = string("op_1607_begin_0"), val = tensor([8, 0, 0, 0, 0])]; - tensor var_1607_end_0 = const()[name = string("op_1607_end_0"), val = tensor([9, 1, 8, 2048, 64])]; - tensor var_1607_end_mask_0 = const()[name = string("op_1607_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_1607_squeeze_mask_0 = const()[name = string("op_1607_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_1607 = slice_by_index(begin = var_1607_begin_0, end = var_1607_end_0, end_mask = var_1607_end_mask_0, squeeze_mask = var_1607_squeeze_mask_0, x = coreml_update_state_48)[name = string("op_1607")]; - tensor var_1610_begin_0 = const()[name = string("op_1610_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1610_end_mask_0 = const()[name = string("op_1610_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_1610 = slice_by_index(begin = var_1610_begin_0, end = concat_11, end_mask = var_1610_end_mask_0, x = var_1607)[name = string("op_1610")]; - tensor var_1612_begin_0 = const()[name = string("op_1612_begin_0"), val = tensor([8, 0, 0, 0, 0])]; - tensor var_1612_end_0 = const()[name = string("op_1612_end_0"), val = tensor([9, 1, 8, 2048, 64])]; - tensor var_1612_end_mask_0 = const()[name = string("op_1612_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_1612_squeeze_mask_0 = const()[name = string("op_1612_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_1612 = slice_by_index(begin = var_1612_begin_0, end = var_1612_end_0, end_mask = var_1612_end_mask_0, squeeze_mask = var_1612_squeeze_mask_0, x = coreml_update_state_49)[name = string("op_1612")]; - tensor var_1615_begin_0 = const()[name = string("op_1615_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1615_end_mask_0 = const()[name = string("op_1615_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_1615 = slice_by_index(begin = var_1615_begin_0, end = concat_11, end_mask = var_1615_end_mask_0, x = var_1612)[name = string("op_1615")]; - tensor var_1617_shape = shape(x = var_1610)[name = string("op_1617_shape")]; + tensor var_1658_begin_0 = const()[name = string("op_1658_begin_0"), val = tensor([8, 0, 0, 0, 0])]; + tensor var_1658_end_0 = const()[name = string("op_1658_end_0"), val = tensor([9, 1, 8, 2048, 64])]; + tensor var_1658_end_mask_0 = const()[name = string("op_1658_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1658_squeeze_mask_0 = const()[name = string("op_1658_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1658_cast_fp16 = slice_by_index(begin = var_1658_begin_0, end = var_1658_end_0, end_mask = var_1658_end_mask_0, squeeze_mask = var_1658_squeeze_mask_0, x = coreml_update_state_48)[name = string("op_1658_cast_fp16")]; + tensor var_1661_begin_0 = const()[name = string("op_1661_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1661_end_mask_0 = const()[name = string("op_1661_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1661_cast_fp16 = slice_by_index(begin = var_1661_begin_0, end = concat_11, end_mask = var_1661_end_mask_0, x = var_1658_cast_fp16)[name = string("op_1661_cast_fp16")]; + tensor var_1663_begin_0 = const()[name = string("op_1663_begin_0"), val = tensor([8, 0, 0, 0, 0])]; + tensor var_1663_end_0 = const()[name = string("op_1663_end_0"), val = tensor([9, 1, 8, 2048, 64])]; + tensor var_1663_end_mask_0 = const()[name = string("op_1663_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1663_squeeze_mask_0 = const()[name = string("op_1663_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1663_cast_fp16 = slice_by_index(begin = var_1663_begin_0, end = var_1663_end_0, end_mask = var_1663_end_mask_0, squeeze_mask = var_1663_squeeze_mask_0, x = coreml_update_state_49)[name = string("op_1663_cast_fp16")]; + tensor var_1666_begin_0 = const()[name = string("op_1666_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1666_end_mask_0 = const()[name = string("op_1666_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1666_cast_fp16 = slice_by_index(begin = var_1666_begin_0, end = concat_11, end_mask = var_1666_end_mask_0, x = var_1663_cast_fp16)[name = string("op_1666_cast_fp16")]; + tensor var_1668_shape_cast_fp16 = shape(x = var_1661_cast_fp16)[name = string("op_1668_shape_cast_fp16")]; int32 gather_157 = const()[name = string("gather_157"), val = int32(1)]; int32 gather_158 = const()[name = string("gather_158"), val = int32(8)]; int32 gather_159_axis_0 = const()[name = string("gather_159_axis_0"), val = int32(0)]; int32 gather_159_batch_dims_0 = const()[name = string("gather_159_batch_dims_0"), val = int32(0)]; bool gather_159_validate_indices_0 = const()[name = string("gather_159_validate_indices_0"), val = bool(false)]; - string var_1617_shape_to_uint16_dtype_0 = const()[name = string("op_1617_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1668_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1668_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_159_to_uint16 = const()[name = string("select_159_to_uint16"), val = uint16(2)]; - tensor var_1617_shape_to_uint16 = cast(dtype = var_1617_shape_to_uint16_dtype_0, x = var_1617_shape)[name = string("cast_62")]; - uint16 gather_159_cast_uint16 = gather(axis = gather_159_axis_0, batch_dims = gather_159_batch_dims_0, indices = select_159_to_uint16, validate_indices = gather_159_validate_indices_0, x = var_1617_shape_to_uint16)[name = string("gather_159_cast_uint16")]; + tensor var_1668_shape_cast_fp16_to_uint16 = cast(dtype = var_1668_shape_cast_fp16_to_uint16_dtype_0, x = var_1668_shape_cast_fp16)[name = string("cast_62")]; + uint16 gather_159_cast_uint16 = gather(axis = gather_159_axis_0, batch_dims = gather_159_batch_dims_0, indices = select_159_to_uint16, validate_indices = gather_159_validate_indices_0, x = var_1668_shape_cast_fp16_to_uint16)[name = string("gather_159_cast_uint16")]; string gather_159_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_159_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_160 = const()[name = string("gather_160"), val = int32(64)]; - tensor var_1624_axes_0 = const()[name = string("op_1624_axes_0"), val = tensor([2])]; - tensor var_1624 = expand_dims(axes = var_1624_axes_0, x = var_1610)[name = string("op_1624")]; - tensor shape_177 = shape(x = var_1624)[name = string("shape_177")]; + tensor var_1675_axes_0 = const()[name = string("op_1675_axes_0"), val = tensor([2])]; + tensor var_1675_cast_fp16 = expand_dims(axes = var_1675_axes_0, x = var_1661_cast_fp16)[name = string("op_1675_cast_fp16")]; + tensor shape_177_cast_fp16 = shape(x = var_1675_cast_fp16)[name = string("shape_177_cast_fp16")]; int32 concat_165_axis_0 = const()[name = string("concat_165_axis_0"), val = int32(0)]; bool concat_165_interleave_0 = const()[name = string("concat_165_interleave_0"), val = bool(false)]; int32 gather_159_cast_uint16_to_int32 = cast(dtype = gather_159_cast_uint16_to_int32_dtype_0, x = gather_159_cast_uint16)[name = string("cast_61")]; - tensor concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (gather_157, gather_158, var_60, gather_159_cast_uint16_to_int32, gather_160))[name = string("concat_165")]; - tensor real_div_16 = real_div(x = concat_165, y = shape_177)[name = string("real_div_16")]; - tensor hidden_states_217 = tile(reps = real_div_16, x = var_1624)[name = string("hidden_states_217")]; + tensor concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (gather_157, gather_158, var_59, gather_159_cast_uint16_to_int32, gather_160))[name = string("concat_165")]; + tensor real_div_16 = real_div(x = concat_165, y = shape_177_cast_fp16)[name = string("real_div_16")]; + tensor hidden_states_251_cast_fp16 = tile(reps = real_div_16, x = var_1675_cast_fp16)[name = string("hidden_states_251_cast_fp16")]; tensor concat_166x = const()[name = string("concat_166x"), val = tensor([1, 32, -1, 64])]; - tensor key_states_35 = reshape(shape = concat_166x, x = hidden_states_217)[name = string("key_states_35")]; - tensor var_1634_shape = shape(x = var_1615)[name = string("op_1634_shape")]; + tensor key_states_35_cast_fp16 = reshape(shape = concat_166x, x = hidden_states_251_cast_fp16)[name = string("key_states_35_cast_fp16")]; + tensor var_1685_shape_cast_fp16 = shape(x = var_1666_cast_fp16)[name = string("op_1685_shape_cast_fp16")]; int32 gather_161 = const()[name = string("gather_161"), val = int32(1)]; int32 gather_162 = const()[name = string("gather_162"), val = int32(8)]; int32 gather_163_axis_0 = const()[name = string("gather_163_axis_0"), val = int32(0)]; int32 gather_163_batch_dims_0 = const()[name = string("gather_163_batch_dims_0"), val = int32(0)]; bool gather_163_validate_indices_0 = const()[name = string("gather_163_validate_indices_0"), val = bool(false)]; - string var_1634_shape_to_uint16_dtype_0 = const()[name = string("op_1634_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1685_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1685_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_163_to_uint16 = const()[name = string("select_163_to_uint16"), val = uint16(2)]; - tensor var_1634_shape_to_uint16 = cast(dtype = var_1634_shape_to_uint16_dtype_0, x = var_1634_shape)[name = string("cast_60")]; - uint16 gather_163_cast_uint16 = gather(axis = gather_163_axis_0, batch_dims = gather_163_batch_dims_0, indices = select_163_to_uint16, validate_indices = gather_163_validate_indices_0, x = var_1634_shape_to_uint16)[name = string("gather_163_cast_uint16")]; + tensor var_1685_shape_cast_fp16_to_uint16 = cast(dtype = var_1685_shape_cast_fp16_to_uint16_dtype_0, x = var_1685_shape_cast_fp16)[name = string("cast_60")]; + uint16 gather_163_cast_uint16 = gather(axis = gather_163_axis_0, batch_dims = gather_163_batch_dims_0, indices = select_163_to_uint16, validate_indices = gather_163_validate_indices_0, x = var_1685_shape_cast_fp16_to_uint16)[name = string("gather_163_cast_uint16")]; string gather_163_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_163_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_164 = const()[name = string("gather_164"), val = int32(64)]; - tensor var_1641_axes_0 = const()[name = string("op_1641_axes_0"), val = tensor([2])]; - tensor var_1641 = expand_dims(axes = var_1641_axes_0, x = var_1615)[name = string("op_1641")]; - tensor shape_182 = shape(x = var_1641)[name = string("shape_182")]; + tensor var_1692_axes_0 = const()[name = string("op_1692_axes_0"), val = tensor([2])]; + tensor var_1692_cast_fp16 = expand_dims(axes = var_1692_axes_0, x = var_1666_cast_fp16)[name = string("op_1692_cast_fp16")]; + tensor shape_182_cast_fp16 = shape(x = var_1692_cast_fp16)[name = string("shape_182_cast_fp16")]; int32 concat_167_axis_0 = const()[name = string("concat_167_axis_0"), val = int32(0)]; bool concat_167_interleave_0 = const()[name = string("concat_167_interleave_0"), val = bool(false)]; int32 gather_163_cast_uint16_to_int32 = cast(dtype = gather_163_cast_uint16_to_int32_dtype_0, x = gather_163_cast_uint16)[name = string("cast_59")]; - tensor concat_167 = concat(axis = concat_167_axis_0, interleave = concat_167_interleave_0, values = (gather_161, gather_162, var_60, gather_163_cast_uint16_to_int32, gather_164))[name = string("concat_167")]; - tensor real_div_17 = real_div(x = concat_167, y = shape_182)[name = string("real_div_17")]; - tensor hidden_states_221 = tile(reps = real_div_17, x = var_1641)[name = string("hidden_states_221")]; + tensor concat_167 = concat(axis = concat_167_axis_0, interleave = concat_167_interleave_0, values = (gather_161, gather_162, var_59, gather_163_cast_uint16_to_int32, gather_164))[name = string("concat_167")]; + tensor real_div_17 = real_div(x = concat_167, y = shape_182_cast_fp16)[name = string("real_div_17")]; + tensor hidden_states_255_cast_fp16 = tile(reps = real_div_17, x = var_1692_cast_fp16)[name = string("hidden_states_255_cast_fp16")]; tensor concat_168x = const()[name = string("concat_168x"), val = tensor([1, 32, -1, 64])]; - tensor value_states_35 = reshape(shape = concat_168x, x = hidden_states_221)[name = string("value_states_35")]; - tensor var_1651_shape = shape(x = key_states_35)[name = string("op_1651_shape")]; + tensor value_states_35_cast_fp16 = reshape(shape = concat_168x, x = hidden_states_255_cast_fp16)[name = string("value_states_35_cast_fp16")]; + tensor var_1702_shape_cast_fp16 = shape(x = key_states_35_cast_fp16)[name = string("op_1702_shape_cast_fp16")]; int32 gather_165_axis_0 = const()[name = string("gather_165_axis_0"), val = int32(0)]; int32 gather_165_batch_dims_0 = const()[name = string("gather_165_batch_dims_0"), val = int32(0)]; bool gather_165_validate_indices_0 = const()[name = string("gather_165_validate_indices_0"), val = bool(false)]; - string var_1651_shape_to_uint16_dtype_0 = const()[name = string("op_1651_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1702_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1702_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_165_to_uint16 = const()[name = string("select_165_to_uint16"), val = uint16(2)]; - tensor var_1651_shape_to_uint16 = cast(dtype = var_1651_shape_to_uint16_dtype_0, x = var_1651_shape)[name = string("cast_58")]; - uint16 gather_165_cast_uint16 = gather(axis = gather_165_axis_0, batch_dims = gather_165_batch_dims_0, indices = select_165_to_uint16, validate_indices = gather_165_validate_indices_0, x = var_1651_shape_to_uint16)[name = string("gather_165_cast_uint16")]; + tensor var_1702_shape_cast_fp16_to_uint16 = cast(dtype = var_1702_shape_cast_fp16_to_uint16_dtype_0, x = var_1702_shape_cast_fp16)[name = string("cast_58")]; + uint16 gather_165_cast_uint16 = gather(axis = gather_165_axis_0, batch_dims = gather_165_batch_dims_0, indices = select_165_to_uint16, validate_indices = gather_165_validate_indices_0, x = var_1702_shape_cast_fp16_to_uint16)[name = string("gather_165_cast_uint16")]; string gather_165_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_165_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_169_values0_0 = const()[name = string("concat_169_values0_0"), val = int32(1)]; int32 concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = int32(1)]; @@ -1969,98 +1900,107 @@ program(1.3) tensor causal_mask_19_begin_0 = const()[name = string("causal_mask_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_19_end_mask_0 = const()[name = string("causal_mask_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_19_cast_fp16 = slice_by_index(begin = causal_mask_19_begin_0, end = concat_169, end_mask = causal_mask_19_end_mask_0, x = causal_mask)[name = string("causal_mask_19_cast_fp16")]; - tensor attn_output_33_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_19_cast_fp16, key = key_states_35, query = query_states_35, value = value_states_35)[name = string("attn_output_33_cast_fp16")]; - tensor var_1657_perm_0 = const()[name = string("op_1657_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor attn_output_33_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_19_cast_fp16, key = key_states_35_cast_fp16, query = query_states_35_cast_fp16, value = value_states_35_cast_fp16)[name = string("attn_output_33_cast_fp16")]; + tensor var_1708_perm_0 = const()[name = string("op_1708_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)]; bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)]; int32 gather_149_cast_uint16_to_int32 = cast(dtype = gather_149_cast_uint16_to_int32_dtype_0, x = gather_149_cast_uint16)[name = string("cast_56")]; tensor concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (gather_148, gather_149_cast_uint16_to_int32, var_48))[name = string("concat_170")]; - tensor var_1657 = transpose(perm = var_1657_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_28")]; - tensor input_65 = reshape(shape = concat_170, x = var_1657)[name = string("input_65")]; - tensor linear_59 = linear(bias = linear_0_bias_0, weight = model_model_layers_8_self_attn_o_proj_weight_quantized, x = input_65)[name = string("linear_59")]; - tensor hidden_states_225 = add(x = hidden_states_207, y = linear_59)[name = string("hidden_states_225")]; - fp16 var_55_promoted_17_to_fp16 = const()[name = string("op_55_promoted_17_to_fp16"), val = fp16(0x1p+1)]; - tensor var_1666_cast_fp16 = pow(x = hidden_states_225, y = var_55_promoted_17_to_fp16)[name = string("op_1666_cast_fp16")]; + tensor var_1708_cast_fp16 = transpose(perm = var_1708_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_28")]; + tensor input_65_cast_fp16 = reshape(shape = concat_170, x = var_1708_cast_fp16)[name = string("input_65_cast_fp16")]; + tensor model_model_layers_8_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(425068480))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427165696))))[name = string("model_model_layers_8_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_59_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_o_proj_weight_to_fp16_quantized, x = input_65_cast_fp16)[name = string("linear_59_cast_fp16")]; + tensor hidden_states_259_cast_fp16 = add(x = hidden_states_239_cast_fp16, y = linear_59_cast_fp16)[name = string("hidden_states_259_cast_fp16")]; + fp16 var_54_promoted_17_to_fp16 = const()[name = string("op_54_promoted_17_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1717_cast_fp16 = pow(x = hidden_states_259_cast_fp16, y = var_54_promoted_17_to_fp16)[name = string("op_1717_cast_fp16")]; tensor variance_35_axes_0 = const()[name = string("variance_35_axes_0"), val = tensor([-1])]; bool variance_35_keep_dims_0 = const()[name = string("variance_35_keep_dims_0"), val = bool(true)]; - tensor variance_35_cast_fp16 = reduce_mean(axes = variance_35_axes_0, keep_dims = variance_35_keep_dims_0, x = var_1666_cast_fp16)[name = string("variance_35_cast_fp16")]; - fp16 var_1669_to_fp16 = const()[name = string("op_1669_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_1670_cast_fp16 = add(x = variance_35_cast_fp16, y = var_1669_to_fp16)[name = string("op_1670_cast_fp16")]; - fp32 var_1671_epsilon_0 = const()[name = string("op_1671_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_1671_cast_fp16 = rsqrt(epsilon = var_1671_epsilon_0, x = var_1670_cast_fp16)[name = string("op_1671_cast_fp16")]; - tensor hidden_states_229_cast_fp16 = mul(x = hidden_states_225, y = var_1671_cast_fp16)[name = string("hidden_states_229_cast_fp16")]; - tensor input_67 = mul(x = model_model_layers_8_post_attention_layernorm_weight, y = hidden_states_229_cast_fp16)[name = string("input_67")]; - tensor linear_60 = linear(bias = linear_4_bias_0, weight = model_model_layers_8_mlp_gate_proj_weight_quantized, x = input_67)[name = string("linear_60")]; - tensor var_1680 = silu(x = linear_60)[name = string("op_1680")]; - tensor linear_61 = linear(bias = linear_4_bias_0, weight = model_model_layers_8_mlp_up_proj_weight_quantized, x = input_67)[name = string("linear_61")]; - tensor input_71 = mul(x = var_1680, y = linear_61)[name = string("input_71")]; - tensor linear_62 = linear(bias = linear_0_bias_0, weight = model_model_layers_8_mlp_down_proj_weight_quantized, x = input_71)[name = string("linear_62")]; - tensor hidden_states_233 = add(x = hidden_states_225, y = linear_62)[name = string("hidden_states_233")]; - fp16 var_55_promoted_18_to_fp16 = const()[name = string("op_55_promoted_18_to_fp16"), val = fp16(0x1p+1)]; - tensor var_1693_cast_fp16 = pow(x = hidden_states_233, y = var_55_promoted_18_to_fp16)[name = string("op_1693_cast_fp16")]; + tensor variance_35_cast_fp16 = reduce_mean(axes = variance_35_axes_0, keep_dims = variance_35_keep_dims_0, x = var_1717_cast_fp16)[name = string("variance_35_cast_fp16")]; + fp16 var_1720_to_fp16 = const()[name = string("op_1720_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1721_cast_fp16 = add(x = variance_35_cast_fp16, y = var_1720_to_fp16)[name = string("op_1721_cast_fp16")]; + fp32 var_1722_epsilon_0 = const()[name = string("op_1722_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1722_cast_fp16 = rsqrt(epsilon = var_1722_epsilon_0, x = var_1721_cast_fp16)[name = string("op_1722_cast_fp16")]; + tensor hidden_states_263_cast_fp16 = mul(x = hidden_states_259_cast_fp16, y = var_1722_cast_fp16)[name = string("hidden_states_263_cast_fp16")]; + tensor model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427427904)))]; + tensor input_67_cast_fp16 = mul(x = model_model_layers_8_post_attention_layernorm_weight_to_fp16, y = hidden_states_263_cast_fp16)[name = string("input_67_cast_fp16")]; + tensor model_model_layers_8_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(427432064))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(435820736))))[name = string("model_model_layers_8_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_60_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_8_mlp_gate_proj_weight_to_fp16_quantized, x = input_67_cast_fp16)[name = string("linear_60_cast_fp16")]; + tensor var_1734_cast_fp16 = silu(x = linear_60_cast_fp16)[name = string("op_1734_cast_fp16")]; + tensor model_model_layers_8_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(436869376))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(445258048))))[name = string("model_model_layers_8_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_61_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_8_mlp_up_proj_weight_to_fp16_quantized, x = input_67_cast_fp16)[name = string("linear_61_cast_fp16")]; + tensor input_71_cast_fp16 = mul(x = var_1734_cast_fp16, y = linear_61_cast_fp16)[name = string("input_71_cast_fp16")]; + tensor model_model_layers_8_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(446306688))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(454695360))))[name = string("model_model_layers_8_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_62_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_mlp_down_proj_weight_to_fp16_quantized, x = input_71_cast_fp16)[name = string("linear_62_cast_fp16")]; + tensor hidden_states_269_cast_fp16 = add(x = hidden_states_259_cast_fp16, y = linear_62_cast_fp16)[name = string("hidden_states_269_cast_fp16")]; + fp16 var_54_promoted_18_to_fp16 = const()[name = string("op_54_promoted_18_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1747_cast_fp16 = pow(x = hidden_states_269_cast_fp16, y = var_54_promoted_18_to_fp16)[name = string("op_1747_cast_fp16")]; tensor variance_37_axes_0 = const()[name = string("variance_37_axes_0"), val = tensor([-1])]; bool variance_37_keep_dims_0 = const()[name = string("variance_37_keep_dims_0"), val = bool(true)]; - tensor variance_37_cast_fp16 = reduce_mean(axes = variance_37_axes_0, keep_dims = variance_37_keep_dims_0, x = var_1693_cast_fp16)[name = string("variance_37_cast_fp16")]; - fp16 var_1696_to_fp16 = const()[name = string("op_1696_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_1697_cast_fp16 = add(x = variance_37_cast_fp16, y = var_1696_to_fp16)[name = string("op_1697_cast_fp16")]; - fp32 var_1698_epsilon_0 = const()[name = string("op_1698_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_1698_cast_fp16 = rsqrt(epsilon = var_1698_epsilon_0, x = var_1697_cast_fp16)[name = string("op_1698_cast_fp16")]; - tensor hidden_states_237_cast_fp16 = mul(x = hidden_states_233, y = var_1698_cast_fp16)[name = string("hidden_states_237_cast_fp16")]; - tensor hidden_states_239 = mul(x = model_model_layers_9_input_layernorm_weight, y = hidden_states_237_cast_fp16)[name = string("hidden_states_239")]; - tensor var_1706_shape = shape(x = hidden_states_239)[name = string("op_1706_shape")]; + tensor variance_37_cast_fp16 = reduce_mean(axes = variance_37_axes_0, keep_dims = variance_37_keep_dims_0, x = var_1747_cast_fp16)[name = string("variance_37_cast_fp16")]; + fp16 var_1750_to_fp16 = const()[name = string("op_1750_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1751_cast_fp16 = add(x = variance_37_cast_fp16, y = var_1750_to_fp16)[name = string("op_1751_cast_fp16")]; + fp32 var_1752_epsilon_0 = const()[name = string("op_1752_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1752_cast_fp16 = rsqrt(epsilon = var_1752_epsilon_0, x = var_1751_cast_fp16)[name = string("op_1752_cast_fp16")]; + tensor hidden_states_273_cast_fp16 = mul(x = hidden_states_269_cast_fp16, y = var_1752_cast_fp16)[name = string("hidden_states_273_cast_fp16")]; + tensor model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455744000)))]; + tensor hidden_states_277_cast_fp16 = mul(x = model_model_layers_9_input_layernorm_weight_to_fp16, y = hidden_states_273_cast_fp16)[name = string("hidden_states_277_cast_fp16")]; + tensor var_1763_shape_cast_fp16 = shape(x = hidden_states_277_cast_fp16)[name = string("op_1763_shape_cast_fp16")]; int32 gather_166 = const()[name = string("gather_166"), val = int32(1)]; int32 gather_167_axis_0 = const()[name = string("gather_167_axis_0"), val = int32(0)]; int32 gather_167_batch_dims_0 = const()[name = string("gather_167_batch_dims_0"), val = int32(0)]; bool gather_167_validate_indices_0 = const()[name = string("gather_167_validate_indices_0"), val = bool(false)]; - string var_1706_shape_to_uint16_dtype_0 = const()[name = string("op_1706_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1763_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1763_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_167_to_uint16 = const()[name = string("select_167_to_uint16"), val = uint16(1)]; - tensor var_1706_shape_to_uint16 = cast(dtype = var_1706_shape_to_uint16_dtype_0, x = var_1706_shape)[name = string("cast_55")]; - uint16 gather_167_cast_uint16 = gather(axis = gather_167_axis_0, batch_dims = gather_167_batch_dims_0, indices = select_167_to_uint16, validate_indices = gather_167_validate_indices_0, x = var_1706_shape_to_uint16)[name = string("gather_167_cast_uint16")]; + tensor var_1763_shape_cast_fp16_to_uint16 = cast(dtype = var_1763_shape_cast_fp16_to_uint16_dtype_0, x = var_1763_shape_cast_fp16)[name = string("cast_55")]; + uint16 gather_167_cast_uint16 = gather(axis = gather_167_axis_0, batch_dims = gather_167_batch_dims_0, indices = select_167_to_uint16, validate_indices = gather_167_validate_indices_0, x = var_1763_shape_cast_fp16_to_uint16)[name = string("gather_167_cast_uint16")]; string gather_167_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_167_cast_uint16_to_int32_dtype_0"), val = string("int32")]; - tensor linear_63 = linear(bias = linear_0_bias_0, weight = model_model_layers_9_self_attn_q_proj_weight_quantized, x = hidden_states_239)[name = string("linear_63")]; - tensor linear_64 = linear(bias = linear_1_bias_0, weight = model_model_layers_9_self_attn_k_proj_weight_quantized, x = hidden_states_239)[name = string("linear_64")]; - tensor linear_65 = linear(bias = linear_1_bias_0, weight = model_model_layers_9_self_attn_v_proj_weight_quantized, x = hidden_states_239)[name = string("linear_65")]; + tensor model_model_layers_9_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455748160))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(457845376))))[name = string("model_model_layers_9_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_63_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_277_cast_fp16)[name = string("linear_63_cast_fp16")]; + tensor model_model_layers_9_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458107584))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458631936))))[name = string("model_model_layers_9_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_64_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_9_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_277_cast_fp16)[name = string("linear_64_cast_fp16")]; + tensor model_model_layers_9_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(458697536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459221888))))[name = string("model_model_layers_9_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_9_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_277_cast_fp16)[name = string("linear_65_cast_fp16")]; tensor concat_171x = const()[name = string("concat_171x"), val = tensor([1, -1, 32, 64])]; - tensor var_1715 = reshape(shape = concat_171x, x = linear_63)[name = string("op_1715")]; + tensor var_1772_cast_fp16 = reshape(shape = concat_171x, x = linear_63_cast_fp16)[name = string("op_1772_cast_fp16")]; tensor q_19_perm_0 = const()[name = string("q_19_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_172x = const()[name = string("concat_172x"), val = tensor([1, -1, 8, 64])]; - tensor var_1718 = reshape(shape = concat_172x, x = linear_64)[name = string("op_1718")]; + tensor var_1775_cast_fp16 = reshape(shape = concat_172x, x = linear_64_cast_fp16)[name = string("op_1775_cast_fp16")]; tensor k_19_perm_0 = const()[name = string("k_19_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_173x = const()[name = string("concat_173x"), val = tensor([1, -1, 8, 64])]; - tensor var_1721 = reshape(shape = concat_173x, x = linear_65)[name = string("op_1721")]; + tensor var_1778_cast_fp16 = reshape(shape = concat_173x, x = linear_65_cast_fp16)[name = string("op_1778_cast_fp16")]; tensor v_state_19_perm_0 = const()[name = string("v_state_19_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor q_19 = transpose(perm = q_19_perm_0, x = var_1715)[name = string("transpose_27")]; - tensor var_1725 = mul(x = q_19, y = cos_7)[name = string("op_1725")]; + tensor q_19_cast_fp16 = transpose(perm = q_19_perm_0, x = var_1772_cast_fp16)[name = string("transpose_27")]; + tensor var_1782_cast_fp16 = mul(x = q_19_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1782_cast_fp16")]; tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 32, 0, 32])]; tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_37 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19)[name = string("x1_37")]; + tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19_cast_fp16)[name = string("x1_37_cast_fp16")]; tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 32, 0, 64])]; tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_37 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19)[name = string("x2_37")]; - fp16 const_21_promoted = const()[name = string("const_21_promoted"), val = fp16(-0x1p+0)]; - tensor var_1736 = mul(x = x2_37, y = const_21_promoted)[name = string("op_1736")]; - bool var_1738_interleave_0 = const()[name = string("op_1738_interleave_0"), val = bool(false)]; - tensor var_1738 = concat(axis = var_48, interleave = var_1738_interleave_0, values = (var_1736, x1_37))[name = string("op_1738")]; - tensor var_1739 = mul(x = var_1738, y = sin_7)[name = string("op_1739")]; - tensor query_states_39 = add(x = var_1725, y = var_1739)[name = string("query_states_39")]; - tensor k_19 = transpose(perm = k_19_perm_0, x = var_1718)[name = string("transpose_26")]; - tensor var_1741 = mul(x = k_19, y = cos_7)[name = string("op_1741")]; + tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19_cast_fp16)[name = string("x2_37_cast_fp16")]; + fp16 const_21_promoted_to_fp16 = const()[name = string("const_21_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1793_cast_fp16 = mul(x = x2_37_cast_fp16, y = const_21_promoted_to_fp16)[name = string("op_1793_cast_fp16")]; + bool var_1795_interleave_0 = const()[name = string("op_1795_interleave_0"), val = bool(false)]; + tensor var_1795_cast_fp16 = concat(axis = var_48, interleave = var_1795_interleave_0, values = (var_1793_cast_fp16, x1_37_cast_fp16))[name = string("op_1795_cast_fp16")]; + tensor var_1796_cast_fp16 = mul(x = var_1795_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1796_cast_fp16")]; + tensor query_states_39_cast_fp16 = add(x = var_1782_cast_fp16, y = var_1796_cast_fp16)[name = string("query_states_39_cast_fp16")]; + tensor k_19_cast_fp16 = transpose(perm = k_19_perm_0, x = var_1775_cast_fp16)[name = string("transpose_26")]; + tensor var_1798_cast_fp16 = mul(x = k_19_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1798_cast_fp16")]; tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 8, 0, 32])]; tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_39 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19)[name = string("x1_39")]; + tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19_cast_fp16)[name = string("x1_39_cast_fp16")]; tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 8, 0, 64])]; tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_39 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19)[name = string("x2_39")]; - fp16 const_22_promoted = const()[name = string("const_22_promoted"), val = fp16(-0x1p+0)]; - tensor var_1752 = mul(x = x2_39, y = const_22_promoted)[name = string("op_1752")]; - bool var_1754_interleave_0 = const()[name = string("op_1754_interleave_0"), val = bool(false)]; - tensor var_1754 = concat(axis = var_48, interleave = var_1754_interleave_0, values = (var_1752, x1_39))[name = string("op_1754")]; - tensor var_1755 = mul(x = var_1754, y = sin_7)[name = string("op_1755")]; - tensor k_state_19 = add(x = var_1741, y = var_1755)[name = string("k_state_19")]; + tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19_cast_fp16)[name = string("x2_39_cast_fp16")]; + fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1809_cast_fp16 = mul(x = x2_39_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_1809_cast_fp16")]; + bool var_1811_interleave_0 = const()[name = string("op_1811_interleave_0"), val = bool(false)]; + tensor var_1811_cast_fp16 = concat(axis = var_48, interleave = var_1811_interleave_0, values = (var_1809_cast_fp16, x1_39_cast_fp16))[name = string("op_1811_cast_fp16")]; + tensor var_1812_cast_fp16 = mul(x = var_1811_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1812_cast_fp16")]; + tensor k_state_19_cast_fp16 = add(x = var_1798_cast_fp16, y = var_1812_cast_fp16)[name = string("k_state_19_cast_fp16")]; tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([0])]; tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; @@ -2072,87 +2012,87 @@ program(1.3) tensor key_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor key_cache_internal_tensor_assign_10 = slice_update(begin = concat_176, begin_mask = key_cache_internal_tensor_assign_10_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_10_squeeze_mask_0, stride = key_cache_internal_tensor_assign_10_stride_0, update = k_state_19, x = coreml_update_state_48)[name = string("key_cache_internal_tensor_assign_10")]; - write_state(data = key_cache_internal_tensor_assign_10, input = key_cache)[name = string("coreml_update_state_50_write_state")]; + tensor key_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_176, begin_mask = key_cache_internal_tensor_assign_10_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_10_squeeze_mask_0, stride = key_cache_internal_tensor_assign_10_stride_0, update = k_state_19_cast_fp16, x = coreml_update_state_48)[name = string("key_cache_internal_tensor_assign_10_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_10_cast_fp16, input = key_cache)[name = string("coreml_update_state_50_write_state")]; tensor coreml_update_state_50 = read_state(input = key_cache)[name = string("coreml_update_state_50")]; tensor value_cache_internal_tensor_assign_10_stride_0 = const()[name = string("value_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor v_state_19 = transpose(perm = v_state_19_perm_0, x = var_1721)[name = string("transpose_25")]; - tensor value_cache_internal_tensor_assign_10 = slice_update(begin = concat_176, begin_mask = value_cache_internal_tensor_assign_10_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_10_squeeze_mask_0, stride = value_cache_internal_tensor_assign_10_stride_0, update = v_state_19, x = coreml_update_state_49)[name = string("value_cache_internal_tensor_assign_10")]; - write_state(data = value_cache_internal_tensor_assign_10, input = value_cache)[name = string("coreml_update_state_51_write_state")]; + tensor v_state_19_cast_fp16 = transpose(perm = v_state_19_perm_0, x = var_1778_cast_fp16)[name = string("transpose_25")]; + tensor value_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_176, begin_mask = value_cache_internal_tensor_assign_10_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_10_squeeze_mask_0, stride = value_cache_internal_tensor_assign_10_stride_0, update = v_state_19_cast_fp16, x = coreml_update_state_49)[name = string("value_cache_internal_tensor_assign_10_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_10_cast_fp16, input = value_cache)[name = string("coreml_update_state_51_write_state")]; tensor coreml_update_state_51 = read_state(input = value_cache)[name = string("coreml_update_state_51")]; - tensor var_1778_begin_0 = const()[name = string("op_1778_begin_0"), val = tensor([9, 0, 0, 0, 0])]; - tensor var_1778_end_0 = const()[name = string("op_1778_end_0"), val = tensor([10, 1, 8, 2048, 64])]; - tensor var_1778_end_mask_0 = const()[name = string("op_1778_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_1778_squeeze_mask_0 = const()[name = string("op_1778_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_1778 = slice_by_index(begin = var_1778_begin_0, end = var_1778_end_0, end_mask = var_1778_end_mask_0, squeeze_mask = var_1778_squeeze_mask_0, x = coreml_update_state_50)[name = string("op_1778")]; - tensor var_1781_begin_0 = const()[name = string("op_1781_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1781_end_mask_0 = const()[name = string("op_1781_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_1781 = slice_by_index(begin = var_1781_begin_0, end = concat_11, end_mask = var_1781_end_mask_0, x = var_1778)[name = string("op_1781")]; - tensor var_1783_begin_0 = const()[name = string("op_1783_begin_0"), val = tensor([9, 0, 0, 0, 0])]; - tensor var_1783_end_0 = const()[name = string("op_1783_end_0"), val = tensor([10, 1, 8, 2048, 64])]; - tensor var_1783_end_mask_0 = const()[name = string("op_1783_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_1783_squeeze_mask_0 = const()[name = string("op_1783_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_1783 = slice_by_index(begin = var_1783_begin_0, end = var_1783_end_0, end_mask = var_1783_end_mask_0, squeeze_mask = var_1783_squeeze_mask_0, x = coreml_update_state_51)[name = string("op_1783")]; - tensor var_1786_begin_0 = const()[name = string("op_1786_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1786_end_mask_0 = const()[name = string("op_1786_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_1786 = slice_by_index(begin = var_1786_begin_0, end = concat_11, end_mask = var_1786_end_mask_0, x = var_1783)[name = string("op_1786")]; - tensor var_1788_shape = shape(x = var_1781)[name = string("op_1788_shape")]; + tensor var_1835_begin_0 = const()[name = string("op_1835_begin_0"), val = tensor([9, 0, 0, 0, 0])]; + tensor var_1835_end_0 = const()[name = string("op_1835_end_0"), val = tensor([10, 1, 8, 2048, 64])]; + tensor var_1835_end_mask_0 = const()[name = string("op_1835_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1835_squeeze_mask_0 = const()[name = string("op_1835_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1835_cast_fp16 = slice_by_index(begin = var_1835_begin_0, end = var_1835_end_0, end_mask = var_1835_end_mask_0, squeeze_mask = var_1835_squeeze_mask_0, x = coreml_update_state_50)[name = string("op_1835_cast_fp16")]; + tensor var_1838_begin_0 = const()[name = string("op_1838_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1838_end_mask_0 = const()[name = string("op_1838_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1838_cast_fp16 = slice_by_index(begin = var_1838_begin_0, end = concat_11, end_mask = var_1838_end_mask_0, x = var_1835_cast_fp16)[name = string("op_1838_cast_fp16")]; + tensor var_1840_begin_0 = const()[name = string("op_1840_begin_0"), val = tensor([9, 0, 0, 0, 0])]; + tensor var_1840_end_0 = const()[name = string("op_1840_end_0"), val = tensor([10, 1, 8, 2048, 64])]; + tensor var_1840_end_mask_0 = const()[name = string("op_1840_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1840_squeeze_mask_0 = const()[name = string("op_1840_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1840_cast_fp16 = slice_by_index(begin = var_1840_begin_0, end = var_1840_end_0, end_mask = var_1840_end_mask_0, squeeze_mask = var_1840_squeeze_mask_0, x = coreml_update_state_51)[name = string("op_1840_cast_fp16")]; + tensor var_1843_begin_0 = const()[name = string("op_1843_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1843_end_mask_0 = const()[name = string("op_1843_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1843_cast_fp16 = slice_by_index(begin = var_1843_begin_0, end = concat_11, end_mask = var_1843_end_mask_0, x = var_1840_cast_fp16)[name = string("op_1843_cast_fp16")]; + tensor var_1845_shape_cast_fp16 = shape(x = var_1838_cast_fp16)[name = string("op_1845_shape_cast_fp16")]; int32 gather_175 = const()[name = string("gather_175"), val = int32(1)]; int32 gather_176 = const()[name = string("gather_176"), val = int32(8)]; int32 gather_177_axis_0 = const()[name = string("gather_177_axis_0"), val = int32(0)]; int32 gather_177_batch_dims_0 = const()[name = string("gather_177_batch_dims_0"), val = int32(0)]; bool gather_177_validate_indices_0 = const()[name = string("gather_177_validate_indices_0"), val = bool(false)]; - string var_1788_shape_to_uint16_dtype_0 = const()[name = string("op_1788_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1845_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1845_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_177_to_uint16 = const()[name = string("select_177_to_uint16"), val = uint16(2)]; - tensor var_1788_shape_to_uint16 = cast(dtype = var_1788_shape_to_uint16_dtype_0, x = var_1788_shape)[name = string("cast_54")]; - uint16 gather_177_cast_uint16 = gather(axis = gather_177_axis_0, batch_dims = gather_177_batch_dims_0, indices = select_177_to_uint16, validate_indices = gather_177_validate_indices_0, x = var_1788_shape_to_uint16)[name = string("gather_177_cast_uint16")]; + tensor var_1845_shape_cast_fp16_to_uint16 = cast(dtype = var_1845_shape_cast_fp16_to_uint16_dtype_0, x = var_1845_shape_cast_fp16)[name = string("cast_54")]; + uint16 gather_177_cast_uint16 = gather(axis = gather_177_axis_0, batch_dims = gather_177_batch_dims_0, indices = select_177_to_uint16, validate_indices = gather_177_validate_indices_0, x = var_1845_shape_cast_fp16_to_uint16)[name = string("gather_177_cast_uint16")]; string gather_177_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_177_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_178 = const()[name = string("gather_178"), val = int32(64)]; - tensor var_1795_axes_0 = const()[name = string("op_1795_axes_0"), val = tensor([2])]; - tensor var_1795 = expand_dims(axes = var_1795_axes_0, x = var_1781)[name = string("op_1795")]; - tensor shape_197 = shape(x = var_1795)[name = string("shape_197")]; + tensor var_1852_axes_0 = const()[name = string("op_1852_axes_0"), val = tensor([2])]; + tensor var_1852_cast_fp16 = expand_dims(axes = var_1852_axes_0, x = var_1838_cast_fp16)[name = string("op_1852_cast_fp16")]; + tensor shape_197_cast_fp16 = shape(x = var_1852_cast_fp16)[name = string("shape_197_cast_fp16")]; int32 concat_184_axis_0 = const()[name = string("concat_184_axis_0"), val = int32(0)]; bool concat_184_interleave_0 = const()[name = string("concat_184_interleave_0"), val = bool(false)]; int32 gather_177_cast_uint16_to_int32 = cast(dtype = gather_177_cast_uint16_to_int32_dtype_0, x = gather_177_cast_uint16)[name = string("cast_53")]; - tensor concat_184 = concat(axis = concat_184_axis_0, interleave = concat_184_interleave_0, values = (gather_175, gather_176, var_60, gather_177_cast_uint16_to_int32, gather_178))[name = string("concat_184")]; - tensor real_div_18 = real_div(x = concat_184, y = shape_197)[name = string("real_div_18")]; - tensor hidden_states_243 = tile(reps = real_div_18, x = var_1795)[name = string("hidden_states_243")]; + tensor concat_184 = concat(axis = concat_184_axis_0, interleave = concat_184_interleave_0, values = (gather_175, gather_176, var_59, gather_177_cast_uint16_to_int32, gather_178))[name = string("concat_184")]; + tensor real_div_18 = real_div(x = concat_184, y = shape_197_cast_fp16)[name = string("real_div_18")]; + tensor hidden_states_281_cast_fp16 = tile(reps = real_div_18, x = var_1852_cast_fp16)[name = string("hidden_states_281_cast_fp16")]; tensor concat_185x = const()[name = string("concat_185x"), val = tensor([1, 32, -1, 64])]; - tensor key_states_39 = reshape(shape = concat_185x, x = hidden_states_243)[name = string("key_states_39")]; - tensor var_1805_shape = shape(x = var_1786)[name = string("op_1805_shape")]; + tensor key_states_39_cast_fp16 = reshape(shape = concat_185x, x = hidden_states_281_cast_fp16)[name = string("key_states_39_cast_fp16")]; + tensor var_1862_shape_cast_fp16 = shape(x = var_1843_cast_fp16)[name = string("op_1862_shape_cast_fp16")]; int32 gather_179 = const()[name = string("gather_179"), val = int32(1)]; int32 gather_180 = const()[name = string("gather_180"), val = int32(8)]; int32 gather_181_axis_0 = const()[name = string("gather_181_axis_0"), val = int32(0)]; int32 gather_181_batch_dims_0 = const()[name = string("gather_181_batch_dims_0"), val = int32(0)]; bool gather_181_validate_indices_0 = const()[name = string("gather_181_validate_indices_0"), val = bool(false)]; - string var_1805_shape_to_uint16_dtype_0 = const()[name = string("op_1805_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1862_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1862_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_181_to_uint16 = const()[name = string("select_181_to_uint16"), val = uint16(2)]; - tensor var_1805_shape_to_uint16 = cast(dtype = var_1805_shape_to_uint16_dtype_0, x = var_1805_shape)[name = string("cast_52")]; - uint16 gather_181_cast_uint16 = gather(axis = gather_181_axis_0, batch_dims = gather_181_batch_dims_0, indices = select_181_to_uint16, validate_indices = gather_181_validate_indices_0, x = var_1805_shape_to_uint16)[name = string("gather_181_cast_uint16")]; + tensor var_1862_shape_cast_fp16_to_uint16 = cast(dtype = var_1862_shape_cast_fp16_to_uint16_dtype_0, x = var_1862_shape_cast_fp16)[name = string("cast_52")]; + uint16 gather_181_cast_uint16 = gather(axis = gather_181_axis_0, batch_dims = gather_181_batch_dims_0, indices = select_181_to_uint16, validate_indices = gather_181_validate_indices_0, x = var_1862_shape_cast_fp16_to_uint16)[name = string("gather_181_cast_uint16")]; string gather_181_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_181_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_182 = const()[name = string("gather_182"), val = int32(64)]; - tensor var_1812_axes_0 = const()[name = string("op_1812_axes_0"), val = tensor([2])]; - tensor var_1812 = expand_dims(axes = var_1812_axes_0, x = var_1786)[name = string("op_1812")]; - tensor shape_202 = shape(x = var_1812)[name = string("shape_202")]; + tensor var_1869_axes_0 = const()[name = string("op_1869_axes_0"), val = tensor([2])]; + tensor var_1869_cast_fp16 = expand_dims(axes = var_1869_axes_0, x = var_1843_cast_fp16)[name = string("op_1869_cast_fp16")]; + tensor shape_202_cast_fp16 = shape(x = var_1869_cast_fp16)[name = string("shape_202_cast_fp16")]; int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; int32 gather_181_cast_uint16_to_int32 = cast(dtype = gather_181_cast_uint16_to_int32_dtype_0, x = gather_181_cast_uint16)[name = string("cast_51")]; - tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (gather_179, gather_180, var_60, gather_181_cast_uint16_to_int32, gather_182))[name = string("concat_186")]; - tensor real_div_19 = real_div(x = concat_186, y = shape_202)[name = string("real_div_19")]; - tensor hidden_states_247 = tile(reps = real_div_19, x = var_1812)[name = string("hidden_states_247")]; + tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (gather_179, gather_180, var_59, gather_181_cast_uint16_to_int32, gather_182))[name = string("concat_186")]; + tensor real_div_19 = real_div(x = concat_186, y = shape_202_cast_fp16)[name = string("real_div_19")]; + tensor hidden_states_285_cast_fp16 = tile(reps = real_div_19, x = var_1869_cast_fp16)[name = string("hidden_states_285_cast_fp16")]; tensor concat_187x = const()[name = string("concat_187x"), val = tensor([1, 32, -1, 64])]; - tensor value_states_39 = reshape(shape = concat_187x, x = hidden_states_247)[name = string("value_states_39")]; - tensor var_1822_shape = shape(x = key_states_39)[name = string("op_1822_shape")]; + tensor value_states_39_cast_fp16 = reshape(shape = concat_187x, x = hidden_states_285_cast_fp16)[name = string("value_states_39_cast_fp16")]; + tensor var_1879_shape_cast_fp16 = shape(x = key_states_39_cast_fp16)[name = string("op_1879_shape_cast_fp16")]; int32 gather_183_axis_0 = const()[name = string("gather_183_axis_0"), val = int32(0)]; int32 gather_183_batch_dims_0 = const()[name = string("gather_183_batch_dims_0"), val = int32(0)]; bool gather_183_validate_indices_0 = const()[name = string("gather_183_validate_indices_0"), val = bool(false)]; - string var_1822_shape_to_uint16_dtype_0 = const()[name = string("op_1822_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1879_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1879_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_183_to_uint16 = const()[name = string("select_183_to_uint16"), val = uint16(2)]; - tensor var_1822_shape_to_uint16 = cast(dtype = var_1822_shape_to_uint16_dtype_0, x = var_1822_shape)[name = string("cast_50")]; - uint16 gather_183_cast_uint16 = gather(axis = gather_183_axis_0, batch_dims = gather_183_batch_dims_0, indices = select_183_to_uint16, validate_indices = gather_183_validate_indices_0, x = var_1822_shape_to_uint16)[name = string("gather_183_cast_uint16")]; + tensor var_1879_shape_cast_fp16_to_uint16 = cast(dtype = var_1879_shape_cast_fp16_to_uint16_dtype_0, x = var_1879_shape_cast_fp16)[name = string("cast_50")]; + uint16 gather_183_cast_uint16 = gather(axis = gather_183_axis_0, batch_dims = gather_183_batch_dims_0, indices = select_183_to_uint16, validate_indices = gather_183_validate_indices_0, x = var_1879_shape_cast_fp16_to_uint16)[name = string("gather_183_cast_uint16")]; string gather_183_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_183_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_188_values0_0 = const()[name = string("concat_188_values0_0"), val = int32(1)]; int32 concat_188_values1_0 = const()[name = string("concat_188_values1_0"), val = int32(1)]; @@ -2164,98 +2104,107 @@ program(1.3) tensor causal_mask_21_begin_0 = const()[name = string("causal_mask_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_21_end_mask_0 = const()[name = string("causal_mask_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_21_cast_fp16 = slice_by_index(begin = causal_mask_21_begin_0, end = concat_188, end_mask = causal_mask_21_end_mask_0, x = causal_mask)[name = string("causal_mask_21_cast_fp16")]; - tensor attn_output_37_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_21_cast_fp16, key = key_states_39, query = query_states_39, value = value_states_39)[name = string("attn_output_37_cast_fp16")]; - tensor var_1828_perm_0 = const()[name = string("op_1828_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor attn_output_37_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_21_cast_fp16, key = key_states_39_cast_fp16, query = query_states_39_cast_fp16, value = value_states_39_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_1885_perm_0 = const()[name = string("op_1885_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_189_axis_0 = const()[name = string("concat_189_axis_0"), val = int32(0)]; bool concat_189_interleave_0 = const()[name = string("concat_189_interleave_0"), val = bool(false)]; int32 gather_167_cast_uint16_to_int32 = cast(dtype = gather_167_cast_uint16_to_int32_dtype_0, x = gather_167_cast_uint16)[name = string("cast_48")]; tensor concat_189 = concat(axis = concat_189_axis_0, interleave = concat_189_interleave_0, values = (gather_166, gather_167_cast_uint16_to_int32, var_48))[name = string("concat_189")]; - tensor var_1828 = transpose(perm = var_1828_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_24")]; - tensor input_73 = reshape(shape = concat_189, x = var_1828)[name = string("input_73")]; - tensor linear_66 = linear(bias = linear_0_bias_0, weight = model_model_layers_9_self_attn_o_proj_weight_quantized, x = input_73)[name = string("linear_66")]; - tensor hidden_states_251 = add(x = hidden_states_233, y = linear_66)[name = string("hidden_states_251")]; - fp16 var_55_promoted_19_to_fp16 = const()[name = string("op_55_promoted_19_to_fp16"), val = fp16(0x1p+1)]; - tensor var_1837_cast_fp16 = pow(x = hidden_states_251, y = var_55_promoted_19_to_fp16)[name = string("op_1837_cast_fp16")]; + tensor var_1885_cast_fp16 = transpose(perm = var_1885_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_24")]; + tensor input_73_cast_fp16 = reshape(shape = concat_189, x = var_1885_cast_fp16)[name = string("input_73_cast_fp16")]; + tensor model_model_layers_9_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(459287488))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461384704))))[name = string("model_model_layers_9_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_66_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_o_proj_weight_to_fp16_quantized, x = input_73_cast_fp16)[name = string("linear_66_cast_fp16")]; + tensor hidden_states_289_cast_fp16 = add(x = hidden_states_269_cast_fp16, y = linear_66_cast_fp16)[name = string("hidden_states_289_cast_fp16")]; + fp16 var_54_promoted_19_to_fp16 = const()[name = string("op_54_promoted_19_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1894_cast_fp16 = pow(x = hidden_states_289_cast_fp16, y = var_54_promoted_19_to_fp16)[name = string("op_1894_cast_fp16")]; tensor variance_39_axes_0 = const()[name = string("variance_39_axes_0"), val = tensor([-1])]; bool variance_39_keep_dims_0 = const()[name = string("variance_39_keep_dims_0"), val = bool(true)]; - tensor variance_39_cast_fp16 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = var_1837_cast_fp16)[name = string("variance_39_cast_fp16")]; - fp16 var_1840_to_fp16 = const()[name = string("op_1840_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_1841_cast_fp16 = add(x = variance_39_cast_fp16, y = var_1840_to_fp16)[name = string("op_1841_cast_fp16")]; - fp32 var_1842_epsilon_0 = const()[name = string("op_1842_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_1842_cast_fp16 = rsqrt(epsilon = var_1842_epsilon_0, x = var_1841_cast_fp16)[name = string("op_1842_cast_fp16")]; - tensor hidden_states_255_cast_fp16 = mul(x = hidden_states_251, y = var_1842_cast_fp16)[name = string("hidden_states_255_cast_fp16")]; - tensor input_75 = mul(x = model_model_layers_9_post_attention_layernorm_weight, y = hidden_states_255_cast_fp16)[name = string("input_75")]; - tensor linear_67 = linear(bias = linear_4_bias_0, weight = model_model_layers_9_mlp_gate_proj_weight_quantized, x = input_75)[name = string("linear_67")]; - tensor var_1851 = silu(x = linear_67)[name = string("op_1851")]; - tensor linear_68 = linear(bias = linear_4_bias_0, weight = model_model_layers_9_mlp_up_proj_weight_quantized, x = input_75)[name = string("linear_68")]; - tensor input_79 = mul(x = var_1851, y = linear_68)[name = string("input_79")]; - tensor linear_69 = linear(bias = linear_0_bias_0, weight = model_model_layers_9_mlp_down_proj_weight_quantized, x = input_79)[name = string("linear_69")]; - tensor hidden_states_259 = add(x = hidden_states_251, y = linear_69)[name = string("hidden_states_259")]; - fp16 var_55_promoted_20_to_fp16 = const()[name = string("op_55_promoted_20_to_fp16"), val = fp16(0x1p+1)]; - tensor var_1864_cast_fp16 = pow(x = hidden_states_259, y = var_55_promoted_20_to_fp16)[name = string("op_1864_cast_fp16")]; + tensor variance_39_cast_fp16 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = var_1894_cast_fp16)[name = string("variance_39_cast_fp16")]; + fp16 var_1897_to_fp16 = const()[name = string("op_1897_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1898_cast_fp16 = add(x = variance_39_cast_fp16, y = var_1897_to_fp16)[name = string("op_1898_cast_fp16")]; + fp32 var_1899_epsilon_0 = const()[name = string("op_1899_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1899_cast_fp16 = rsqrt(epsilon = var_1899_epsilon_0, x = var_1898_cast_fp16)[name = string("op_1899_cast_fp16")]; + tensor hidden_states_293_cast_fp16 = mul(x = hidden_states_289_cast_fp16, y = var_1899_cast_fp16)[name = string("hidden_states_293_cast_fp16")]; + tensor model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461646912)))]; + tensor input_75_cast_fp16 = mul(x = model_model_layers_9_post_attention_layernorm_weight_to_fp16, y = hidden_states_293_cast_fp16)[name = string("input_75_cast_fp16")]; + tensor model_model_layers_9_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(461651072))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(470039744))))[name = string("model_model_layers_9_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_67_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_9_mlp_gate_proj_weight_to_fp16_quantized, x = input_75_cast_fp16)[name = string("linear_67_cast_fp16")]; + tensor var_1911_cast_fp16 = silu(x = linear_67_cast_fp16)[name = string("op_1911_cast_fp16")]; + tensor model_model_layers_9_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(471088384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(479477056))))[name = string("model_model_layers_9_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_68_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_9_mlp_up_proj_weight_to_fp16_quantized, x = input_75_cast_fp16)[name = string("linear_68_cast_fp16")]; + tensor input_79_cast_fp16 = mul(x = var_1911_cast_fp16, y = linear_68_cast_fp16)[name = string("input_79_cast_fp16")]; + tensor model_model_layers_9_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(480525696))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(488914368))))[name = string("model_model_layers_9_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_69_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_mlp_down_proj_weight_to_fp16_quantized, x = input_79_cast_fp16)[name = string("linear_69_cast_fp16")]; + tensor hidden_states_299_cast_fp16 = add(x = hidden_states_289_cast_fp16, y = linear_69_cast_fp16)[name = string("hidden_states_299_cast_fp16")]; + fp16 var_54_promoted_20_to_fp16 = const()[name = string("op_54_promoted_20_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1924_cast_fp16 = pow(x = hidden_states_299_cast_fp16, y = var_54_promoted_20_to_fp16)[name = string("op_1924_cast_fp16")]; tensor variance_41_axes_0 = const()[name = string("variance_41_axes_0"), val = tensor([-1])]; bool variance_41_keep_dims_0 = const()[name = string("variance_41_keep_dims_0"), val = bool(true)]; - tensor variance_41_cast_fp16 = reduce_mean(axes = variance_41_axes_0, keep_dims = variance_41_keep_dims_0, x = var_1864_cast_fp16)[name = string("variance_41_cast_fp16")]; - fp16 var_1867_to_fp16 = const()[name = string("op_1867_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_1868_cast_fp16 = add(x = variance_41_cast_fp16, y = var_1867_to_fp16)[name = string("op_1868_cast_fp16")]; - fp32 var_1869_epsilon_0 = const()[name = string("op_1869_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_1869_cast_fp16 = rsqrt(epsilon = var_1869_epsilon_0, x = var_1868_cast_fp16)[name = string("op_1869_cast_fp16")]; - tensor hidden_states_263_cast_fp16 = mul(x = hidden_states_259, y = var_1869_cast_fp16)[name = string("hidden_states_263_cast_fp16")]; - tensor hidden_states_265 = mul(x = model_model_layers_10_input_layernorm_weight, y = hidden_states_263_cast_fp16)[name = string("hidden_states_265")]; - tensor var_1877_shape = shape(x = hidden_states_265)[name = string("op_1877_shape")]; + tensor variance_41_cast_fp16 = reduce_mean(axes = variance_41_axes_0, keep_dims = variance_41_keep_dims_0, x = var_1924_cast_fp16)[name = string("variance_41_cast_fp16")]; + fp16 var_1927_to_fp16 = const()[name = string("op_1927_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1928_cast_fp16 = add(x = variance_41_cast_fp16, y = var_1927_to_fp16)[name = string("op_1928_cast_fp16")]; + fp32 var_1929_epsilon_0 = const()[name = string("op_1929_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1929_cast_fp16 = rsqrt(epsilon = var_1929_epsilon_0, x = var_1928_cast_fp16)[name = string("op_1929_cast_fp16")]; + tensor hidden_states_303_cast_fp16 = mul(x = hidden_states_299_cast_fp16, y = var_1929_cast_fp16)[name = string("hidden_states_303_cast_fp16")]; + tensor model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489963008)))]; + tensor hidden_states_307_cast_fp16 = mul(x = model_model_layers_10_input_layernorm_weight_to_fp16, y = hidden_states_303_cast_fp16)[name = string("hidden_states_307_cast_fp16")]; + tensor var_1940_shape_cast_fp16 = shape(x = hidden_states_307_cast_fp16)[name = string("op_1940_shape_cast_fp16")]; int32 gather_184 = const()[name = string("gather_184"), val = int32(1)]; int32 gather_185_axis_0 = const()[name = string("gather_185_axis_0"), val = int32(0)]; int32 gather_185_batch_dims_0 = const()[name = string("gather_185_batch_dims_0"), val = int32(0)]; bool gather_185_validate_indices_0 = const()[name = string("gather_185_validate_indices_0"), val = bool(false)]; - string var_1877_shape_to_uint16_dtype_0 = const()[name = string("op_1877_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_1940_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1940_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_185_to_uint16 = const()[name = string("select_185_to_uint16"), val = uint16(1)]; - tensor var_1877_shape_to_uint16 = cast(dtype = var_1877_shape_to_uint16_dtype_0, x = var_1877_shape)[name = string("cast_47")]; - uint16 gather_185_cast_uint16 = gather(axis = gather_185_axis_0, batch_dims = gather_185_batch_dims_0, indices = select_185_to_uint16, validate_indices = gather_185_validate_indices_0, x = var_1877_shape_to_uint16)[name = string("gather_185_cast_uint16")]; + tensor var_1940_shape_cast_fp16_to_uint16 = cast(dtype = var_1940_shape_cast_fp16_to_uint16_dtype_0, x = var_1940_shape_cast_fp16)[name = string("cast_47")]; + uint16 gather_185_cast_uint16 = gather(axis = gather_185_axis_0, batch_dims = gather_185_batch_dims_0, indices = select_185_to_uint16, validate_indices = gather_185_validate_indices_0, x = var_1940_shape_cast_fp16_to_uint16)[name = string("gather_185_cast_uint16")]; string gather_185_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_185_cast_uint16_to_int32_dtype_0"), val = string("int32")]; - tensor linear_70 = linear(bias = linear_0_bias_0, weight = model_model_layers_10_self_attn_q_proj_weight_quantized, x = hidden_states_265)[name = string("linear_70")]; - tensor linear_71 = linear(bias = linear_1_bias_0, weight = model_model_layers_10_self_attn_k_proj_weight_quantized, x = hidden_states_265)[name = string("linear_71")]; - tensor linear_72 = linear(bias = linear_1_bias_0, weight = model_model_layers_10_self_attn_v_proj_weight_quantized, x = hidden_states_265)[name = string("linear_72")]; + tensor model_model_layers_10_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(489967168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492064384))))[name = string("model_model_layers_10_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_70_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_307_cast_fp16)[name = string("linear_70_cast_fp16")]; + tensor model_model_layers_10_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492326592))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492850944))))[name = string("model_model_layers_10_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_71_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_10_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_307_cast_fp16)[name = string("linear_71_cast_fp16")]; + tensor model_model_layers_10_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(492916544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493440896))))[name = string("model_model_layers_10_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_72_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_10_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_307_cast_fp16)[name = string("linear_72_cast_fp16")]; tensor concat_190x = const()[name = string("concat_190x"), val = tensor([1, -1, 32, 64])]; - tensor var_1886 = reshape(shape = concat_190x, x = linear_70)[name = string("op_1886")]; + tensor var_1949_cast_fp16 = reshape(shape = concat_190x, x = linear_70_cast_fp16)[name = string("op_1949_cast_fp16")]; tensor q_21_perm_0 = const()[name = string("q_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_191x = const()[name = string("concat_191x"), val = tensor([1, -1, 8, 64])]; - tensor var_1889 = reshape(shape = concat_191x, x = linear_71)[name = string("op_1889")]; + tensor var_1952_cast_fp16 = reshape(shape = concat_191x, x = linear_71_cast_fp16)[name = string("op_1952_cast_fp16")]; tensor k_21_perm_0 = const()[name = string("k_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_192x = const()[name = string("concat_192x"), val = tensor([1, -1, 8, 64])]; - tensor var_1892 = reshape(shape = concat_192x, x = linear_72)[name = string("op_1892")]; + tensor var_1955_cast_fp16 = reshape(shape = concat_192x, x = linear_72_cast_fp16)[name = string("op_1955_cast_fp16")]; tensor v_state_21_perm_0 = const()[name = string("v_state_21_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor q_21 = transpose(perm = q_21_perm_0, x = var_1886)[name = string("transpose_23")]; - tensor var_1896 = mul(x = q_21, y = cos_7)[name = string("op_1896")]; + tensor q_21_cast_fp16 = transpose(perm = q_21_perm_0, x = var_1949_cast_fp16)[name = string("transpose_23")]; + tensor var_1959_cast_fp16 = mul(x = q_21_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1959_cast_fp16")]; tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 32, 0, 32])]; tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_41 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21)[name = string("x1_41")]; + tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21_cast_fp16)[name = string("x1_41_cast_fp16")]; tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 32, 0, 64])]; tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_41 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21)[name = string("x2_41")]; - fp16 const_23_promoted = const()[name = string("const_23_promoted"), val = fp16(-0x1p+0)]; - tensor var_1907 = mul(x = x2_41, y = const_23_promoted)[name = string("op_1907")]; - bool var_1909_interleave_0 = const()[name = string("op_1909_interleave_0"), val = bool(false)]; - tensor var_1909 = concat(axis = var_48, interleave = var_1909_interleave_0, values = (var_1907, x1_41))[name = string("op_1909")]; - tensor var_1910 = mul(x = var_1909, y = sin_7)[name = string("op_1910")]; - tensor query_states_43 = add(x = var_1896, y = var_1910)[name = string("query_states_43")]; - tensor k_21 = transpose(perm = k_21_perm_0, x = var_1889)[name = string("transpose_22")]; - tensor var_1912 = mul(x = k_21, y = cos_7)[name = string("op_1912")]; + tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21_cast_fp16)[name = string("x2_41_cast_fp16")]; + fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1970_cast_fp16 = mul(x = x2_41_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_1970_cast_fp16")]; + bool var_1972_interleave_0 = const()[name = string("op_1972_interleave_0"), val = bool(false)]; + tensor var_1972_cast_fp16 = concat(axis = var_48, interleave = var_1972_interleave_0, values = (var_1970_cast_fp16, x1_41_cast_fp16))[name = string("op_1972_cast_fp16")]; + tensor var_1973_cast_fp16 = mul(x = var_1972_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1973_cast_fp16")]; + tensor query_states_43_cast_fp16 = add(x = var_1959_cast_fp16, y = var_1973_cast_fp16)[name = string("query_states_43_cast_fp16")]; + tensor k_21_cast_fp16 = transpose(perm = k_21_perm_0, x = var_1952_cast_fp16)[name = string("transpose_22")]; + tensor var_1975_cast_fp16 = mul(x = k_21_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1975_cast_fp16")]; tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 8, 0, 32])]; tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_43 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21)[name = string("x1_43")]; + tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21_cast_fp16)[name = string("x1_43_cast_fp16")]; tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 8, 0, 64])]; tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_43 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21)[name = string("x2_43")]; - fp16 const_24_promoted = const()[name = string("const_24_promoted"), val = fp16(-0x1p+0)]; - tensor var_1923 = mul(x = x2_43, y = const_24_promoted)[name = string("op_1923")]; - bool var_1925_interleave_0 = const()[name = string("op_1925_interleave_0"), val = bool(false)]; - tensor var_1925 = concat(axis = var_48, interleave = var_1925_interleave_0, values = (var_1923, x1_43))[name = string("op_1925")]; - tensor var_1926 = mul(x = var_1925, y = sin_7)[name = string("op_1926")]; - tensor k_state_21 = add(x = var_1912, y = var_1926)[name = string("k_state_21")]; + tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21_cast_fp16)[name = string("x2_43_cast_fp16")]; + fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1986_cast_fp16 = mul(x = x2_43_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_1986_cast_fp16")]; + bool var_1988_interleave_0 = const()[name = string("op_1988_interleave_0"), val = bool(false)]; + tensor var_1988_cast_fp16 = concat(axis = var_48, interleave = var_1988_interleave_0, values = (var_1986_cast_fp16, x1_43_cast_fp16))[name = string("op_1988_cast_fp16")]; + tensor var_1989_cast_fp16 = mul(x = var_1988_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1989_cast_fp16")]; + tensor k_state_21_cast_fp16 = add(x = var_1975_cast_fp16, y = var_1989_cast_fp16)[name = string("k_state_21_cast_fp16")]; tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([0])]; tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; @@ -2267,87 +2216,87 @@ program(1.3) tensor key_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor key_cache_internal_tensor_assign_11 = slice_update(begin = concat_195, begin_mask = key_cache_internal_tensor_assign_11_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_11_squeeze_mask_0, stride = key_cache_internal_tensor_assign_11_stride_0, update = k_state_21, x = coreml_update_state_50)[name = string("key_cache_internal_tensor_assign_11")]; - write_state(data = key_cache_internal_tensor_assign_11, input = key_cache)[name = string("coreml_update_state_52_write_state")]; + tensor key_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_195, begin_mask = key_cache_internal_tensor_assign_11_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_11_squeeze_mask_0, stride = key_cache_internal_tensor_assign_11_stride_0, update = k_state_21_cast_fp16, x = coreml_update_state_50)[name = string("key_cache_internal_tensor_assign_11_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_11_cast_fp16, input = key_cache)[name = string("coreml_update_state_52_write_state")]; tensor coreml_update_state_52 = read_state(input = key_cache)[name = string("coreml_update_state_52")]; tensor value_cache_internal_tensor_assign_11_stride_0 = const()[name = string("value_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor v_state_21 = transpose(perm = v_state_21_perm_0, x = var_1892)[name = string("transpose_21")]; - tensor value_cache_internal_tensor_assign_11 = slice_update(begin = concat_195, begin_mask = value_cache_internal_tensor_assign_11_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_11_squeeze_mask_0, stride = value_cache_internal_tensor_assign_11_stride_0, update = v_state_21, x = coreml_update_state_51)[name = string("value_cache_internal_tensor_assign_11")]; - write_state(data = value_cache_internal_tensor_assign_11, input = value_cache)[name = string("coreml_update_state_53_write_state")]; + tensor v_state_21_cast_fp16 = transpose(perm = v_state_21_perm_0, x = var_1955_cast_fp16)[name = string("transpose_21")]; + tensor value_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_195, begin_mask = value_cache_internal_tensor_assign_11_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_11_squeeze_mask_0, stride = value_cache_internal_tensor_assign_11_stride_0, update = v_state_21_cast_fp16, x = coreml_update_state_51)[name = string("value_cache_internal_tensor_assign_11_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_11_cast_fp16, input = value_cache)[name = string("coreml_update_state_53_write_state")]; tensor coreml_update_state_53 = read_state(input = value_cache)[name = string("coreml_update_state_53")]; - tensor var_1949_begin_0 = const()[name = string("op_1949_begin_0"), val = tensor([10, 0, 0, 0, 0])]; - tensor var_1949_end_0 = const()[name = string("op_1949_end_0"), val = tensor([11, 1, 8, 2048, 64])]; - tensor var_1949_end_mask_0 = const()[name = string("op_1949_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_1949_squeeze_mask_0 = const()[name = string("op_1949_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_1949 = slice_by_index(begin = var_1949_begin_0, end = var_1949_end_0, end_mask = var_1949_end_mask_0, squeeze_mask = var_1949_squeeze_mask_0, x = coreml_update_state_52)[name = string("op_1949")]; - tensor var_1952_begin_0 = const()[name = string("op_1952_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1952_end_mask_0 = const()[name = string("op_1952_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_1952 = slice_by_index(begin = var_1952_begin_0, end = concat_11, end_mask = var_1952_end_mask_0, x = var_1949)[name = string("op_1952")]; - tensor var_1954_begin_0 = const()[name = string("op_1954_begin_0"), val = tensor([10, 0, 0, 0, 0])]; - tensor var_1954_end_0 = const()[name = string("op_1954_end_0"), val = tensor([11, 1, 8, 2048, 64])]; - tensor var_1954_end_mask_0 = const()[name = string("op_1954_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_1954_squeeze_mask_0 = const()[name = string("op_1954_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_1954 = slice_by_index(begin = var_1954_begin_0, end = var_1954_end_0, end_mask = var_1954_end_mask_0, squeeze_mask = var_1954_squeeze_mask_0, x = coreml_update_state_53)[name = string("op_1954")]; - tensor var_1957_begin_0 = const()[name = string("op_1957_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1957_end_mask_0 = const()[name = string("op_1957_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_1957 = slice_by_index(begin = var_1957_begin_0, end = concat_11, end_mask = var_1957_end_mask_0, x = var_1954)[name = string("op_1957")]; - tensor var_1959_shape = shape(x = var_1952)[name = string("op_1959_shape")]; + tensor var_2012_begin_0 = const()[name = string("op_2012_begin_0"), val = tensor([10, 0, 0, 0, 0])]; + tensor var_2012_end_0 = const()[name = string("op_2012_end_0"), val = tensor([11, 1, 8, 2048, 64])]; + tensor var_2012_end_mask_0 = const()[name = string("op_2012_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2012_squeeze_mask_0 = const()[name = string("op_2012_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2012_cast_fp16 = slice_by_index(begin = var_2012_begin_0, end = var_2012_end_0, end_mask = var_2012_end_mask_0, squeeze_mask = var_2012_squeeze_mask_0, x = coreml_update_state_52)[name = string("op_2012_cast_fp16")]; + tensor var_2015_begin_0 = const()[name = string("op_2015_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2015_end_mask_0 = const()[name = string("op_2015_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2015_cast_fp16 = slice_by_index(begin = var_2015_begin_0, end = concat_11, end_mask = var_2015_end_mask_0, x = var_2012_cast_fp16)[name = string("op_2015_cast_fp16")]; + tensor var_2017_begin_0 = const()[name = string("op_2017_begin_0"), val = tensor([10, 0, 0, 0, 0])]; + tensor var_2017_end_0 = const()[name = string("op_2017_end_0"), val = tensor([11, 1, 8, 2048, 64])]; + tensor var_2017_end_mask_0 = const()[name = string("op_2017_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2017_squeeze_mask_0 = const()[name = string("op_2017_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2017_cast_fp16 = slice_by_index(begin = var_2017_begin_0, end = var_2017_end_0, end_mask = var_2017_end_mask_0, squeeze_mask = var_2017_squeeze_mask_0, x = coreml_update_state_53)[name = string("op_2017_cast_fp16")]; + tensor var_2020_begin_0 = const()[name = string("op_2020_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2020_end_mask_0 = const()[name = string("op_2020_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2020_cast_fp16 = slice_by_index(begin = var_2020_begin_0, end = concat_11, end_mask = var_2020_end_mask_0, x = var_2017_cast_fp16)[name = string("op_2020_cast_fp16")]; + tensor var_2022_shape_cast_fp16 = shape(x = var_2015_cast_fp16)[name = string("op_2022_shape_cast_fp16")]; int32 gather_193 = const()[name = string("gather_193"), val = int32(1)]; int32 gather_194 = const()[name = string("gather_194"), val = int32(8)]; int32 gather_195_axis_0 = const()[name = string("gather_195_axis_0"), val = int32(0)]; int32 gather_195_batch_dims_0 = const()[name = string("gather_195_batch_dims_0"), val = int32(0)]; bool gather_195_validate_indices_0 = const()[name = string("gather_195_validate_indices_0"), val = bool(false)]; - string var_1959_shape_to_uint16_dtype_0 = const()[name = string("op_1959_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2022_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2022_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_195_to_uint16 = const()[name = string("select_195_to_uint16"), val = uint16(2)]; - tensor var_1959_shape_to_uint16 = cast(dtype = var_1959_shape_to_uint16_dtype_0, x = var_1959_shape)[name = string("cast_46")]; - uint16 gather_195_cast_uint16 = gather(axis = gather_195_axis_0, batch_dims = gather_195_batch_dims_0, indices = select_195_to_uint16, validate_indices = gather_195_validate_indices_0, x = var_1959_shape_to_uint16)[name = string("gather_195_cast_uint16")]; + tensor var_2022_shape_cast_fp16_to_uint16 = cast(dtype = var_2022_shape_cast_fp16_to_uint16_dtype_0, x = var_2022_shape_cast_fp16)[name = string("cast_46")]; + uint16 gather_195_cast_uint16 = gather(axis = gather_195_axis_0, batch_dims = gather_195_batch_dims_0, indices = select_195_to_uint16, validate_indices = gather_195_validate_indices_0, x = var_2022_shape_cast_fp16_to_uint16)[name = string("gather_195_cast_uint16")]; string gather_195_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_195_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_196 = const()[name = string("gather_196"), val = int32(64)]; - tensor var_1966_axes_0 = const()[name = string("op_1966_axes_0"), val = tensor([2])]; - tensor var_1966 = expand_dims(axes = var_1966_axes_0, x = var_1952)[name = string("op_1966")]; - tensor shape_217 = shape(x = var_1966)[name = string("shape_217")]; + tensor var_2029_axes_0 = const()[name = string("op_2029_axes_0"), val = tensor([2])]; + tensor var_2029_cast_fp16 = expand_dims(axes = var_2029_axes_0, x = var_2015_cast_fp16)[name = string("op_2029_cast_fp16")]; + tensor shape_217_cast_fp16 = shape(x = var_2029_cast_fp16)[name = string("shape_217_cast_fp16")]; int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)]; bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)]; int32 gather_195_cast_uint16_to_int32 = cast(dtype = gather_195_cast_uint16_to_int32_dtype_0, x = gather_195_cast_uint16)[name = string("cast_45")]; - tensor concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (gather_193, gather_194, var_60, gather_195_cast_uint16_to_int32, gather_196))[name = string("concat_203")]; - tensor real_div_20 = real_div(x = concat_203, y = shape_217)[name = string("real_div_20")]; - tensor hidden_states_269 = tile(reps = real_div_20, x = var_1966)[name = string("hidden_states_269")]; + tensor concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (gather_193, gather_194, var_59, gather_195_cast_uint16_to_int32, gather_196))[name = string("concat_203")]; + tensor real_div_20 = real_div(x = concat_203, y = shape_217_cast_fp16)[name = string("real_div_20")]; + tensor hidden_states_311_cast_fp16 = tile(reps = real_div_20, x = var_2029_cast_fp16)[name = string("hidden_states_311_cast_fp16")]; tensor concat_204x = const()[name = string("concat_204x"), val = tensor([1, 32, -1, 64])]; - tensor key_states_43 = reshape(shape = concat_204x, x = hidden_states_269)[name = string("key_states_43")]; - tensor var_1976_shape = shape(x = var_1957)[name = string("op_1976_shape")]; + tensor key_states_43_cast_fp16 = reshape(shape = concat_204x, x = hidden_states_311_cast_fp16)[name = string("key_states_43_cast_fp16")]; + tensor var_2039_shape_cast_fp16 = shape(x = var_2020_cast_fp16)[name = string("op_2039_shape_cast_fp16")]; int32 gather_197 = const()[name = string("gather_197"), val = int32(1)]; int32 gather_198 = const()[name = string("gather_198"), val = int32(8)]; int32 gather_199_axis_0 = const()[name = string("gather_199_axis_0"), val = int32(0)]; int32 gather_199_batch_dims_0 = const()[name = string("gather_199_batch_dims_0"), val = int32(0)]; bool gather_199_validate_indices_0 = const()[name = string("gather_199_validate_indices_0"), val = bool(false)]; - string var_1976_shape_to_uint16_dtype_0 = const()[name = string("op_1976_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2039_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2039_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_199_to_uint16 = const()[name = string("select_199_to_uint16"), val = uint16(2)]; - tensor var_1976_shape_to_uint16 = cast(dtype = var_1976_shape_to_uint16_dtype_0, x = var_1976_shape)[name = string("cast_44")]; - uint16 gather_199_cast_uint16 = gather(axis = gather_199_axis_0, batch_dims = gather_199_batch_dims_0, indices = select_199_to_uint16, validate_indices = gather_199_validate_indices_0, x = var_1976_shape_to_uint16)[name = string("gather_199_cast_uint16")]; + tensor var_2039_shape_cast_fp16_to_uint16 = cast(dtype = var_2039_shape_cast_fp16_to_uint16_dtype_0, x = var_2039_shape_cast_fp16)[name = string("cast_44")]; + uint16 gather_199_cast_uint16 = gather(axis = gather_199_axis_0, batch_dims = gather_199_batch_dims_0, indices = select_199_to_uint16, validate_indices = gather_199_validate_indices_0, x = var_2039_shape_cast_fp16_to_uint16)[name = string("gather_199_cast_uint16")]; string gather_199_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_199_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_200 = const()[name = string("gather_200"), val = int32(64)]; - tensor var_1983_axes_0 = const()[name = string("op_1983_axes_0"), val = tensor([2])]; - tensor var_1983 = expand_dims(axes = var_1983_axes_0, x = var_1957)[name = string("op_1983")]; - tensor shape_222 = shape(x = var_1983)[name = string("shape_222")]; + tensor var_2046_axes_0 = const()[name = string("op_2046_axes_0"), val = tensor([2])]; + tensor var_2046_cast_fp16 = expand_dims(axes = var_2046_axes_0, x = var_2020_cast_fp16)[name = string("op_2046_cast_fp16")]; + tensor shape_222_cast_fp16 = shape(x = var_2046_cast_fp16)[name = string("shape_222_cast_fp16")]; int32 concat_205_axis_0 = const()[name = string("concat_205_axis_0"), val = int32(0)]; bool concat_205_interleave_0 = const()[name = string("concat_205_interleave_0"), val = bool(false)]; int32 gather_199_cast_uint16_to_int32 = cast(dtype = gather_199_cast_uint16_to_int32_dtype_0, x = gather_199_cast_uint16)[name = string("cast_43")]; - tensor concat_205 = concat(axis = concat_205_axis_0, interleave = concat_205_interleave_0, values = (gather_197, gather_198, var_60, gather_199_cast_uint16_to_int32, gather_200))[name = string("concat_205")]; - tensor real_div_21 = real_div(x = concat_205, y = shape_222)[name = string("real_div_21")]; - tensor hidden_states_273 = tile(reps = real_div_21, x = var_1983)[name = string("hidden_states_273")]; + tensor concat_205 = concat(axis = concat_205_axis_0, interleave = concat_205_interleave_0, values = (gather_197, gather_198, var_59, gather_199_cast_uint16_to_int32, gather_200))[name = string("concat_205")]; + tensor real_div_21 = real_div(x = concat_205, y = shape_222_cast_fp16)[name = string("real_div_21")]; + tensor hidden_states_315_cast_fp16 = tile(reps = real_div_21, x = var_2046_cast_fp16)[name = string("hidden_states_315_cast_fp16")]; tensor concat_206x = const()[name = string("concat_206x"), val = tensor([1, 32, -1, 64])]; - tensor value_states_43 = reshape(shape = concat_206x, x = hidden_states_273)[name = string("value_states_43")]; - tensor var_1993_shape = shape(x = key_states_43)[name = string("op_1993_shape")]; + tensor value_states_43_cast_fp16 = reshape(shape = concat_206x, x = hidden_states_315_cast_fp16)[name = string("value_states_43_cast_fp16")]; + tensor var_2056_shape_cast_fp16 = shape(x = key_states_43_cast_fp16)[name = string("op_2056_shape_cast_fp16")]; int32 gather_201_axis_0 = const()[name = string("gather_201_axis_0"), val = int32(0)]; int32 gather_201_batch_dims_0 = const()[name = string("gather_201_batch_dims_0"), val = int32(0)]; bool gather_201_validate_indices_0 = const()[name = string("gather_201_validate_indices_0"), val = bool(false)]; - string var_1993_shape_to_uint16_dtype_0 = const()[name = string("op_1993_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2056_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2056_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_201_to_uint16 = const()[name = string("select_201_to_uint16"), val = uint16(2)]; - tensor var_1993_shape_to_uint16 = cast(dtype = var_1993_shape_to_uint16_dtype_0, x = var_1993_shape)[name = string("cast_42")]; - uint16 gather_201_cast_uint16 = gather(axis = gather_201_axis_0, batch_dims = gather_201_batch_dims_0, indices = select_201_to_uint16, validate_indices = gather_201_validate_indices_0, x = var_1993_shape_to_uint16)[name = string("gather_201_cast_uint16")]; + tensor var_2056_shape_cast_fp16_to_uint16 = cast(dtype = var_2056_shape_cast_fp16_to_uint16_dtype_0, x = var_2056_shape_cast_fp16)[name = string("cast_42")]; + uint16 gather_201_cast_uint16 = gather(axis = gather_201_axis_0, batch_dims = gather_201_batch_dims_0, indices = select_201_to_uint16, validate_indices = gather_201_validate_indices_0, x = var_2056_shape_cast_fp16_to_uint16)[name = string("gather_201_cast_uint16")]; string gather_201_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_201_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_207_values0_0 = const()[name = string("concat_207_values0_0"), val = int32(1)]; int32 concat_207_values1_0 = const()[name = string("concat_207_values1_0"), val = int32(1)]; @@ -2359,98 +2308,107 @@ program(1.3) tensor causal_mask_23_begin_0 = const()[name = string("causal_mask_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_23_end_mask_0 = const()[name = string("causal_mask_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_23_cast_fp16 = slice_by_index(begin = causal_mask_23_begin_0, end = concat_207, end_mask = causal_mask_23_end_mask_0, x = causal_mask)[name = string("causal_mask_23_cast_fp16")]; - tensor attn_output_41_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_23_cast_fp16, key = key_states_43, query = query_states_43, value = value_states_43)[name = string("attn_output_41_cast_fp16")]; - tensor var_1999_perm_0 = const()[name = string("op_1999_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor attn_output_41_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_23_cast_fp16, key = key_states_43_cast_fp16, query = query_states_43_cast_fp16, value = value_states_43_cast_fp16)[name = string("attn_output_41_cast_fp16")]; + tensor var_2062_perm_0 = const()[name = string("op_2062_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_208_axis_0 = const()[name = string("concat_208_axis_0"), val = int32(0)]; bool concat_208_interleave_0 = const()[name = string("concat_208_interleave_0"), val = bool(false)]; int32 gather_185_cast_uint16_to_int32 = cast(dtype = gather_185_cast_uint16_to_int32_dtype_0, x = gather_185_cast_uint16)[name = string("cast_40")]; tensor concat_208 = concat(axis = concat_208_axis_0, interleave = concat_208_interleave_0, values = (gather_184, gather_185_cast_uint16_to_int32, var_48))[name = string("concat_208")]; - tensor var_1999 = transpose(perm = var_1999_perm_0, x = attn_output_41_cast_fp16)[name = string("transpose_20")]; - tensor input_81 = reshape(shape = concat_208, x = var_1999)[name = string("input_81")]; - tensor linear_73 = linear(bias = linear_0_bias_0, weight = model_model_layers_10_self_attn_o_proj_weight_quantized, x = input_81)[name = string("linear_73")]; - tensor hidden_states_277 = add(x = hidden_states_259, y = linear_73)[name = string("hidden_states_277")]; - fp16 var_55_promoted_21_to_fp16 = const()[name = string("op_55_promoted_21_to_fp16"), val = fp16(0x1p+1)]; - tensor var_2008_cast_fp16 = pow(x = hidden_states_277, y = var_55_promoted_21_to_fp16)[name = string("op_2008_cast_fp16")]; + tensor var_2062_cast_fp16 = transpose(perm = var_2062_perm_0, x = attn_output_41_cast_fp16)[name = string("transpose_20")]; + tensor input_81_cast_fp16 = reshape(shape = concat_208, x = var_2062_cast_fp16)[name = string("input_81_cast_fp16")]; + tensor model_model_layers_10_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493506496))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(495603712))))[name = string("model_model_layers_10_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_73_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_o_proj_weight_to_fp16_quantized, x = input_81_cast_fp16)[name = string("linear_73_cast_fp16")]; + tensor hidden_states_319_cast_fp16 = add(x = hidden_states_299_cast_fp16, y = linear_73_cast_fp16)[name = string("hidden_states_319_cast_fp16")]; + fp16 var_54_promoted_21_to_fp16 = const()[name = string("op_54_promoted_21_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2071_cast_fp16 = pow(x = hidden_states_319_cast_fp16, y = var_54_promoted_21_to_fp16)[name = string("op_2071_cast_fp16")]; tensor variance_43_axes_0 = const()[name = string("variance_43_axes_0"), val = tensor([-1])]; bool variance_43_keep_dims_0 = const()[name = string("variance_43_keep_dims_0"), val = bool(true)]; - tensor variance_43_cast_fp16 = reduce_mean(axes = variance_43_axes_0, keep_dims = variance_43_keep_dims_0, x = var_2008_cast_fp16)[name = string("variance_43_cast_fp16")]; - fp16 var_2011_to_fp16 = const()[name = string("op_2011_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_2012_cast_fp16 = add(x = variance_43_cast_fp16, y = var_2011_to_fp16)[name = string("op_2012_cast_fp16")]; - fp32 var_2013_epsilon_0 = const()[name = string("op_2013_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_2013_cast_fp16 = rsqrt(epsilon = var_2013_epsilon_0, x = var_2012_cast_fp16)[name = string("op_2013_cast_fp16")]; - tensor hidden_states_281_cast_fp16 = mul(x = hidden_states_277, y = var_2013_cast_fp16)[name = string("hidden_states_281_cast_fp16")]; - tensor input_83 = mul(x = model_model_layers_10_post_attention_layernorm_weight, y = hidden_states_281_cast_fp16)[name = string("input_83")]; - tensor linear_74 = linear(bias = linear_4_bias_0, weight = model_model_layers_10_mlp_gate_proj_weight_quantized, x = input_83)[name = string("linear_74")]; - tensor var_2022 = silu(x = linear_74)[name = string("op_2022")]; - tensor linear_75 = linear(bias = linear_4_bias_0, weight = model_model_layers_10_mlp_up_proj_weight_quantized, x = input_83)[name = string("linear_75")]; - tensor input_87 = mul(x = var_2022, y = linear_75)[name = string("input_87")]; - tensor linear_76 = linear(bias = linear_0_bias_0, weight = model_model_layers_10_mlp_down_proj_weight_quantized, x = input_87)[name = string("linear_76")]; - tensor hidden_states_285 = add(x = hidden_states_277, y = linear_76)[name = string("hidden_states_285")]; - fp16 var_55_promoted_22_to_fp16 = const()[name = string("op_55_promoted_22_to_fp16"), val = fp16(0x1p+1)]; - tensor var_2035_cast_fp16 = pow(x = hidden_states_285, y = var_55_promoted_22_to_fp16)[name = string("op_2035_cast_fp16")]; + tensor variance_43_cast_fp16 = reduce_mean(axes = variance_43_axes_0, keep_dims = variance_43_keep_dims_0, x = var_2071_cast_fp16)[name = string("variance_43_cast_fp16")]; + fp16 var_2074_to_fp16 = const()[name = string("op_2074_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2075_cast_fp16 = add(x = variance_43_cast_fp16, y = var_2074_to_fp16)[name = string("op_2075_cast_fp16")]; + fp32 var_2076_epsilon_0 = const()[name = string("op_2076_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2076_cast_fp16 = rsqrt(epsilon = var_2076_epsilon_0, x = var_2075_cast_fp16)[name = string("op_2076_cast_fp16")]; + tensor hidden_states_323_cast_fp16 = mul(x = hidden_states_319_cast_fp16, y = var_2076_cast_fp16)[name = string("hidden_states_323_cast_fp16")]; + tensor model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(495865920)))]; + tensor input_83_cast_fp16 = mul(x = model_model_layers_10_post_attention_layernorm_weight_to_fp16, y = hidden_states_323_cast_fp16)[name = string("input_83_cast_fp16")]; + tensor model_model_layers_10_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(495870080))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504258752))))[name = string("model_model_layers_10_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_74_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_10_mlp_gate_proj_weight_to_fp16_quantized, x = input_83_cast_fp16)[name = string("linear_74_cast_fp16")]; + tensor var_2088_cast_fp16 = silu(x = linear_74_cast_fp16)[name = string("op_2088_cast_fp16")]; + tensor model_model_layers_10_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(505307392))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(513696064))))[name = string("model_model_layers_10_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_75_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_10_mlp_up_proj_weight_to_fp16_quantized, x = input_83_cast_fp16)[name = string("linear_75_cast_fp16")]; + tensor input_87_cast_fp16 = mul(x = var_2088_cast_fp16, y = linear_75_cast_fp16)[name = string("input_87_cast_fp16")]; + tensor model_model_layers_10_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(514744704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(523133376))))[name = string("model_model_layers_10_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_76_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_mlp_down_proj_weight_to_fp16_quantized, x = input_87_cast_fp16)[name = string("linear_76_cast_fp16")]; + tensor hidden_states_329_cast_fp16 = add(x = hidden_states_319_cast_fp16, y = linear_76_cast_fp16)[name = string("hidden_states_329_cast_fp16")]; + fp16 var_54_promoted_22_to_fp16 = const()[name = string("op_54_promoted_22_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2101_cast_fp16 = pow(x = hidden_states_329_cast_fp16, y = var_54_promoted_22_to_fp16)[name = string("op_2101_cast_fp16")]; tensor variance_45_axes_0 = const()[name = string("variance_45_axes_0"), val = tensor([-1])]; bool variance_45_keep_dims_0 = const()[name = string("variance_45_keep_dims_0"), val = bool(true)]; - tensor variance_45_cast_fp16 = reduce_mean(axes = variance_45_axes_0, keep_dims = variance_45_keep_dims_0, x = var_2035_cast_fp16)[name = string("variance_45_cast_fp16")]; - fp16 var_2038_to_fp16 = const()[name = string("op_2038_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_2039_cast_fp16 = add(x = variance_45_cast_fp16, y = var_2038_to_fp16)[name = string("op_2039_cast_fp16")]; - fp32 var_2040_epsilon_0 = const()[name = string("op_2040_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_2040_cast_fp16 = rsqrt(epsilon = var_2040_epsilon_0, x = var_2039_cast_fp16)[name = string("op_2040_cast_fp16")]; - tensor hidden_states_289_cast_fp16 = mul(x = hidden_states_285, y = var_2040_cast_fp16)[name = string("hidden_states_289_cast_fp16")]; - tensor hidden_states_291 = mul(x = model_model_layers_11_input_layernorm_weight, y = hidden_states_289_cast_fp16)[name = string("hidden_states_291")]; - tensor var_2048_shape = shape(x = hidden_states_291)[name = string("op_2048_shape")]; + tensor variance_45_cast_fp16 = reduce_mean(axes = variance_45_axes_0, keep_dims = variance_45_keep_dims_0, x = var_2101_cast_fp16)[name = string("variance_45_cast_fp16")]; + fp16 var_2104_to_fp16 = const()[name = string("op_2104_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2105_cast_fp16 = add(x = variance_45_cast_fp16, y = var_2104_to_fp16)[name = string("op_2105_cast_fp16")]; + fp32 var_2106_epsilon_0 = const()[name = string("op_2106_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2106_cast_fp16 = rsqrt(epsilon = var_2106_epsilon_0, x = var_2105_cast_fp16)[name = string("op_2106_cast_fp16")]; + tensor hidden_states_333_cast_fp16 = mul(x = hidden_states_329_cast_fp16, y = var_2106_cast_fp16)[name = string("hidden_states_333_cast_fp16")]; + tensor model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524182016)))]; + tensor hidden_states_337_cast_fp16 = mul(x = model_model_layers_11_input_layernorm_weight_to_fp16, y = hidden_states_333_cast_fp16)[name = string("hidden_states_337_cast_fp16")]; + tensor var_2117_shape_cast_fp16 = shape(x = hidden_states_337_cast_fp16)[name = string("op_2117_shape_cast_fp16")]; int32 gather_202 = const()[name = string("gather_202"), val = int32(1)]; int32 gather_203_axis_0 = const()[name = string("gather_203_axis_0"), val = int32(0)]; int32 gather_203_batch_dims_0 = const()[name = string("gather_203_batch_dims_0"), val = int32(0)]; bool gather_203_validate_indices_0 = const()[name = string("gather_203_validate_indices_0"), val = bool(false)]; - string var_2048_shape_to_uint16_dtype_0 = const()[name = string("op_2048_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2117_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2117_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_203_to_uint16 = const()[name = string("select_203_to_uint16"), val = uint16(1)]; - tensor var_2048_shape_to_uint16 = cast(dtype = var_2048_shape_to_uint16_dtype_0, x = var_2048_shape)[name = string("cast_39")]; - uint16 gather_203_cast_uint16 = gather(axis = gather_203_axis_0, batch_dims = gather_203_batch_dims_0, indices = select_203_to_uint16, validate_indices = gather_203_validate_indices_0, x = var_2048_shape_to_uint16)[name = string("gather_203_cast_uint16")]; + tensor var_2117_shape_cast_fp16_to_uint16 = cast(dtype = var_2117_shape_cast_fp16_to_uint16_dtype_0, x = var_2117_shape_cast_fp16)[name = string("cast_39")]; + uint16 gather_203_cast_uint16 = gather(axis = gather_203_axis_0, batch_dims = gather_203_batch_dims_0, indices = select_203_to_uint16, validate_indices = gather_203_validate_indices_0, x = var_2117_shape_cast_fp16_to_uint16)[name = string("gather_203_cast_uint16")]; string gather_203_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_203_cast_uint16_to_int32_dtype_0"), val = string("int32")]; - tensor linear_77 = linear(bias = linear_0_bias_0, weight = model_model_layers_11_self_attn_q_proj_weight_quantized, x = hidden_states_291)[name = string("linear_77")]; - tensor linear_78 = linear(bias = linear_1_bias_0, weight = model_model_layers_11_self_attn_k_proj_weight_quantized, x = hidden_states_291)[name = string("linear_78")]; - tensor linear_79 = linear(bias = linear_1_bias_0, weight = model_model_layers_11_self_attn_v_proj_weight_quantized, x = hidden_states_291)[name = string("linear_79")]; + tensor model_model_layers_11_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(524186176))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526283392))))[name = string("model_model_layers_11_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_77_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_337_cast_fp16)[name = string("linear_77_cast_fp16")]; + tensor model_model_layers_11_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(526545600))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(527069952))))[name = string("model_model_layers_11_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_78_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_11_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_337_cast_fp16)[name = string("linear_78_cast_fp16")]; + tensor model_model_layers_11_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(527135552))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(527659904))))[name = string("model_model_layers_11_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_79_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_11_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_337_cast_fp16)[name = string("linear_79_cast_fp16")]; tensor concat_209x = const()[name = string("concat_209x"), val = tensor([1, -1, 32, 64])]; - tensor var_2057 = reshape(shape = concat_209x, x = linear_77)[name = string("op_2057")]; + tensor var_2126_cast_fp16 = reshape(shape = concat_209x, x = linear_77_cast_fp16)[name = string("op_2126_cast_fp16")]; tensor q_23_perm_0 = const()[name = string("q_23_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_210x = const()[name = string("concat_210x"), val = tensor([1, -1, 8, 64])]; - tensor var_2060 = reshape(shape = concat_210x, x = linear_78)[name = string("op_2060")]; + tensor var_2129_cast_fp16 = reshape(shape = concat_210x, x = linear_78_cast_fp16)[name = string("op_2129_cast_fp16")]; tensor k_23_perm_0 = const()[name = string("k_23_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_211x = const()[name = string("concat_211x"), val = tensor([1, -1, 8, 64])]; - tensor var_2063 = reshape(shape = concat_211x, x = linear_79)[name = string("op_2063")]; + tensor var_2132_cast_fp16 = reshape(shape = concat_211x, x = linear_79_cast_fp16)[name = string("op_2132_cast_fp16")]; tensor v_state_23_perm_0 = const()[name = string("v_state_23_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor q_23 = transpose(perm = q_23_perm_0, x = var_2057)[name = string("transpose_19")]; - tensor var_2067 = mul(x = q_23, y = cos_7)[name = string("op_2067")]; + tensor q_23_cast_fp16 = transpose(perm = q_23_perm_0, x = var_2126_cast_fp16)[name = string("transpose_19")]; + tensor var_2136_cast_fp16 = mul(x = q_23_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2136_cast_fp16")]; tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 32, 0, 32])]; tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_45 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23)[name = string("x1_45")]; + tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23_cast_fp16)[name = string("x1_45_cast_fp16")]; tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 32, 0, 64])]; tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_45 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23)[name = string("x2_45")]; - fp16 const_25_promoted = const()[name = string("const_25_promoted"), val = fp16(-0x1p+0)]; - tensor var_2078 = mul(x = x2_45, y = const_25_promoted)[name = string("op_2078")]; - bool var_2080_interleave_0 = const()[name = string("op_2080_interleave_0"), val = bool(false)]; - tensor var_2080 = concat(axis = var_48, interleave = var_2080_interleave_0, values = (var_2078, x1_45))[name = string("op_2080")]; - tensor var_2081 = mul(x = var_2080, y = sin_7)[name = string("op_2081")]; - tensor query_states_47 = add(x = var_2067, y = var_2081)[name = string("query_states_47")]; - tensor k_23 = transpose(perm = k_23_perm_0, x = var_2060)[name = string("transpose_18")]; - tensor var_2083 = mul(x = k_23, y = cos_7)[name = string("op_2083")]; + tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23_cast_fp16)[name = string("x2_45_cast_fp16")]; + fp16 const_25_promoted_to_fp16 = const()[name = string("const_25_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2147_cast_fp16 = mul(x = x2_45_cast_fp16, y = const_25_promoted_to_fp16)[name = string("op_2147_cast_fp16")]; + bool var_2149_interleave_0 = const()[name = string("op_2149_interleave_0"), val = bool(false)]; + tensor var_2149_cast_fp16 = concat(axis = var_48, interleave = var_2149_interleave_0, values = (var_2147_cast_fp16, x1_45_cast_fp16))[name = string("op_2149_cast_fp16")]; + tensor var_2150_cast_fp16 = mul(x = var_2149_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2150_cast_fp16")]; + tensor query_states_47_cast_fp16 = add(x = var_2136_cast_fp16, y = var_2150_cast_fp16)[name = string("query_states_47_cast_fp16")]; + tensor k_23_cast_fp16 = transpose(perm = k_23_perm_0, x = var_2129_cast_fp16)[name = string("transpose_18")]; + tensor var_2152_cast_fp16 = mul(x = k_23_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2152_cast_fp16")]; tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 8, 0, 32])]; tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_47 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23)[name = string("x1_47")]; + tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23_cast_fp16)[name = string("x1_47_cast_fp16")]; tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 8, 0, 64])]; tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_47 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23)[name = string("x2_47")]; - fp16 const_26_promoted = const()[name = string("const_26_promoted"), val = fp16(-0x1p+0)]; - tensor var_2094 = mul(x = x2_47, y = const_26_promoted)[name = string("op_2094")]; - bool var_2096_interleave_0 = const()[name = string("op_2096_interleave_0"), val = bool(false)]; - tensor var_2096 = concat(axis = var_48, interleave = var_2096_interleave_0, values = (var_2094, x1_47))[name = string("op_2096")]; - tensor var_2097 = mul(x = var_2096, y = sin_7)[name = string("op_2097")]; - tensor k_state_23 = add(x = var_2083, y = var_2097)[name = string("k_state_23")]; + tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23_cast_fp16)[name = string("x2_47_cast_fp16")]; + fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2163_cast_fp16 = mul(x = x2_47_cast_fp16, y = const_26_promoted_to_fp16)[name = string("op_2163_cast_fp16")]; + bool var_2165_interleave_0 = const()[name = string("op_2165_interleave_0"), val = bool(false)]; + tensor var_2165_cast_fp16 = concat(axis = var_48, interleave = var_2165_interleave_0, values = (var_2163_cast_fp16, x1_47_cast_fp16))[name = string("op_2165_cast_fp16")]; + tensor var_2166_cast_fp16 = mul(x = var_2165_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2166_cast_fp16")]; + tensor k_state_23_cast_fp16 = add(x = var_2152_cast_fp16, y = var_2166_cast_fp16)[name = string("k_state_23_cast_fp16")]; tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([0])]; tensor expand_dims_133 = const()[name = string("expand_dims_133"), val = tensor([0])]; tensor expand_dims_135 = const()[name = string("expand_dims_135"), val = tensor([0])]; @@ -2462,87 +2420,87 @@ program(1.3) tensor key_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor key_cache_internal_tensor_assign_12 = slice_update(begin = concat_214, begin_mask = key_cache_internal_tensor_assign_12_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_12_squeeze_mask_0, stride = key_cache_internal_tensor_assign_12_stride_0, update = k_state_23, x = coreml_update_state_52)[name = string("key_cache_internal_tensor_assign_12")]; - write_state(data = key_cache_internal_tensor_assign_12, input = key_cache)[name = string("coreml_update_state_54_write_state")]; + tensor key_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_214, begin_mask = key_cache_internal_tensor_assign_12_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_12_squeeze_mask_0, stride = key_cache_internal_tensor_assign_12_stride_0, update = k_state_23_cast_fp16, x = coreml_update_state_52)[name = string("key_cache_internal_tensor_assign_12_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_12_cast_fp16, input = key_cache)[name = string("coreml_update_state_54_write_state")]; tensor coreml_update_state_54 = read_state(input = key_cache)[name = string("coreml_update_state_54")]; tensor value_cache_internal_tensor_assign_12_stride_0 = const()[name = string("value_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor v_state_23 = transpose(perm = v_state_23_perm_0, x = var_2063)[name = string("transpose_17")]; - tensor value_cache_internal_tensor_assign_12 = slice_update(begin = concat_214, begin_mask = value_cache_internal_tensor_assign_12_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_12_squeeze_mask_0, stride = value_cache_internal_tensor_assign_12_stride_0, update = v_state_23, x = coreml_update_state_53)[name = string("value_cache_internal_tensor_assign_12")]; - write_state(data = value_cache_internal_tensor_assign_12, input = value_cache)[name = string("coreml_update_state_55_write_state")]; + tensor v_state_23_cast_fp16 = transpose(perm = v_state_23_perm_0, x = var_2132_cast_fp16)[name = string("transpose_17")]; + tensor value_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_214, begin_mask = value_cache_internal_tensor_assign_12_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_12_squeeze_mask_0, stride = value_cache_internal_tensor_assign_12_stride_0, update = v_state_23_cast_fp16, x = coreml_update_state_53)[name = string("value_cache_internal_tensor_assign_12_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_12_cast_fp16, input = value_cache)[name = string("coreml_update_state_55_write_state")]; tensor coreml_update_state_55 = read_state(input = value_cache)[name = string("coreml_update_state_55")]; - tensor var_2120_begin_0 = const()[name = string("op_2120_begin_0"), val = tensor([11, 0, 0, 0, 0])]; - tensor var_2120_end_0 = const()[name = string("op_2120_end_0"), val = tensor([12, 1, 8, 2048, 64])]; - tensor var_2120_end_mask_0 = const()[name = string("op_2120_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_2120_squeeze_mask_0 = const()[name = string("op_2120_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_2120 = slice_by_index(begin = var_2120_begin_0, end = var_2120_end_0, end_mask = var_2120_end_mask_0, squeeze_mask = var_2120_squeeze_mask_0, x = coreml_update_state_54)[name = string("op_2120")]; - tensor var_2123_begin_0 = const()[name = string("op_2123_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2123_end_mask_0 = const()[name = string("op_2123_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_2123 = slice_by_index(begin = var_2123_begin_0, end = concat_11, end_mask = var_2123_end_mask_0, x = var_2120)[name = string("op_2123")]; - tensor var_2125_begin_0 = const()[name = string("op_2125_begin_0"), val = tensor([11, 0, 0, 0, 0])]; - tensor var_2125_end_0 = const()[name = string("op_2125_end_0"), val = tensor([12, 1, 8, 2048, 64])]; - tensor var_2125_end_mask_0 = const()[name = string("op_2125_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_2125_squeeze_mask_0 = const()[name = string("op_2125_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_2125 = slice_by_index(begin = var_2125_begin_0, end = var_2125_end_0, end_mask = var_2125_end_mask_0, squeeze_mask = var_2125_squeeze_mask_0, x = coreml_update_state_55)[name = string("op_2125")]; - tensor var_2128_begin_0 = const()[name = string("op_2128_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2128_end_mask_0 = const()[name = string("op_2128_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_2128 = slice_by_index(begin = var_2128_begin_0, end = concat_11, end_mask = var_2128_end_mask_0, x = var_2125)[name = string("op_2128")]; - tensor var_2130_shape = shape(x = var_2123)[name = string("op_2130_shape")]; + tensor var_2189_begin_0 = const()[name = string("op_2189_begin_0"), val = tensor([11, 0, 0, 0, 0])]; + tensor var_2189_end_0 = const()[name = string("op_2189_end_0"), val = tensor([12, 1, 8, 2048, 64])]; + tensor var_2189_end_mask_0 = const()[name = string("op_2189_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2189_squeeze_mask_0 = const()[name = string("op_2189_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2189_cast_fp16 = slice_by_index(begin = var_2189_begin_0, end = var_2189_end_0, end_mask = var_2189_end_mask_0, squeeze_mask = var_2189_squeeze_mask_0, x = coreml_update_state_54)[name = string("op_2189_cast_fp16")]; + tensor var_2192_begin_0 = const()[name = string("op_2192_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2192_end_mask_0 = const()[name = string("op_2192_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2192_cast_fp16 = slice_by_index(begin = var_2192_begin_0, end = concat_11, end_mask = var_2192_end_mask_0, x = var_2189_cast_fp16)[name = string("op_2192_cast_fp16")]; + tensor var_2194_begin_0 = const()[name = string("op_2194_begin_0"), val = tensor([11, 0, 0, 0, 0])]; + tensor var_2194_end_0 = const()[name = string("op_2194_end_0"), val = tensor([12, 1, 8, 2048, 64])]; + tensor var_2194_end_mask_0 = const()[name = string("op_2194_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2194_squeeze_mask_0 = const()[name = string("op_2194_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2194_cast_fp16 = slice_by_index(begin = var_2194_begin_0, end = var_2194_end_0, end_mask = var_2194_end_mask_0, squeeze_mask = var_2194_squeeze_mask_0, x = coreml_update_state_55)[name = string("op_2194_cast_fp16")]; + tensor var_2197_begin_0 = const()[name = string("op_2197_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2197_end_mask_0 = const()[name = string("op_2197_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2197_cast_fp16 = slice_by_index(begin = var_2197_begin_0, end = concat_11, end_mask = var_2197_end_mask_0, x = var_2194_cast_fp16)[name = string("op_2197_cast_fp16")]; + tensor var_2199_shape_cast_fp16 = shape(x = var_2192_cast_fp16)[name = string("op_2199_shape_cast_fp16")]; int32 gather_211 = const()[name = string("gather_211"), val = int32(1)]; int32 gather_212 = const()[name = string("gather_212"), val = int32(8)]; int32 gather_213_axis_0 = const()[name = string("gather_213_axis_0"), val = int32(0)]; int32 gather_213_batch_dims_0 = const()[name = string("gather_213_batch_dims_0"), val = int32(0)]; bool gather_213_validate_indices_0 = const()[name = string("gather_213_validate_indices_0"), val = bool(false)]; - string var_2130_shape_to_uint16_dtype_0 = const()[name = string("op_2130_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2199_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2199_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_213_to_uint16 = const()[name = string("select_213_to_uint16"), val = uint16(2)]; - tensor var_2130_shape_to_uint16 = cast(dtype = var_2130_shape_to_uint16_dtype_0, x = var_2130_shape)[name = string("cast_38")]; - uint16 gather_213_cast_uint16 = gather(axis = gather_213_axis_0, batch_dims = gather_213_batch_dims_0, indices = select_213_to_uint16, validate_indices = gather_213_validate_indices_0, x = var_2130_shape_to_uint16)[name = string("gather_213_cast_uint16")]; + tensor var_2199_shape_cast_fp16_to_uint16 = cast(dtype = var_2199_shape_cast_fp16_to_uint16_dtype_0, x = var_2199_shape_cast_fp16)[name = string("cast_38")]; + uint16 gather_213_cast_uint16 = gather(axis = gather_213_axis_0, batch_dims = gather_213_batch_dims_0, indices = select_213_to_uint16, validate_indices = gather_213_validate_indices_0, x = var_2199_shape_cast_fp16_to_uint16)[name = string("gather_213_cast_uint16")]; string gather_213_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_213_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_214 = const()[name = string("gather_214"), val = int32(64)]; - tensor var_2137_axes_0 = const()[name = string("op_2137_axes_0"), val = tensor([2])]; - tensor var_2137 = expand_dims(axes = var_2137_axes_0, x = var_2123)[name = string("op_2137")]; - tensor shape_237 = shape(x = var_2137)[name = string("shape_237")]; + tensor var_2206_axes_0 = const()[name = string("op_2206_axes_0"), val = tensor([2])]; + tensor var_2206_cast_fp16 = expand_dims(axes = var_2206_axes_0, x = var_2192_cast_fp16)[name = string("op_2206_cast_fp16")]; + tensor shape_237_cast_fp16 = shape(x = var_2206_cast_fp16)[name = string("shape_237_cast_fp16")]; int32 concat_222_axis_0 = const()[name = string("concat_222_axis_0"), val = int32(0)]; bool concat_222_interleave_0 = const()[name = string("concat_222_interleave_0"), val = bool(false)]; int32 gather_213_cast_uint16_to_int32 = cast(dtype = gather_213_cast_uint16_to_int32_dtype_0, x = gather_213_cast_uint16)[name = string("cast_37")]; - tensor concat_222 = concat(axis = concat_222_axis_0, interleave = concat_222_interleave_0, values = (gather_211, gather_212, var_60, gather_213_cast_uint16_to_int32, gather_214))[name = string("concat_222")]; - tensor real_div_22 = real_div(x = concat_222, y = shape_237)[name = string("real_div_22")]; - tensor hidden_states_295 = tile(reps = real_div_22, x = var_2137)[name = string("hidden_states_295")]; + tensor concat_222 = concat(axis = concat_222_axis_0, interleave = concat_222_interleave_0, values = (gather_211, gather_212, var_59, gather_213_cast_uint16_to_int32, gather_214))[name = string("concat_222")]; + tensor real_div_22 = real_div(x = concat_222, y = shape_237_cast_fp16)[name = string("real_div_22")]; + tensor hidden_states_341_cast_fp16 = tile(reps = real_div_22, x = var_2206_cast_fp16)[name = string("hidden_states_341_cast_fp16")]; tensor concat_223x = const()[name = string("concat_223x"), val = tensor([1, 32, -1, 64])]; - tensor key_states_47 = reshape(shape = concat_223x, x = hidden_states_295)[name = string("key_states_47")]; - tensor var_2147_shape = shape(x = var_2128)[name = string("op_2147_shape")]; + tensor key_states_47_cast_fp16 = reshape(shape = concat_223x, x = hidden_states_341_cast_fp16)[name = string("key_states_47_cast_fp16")]; + tensor var_2216_shape_cast_fp16 = shape(x = var_2197_cast_fp16)[name = string("op_2216_shape_cast_fp16")]; int32 gather_215 = const()[name = string("gather_215"), val = int32(1)]; int32 gather_216 = const()[name = string("gather_216"), val = int32(8)]; int32 gather_217_axis_0 = const()[name = string("gather_217_axis_0"), val = int32(0)]; int32 gather_217_batch_dims_0 = const()[name = string("gather_217_batch_dims_0"), val = int32(0)]; bool gather_217_validate_indices_0 = const()[name = string("gather_217_validate_indices_0"), val = bool(false)]; - string var_2147_shape_to_uint16_dtype_0 = const()[name = string("op_2147_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2216_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2216_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_217_to_uint16 = const()[name = string("select_217_to_uint16"), val = uint16(2)]; - tensor var_2147_shape_to_uint16 = cast(dtype = var_2147_shape_to_uint16_dtype_0, x = var_2147_shape)[name = string("cast_36")]; - uint16 gather_217_cast_uint16 = gather(axis = gather_217_axis_0, batch_dims = gather_217_batch_dims_0, indices = select_217_to_uint16, validate_indices = gather_217_validate_indices_0, x = var_2147_shape_to_uint16)[name = string("gather_217_cast_uint16")]; + tensor var_2216_shape_cast_fp16_to_uint16 = cast(dtype = var_2216_shape_cast_fp16_to_uint16_dtype_0, x = var_2216_shape_cast_fp16)[name = string("cast_36")]; + uint16 gather_217_cast_uint16 = gather(axis = gather_217_axis_0, batch_dims = gather_217_batch_dims_0, indices = select_217_to_uint16, validate_indices = gather_217_validate_indices_0, x = var_2216_shape_cast_fp16_to_uint16)[name = string("gather_217_cast_uint16")]; string gather_217_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_217_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_218 = const()[name = string("gather_218"), val = int32(64)]; - tensor var_2154_axes_0 = const()[name = string("op_2154_axes_0"), val = tensor([2])]; - tensor var_2154 = expand_dims(axes = var_2154_axes_0, x = var_2128)[name = string("op_2154")]; - tensor shape_242 = shape(x = var_2154)[name = string("shape_242")]; + tensor var_2223_axes_0 = const()[name = string("op_2223_axes_0"), val = tensor([2])]; + tensor var_2223_cast_fp16 = expand_dims(axes = var_2223_axes_0, x = var_2197_cast_fp16)[name = string("op_2223_cast_fp16")]; + tensor shape_242_cast_fp16 = shape(x = var_2223_cast_fp16)[name = string("shape_242_cast_fp16")]; int32 concat_224_axis_0 = const()[name = string("concat_224_axis_0"), val = int32(0)]; bool concat_224_interleave_0 = const()[name = string("concat_224_interleave_0"), val = bool(false)]; int32 gather_217_cast_uint16_to_int32 = cast(dtype = gather_217_cast_uint16_to_int32_dtype_0, x = gather_217_cast_uint16)[name = string("cast_35")]; - tensor concat_224 = concat(axis = concat_224_axis_0, interleave = concat_224_interleave_0, values = (gather_215, gather_216, var_60, gather_217_cast_uint16_to_int32, gather_218))[name = string("concat_224")]; - tensor real_div_23 = real_div(x = concat_224, y = shape_242)[name = string("real_div_23")]; - tensor hidden_states_299 = tile(reps = real_div_23, x = var_2154)[name = string("hidden_states_299")]; + tensor concat_224 = concat(axis = concat_224_axis_0, interleave = concat_224_interleave_0, values = (gather_215, gather_216, var_59, gather_217_cast_uint16_to_int32, gather_218))[name = string("concat_224")]; + tensor real_div_23 = real_div(x = concat_224, y = shape_242_cast_fp16)[name = string("real_div_23")]; + tensor hidden_states_345_cast_fp16 = tile(reps = real_div_23, x = var_2223_cast_fp16)[name = string("hidden_states_345_cast_fp16")]; tensor concat_225x = const()[name = string("concat_225x"), val = tensor([1, 32, -1, 64])]; - tensor value_states_47 = reshape(shape = concat_225x, x = hidden_states_299)[name = string("value_states_47")]; - tensor var_2164_shape = shape(x = key_states_47)[name = string("op_2164_shape")]; + tensor value_states_47_cast_fp16 = reshape(shape = concat_225x, x = hidden_states_345_cast_fp16)[name = string("value_states_47_cast_fp16")]; + tensor var_2233_shape_cast_fp16 = shape(x = key_states_47_cast_fp16)[name = string("op_2233_shape_cast_fp16")]; int32 gather_219_axis_0 = const()[name = string("gather_219_axis_0"), val = int32(0)]; int32 gather_219_batch_dims_0 = const()[name = string("gather_219_batch_dims_0"), val = int32(0)]; bool gather_219_validate_indices_0 = const()[name = string("gather_219_validate_indices_0"), val = bool(false)]; - string var_2164_shape_to_uint16_dtype_0 = const()[name = string("op_2164_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2233_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2233_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_219_to_uint16 = const()[name = string("select_219_to_uint16"), val = uint16(2)]; - tensor var_2164_shape_to_uint16 = cast(dtype = var_2164_shape_to_uint16_dtype_0, x = var_2164_shape)[name = string("cast_34")]; - uint16 gather_219_cast_uint16 = gather(axis = gather_219_axis_0, batch_dims = gather_219_batch_dims_0, indices = select_219_to_uint16, validate_indices = gather_219_validate_indices_0, x = var_2164_shape_to_uint16)[name = string("gather_219_cast_uint16")]; + tensor var_2233_shape_cast_fp16_to_uint16 = cast(dtype = var_2233_shape_cast_fp16_to_uint16_dtype_0, x = var_2233_shape_cast_fp16)[name = string("cast_34")]; + uint16 gather_219_cast_uint16 = gather(axis = gather_219_axis_0, batch_dims = gather_219_batch_dims_0, indices = select_219_to_uint16, validate_indices = gather_219_validate_indices_0, x = var_2233_shape_cast_fp16_to_uint16)[name = string("gather_219_cast_uint16")]; string gather_219_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_219_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_226_values0_0 = const()[name = string("concat_226_values0_0"), val = int32(1)]; int32 concat_226_values1_0 = const()[name = string("concat_226_values1_0"), val = int32(1)]; @@ -2554,98 +2512,107 @@ program(1.3) tensor causal_mask_25_begin_0 = const()[name = string("causal_mask_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_25_end_mask_0 = const()[name = string("causal_mask_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_25_cast_fp16 = slice_by_index(begin = causal_mask_25_begin_0, end = concat_226, end_mask = causal_mask_25_end_mask_0, x = causal_mask)[name = string("causal_mask_25_cast_fp16")]; - tensor attn_output_45_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_25_cast_fp16, key = key_states_47, query = query_states_47, value = value_states_47)[name = string("attn_output_45_cast_fp16")]; - tensor var_2170_perm_0 = const()[name = string("op_2170_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor attn_output_45_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_25_cast_fp16, key = key_states_47_cast_fp16, query = query_states_47_cast_fp16, value = value_states_47_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_2239_perm_0 = const()[name = string("op_2239_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_227_axis_0 = const()[name = string("concat_227_axis_0"), val = int32(0)]; bool concat_227_interleave_0 = const()[name = string("concat_227_interleave_0"), val = bool(false)]; int32 gather_203_cast_uint16_to_int32 = cast(dtype = gather_203_cast_uint16_to_int32_dtype_0, x = gather_203_cast_uint16)[name = string("cast_32")]; tensor concat_227 = concat(axis = concat_227_axis_0, interleave = concat_227_interleave_0, values = (gather_202, gather_203_cast_uint16_to_int32, var_48))[name = string("concat_227")]; - tensor var_2170 = transpose(perm = var_2170_perm_0, x = attn_output_45_cast_fp16)[name = string("transpose_16")]; - tensor input_89 = reshape(shape = concat_227, x = var_2170)[name = string("input_89")]; - tensor linear_80 = linear(bias = linear_0_bias_0, weight = model_model_layers_11_self_attn_o_proj_weight_quantized, x = input_89)[name = string("linear_80")]; - tensor hidden_states_303 = add(x = hidden_states_285, y = linear_80)[name = string("hidden_states_303")]; - fp16 var_55_promoted_23_to_fp16 = const()[name = string("op_55_promoted_23_to_fp16"), val = fp16(0x1p+1)]; - tensor var_2179_cast_fp16 = pow(x = hidden_states_303, y = var_55_promoted_23_to_fp16)[name = string("op_2179_cast_fp16")]; + tensor var_2239_cast_fp16 = transpose(perm = var_2239_perm_0, x = attn_output_45_cast_fp16)[name = string("transpose_16")]; + tensor input_89_cast_fp16 = reshape(shape = concat_227, x = var_2239_cast_fp16)[name = string("input_89_cast_fp16")]; + tensor model_model_layers_11_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(527725504))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(529822720))))[name = string("model_model_layers_11_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_80_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_o_proj_weight_to_fp16_quantized, x = input_89_cast_fp16)[name = string("linear_80_cast_fp16")]; + tensor hidden_states_349_cast_fp16 = add(x = hidden_states_329_cast_fp16, y = linear_80_cast_fp16)[name = string("hidden_states_349_cast_fp16")]; + fp16 var_54_promoted_23_to_fp16 = const()[name = string("op_54_promoted_23_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2248_cast_fp16 = pow(x = hidden_states_349_cast_fp16, y = var_54_promoted_23_to_fp16)[name = string("op_2248_cast_fp16")]; tensor variance_47_axes_0 = const()[name = string("variance_47_axes_0"), val = tensor([-1])]; bool variance_47_keep_dims_0 = const()[name = string("variance_47_keep_dims_0"), val = bool(true)]; - tensor variance_47_cast_fp16 = reduce_mean(axes = variance_47_axes_0, keep_dims = variance_47_keep_dims_0, x = var_2179_cast_fp16)[name = string("variance_47_cast_fp16")]; - fp16 var_2182_to_fp16 = const()[name = string("op_2182_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_2183_cast_fp16 = add(x = variance_47_cast_fp16, y = var_2182_to_fp16)[name = string("op_2183_cast_fp16")]; - fp32 var_2184_epsilon_0 = const()[name = string("op_2184_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_2184_cast_fp16 = rsqrt(epsilon = var_2184_epsilon_0, x = var_2183_cast_fp16)[name = string("op_2184_cast_fp16")]; - tensor hidden_states_307_cast_fp16 = mul(x = hidden_states_303, y = var_2184_cast_fp16)[name = string("hidden_states_307_cast_fp16")]; - tensor input_91 = mul(x = model_model_layers_11_post_attention_layernorm_weight, y = hidden_states_307_cast_fp16)[name = string("input_91")]; - tensor linear_81 = linear(bias = linear_4_bias_0, weight = model_model_layers_11_mlp_gate_proj_weight_quantized, x = input_91)[name = string("linear_81")]; - tensor var_2193 = silu(x = linear_81)[name = string("op_2193")]; - tensor linear_82 = linear(bias = linear_4_bias_0, weight = model_model_layers_11_mlp_up_proj_weight_quantized, x = input_91)[name = string("linear_82")]; - tensor input_95 = mul(x = var_2193, y = linear_82)[name = string("input_95")]; - tensor linear_83 = linear(bias = linear_0_bias_0, weight = model_model_layers_11_mlp_down_proj_weight_quantized, x = input_95)[name = string("linear_83")]; - tensor hidden_states_311 = add(x = hidden_states_303, y = linear_83)[name = string("hidden_states_311")]; - fp16 var_55_promoted_24_to_fp16 = const()[name = string("op_55_promoted_24_to_fp16"), val = fp16(0x1p+1)]; - tensor var_2206_cast_fp16 = pow(x = hidden_states_311, y = var_55_promoted_24_to_fp16)[name = string("op_2206_cast_fp16")]; + tensor variance_47_cast_fp16 = reduce_mean(axes = variance_47_axes_0, keep_dims = variance_47_keep_dims_0, x = var_2248_cast_fp16)[name = string("variance_47_cast_fp16")]; + fp16 var_2251_to_fp16 = const()[name = string("op_2251_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2252_cast_fp16 = add(x = variance_47_cast_fp16, y = var_2251_to_fp16)[name = string("op_2252_cast_fp16")]; + fp32 var_2253_epsilon_0 = const()[name = string("op_2253_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2253_cast_fp16 = rsqrt(epsilon = var_2253_epsilon_0, x = var_2252_cast_fp16)[name = string("op_2253_cast_fp16")]; + tensor hidden_states_353_cast_fp16 = mul(x = hidden_states_349_cast_fp16, y = var_2253_cast_fp16)[name = string("hidden_states_353_cast_fp16")]; + tensor model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530084928)))]; + tensor input_91_cast_fp16 = mul(x = model_model_layers_11_post_attention_layernorm_weight_to_fp16, y = hidden_states_353_cast_fp16)[name = string("input_91_cast_fp16")]; + tensor model_model_layers_11_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(530089088))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(538477760))))[name = string("model_model_layers_11_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_81_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_11_mlp_gate_proj_weight_to_fp16_quantized, x = input_91_cast_fp16)[name = string("linear_81_cast_fp16")]; + tensor var_2265_cast_fp16 = silu(x = linear_81_cast_fp16)[name = string("op_2265_cast_fp16")]; + tensor model_model_layers_11_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539526400))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(547915072))))[name = string("model_model_layers_11_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_82_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_11_mlp_up_proj_weight_to_fp16_quantized, x = input_91_cast_fp16)[name = string("linear_82_cast_fp16")]; + tensor input_95_cast_fp16 = mul(x = var_2265_cast_fp16, y = linear_82_cast_fp16)[name = string("input_95_cast_fp16")]; + tensor model_model_layers_11_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(548963712))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(557352384))))[name = string("model_model_layers_11_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_83_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_mlp_down_proj_weight_to_fp16_quantized, x = input_95_cast_fp16)[name = string("linear_83_cast_fp16")]; + tensor hidden_states_359_cast_fp16 = add(x = hidden_states_349_cast_fp16, y = linear_83_cast_fp16)[name = string("hidden_states_359_cast_fp16")]; + fp16 var_54_promoted_24_to_fp16 = const()[name = string("op_54_promoted_24_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2278_cast_fp16 = pow(x = hidden_states_359_cast_fp16, y = var_54_promoted_24_to_fp16)[name = string("op_2278_cast_fp16")]; tensor variance_49_axes_0 = const()[name = string("variance_49_axes_0"), val = tensor([-1])]; bool variance_49_keep_dims_0 = const()[name = string("variance_49_keep_dims_0"), val = bool(true)]; - tensor variance_49_cast_fp16 = reduce_mean(axes = variance_49_axes_0, keep_dims = variance_49_keep_dims_0, x = var_2206_cast_fp16)[name = string("variance_49_cast_fp16")]; - fp16 var_2209_to_fp16 = const()[name = string("op_2209_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_2210_cast_fp16 = add(x = variance_49_cast_fp16, y = var_2209_to_fp16)[name = string("op_2210_cast_fp16")]; - fp32 var_2211_epsilon_0 = const()[name = string("op_2211_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_2211_cast_fp16 = rsqrt(epsilon = var_2211_epsilon_0, x = var_2210_cast_fp16)[name = string("op_2211_cast_fp16")]; - tensor hidden_states_315_cast_fp16 = mul(x = hidden_states_311, y = var_2211_cast_fp16)[name = string("hidden_states_315_cast_fp16")]; - tensor hidden_states_317 = mul(x = model_model_layers_12_input_layernorm_weight, y = hidden_states_315_cast_fp16)[name = string("hidden_states_317")]; - tensor var_2219_shape = shape(x = hidden_states_317)[name = string("op_2219_shape")]; + tensor variance_49_cast_fp16 = reduce_mean(axes = variance_49_axes_0, keep_dims = variance_49_keep_dims_0, x = var_2278_cast_fp16)[name = string("variance_49_cast_fp16")]; + fp16 var_2281_to_fp16 = const()[name = string("op_2281_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2282_cast_fp16 = add(x = variance_49_cast_fp16, y = var_2281_to_fp16)[name = string("op_2282_cast_fp16")]; + fp32 var_2283_epsilon_0 = const()[name = string("op_2283_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2283_cast_fp16 = rsqrt(epsilon = var_2283_epsilon_0, x = var_2282_cast_fp16)[name = string("op_2283_cast_fp16")]; + tensor hidden_states_363_cast_fp16 = mul(x = hidden_states_359_cast_fp16, y = var_2283_cast_fp16)[name = string("hidden_states_363_cast_fp16")]; + tensor model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558401024)))]; + tensor hidden_states_367_cast_fp16 = mul(x = model_model_layers_12_input_layernorm_weight_to_fp16, y = hidden_states_363_cast_fp16)[name = string("hidden_states_367_cast_fp16")]; + tensor var_2294_shape_cast_fp16 = shape(x = hidden_states_367_cast_fp16)[name = string("op_2294_shape_cast_fp16")]; int32 gather_220 = const()[name = string("gather_220"), val = int32(1)]; int32 gather_221_axis_0 = const()[name = string("gather_221_axis_0"), val = int32(0)]; int32 gather_221_batch_dims_0 = const()[name = string("gather_221_batch_dims_0"), val = int32(0)]; bool gather_221_validate_indices_0 = const()[name = string("gather_221_validate_indices_0"), val = bool(false)]; - string var_2219_shape_to_uint16_dtype_0 = const()[name = string("op_2219_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2294_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2294_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_221_to_uint16 = const()[name = string("select_221_to_uint16"), val = uint16(1)]; - tensor var_2219_shape_to_uint16 = cast(dtype = var_2219_shape_to_uint16_dtype_0, x = var_2219_shape)[name = string("cast_31")]; - uint16 gather_221_cast_uint16 = gather(axis = gather_221_axis_0, batch_dims = gather_221_batch_dims_0, indices = select_221_to_uint16, validate_indices = gather_221_validate_indices_0, x = var_2219_shape_to_uint16)[name = string("gather_221_cast_uint16")]; + tensor var_2294_shape_cast_fp16_to_uint16 = cast(dtype = var_2294_shape_cast_fp16_to_uint16_dtype_0, x = var_2294_shape_cast_fp16)[name = string("cast_31")]; + uint16 gather_221_cast_uint16 = gather(axis = gather_221_axis_0, batch_dims = gather_221_batch_dims_0, indices = select_221_to_uint16, validate_indices = gather_221_validate_indices_0, x = var_2294_shape_cast_fp16_to_uint16)[name = string("gather_221_cast_uint16")]; string gather_221_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_221_cast_uint16_to_int32_dtype_0"), val = string("int32")]; - tensor linear_84 = linear(bias = linear_0_bias_0, weight = model_model_layers_12_self_attn_q_proj_weight_quantized, x = hidden_states_317)[name = string("linear_84")]; - tensor linear_85 = linear(bias = linear_1_bias_0, weight = model_model_layers_12_self_attn_k_proj_weight_quantized, x = hidden_states_317)[name = string("linear_85")]; - tensor linear_86 = linear(bias = linear_1_bias_0, weight = model_model_layers_12_self_attn_v_proj_weight_quantized, x = hidden_states_317)[name = string("linear_86")]; + tensor model_model_layers_12_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(558405184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560502400))))[name = string("model_model_layers_12_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_84_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_367_cast_fp16)[name = string("linear_84_cast_fp16")]; + tensor model_model_layers_12_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(560764608))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561288960))))[name = string("model_model_layers_12_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_85_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_12_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_367_cast_fp16)[name = string("linear_85_cast_fp16")]; + tensor model_model_layers_12_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561354560))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561878912))))[name = string("model_model_layers_12_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_86_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_12_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_367_cast_fp16)[name = string("linear_86_cast_fp16")]; tensor concat_228x = const()[name = string("concat_228x"), val = tensor([1, -1, 32, 64])]; - tensor var_2228 = reshape(shape = concat_228x, x = linear_84)[name = string("op_2228")]; + tensor var_2303_cast_fp16 = reshape(shape = concat_228x, x = linear_84_cast_fp16)[name = string("op_2303_cast_fp16")]; tensor q_25_perm_0 = const()[name = string("q_25_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_229x = const()[name = string("concat_229x"), val = tensor([1, -1, 8, 64])]; - tensor var_2231 = reshape(shape = concat_229x, x = linear_85)[name = string("op_2231")]; + tensor var_2306_cast_fp16 = reshape(shape = concat_229x, x = linear_85_cast_fp16)[name = string("op_2306_cast_fp16")]; tensor k_25_perm_0 = const()[name = string("k_25_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_230x = const()[name = string("concat_230x"), val = tensor([1, -1, 8, 64])]; - tensor var_2234 = reshape(shape = concat_230x, x = linear_86)[name = string("op_2234")]; + tensor var_2309_cast_fp16 = reshape(shape = concat_230x, x = linear_86_cast_fp16)[name = string("op_2309_cast_fp16")]; tensor v_state_25_perm_0 = const()[name = string("v_state_25_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor q_25 = transpose(perm = q_25_perm_0, x = var_2228)[name = string("transpose_15")]; - tensor var_2238 = mul(x = q_25, y = cos_7)[name = string("op_2238")]; + tensor q_25_cast_fp16 = transpose(perm = q_25_perm_0, x = var_2303_cast_fp16)[name = string("transpose_15")]; + tensor var_2313_cast_fp16 = mul(x = q_25_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2313_cast_fp16")]; tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 32, 0, 32])]; tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_49 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25)[name = string("x1_49")]; + tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25_cast_fp16)[name = string("x1_49_cast_fp16")]; tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 32, 0, 64])]; tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_49 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25)[name = string("x2_49")]; - fp16 const_27_promoted = const()[name = string("const_27_promoted"), val = fp16(-0x1p+0)]; - tensor var_2249 = mul(x = x2_49, y = const_27_promoted)[name = string("op_2249")]; - bool var_2251_interleave_0 = const()[name = string("op_2251_interleave_0"), val = bool(false)]; - tensor var_2251 = concat(axis = var_48, interleave = var_2251_interleave_0, values = (var_2249, x1_49))[name = string("op_2251")]; - tensor var_2252 = mul(x = var_2251, y = sin_7)[name = string("op_2252")]; - tensor query_states_51 = add(x = var_2238, y = var_2252)[name = string("query_states_51")]; - tensor k_25 = transpose(perm = k_25_perm_0, x = var_2231)[name = string("transpose_14")]; - tensor var_2254 = mul(x = k_25, y = cos_7)[name = string("op_2254")]; + tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25_cast_fp16)[name = string("x2_49_cast_fp16")]; + fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2324_cast_fp16 = mul(x = x2_49_cast_fp16, y = const_27_promoted_to_fp16)[name = string("op_2324_cast_fp16")]; + bool var_2326_interleave_0 = const()[name = string("op_2326_interleave_0"), val = bool(false)]; + tensor var_2326_cast_fp16 = concat(axis = var_48, interleave = var_2326_interleave_0, values = (var_2324_cast_fp16, x1_49_cast_fp16))[name = string("op_2326_cast_fp16")]; + tensor var_2327_cast_fp16 = mul(x = var_2326_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2327_cast_fp16")]; + tensor query_states_51_cast_fp16 = add(x = var_2313_cast_fp16, y = var_2327_cast_fp16)[name = string("query_states_51_cast_fp16")]; + tensor k_25_cast_fp16 = transpose(perm = k_25_perm_0, x = var_2306_cast_fp16)[name = string("transpose_14")]; + tensor var_2329_cast_fp16 = mul(x = k_25_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2329_cast_fp16")]; tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 8, 0, 32])]; tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_51 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25)[name = string("x1_51")]; + tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25_cast_fp16)[name = string("x1_51_cast_fp16")]; tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 8, 0, 64])]; tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_51 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25)[name = string("x2_51")]; - fp16 const_28_promoted = const()[name = string("const_28_promoted"), val = fp16(-0x1p+0)]; - tensor var_2265 = mul(x = x2_51, y = const_28_promoted)[name = string("op_2265")]; - bool var_2267_interleave_0 = const()[name = string("op_2267_interleave_0"), val = bool(false)]; - tensor var_2267 = concat(axis = var_48, interleave = var_2267_interleave_0, values = (var_2265, x1_51))[name = string("op_2267")]; - tensor var_2268 = mul(x = var_2267, y = sin_7)[name = string("op_2268")]; - tensor k_state_25 = add(x = var_2254, y = var_2268)[name = string("k_state_25")]; + tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25_cast_fp16)[name = string("x2_51_cast_fp16")]; + fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2340_cast_fp16 = mul(x = x2_51_cast_fp16, y = const_28_promoted_to_fp16)[name = string("op_2340_cast_fp16")]; + bool var_2342_interleave_0 = const()[name = string("op_2342_interleave_0"), val = bool(false)]; + tensor var_2342_cast_fp16 = concat(axis = var_48, interleave = var_2342_interleave_0, values = (var_2340_cast_fp16, x1_51_cast_fp16))[name = string("op_2342_cast_fp16")]; + tensor var_2343_cast_fp16 = mul(x = var_2342_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2343_cast_fp16")]; + tensor k_state_25_cast_fp16 = add(x = var_2329_cast_fp16, y = var_2343_cast_fp16)[name = string("k_state_25_cast_fp16")]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([0])]; tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; @@ -2657,87 +2624,87 @@ program(1.3) tensor key_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor key_cache_internal_tensor_assign_13 = slice_update(begin = concat_233, begin_mask = key_cache_internal_tensor_assign_13_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_13_squeeze_mask_0, stride = key_cache_internal_tensor_assign_13_stride_0, update = k_state_25, x = coreml_update_state_54)[name = string("key_cache_internal_tensor_assign_13")]; - write_state(data = key_cache_internal_tensor_assign_13, input = key_cache)[name = string("coreml_update_state_56_write_state")]; + tensor key_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_233, begin_mask = key_cache_internal_tensor_assign_13_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_13_squeeze_mask_0, stride = key_cache_internal_tensor_assign_13_stride_0, update = k_state_25_cast_fp16, x = coreml_update_state_54)[name = string("key_cache_internal_tensor_assign_13_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_13_cast_fp16, input = key_cache)[name = string("coreml_update_state_56_write_state")]; tensor coreml_update_state_56 = read_state(input = key_cache)[name = string("coreml_update_state_56")]; tensor value_cache_internal_tensor_assign_13_stride_0 = const()[name = string("value_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor v_state_25 = transpose(perm = v_state_25_perm_0, x = var_2234)[name = string("transpose_13")]; - tensor value_cache_internal_tensor_assign_13 = slice_update(begin = concat_233, begin_mask = value_cache_internal_tensor_assign_13_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_13_squeeze_mask_0, stride = value_cache_internal_tensor_assign_13_stride_0, update = v_state_25, x = coreml_update_state_55)[name = string("value_cache_internal_tensor_assign_13")]; - write_state(data = value_cache_internal_tensor_assign_13, input = value_cache)[name = string("coreml_update_state_57_write_state")]; + tensor v_state_25_cast_fp16 = transpose(perm = v_state_25_perm_0, x = var_2309_cast_fp16)[name = string("transpose_13")]; + tensor value_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_233, begin_mask = value_cache_internal_tensor_assign_13_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_13_squeeze_mask_0, stride = value_cache_internal_tensor_assign_13_stride_0, update = v_state_25_cast_fp16, x = coreml_update_state_55)[name = string("value_cache_internal_tensor_assign_13_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_13_cast_fp16, input = value_cache)[name = string("coreml_update_state_57_write_state")]; tensor coreml_update_state_57 = read_state(input = value_cache)[name = string("coreml_update_state_57")]; - tensor var_2291_begin_0 = const()[name = string("op_2291_begin_0"), val = tensor([12, 0, 0, 0, 0])]; - tensor var_2291_end_0 = const()[name = string("op_2291_end_0"), val = tensor([13, 1, 8, 2048, 64])]; - tensor var_2291_end_mask_0 = const()[name = string("op_2291_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_2291_squeeze_mask_0 = const()[name = string("op_2291_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_2291 = slice_by_index(begin = var_2291_begin_0, end = var_2291_end_0, end_mask = var_2291_end_mask_0, squeeze_mask = var_2291_squeeze_mask_0, x = coreml_update_state_56)[name = string("op_2291")]; - tensor var_2294_begin_0 = const()[name = string("op_2294_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2294_end_mask_0 = const()[name = string("op_2294_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_2294 = slice_by_index(begin = var_2294_begin_0, end = concat_11, end_mask = var_2294_end_mask_0, x = var_2291)[name = string("op_2294")]; - tensor var_2296_begin_0 = const()[name = string("op_2296_begin_0"), val = tensor([12, 0, 0, 0, 0])]; - tensor var_2296_end_0 = const()[name = string("op_2296_end_0"), val = tensor([13, 1, 8, 2048, 64])]; - tensor var_2296_end_mask_0 = const()[name = string("op_2296_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_2296_squeeze_mask_0 = const()[name = string("op_2296_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_2296 = slice_by_index(begin = var_2296_begin_0, end = var_2296_end_0, end_mask = var_2296_end_mask_0, squeeze_mask = var_2296_squeeze_mask_0, x = coreml_update_state_57)[name = string("op_2296")]; - tensor var_2299_begin_0 = const()[name = string("op_2299_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2299_end_mask_0 = const()[name = string("op_2299_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_2299 = slice_by_index(begin = var_2299_begin_0, end = concat_11, end_mask = var_2299_end_mask_0, x = var_2296)[name = string("op_2299")]; - tensor var_2301_shape = shape(x = var_2294)[name = string("op_2301_shape")]; + tensor var_2366_begin_0 = const()[name = string("op_2366_begin_0"), val = tensor([12, 0, 0, 0, 0])]; + tensor var_2366_end_0 = const()[name = string("op_2366_end_0"), val = tensor([13, 1, 8, 2048, 64])]; + tensor var_2366_end_mask_0 = const()[name = string("op_2366_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2366_squeeze_mask_0 = const()[name = string("op_2366_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2366_cast_fp16 = slice_by_index(begin = var_2366_begin_0, end = var_2366_end_0, end_mask = var_2366_end_mask_0, squeeze_mask = var_2366_squeeze_mask_0, x = coreml_update_state_56)[name = string("op_2366_cast_fp16")]; + tensor var_2369_begin_0 = const()[name = string("op_2369_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2369_end_mask_0 = const()[name = string("op_2369_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2369_cast_fp16 = slice_by_index(begin = var_2369_begin_0, end = concat_11, end_mask = var_2369_end_mask_0, x = var_2366_cast_fp16)[name = string("op_2369_cast_fp16")]; + tensor var_2371_begin_0 = const()[name = string("op_2371_begin_0"), val = tensor([12, 0, 0, 0, 0])]; + tensor var_2371_end_0 = const()[name = string("op_2371_end_0"), val = tensor([13, 1, 8, 2048, 64])]; + tensor var_2371_end_mask_0 = const()[name = string("op_2371_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2371_squeeze_mask_0 = const()[name = string("op_2371_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2371_cast_fp16 = slice_by_index(begin = var_2371_begin_0, end = var_2371_end_0, end_mask = var_2371_end_mask_0, squeeze_mask = var_2371_squeeze_mask_0, x = coreml_update_state_57)[name = string("op_2371_cast_fp16")]; + tensor var_2374_begin_0 = const()[name = string("op_2374_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2374_end_mask_0 = const()[name = string("op_2374_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2374_cast_fp16 = slice_by_index(begin = var_2374_begin_0, end = concat_11, end_mask = var_2374_end_mask_0, x = var_2371_cast_fp16)[name = string("op_2374_cast_fp16")]; + tensor var_2376_shape_cast_fp16 = shape(x = var_2369_cast_fp16)[name = string("op_2376_shape_cast_fp16")]; int32 gather_229 = const()[name = string("gather_229"), val = int32(1)]; int32 gather_230 = const()[name = string("gather_230"), val = int32(8)]; int32 gather_231_axis_0 = const()[name = string("gather_231_axis_0"), val = int32(0)]; int32 gather_231_batch_dims_0 = const()[name = string("gather_231_batch_dims_0"), val = int32(0)]; bool gather_231_validate_indices_0 = const()[name = string("gather_231_validate_indices_0"), val = bool(false)]; - string var_2301_shape_to_uint16_dtype_0 = const()[name = string("op_2301_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2376_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2376_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_231_to_uint16 = const()[name = string("select_231_to_uint16"), val = uint16(2)]; - tensor var_2301_shape_to_uint16 = cast(dtype = var_2301_shape_to_uint16_dtype_0, x = var_2301_shape)[name = string("cast_30")]; - uint16 gather_231_cast_uint16 = gather(axis = gather_231_axis_0, batch_dims = gather_231_batch_dims_0, indices = select_231_to_uint16, validate_indices = gather_231_validate_indices_0, x = var_2301_shape_to_uint16)[name = string("gather_231_cast_uint16")]; + tensor var_2376_shape_cast_fp16_to_uint16 = cast(dtype = var_2376_shape_cast_fp16_to_uint16_dtype_0, x = var_2376_shape_cast_fp16)[name = string("cast_30")]; + uint16 gather_231_cast_uint16 = gather(axis = gather_231_axis_0, batch_dims = gather_231_batch_dims_0, indices = select_231_to_uint16, validate_indices = gather_231_validate_indices_0, x = var_2376_shape_cast_fp16_to_uint16)[name = string("gather_231_cast_uint16")]; string gather_231_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_231_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_232 = const()[name = string("gather_232"), val = int32(64)]; - tensor var_2308_axes_0 = const()[name = string("op_2308_axes_0"), val = tensor([2])]; - tensor var_2308 = expand_dims(axes = var_2308_axes_0, x = var_2294)[name = string("op_2308")]; - tensor shape_257 = shape(x = var_2308)[name = string("shape_257")]; + tensor var_2383_axes_0 = const()[name = string("op_2383_axes_0"), val = tensor([2])]; + tensor var_2383_cast_fp16 = expand_dims(axes = var_2383_axes_0, x = var_2369_cast_fp16)[name = string("op_2383_cast_fp16")]; + tensor shape_257_cast_fp16 = shape(x = var_2383_cast_fp16)[name = string("shape_257_cast_fp16")]; int32 concat_241_axis_0 = const()[name = string("concat_241_axis_0"), val = int32(0)]; bool concat_241_interleave_0 = const()[name = string("concat_241_interleave_0"), val = bool(false)]; int32 gather_231_cast_uint16_to_int32 = cast(dtype = gather_231_cast_uint16_to_int32_dtype_0, x = gather_231_cast_uint16)[name = string("cast_29")]; - tensor concat_241 = concat(axis = concat_241_axis_0, interleave = concat_241_interleave_0, values = (gather_229, gather_230, var_60, gather_231_cast_uint16_to_int32, gather_232))[name = string("concat_241")]; - tensor real_div_24 = real_div(x = concat_241, y = shape_257)[name = string("real_div_24")]; - tensor hidden_states_321 = tile(reps = real_div_24, x = var_2308)[name = string("hidden_states_321")]; + tensor concat_241 = concat(axis = concat_241_axis_0, interleave = concat_241_interleave_0, values = (gather_229, gather_230, var_59, gather_231_cast_uint16_to_int32, gather_232))[name = string("concat_241")]; + tensor real_div_24 = real_div(x = concat_241, y = shape_257_cast_fp16)[name = string("real_div_24")]; + tensor hidden_states_371_cast_fp16 = tile(reps = real_div_24, x = var_2383_cast_fp16)[name = string("hidden_states_371_cast_fp16")]; tensor concat_242x = const()[name = string("concat_242x"), val = tensor([1, 32, -1, 64])]; - tensor key_states_51 = reshape(shape = concat_242x, x = hidden_states_321)[name = string("key_states_51")]; - tensor var_2318_shape = shape(x = var_2299)[name = string("op_2318_shape")]; + tensor key_states_51_cast_fp16 = reshape(shape = concat_242x, x = hidden_states_371_cast_fp16)[name = string("key_states_51_cast_fp16")]; + tensor var_2393_shape_cast_fp16 = shape(x = var_2374_cast_fp16)[name = string("op_2393_shape_cast_fp16")]; int32 gather_233 = const()[name = string("gather_233"), val = int32(1)]; int32 gather_234 = const()[name = string("gather_234"), val = int32(8)]; int32 gather_235_axis_0 = const()[name = string("gather_235_axis_0"), val = int32(0)]; int32 gather_235_batch_dims_0 = const()[name = string("gather_235_batch_dims_0"), val = int32(0)]; bool gather_235_validate_indices_0 = const()[name = string("gather_235_validate_indices_0"), val = bool(false)]; - string var_2318_shape_to_uint16_dtype_0 = const()[name = string("op_2318_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2393_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2393_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_235_to_uint16 = const()[name = string("select_235_to_uint16"), val = uint16(2)]; - tensor var_2318_shape_to_uint16 = cast(dtype = var_2318_shape_to_uint16_dtype_0, x = var_2318_shape)[name = string("cast_28")]; - uint16 gather_235_cast_uint16 = gather(axis = gather_235_axis_0, batch_dims = gather_235_batch_dims_0, indices = select_235_to_uint16, validate_indices = gather_235_validate_indices_0, x = var_2318_shape_to_uint16)[name = string("gather_235_cast_uint16")]; + tensor var_2393_shape_cast_fp16_to_uint16 = cast(dtype = var_2393_shape_cast_fp16_to_uint16_dtype_0, x = var_2393_shape_cast_fp16)[name = string("cast_28")]; + uint16 gather_235_cast_uint16 = gather(axis = gather_235_axis_0, batch_dims = gather_235_batch_dims_0, indices = select_235_to_uint16, validate_indices = gather_235_validate_indices_0, x = var_2393_shape_cast_fp16_to_uint16)[name = string("gather_235_cast_uint16")]; string gather_235_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_235_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_236 = const()[name = string("gather_236"), val = int32(64)]; - tensor var_2325_axes_0 = const()[name = string("op_2325_axes_0"), val = tensor([2])]; - tensor var_2325 = expand_dims(axes = var_2325_axes_0, x = var_2299)[name = string("op_2325")]; - tensor shape_262 = shape(x = var_2325)[name = string("shape_262")]; + tensor var_2400_axes_0 = const()[name = string("op_2400_axes_0"), val = tensor([2])]; + tensor var_2400_cast_fp16 = expand_dims(axes = var_2400_axes_0, x = var_2374_cast_fp16)[name = string("op_2400_cast_fp16")]; + tensor shape_262_cast_fp16 = shape(x = var_2400_cast_fp16)[name = string("shape_262_cast_fp16")]; int32 concat_243_axis_0 = const()[name = string("concat_243_axis_0"), val = int32(0)]; bool concat_243_interleave_0 = const()[name = string("concat_243_interleave_0"), val = bool(false)]; int32 gather_235_cast_uint16_to_int32 = cast(dtype = gather_235_cast_uint16_to_int32_dtype_0, x = gather_235_cast_uint16)[name = string("cast_27")]; - tensor concat_243 = concat(axis = concat_243_axis_0, interleave = concat_243_interleave_0, values = (gather_233, gather_234, var_60, gather_235_cast_uint16_to_int32, gather_236))[name = string("concat_243")]; - tensor real_div_25 = real_div(x = concat_243, y = shape_262)[name = string("real_div_25")]; - tensor hidden_states_325 = tile(reps = real_div_25, x = var_2325)[name = string("hidden_states_325")]; + tensor concat_243 = concat(axis = concat_243_axis_0, interleave = concat_243_interleave_0, values = (gather_233, gather_234, var_59, gather_235_cast_uint16_to_int32, gather_236))[name = string("concat_243")]; + tensor real_div_25 = real_div(x = concat_243, y = shape_262_cast_fp16)[name = string("real_div_25")]; + tensor hidden_states_375_cast_fp16 = tile(reps = real_div_25, x = var_2400_cast_fp16)[name = string("hidden_states_375_cast_fp16")]; tensor concat_244x = const()[name = string("concat_244x"), val = tensor([1, 32, -1, 64])]; - tensor value_states_51 = reshape(shape = concat_244x, x = hidden_states_325)[name = string("value_states_51")]; - tensor var_2335_shape = shape(x = key_states_51)[name = string("op_2335_shape")]; + tensor value_states_51_cast_fp16 = reshape(shape = concat_244x, x = hidden_states_375_cast_fp16)[name = string("value_states_51_cast_fp16")]; + tensor var_2410_shape_cast_fp16 = shape(x = key_states_51_cast_fp16)[name = string("op_2410_shape_cast_fp16")]; int32 gather_237_axis_0 = const()[name = string("gather_237_axis_0"), val = int32(0)]; int32 gather_237_batch_dims_0 = const()[name = string("gather_237_batch_dims_0"), val = int32(0)]; bool gather_237_validate_indices_0 = const()[name = string("gather_237_validate_indices_0"), val = bool(false)]; - string var_2335_shape_to_uint16_dtype_0 = const()[name = string("op_2335_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2410_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2410_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_237_to_uint16 = const()[name = string("select_237_to_uint16"), val = uint16(2)]; - tensor var_2335_shape_to_uint16 = cast(dtype = var_2335_shape_to_uint16_dtype_0, x = var_2335_shape)[name = string("cast_26")]; - uint16 gather_237_cast_uint16 = gather(axis = gather_237_axis_0, batch_dims = gather_237_batch_dims_0, indices = select_237_to_uint16, validate_indices = gather_237_validate_indices_0, x = var_2335_shape_to_uint16)[name = string("gather_237_cast_uint16")]; + tensor var_2410_shape_cast_fp16_to_uint16 = cast(dtype = var_2410_shape_cast_fp16_to_uint16_dtype_0, x = var_2410_shape_cast_fp16)[name = string("cast_26")]; + uint16 gather_237_cast_uint16 = gather(axis = gather_237_axis_0, batch_dims = gather_237_batch_dims_0, indices = select_237_to_uint16, validate_indices = gather_237_validate_indices_0, x = var_2410_shape_cast_fp16_to_uint16)[name = string("gather_237_cast_uint16")]; string gather_237_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_237_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_245_values0_0 = const()[name = string("concat_245_values0_0"), val = int32(1)]; int32 concat_245_values1_0 = const()[name = string("concat_245_values1_0"), val = int32(1)]; @@ -2749,98 +2716,107 @@ program(1.3) tensor causal_mask_27_begin_0 = const()[name = string("causal_mask_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_27_end_mask_0 = const()[name = string("causal_mask_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_27_cast_fp16 = slice_by_index(begin = causal_mask_27_begin_0, end = concat_245, end_mask = causal_mask_27_end_mask_0, x = causal_mask)[name = string("causal_mask_27_cast_fp16")]; - tensor attn_output_49_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_27_cast_fp16, key = key_states_51, query = query_states_51, value = value_states_51)[name = string("attn_output_49_cast_fp16")]; - tensor var_2341_perm_0 = const()[name = string("op_2341_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor attn_output_49_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_27_cast_fp16, key = key_states_51_cast_fp16, query = query_states_51_cast_fp16, value = value_states_51_cast_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor var_2416_perm_0 = const()[name = string("op_2416_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_246_axis_0 = const()[name = string("concat_246_axis_0"), val = int32(0)]; bool concat_246_interleave_0 = const()[name = string("concat_246_interleave_0"), val = bool(false)]; int32 gather_221_cast_uint16_to_int32 = cast(dtype = gather_221_cast_uint16_to_int32_dtype_0, x = gather_221_cast_uint16)[name = string("cast_24")]; tensor concat_246 = concat(axis = concat_246_axis_0, interleave = concat_246_interleave_0, values = (gather_220, gather_221_cast_uint16_to_int32, var_48))[name = string("concat_246")]; - tensor var_2341 = transpose(perm = var_2341_perm_0, x = attn_output_49_cast_fp16)[name = string("transpose_12")]; - tensor input_97 = reshape(shape = concat_246, x = var_2341)[name = string("input_97")]; - tensor linear_87 = linear(bias = linear_0_bias_0, weight = model_model_layers_12_self_attn_o_proj_weight_quantized, x = input_97)[name = string("linear_87")]; - tensor hidden_states_329 = add(x = hidden_states_311, y = linear_87)[name = string("hidden_states_329")]; - fp16 var_55_promoted_25_to_fp16 = const()[name = string("op_55_promoted_25_to_fp16"), val = fp16(0x1p+1)]; - tensor var_2350_cast_fp16 = pow(x = hidden_states_329, y = var_55_promoted_25_to_fp16)[name = string("op_2350_cast_fp16")]; + tensor var_2416_cast_fp16 = transpose(perm = var_2416_perm_0, x = attn_output_49_cast_fp16)[name = string("transpose_12")]; + tensor input_97_cast_fp16 = reshape(shape = concat_246, x = var_2416_cast_fp16)[name = string("input_97_cast_fp16")]; + tensor model_model_layers_12_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(561944512))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564041728))))[name = string("model_model_layers_12_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_87_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_o_proj_weight_to_fp16_quantized, x = input_97_cast_fp16)[name = string("linear_87_cast_fp16")]; + tensor hidden_states_379_cast_fp16 = add(x = hidden_states_359_cast_fp16, y = linear_87_cast_fp16)[name = string("hidden_states_379_cast_fp16")]; + fp16 var_54_promoted_25_to_fp16 = const()[name = string("op_54_promoted_25_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2425_cast_fp16 = pow(x = hidden_states_379_cast_fp16, y = var_54_promoted_25_to_fp16)[name = string("op_2425_cast_fp16")]; tensor variance_51_axes_0 = const()[name = string("variance_51_axes_0"), val = tensor([-1])]; bool variance_51_keep_dims_0 = const()[name = string("variance_51_keep_dims_0"), val = bool(true)]; - tensor variance_51_cast_fp16 = reduce_mean(axes = variance_51_axes_0, keep_dims = variance_51_keep_dims_0, x = var_2350_cast_fp16)[name = string("variance_51_cast_fp16")]; - fp16 var_2353_to_fp16 = const()[name = string("op_2353_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_2354_cast_fp16 = add(x = variance_51_cast_fp16, y = var_2353_to_fp16)[name = string("op_2354_cast_fp16")]; - fp32 var_2355_epsilon_0 = const()[name = string("op_2355_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_2355_cast_fp16 = rsqrt(epsilon = var_2355_epsilon_0, x = var_2354_cast_fp16)[name = string("op_2355_cast_fp16")]; - tensor hidden_states_333_cast_fp16 = mul(x = hidden_states_329, y = var_2355_cast_fp16)[name = string("hidden_states_333_cast_fp16")]; - tensor input_99 = mul(x = model_model_layers_12_post_attention_layernorm_weight, y = hidden_states_333_cast_fp16)[name = string("input_99")]; - tensor linear_88 = linear(bias = linear_4_bias_0, weight = model_model_layers_12_mlp_gate_proj_weight_quantized, x = input_99)[name = string("linear_88")]; - tensor var_2364 = silu(x = linear_88)[name = string("op_2364")]; - tensor linear_89 = linear(bias = linear_4_bias_0, weight = model_model_layers_12_mlp_up_proj_weight_quantized, x = input_99)[name = string("linear_89")]; - tensor input_103 = mul(x = var_2364, y = linear_89)[name = string("input_103")]; - tensor linear_90 = linear(bias = linear_0_bias_0, weight = model_model_layers_12_mlp_down_proj_weight_quantized, x = input_103)[name = string("linear_90")]; - tensor hidden_states_337 = add(x = hidden_states_329, y = linear_90)[name = string("hidden_states_337")]; - fp16 var_55_promoted_26_to_fp16 = const()[name = string("op_55_promoted_26_to_fp16"), val = fp16(0x1p+1)]; - tensor var_2377_cast_fp16 = pow(x = hidden_states_337, y = var_55_promoted_26_to_fp16)[name = string("op_2377_cast_fp16")]; + tensor variance_51_cast_fp16 = reduce_mean(axes = variance_51_axes_0, keep_dims = variance_51_keep_dims_0, x = var_2425_cast_fp16)[name = string("variance_51_cast_fp16")]; + fp16 var_2428_to_fp16 = const()[name = string("op_2428_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2429_cast_fp16 = add(x = variance_51_cast_fp16, y = var_2428_to_fp16)[name = string("op_2429_cast_fp16")]; + fp32 var_2430_epsilon_0 = const()[name = string("op_2430_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2430_cast_fp16 = rsqrt(epsilon = var_2430_epsilon_0, x = var_2429_cast_fp16)[name = string("op_2430_cast_fp16")]; + tensor hidden_states_383_cast_fp16 = mul(x = hidden_states_379_cast_fp16, y = var_2430_cast_fp16)[name = string("hidden_states_383_cast_fp16")]; + tensor model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564303936)))]; + tensor input_99_cast_fp16 = mul(x = model_model_layers_12_post_attention_layernorm_weight_to_fp16, y = hidden_states_383_cast_fp16)[name = string("input_99_cast_fp16")]; + tensor model_model_layers_12_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(564308096))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(572696768))))[name = string("model_model_layers_12_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_88_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_12_mlp_gate_proj_weight_to_fp16_quantized, x = input_99_cast_fp16)[name = string("linear_88_cast_fp16")]; + tensor var_2442_cast_fp16 = silu(x = linear_88_cast_fp16)[name = string("op_2442_cast_fp16")]; + tensor model_model_layers_12_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(573745408))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(582134080))))[name = string("model_model_layers_12_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_89_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_12_mlp_up_proj_weight_to_fp16_quantized, x = input_99_cast_fp16)[name = string("linear_89_cast_fp16")]; + tensor input_103_cast_fp16 = mul(x = var_2442_cast_fp16, y = linear_89_cast_fp16)[name = string("input_103_cast_fp16")]; + tensor model_model_layers_12_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(583182720))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(591571392))))[name = string("model_model_layers_12_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_90_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_mlp_down_proj_weight_to_fp16_quantized, x = input_103_cast_fp16)[name = string("linear_90_cast_fp16")]; + tensor hidden_states_389_cast_fp16 = add(x = hidden_states_379_cast_fp16, y = linear_90_cast_fp16)[name = string("hidden_states_389_cast_fp16")]; + fp16 var_54_promoted_26_to_fp16 = const()[name = string("op_54_promoted_26_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2455_cast_fp16 = pow(x = hidden_states_389_cast_fp16, y = var_54_promoted_26_to_fp16)[name = string("op_2455_cast_fp16")]; tensor variance_53_axes_0 = const()[name = string("variance_53_axes_0"), val = tensor([-1])]; bool variance_53_keep_dims_0 = const()[name = string("variance_53_keep_dims_0"), val = bool(true)]; - tensor variance_53_cast_fp16 = reduce_mean(axes = variance_53_axes_0, keep_dims = variance_53_keep_dims_0, x = var_2377_cast_fp16)[name = string("variance_53_cast_fp16")]; - fp16 var_2380_to_fp16 = const()[name = string("op_2380_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_2381_cast_fp16 = add(x = variance_53_cast_fp16, y = var_2380_to_fp16)[name = string("op_2381_cast_fp16")]; - fp32 var_2382_epsilon_0 = const()[name = string("op_2382_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_2382_cast_fp16 = rsqrt(epsilon = var_2382_epsilon_0, x = var_2381_cast_fp16)[name = string("op_2382_cast_fp16")]; - tensor hidden_states_341_cast_fp16 = mul(x = hidden_states_337, y = var_2382_cast_fp16)[name = string("hidden_states_341_cast_fp16")]; - tensor hidden_states_343 = mul(x = model_model_layers_13_input_layernorm_weight, y = hidden_states_341_cast_fp16)[name = string("hidden_states_343")]; - tensor var_2390_shape = shape(x = hidden_states_343)[name = string("op_2390_shape")]; + tensor variance_53_cast_fp16 = reduce_mean(axes = variance_53_axes_0, keep_dims = variance_53_keep_dims_0, x = var_2455_cast_fp16)[name = string("variance_53_cast_fp16")]; + fp16 var_2458_to_fp16 = const()[name = string("op_2458_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2459_cast_fp16 = add(x = variance_53_cast_fp16, y = var_2458_to_fp16)[name = string("op_2459_cast_fp16")]; + fp32 var_2460_epsilon_0 = const()[name = string("op_2460_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2460_cast_fp16 = rsqrt(epsilon = var_2460_epsilon_0, x = var_2459_cast_fp16)[name = string("op_2460_cast_fp16")]; + tensor hidden_states_393_cast_fp16 = mul(x = hidden_states_389_cast_fp16, y = var_2460_cast_fp16)[name = string("hidden_states_393_cast_fp16")]; + tensor model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(592620032)))]; + tensor hidden_states_397_cast_fp16 = mul(x = model_model_layers_13_input_layernorm_weight_to_fp16, y = hidden_states_393_cast_fp16)[name = string("hidden_states_397_cast_fp16")]; + tensor var_2471_shape_cast_fp16 = shape(x = hidden_states_397_cast_fp16)[name = string("op_2471_shape_cast_fp16")]; int32 gather_238 = const()[name = string("gather_238"), val = int32(1)]; int32 gather_239_axis_0 = const()[name = string("gather_239_axis_0"), val = int32(0)]; int32 gather_239_batch_dims_0 = const()[name = string("gather_239_batch_dims_0"), val = int32(0)]; bool gather_239_validate_indices_0 = const()[name = string("gather_239_validate_indices_0"), val = bool(false)]; - string var_2390_shape_to_uint16_dtype_0 = const()[name = string("op_2390_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2471_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2471_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_239_to_uint16 = const()[name = string("select_239_to_uint16"), val = uint16(1)]; - tensor var_2390_shape_to_uint16 = cast(dtype = var_2390_shape_to_uint16_dtype_0, x = var_2390_shape)[name = string("cast_23")]; - uint16 gather_239_cast_uint16 = gather(axis = gather_239_axis_0, batch_dims = gather_239_batch_dims_0, indices = select_239_to_uint16, validate_indices = gather_239_validate_indices_0, x = var_2390_shape_to_uint16)[name = string("gather_239_cast_uint16")]; + tensor var_2471_shape_cast_fp16_to_uint16 = cast(dtype = var_2471_shape_cast_fp16_to_uint16_dtype_0, x = var_2471_shape_cast_fp16)[name = string("cast_23")]; + uint16 gather_239_cast_uint16 = gather(axis = gather_239_axis_0, batch_dims = gather_239_batch_dims_0, indices = select_239_to_uint16, validate_indices = gather_239_validate_indices_0, x = var_2471_shape_cast_fp16_to_uint16)[name = string("gather_239_cast_uint16")]; string gather_239_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_239_cast_uint16_to_int32_dtype_0"), val = string("int32")]; - tensor linear_91 = linear(bias = linear_0_bias_0, weight = model_model_layers_13_self_attn_q_proj_weight_quantized, x = hidden_states_343)[name = string("linear_91")]; - tensor linear_92 = linear(bias = linear_1_bias_0, weight = model_model_layers_13_self_attn_k_proj_weight_quantized, x = hidden_states_343)[name = string("linear_92")]; - tensor linear_93 = linear(bias = linear_1_bias_0, weight = model_model_layers_13_self_attn_v_proj_weight_quantized, x = hidden_states_343)[name = string("linear_93")]; + tensor model_model_layers_13_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(592624192))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594721408))))[name = string("model_model_layers_13_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_91_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_397_cast_fp16)[name = string("linear_91_cast_fp16")]; + tensor model_model_layers_13_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594983616))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(595507968))))[name = string("model_model_layers_13_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_92_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_13_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_397_cast_fp16)[name = string("linear_92_cast_fp16")]; + tensor model_model_layers_13_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(595573568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(596097920))))[name = string("model_model_layers_13_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_93_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_13_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_397_cast_fp16)[name = string("linear_93_cast_fp16")]; tensor concat_247x = const()[name = string("concat_247x"), val = tensor([1, -1, 32, 64])]; - tensor var_2399 = reshape(shape = concat_247x, x = linear_91)[name = string("op_2399")]; + tensor var_2480_cast_fp16 = reshape(shape = concat_247x, x = linear_91_cast_fp16)[name = string("op_2480_cast_fp16")]; tensor q_27_perm_0 = const()[name = string("q_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_248x = const()[name = string("concat_248x"), val = tensor([1, -1, 8, 64])]; - tensor var_2402 = reshape(shape = concat_248x, x = linear_92)[name = string("op_2402")]; + tensor var_2483_cast_fp16 = reshape(shape = concat_248x, x = linear_92_cast_fp16)[name = string("op_2483_cast_fp16")]; tensor k_27_perm_0 = const()[name = string("k_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_249x = const()[name = string("concat_249x"), val = tensor([1, -1, 8, 64])]; - tensor var_2405 = reshape(shape = concat_249x, x = linear_93)[name = string("op_2405")]; + tensor var_2486_cast_fp16 = reshape(shape = concat_249x, x = linear_93_cast_fp16)[name = string("op_2486_cast_fp16")]; tensor v_state_27_perm_0 = const()[name = string("v_state_27_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor q_27 = transpose(perm = q_27_perm_0, x = var_2399)[name = string("transpose_11")]; - tensor var_2409 = mul(x = q_27, y = cos_7)[name = string("op_2409")]; + tensor q_27_cast_fp16 = transpose(perm = q_27_perm_0, x = var_2480_cast_fp16)[name = string("transpose_11")]; + tensor var_2490_cast_fp16 = mul(x = q_27_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2490_cast_fp16")]; tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 32, 0, 32])]; tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_53 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27)[name = string("x1_53")]; + tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27_cast_fp16)[name = string("x1_53_cast_fp16")]; tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 32, 0, 64])]; tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_53 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27)[name = string("x2_53")]; - fp16 const_29_promoted = const()[name = string("const_29_promoted"), val = fp16(-0x1p+0)]; - tensor var_2420 = mul(x = x2_53, y = const_29_promoted)[name = string("op_2420")]; - bool var_2422_interleave_0 = const()[name = string("op_2422_interleave_0"), val = bool(false)]; - tensor var_2422 = concat(axis = var_48, interleave = var_2422_interleave_0, values = (var_2420, x1_53))[name = string("op_2422")]; - tensor var_2423 = mul(x = var_2422, y = sin_7)[name = string("op_2423")]; - tensor query_states_55 = add(x = var_2409, y = var_2423)[name = string("query_states_55")]; - tensor k_27 = transpose(perm = k_27_perm_0, x = var_2402)[name = string("transpose_10")]; - tensor var_2425 = mul(x = k_27, y = cos_7)[name = string("op_2425")]; + tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27_cast_fp16)[name = string("x2_53_cast_fp16")]; + fp16 const_29_promoted_to_fp16 = const()[name = string("const_29_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2501_cast_fp16 = mul(x = x2_53_cast_fp16, y = const_29_promoted_to_fp16)[name = string("op_2501_cast_fp16")]; + bool var_2503_interleave_0 = const()[name = string("op_2503_interleave_0"), val = bool(false)]; + tensor var_2503_cast_fp16 = concat(axis = var_48, interleave = var_2503_interleave_0, values = (var_2501_cast_fp16, x1_53_cast_fp16))[name = string("op_2503_cast_fp16")]; + tensor var_2504_cast_fp16 = mul(x = var_2503_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2504_cast_fp16")]; + tensor query_states_55_cast_fp16 = add(x = var_2490_cast_fp16, y = var_2504_cast_fp16)[name = string("query_states_55_cast_fp16")]; + tensor k_27_cast_fp16 = transpose(perm = k_27_perm_0, x = var_2483_cast_fp16)[name = string("transpose_10")]; + tensor var_2506_cast_fp16 = mul(x = k_27_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2506_cast_fp16")]; tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 8, 0, 32])]; tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_55 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27)[name = string("x1_55")]; + tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27_cast_fp16)[name = string("x1_55_cast_fp16")]; tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 8, 0, 64])]; tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_55 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27)[name = string("x2_55")]; - fp16 const_30_promoted = const()[name = string("const_30_promoted"), val = fp16(-0x1p+0)]; - tensor var_2436 = mul(x = x2_55, y = const_30_promoted)[name = string("op_2436")]; - bool var_2438_interleave_0 = const()[name = string("op_2438_interleave_0"), val = bool(false)]; - tensor var_2438 = concat(axis = var_48, interleave = var_2438_interleave_0, values = (var_2436, x1_55))[name = string("op_2438")]; - tensor var_2439 = mul(x = var_2438, y = sin_7)[name = string("op_2439")]; - tensor k_state_27 = add(x = var_2425, y = var_2439)[name = string("k_state_27")]; + tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27_cast_fp16)[name = string("x2_55_cast_fp16")]; + fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2517_cast_fp16 = mul(x = x2_55_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_2517_cast_fp16")]; + bool var_2519_interleave_0 = const()[name = string("op_2519_interleave_0"), val = bool(false)]; + tensor var_2519_cast_fp16 = concat(axis = var_48, interleave = var_2519_interleave_0, values = (var_2517_cast_fp16, x1_55_cast_fp16))[name = string("op_2519_cast_fp16")]; + tensor var_2520_cast_fp16 = mul(x = var_2519_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2520_cast_fp16")]; + tensor k_state_27_cast_fp16 = add(x = var_2506_cast_fp16, y = var_2520_cast_fp16)[name = string("k_state_27_cast_fp16")]; tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([0])]; tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; @@ -2852,87 +2828,87 @@ program(1.3) tensor key_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor key_cache_internal_tensor_assign_14 = slice_update(begin = concat_252, begin_mask = key_cache_internal_tensor_assign_14_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_14_squeeze_mask_0, stride = key_cache_internal_tensor_assign_14_stride_0, update = k_state_27, x = coreml_update_state_56)[name = string("key_cache_internal_tensor_assign_14")]; - write_state(data = key_cache_internal_tensor_assign_14, input = key_cache)[name = string("coreml_update_state_58_write_state")]; + tensor key_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_252, begin_mask = key_cache_internal_tensor_assign_14_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_14_squeeze_mask_0, stride = key_cache_internal_tensor_assign_14_stride_0, update = k_state_27_cast_fp16, x = coreml_update_state_56)[name = string("key_cache_internal_tensor_assign_14_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_14_cast_fp16, input = key_cache)[name = string("coreml_update_state_58_write_state")]; tensor coreml_update_state_58 = read_state(input = key_cache)[name = string("coreml_update_state_58")]; tensor value_cache_internal_tensor_assign_14_stride_0 = const()[name = string("value_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor v_state_27 = transpose(perm = v_state_27_perm_0, x = var_2405)[name = string("transpose_9")]; - tensor value_cache_internal_tensor_assign_14 = slice_update(begin = concat_252, begin_mask = value_cache_internal_tensor_assign_14_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_14_squeeze_mask_0, stride = value_cache_internal_tensor_assign_14_stride_0, update = v_state_27, x = coreml_update_state_57)[name = string("value_cache_internal_tensor_assign_14")]; - write_state(data = value_cache_internal_tensor_assign_14, input = value_cache)[name = string("coreml_update_state_59_write_state")]; + tensor v_state_27_cast_fp16 = transpose(perm = v_state_27_perm_0, x = var_2486_cast_fp16)[name = string("transpose_9")]; + tensor value_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_252, begin_mask = value_cache_internal_tensor_assign_14_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_14_squeeze_mask_0, stride = value_cache_internal_tensor_assign_14_stride_0, update = v_state_27_cast_fp16, x = coreml_update_state_57)[name = string("value_cache_internal_tensor_assign_14_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_14_cast_fp16, input = value_cache)[name = string("coreml_update_state_59_write_state")]; tensor coreml_update_state_59 = read_state(input = value_cache)[name = string("coreml_update_state_59")]; - tensor var_2462_begin_0 = const()[name = string("op_2462_begin_0"), val = tensor([13, 0, 0, 0, 0])]; - tensor var_2462_end_0 = const()[name = string("op_2462_end_0"), val = tensor([14, 1, 8, 2048, 64])]; - tensor var_2462_end_mask_0 = const()[name = string("op_2462_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_2462_squeeze_mask_0 = const()[name = string("op_2462_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_2462 = slice_by_index(begin = var_2462_begin_0, end = var_2462_end_0, end_mask = var_2462_end_mask_0, squeeze_mask = var_2462_squeeze_mask_0, x = coreml_update_state_58)[name = string("op_2462")]; - tensor var_2465_begin_0 = const()[name = string("op_2465_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2465_end_mask_0 = const()[name = string("op_2465_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_2465 = slice_by_index(begin = var_2465_begin_0, end = concat_11, end_mask = var_2465_end_mask_0, x = var_2462)[name = string("op_2465")]; - tensor var_2467_begin_0 = const()[name = string("op_2467_begin_0"), val = tensor([13, 0, 0, 0, 0])]; - tensor var_2467_end_0 = const()[name = string("op_2467_end_0"), val = tensor([14, 1, 8, 2048, 64])]; - tensor var_2467_end_mask_0 = const()[name = string("op_2467_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_2467_squeeze_mask_0 = const()[name = string("op_2467_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_2467 = slice_by_index(begin = var_2467_begin_0, end = var_2467_end_0, end_mask = var_2467_end_mask_0, squeeze_mask = var_2467_squeeze_mask_0, x = coreml_update_state_59)[name = string("op_2467")]; - tensor var_2470_begin_0 = const()[name = string("op_2470_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2470_end_mask_0 = const()[name = string("op_2470_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_2470 = slice_by_index(begin = var_2470_begin_0, end = concat_11, end_mask = var_2470_end_mask_0, x = var_2467)[name = string("op_2470")]; - tensor var_2472_shape = shape(x = var_2465)[name = string("op_2472_shape")]; + tensor var_2543_begin_0 = const()[name = string("op_2543_begin_0"), val = tensor([13, 0, 0, 0, 0])]; + tensor var_2543_end_0 = const()[name = string("op_2543_end_0"), val = tensor([14, 1, 8, 2048, 64])]; + tensor var_2543_end_mask_0 = const()[name = string("op_2543_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2543_squeeze_mask_0 = const()[name = string("op_2543_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2543_cast_fp16 = slice_by_index(begin = var_2543_begin_0, end = var_2543_end_0, end_mask = var_2543_end_mask_0, squeeze_mask = var_2543_squeeze_mask_0, x = coreml_update_state_58)[name = string("op_2543_cast_fp16")]; + tensor var_2546_begin_0 = const()[name = string("op_2546_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2546_end_mask_0 = const()[name = string("op_2546_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2546_cast_fp16 = slice_by_index(begin = var_2546_begin_0, end = concat_11, end_mask = var_2546_end_mask_0, x = var_2543_cast_fp16)[name = string("op_2546_cast_fp16")]; + tensor var_2548_begin_0 = const()[name = string("op_2548_begin_0"), val = tensor([13, 0, 0, 0, 0])]; + tensor var_2548_end_0 = const()[name = string("op_2548_end_0"), val = tensor([14, 1, 8, 2048, 64])]; + tensor var_2548_end_mask_0 = const()[name = string("op_2548_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2548_squeeze_mask_0 = const()[name = string("op_2548_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2548_cast_fp16 = slice_by_index(begin = var_2548_begin_0, end = var_2548_end_0, end_mask = var_2548_end_mask_0, squeeze_mask = var_2548_squeeze_mask_0, x = coreml_update_state_59)[name = string("op_2548_cast_fp16")]; + tensor var_2551_begin_0 = const()[name = string("op_2551_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2551_end_mask_0 = const()[name = string("op_2551_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2551_cast_fp16 = slice_by_index(begin = var_2551_begin_0, end = concat_11, end_mask = var_2551_end_mask_0, x = var_2548_cast_fp16)[name = string("op_2551_cast_fp16")]; + tensor var_2553_shape_cast_fp16 = shape(x = var_2546_cast_fp16)[name = string("op_2553_shape_cast_fp16")]; int32 gather_247 = const()[name = string("gather_247"), val = int32(1)]; int32 gather_248 = const()[name = string("gather_248"), val = int32(8)]; int32 gather_249_axis_0 = const()[name = string("gather_249_axis_0"), val = int32(0)]; int32 gather_249_batch_dims_0 = const()[name = string("gather_249_batch_dims_0"), val = int32(0)]; bool gather_249_validate_indices_0 = const()[name = string("gather_249_validate_indices_0"), val = bool(false)]; - string var_2472_shape_to_uint16_dtype_0 = const()[name = string("op_2472_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2553_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2553_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_249_to_uint16 = const()[name = string("select_249_to_uint16"), val = uint16(2)]; - tensor var_2472_shape_to_uint16 = cast(dtype = var_2472_shape_to_uint16_dtype_0, x = var_2472_shape)[name = string("cast_22")]; - uint16 gather_249_cast_uint16 = gather(axis = gather_249_axis_0, batch_dims = gather_249_batch_dims_0, indices = select_249_to_uint16, validate_indices = gather_249_validate_indices_0, x = var_2472_shape_to_uint16)[name = string("gather_249_cast_uint16")]; + tensor var_2553_shape_cast_fp16_to_uint16 = cast(dtype = var_2553_shape_cast_fp16_to_uint16_dtype_0, x = var_2553_shape_cast_fp16)[name = string("cast_22")]; + uint16 gather_249_cast_uint16 = gather(axis = gather_249_axis_0, batch_dims = gather_249_batch_dims_0, indices = select_249_to_uint16, validate_indices = gather_249_validate_indices_0, x = var_2553_shape_cast_fp16_to_uint16)[name = string("gather_249_cast_uint16")]; string gather_249_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_249_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_250 = const()[name = string("gather_250"), val = int32(64)]; - tensor var_2479_axes_0 = const()[name = string("op_2479_axes_0"), val = tensor([2])]; - tensor var_2479 = expand_dims(axes = var_2479_axes_0, x = var_2465)[name = string("op_2479")]; - tensor shape_277 = shape(x = var_2479)[name = string("shape_277")]; + tensor var_2560_axes_0 = const()[name = string("op_2560_axes_0"), val = tensor([2])]; + tensor var_2560_cast_fp16 = expand_dims(axes = var_2560_axes_0, x = var_2546_cast_fp16)[name = string("op_2560_cast_fp16")]; + tensor shape_277_cast_fp16 = shape(x = var_2560_cast_fp16)[name = string("shape_277_cast_fp16")]; int32 concat_260_axis_0 = const()[name = string("concat_260_axis_0"), val = int32(0)]; bool concat_260_interleave_0 = const()[name = string("concat_260_interleave_0"), val = bool(false)]; int32 gather_249_cast_uint16_to_int32 = cast(dtype = gather_249_cast_uint16_to_int32_dtype_0, x = gather_249_cast_uint16)[name = string("cast_21")]; - tensor concat_260 = concat(axis = concat_260_axis_0, interleave = concat_260_interleave_0, values = (gather_247, gather_248, var_60, gather_249_cast_uint16_to_int32, gather_250))[name = string("concat_260")]; - tensor real_div_26 = real_div(x = concat_260, y = shape_277)[name = string("real_div_26")]; - tensor hidden_states_347 = tile(reps = real_div_26, x = var_2479)[name = string("hidden_states_347")]; + tensor concat_260 = concat(axis = concat_260_axis_0, interleave = concat_260_interleave_0, values = (gather_247, gather_248, var_59, gather_249_cast_uint16_to_int32, gather_250))[name = string("concat_260")]; + tensor real_div_26 = real_div(x = concat_260, y = shape_277_cast_fp16)[name = string("real_div_26")]; + tensor hidden_states_401_cast_fp16 = tile(reps = real_div_26, x = var_2560_cast_fp16)[name = string("hidden_states_401_cast_fp16")]; tensor concat_261x = const()[name = string("concat_261x"), val = tensor([1, 32, -1, 64])]; - tensor key_states_55 = reshape(shape = concat_261x, x = hidden_states_347)[name = string("key_states_55")]; - tensor var_2489_shape = shape(x = var_2470)[name = string("op_2489_shape")]; + tensor key_states_55_cast_fp16 = reshape(shape = concat_261x, x = hidden_states_401_cast_fp16)[name = string("key_states_55_cast_fp16")]; + tensor var_2570_shape_cast_fp16 = shape(x = var_2551_cast_fp16)[name = string("op_2570_shape_cast_fp16")]; int32 gather_251 = const()[name = string("gather_251"), val = int32(1)]; int32 gather_252 = const()[name = string("gather_252"), val = int32(8)]; int32 gather_253_axis_0 = const()[name = string("gather_253_axis_0"), val = int32(0)]; int32 gather_253_batch_dims_0 = const()[name = string("gather_253_batch_dims_0"), val = int32(0)]; bool gather_253_validate_indices_0 = const()[name = string("gather_253_validate_indices_0"), val = bool(false)]; - string var_2489_shape_to_uint16_dtype_0 = const()[name = string("op_2489_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2570_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2570_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_253_to_uint16 = const()[name = string("select_253_to_uint16"), val = uint16(2)]; - tensor var_2489_shape_to_uint16 = cast(dtype = var_2489_shape_to_uint16_dtype_0, x = var_2489_shape)[name = string("cast_20")]; - uint16 gather_253_cast_uint16 = gather(axis = gather_253_axis_0, batch_dims = gather_253_batch_dims_0, indices = select_253_to_uint16, validate_indices = gather_253_validate_indices_0, x = var_2489_shape_to_uint16)[name = string("gather_253_cast_uint16")]; + tensor var_2570_shape_cast_fp16_to_uint16 = cast(dtype = var_2570_shape_cast_fp16_to_uint16_dtype_0, x = var_2570_shape_cast_fp16)[name = string("cast_20")]; + uint16 gather_253_cast_uint16 = gather(axis = gather_253_axis_0, batch_dims = gather_253_batch_dims_0, indices = select_253_to_uint16, validate_indices = gather_253_validate_indices_0, x = var_2570_shape_cast_fp16_to_uint16)[name = string("gather_253_cast_uint16")]; string gather_253_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_253_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_254 = const()[name = string("gather_254"), val = int32(64)]; - tensor var_2496_axes_0 = const()[name = string("op_2496_axes_0"), val = tensor([2])]; - tensor var_2496 = expand_dims(axes = var_2496_axes_0, x = var_2470)[name = string("op_2496")]; - tensor shape_282 = shape(x = var_2496)[name = string("shape_282")]; + tensor var_2577_axes_0 = const()[name = string("op_2577_axes_0"), val = tensor([2])]; + tensor var_2577_cast_fp16 = expand_dims(axes = var_2577_axes_0, x = var_2551_cast_fp16)[name = string("op_2577_cast_fp16")]; + tensor shape_282_cast_fp16 = shape(x = var_2577_cast_fp16)[name = string("shape_282_cast_fp16")]; int32 concat_262_axis_0 = const()[name = string("concat_262_axis_0"), val = int32(0)]; bool concat_262_interleave_0 = const()[name = string("concat_262_interleave_0"), val = bool(false)]; int32 gather_253_cast_uint16_to_int32 = cast(dtype = gather_253_cast_uint16_to_int32_dtype_0, x = gather_253_cast_uint16)[name = string("cast_19")]; - tensor concat_262 = concat(axis = concat_262_axis_0, interleave = concat_262_interleave_0, values = (gather_251, gather_252, var_60, gather_253_cast_uint16_to_int32, gather_254))[name = string("concat_262")]; - tensor real_div_27 = real_div(x = concat_262, y = shape_282)[name = string("real_div_27")]; - tensor hidden_states_351 = tile(reps = real_div_27, x = var_2496)[name = string("hidden_states_351")]; + tensor concat_262 = concat(axis = concat_262_axis_0, interleave = concat_262_interleave_0, values = (gather_251, gather_252, var_59, gather_253_cast_uint16_to_int32, gather_254))[name = string("concat_262")]; + tensor real_div_27 = real_div(x = concat_262, y = shape_282_cast_fp16)[name = string("real_div_27")]; + tensor hidden_states_405_cast_fp16 = tile(reps = real_div_27, x = var_2577_cast_fp16)[name = string("hidden_states_405_cast_fp16")]; tensor concat_263x = const()[name = string("concat_263x"), val = tensor([1, 32, -1, 64])]; - tensor value_states_55 = reshape(shape = concat_263x, x = hidden_states_351)[name = string("value_states_55")]; - tensor var_2506_shape = shape(x = key_states_55)[name = string("op_2506_shape")]; + tensor value_states_55_cast_fp16 = reshape(shape = concat_263x, x = hidden_states_405_cast_fp16)[name = string("value_states_55_cast_fp16")]; + tensor var_2587_shape_cast_fp16 = shape(x = key_states_55_cast_fp16)[name = string("op_2587_shape_cast_fp16")]; int32 gather_255_axis_0 = const()[name = string("gather_255_axis_0"), val = int32(0)]; int32 gather_255_batch_dims_0 = const()[name = string("gather_255_batch_dims_0"), val = int32(0)]; bool gather_255_validate_indices_0 = const()[name = string("gather_255_validate_indices_0"), val = bool(false)]; - string var_2506_shape_to_uint16_dtype_0 = const()[name = string("op_2506_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2587_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2587_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_255_to_uint16 = const()[name = string("select_255_to_uint16"), val = uint16(2)]; - tensor var_2506_shape_to_uint16 = cast(dtype = var_2506_shape_to_uint16_dtype_0, x = var_2506_shape)[name = string("cast_18")]; - uint16 gather_255_cast_uint16 = gather(axis = gather_255_axis_0, batch_dims = gather_255_batch_dims_0, indices = select_255_to_uint16, validate_indices = gather_255_validate_indices_0, x = var_2506_shape_to_uint16)[name = string("gather_255_cast_uint16")]; + tensor var_2587_shape_cast_fp16_to_uint16 = cast(dtype = var_2587_shape_cast_fp16_to_uint16_dtype_0, x = var_2587_shape_cast_fp16)[name = string("cast_18")]; + uint16 gather_255_cast_uint16 = gather(axis = gather_255_axis_0, batch_dims = gather_255_batch_dims_0, indices = select_255_to_uint16, validate_indices = gather_255_validate_indices_0, x = var_2587_shape_cast_fp16_to_uint16)[name = string("gather_255_cast_uint16")]; string gather_255_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_255_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_264_values0_0 = const()[name = string("concat_264_values0_0"), val = int32(1)]; int32 concat_264_values1_0 = const()[name = string("concat_264_values1_0"), val = int32(1)]; @@ -2944,98 +2920,107 @@ program(1.3) tensor causal_mask_29_begin_0 = const()[name = string("causal_mask_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_29_end_mask_0 = const()[name = string("causal_mask_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_29_cast_fp16 = slice_by_index(begin = causal_mask_29_begin_0, end = concat_264, end_mask = causal_mask_29_end_mask_0, x = causal_mask)[name = string("causal_mask_29_cast_fp16")]; - tensor attn_output_53_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_29_cast_fp16, key = key_states_55, query = query_states_55, value = value_states_55)[name = string("attn_output_53_cast_fp16")]; - tensor var_2512_perm_0 = const()[name = string("op_2512_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor attn_output_53_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_29_cast_fp16, key = key_states_55_cast_fp16, query = query_states_55_cast_fp16, value = value_states_55_cast_fp16)[name = string("attn_output_53_cast_fp16")]; + tensor var_2593_perm_0 = const()[name = string("op_2593_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_265_axis_0 = const()[name = string("concat_265_axis_0"), val = int32(0)]; bool concat_265_interleave_0 = const()[name = string("concat_265_interleave_0"), val = bool(false)]; int32 gather_239_cast_uint16_to_int32 = cast(dtype = gather_239_cast_uint16_to_int32_dtype_0, x = gather_239_cast_uint16)[name = string("cast_16")]; tensor concat_265 = concat(axis = concat_265_axis_0, interleave = concat_265_interleave_0, values = (gather_238, gather_239_cast_uint16_to_int32, var_48))[name = string("concat_265")]; - tensor var_2512 = transpose(perm = var_2512_perm_0, x = attn_output_53_cast_fp16)[name = string("transpose_8")]; - tensor input_105 = reshape(shape = concat_265, x = var_2512)[name = string("input_105")]; - tensor linear_94 = linear(bias = linear_0_bias_0, weight = model_model_layers_13_self_attn_o_proj_weight_quantized, x = input_105)[name = string("linear_94")]; - tensor hidden_states_355 = add(x = hidden_states_337, y = linear_94)[name = string("hidden_states_355")]; - fp16 var_55_promoted_27_to_fp16 = const()[name = string("op_55_promoted_27_to_fp16"), val = fp16(0x1p+1)]; - tensor var_2521_cast_fp16 = pow(x = hidden_states_355, y = var_55_promoted_27_to_fp16)[name = string("op_2521_cast_fp16")]; + tensor var_2593_cast_fp16 = transpose(perm = var_2593_perm_0, x = attn_output_53_cast_fp16)[name = string("transpose_8")]; + tensor input_105_cast_fp16 = reshape(shape = concat_265, x = var_2593_cast_fp16)[name = string("input_105_cast_fp16")]; + tensor model_model_layers_13_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(596163520))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598260736))))[name = string("model_model_layers_13_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_94_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_o_proj_weight_to_fp16_quantized, x = input_105_cast_fp16)[name = string("linear_94_cast_fp16")]; + tensor hidden_states_409_cast_fp16 = add(x = hidden_states_389_cast_fp16, y = linear_94_cast_fp16)[name = string("hidden_states_409_cast_fp16")]; + fp16 var_54_promoted_27_to_fp16 = const()[name = string("op_54_promoted_27_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2602_cast_fp16 = pow(x = hidden_states_409_cast_fp16, y = var_54_promoted_27_to_fp16)[name = string("op_2602_cast_fp16")]; tensor variance_55_axes_0 = const()[name = string("variance_55_axes_0"), val = tensor([-1])]; bool variance_55_keep_dims_0 = const()[name = string("variance_55_keep_dims_0"), val = bool(true)]; - tensor variance_55_cast_fp16 = reduce_mean(axes = variance_55_axes_0, keep_dims = variance_55_keep_dims_0, x = var_2521_cast_fp16)[name = string("variance_55_cast_fp16")]; - fp16 var_2524_to_fp16 = const()[name = string("op_2524_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_2525_cast_fp16 = add(x = variance_55_cast_fp16, y = var_2524_to_fp16)[name = string("op_2525_cast_fp16")]; - fp32 var_2526_epsilon_0 = const()[name = string("op_2526_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_2526_cast_fp16 = rsqrt(epsilon = var_2526_epsilon_0, x = var_2525_cast_fp16)[name = string("op_2526_cast_fp16")]; - tensor hidden_states_359_cast_fp16 = mul(x = hidden_states_355, y = var_2526_cast_fp16)[name = string("hidden_states_359_cast_fp16")]; - tensor input_107 = mul(x = model_model_layers_13_post_attention_layernorm_weight, y = hidden_states_359_cast_fp16)[name = string("input_107")]; - tensor linear_95 = linear(bias = linear_4_bias_0, weight = model_model_layers_13_mlp_gate_proj_weight_quantized, x = input_107)[name = string("linear_95")]; - tensor var_2535 = silu(x = linear_95)[name = string("op_2535")]; - tensor linear_96 = linear(bias = linear_4_bias_0, weight = model_model_layers_13_mlp_up_proj_weight_quantized, x = input_107)[name = string("linear_96")]; - tensor input_111 = mul(x = var_2535, y = linear_96)[name = string("input_111")]; - tensor linear_97 = linear(bias = linear_0_bias_0, weight = model_model_layers_13_mlp_down_proj_weight_quantized, x = input_111)[name = string("linear_97")]; - tensor hidden_states_363 = add(x = hidden_states_355, y = linear_97)[name = string("hidden_states_363")]; - fp16 var_55_promoted_28_to_fp16 = const()[name = string("op_55_promoted_28_to_fp16"), val = fp16(0x1p+1)]; - tensor var_2548_cast_fp16 = pow(x = hidden_states_363, y = var_55_promoted_28_to_fp16)[name = string("op_2548_cast_fp16")]; + tensor variance_55_cast_fp16 = reduce_mean(axes = variance_55_axes_0, keep_dims = variance_55_keep_dims_0, x = var_2602_cast_fp16)[name = string("variance_55_cast_fp16")]; + fp16 var_2605_to_fp16 = const()[name = string("op_2605_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2606_cast_fp16 = add(x = variance_55_cast_fp16, y = var_2605_to_fp16)[name = string("op_2606_cast_fp16")]; + fp32 var_2607_epsilon_0 = const()[name = string("op_2607_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2607_cast_fp16 = rsqrt(epsilon = var_2607_epsilon_0, x = var_2606_cast_fp16)[name = string("op_2607_cast_fp16")]; + tensor hidden_states_413_cast_fp16 = mul(x = hidden_states_409_cast_fp16, y = var_2607_cast_fp16)[name = string("hidden_states_413_cast_fp16")]; + tensor model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598522944)))]; + tensor input_107_cast_fp16 = mul(x = model_model_layers_13_post_attention_layernorm_weight_to_fp16, y = hidden_states_413_cast_fp16)[name = string("input_107_cast_fp16")]; + tensor model_model_layers_13_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598527104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606915776))))[name = string("model_model_layers_13_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_95_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_13_mlp_gate_proj_weight_to_fp16_quantized, x = input_107_cast_fp16)[name = string("linear_95_cast_fp16")]; + tensor var_2619_cast_fp16 = silu(x = linear_95_cast_fp16)[name = string("op_2619_cast_fp16")]; + tensor model_model_layers_13_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(607964416))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(616353088))))[name = string("model_model_layers_13_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_96_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_13_mlp_up_proj_weight_to_fp16_quantized, x = input_107_cast_fp16)[name = string("linear_96_cast_fp16")]; + tensor input_111_cast_fp16 = mul(x = var_2619_cast_fp16, y = linear_96_cast_fp16)[name = string("input_111_cast_fp16")]; + tensor model_model_layers_13_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(617401728))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(625790400))))[name = string("model_model_layers_13_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_97_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_mlp_down_proj_weight_to_fp16_quantized, x = input_111_cast_fp16)[name = string("linear_97_cast_fp16")]; + tensor hidden_states_419_cast_fp16 = add(x = hidden_states_409_cast_fp16, y = linear_97_cast_fp16)[name = string("hidden_states_419_cast_fp16")]; + fp16 var_54_promoted_28_to_fp16 = const()[name = string("op_54_promoted_28_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2632_cast_fp16 = pow(x = hidden_states_419_cast_fp16, y = var_54_promoted_28_to_fp16)[name = string("op_2632_cast_fp16")]; tensor variance_57_axes_0 = const()[name = string("variance_57_axes_0"), val = tensor([-1])]; bool variance_57_keep_dims_0 = const()[name = string("variance_57_keep_dims_0"), val = bool(true)]; - tensor variance_57_cast_fp16 = reduce_mean(axes = variance_57_axes_0, keep_dims = variance_57_keep_dims_0, x = var_2548_cast_fp16)[name = string("variance_57_cast_fp16")]; - fp16 var_2551_to_fp16 = const()[name = string("op_2551_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_2552_cast_fp16 = add(x = variance_57_cast_fp16, y = var_2551_to_fp16)[name = string("op_2552_cast_fp16")]; - fp32 var_2553_epsilon_0 = const()[name = string("op_2553_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_2553_cast_fp16 = rsqrt(epsilon = var_2553_epsilon_0, x = var_2552_cast_fp16)[name = string("op_2553_cast_fp16")]; - tensor hidden_states_367_cast_fp16 = mul(x = hidden_states_363, y = var_2553_cast_fp16)[name = string("hidden_states_367_cast_fp16")]; - tensor hidden_states_369 = mul(x = model_model_layers_14_input_layernorm_weight, y = hidden_states_367_cast_fp16)[name = string("hidden_states_369")]; - tensor var_2561_shape = shape(x = hidden_states_369)[name = string("op_2561_shape")]; + tensor variance_57_cast_fp16 = reduce_mean(axes = variance_57_axes_0, keep_dims = variance_57_keep_dims_0, x = var_2632_cast_fp16)[name = string("variance_57_cast_fp16")]; + fp16 var_2635_to_fp16 = const()[name = string("op_2635_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2636_cast_fp16 = add(x = variance_57_cast_fp16, y = var_2635_to_fp16)[name = string("op_2636_cast_fp16")]; + fp32 var_2637_epsilon_0 = const()[name = string("op_2637_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2637_cast_fp16 = rsqrt(epsilon = var_2637_epsilon_0, x = var_2636_cast_fp16)[name = string("op_2637_cast_fp16")]; + tensor hidden_states_423_cast_fp16 = mul(x = hidden_states_419_cast_fp16, y = var_2637_cast_fp16)[name = string("hidden_states_423_cast_fp16")]; + tensor model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(626839040)))]; + tensor hidden_states_427_cast_fp16 = mul(x = model_model_layers_14_input_layernorm_weight_to_fp16, y = hidden_states_423_cast_fp16)[name = string("hidden_states_427_cast_fp16")]; + tensor var_2648_shape_cast_fp16 = shape(x = hidden_states_427_cast_fp16)[name = string("op_2648_shape_cast_fp16")]; int32 gather_256 = const()[name = string("gather_256"), val = int32(1)]; int32 gather_257_axis_0 = const()[name = string("gather_257_axis_0"), val = int32(0)]; int32 gather_257_batch_dims_0 = const()[name = string("gather_257_batch_dims_0"), val = int32(0)]; bool gather_257_validate_indices_0 = const()[name = string("gather_257_validate_indices_0"), val = bool(false)]; - string var_2561_shape_to_uint16_dtype_0 = const()[name = string("op_2561_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2648_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2648_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_257_to_uint16 = const()[name = string("select_257_to_uint16"), val = uint16(1)]; - tensor var_2561_shape_to_uint16 = cast(dtype = var_2561_shape_to_uint16_dtype_0, x = var_2561_shape)[name = string("cast_15")]; - uint16 gather_257_cast_uint16 = gather(axis = gather_257_axis_0, batch_dims = gather_257_batch_dims_0, indices = select_257_to_uint16, validate_indices = gather_257_validate_indices_0, x = var_2561_shape_to_uint16)[name = string("gather_257_cast_uint16")]; + tensor var_2648_shape_cast_fp16_to_uint16 = cast(dtype = var_2648_shape_cast_fp16_to_uint16_dtype_0, x = var_2648_shape_cast_fp16)[name = string("cast_15")]; + uint16 gather_257_cast_uint16 = gather(axis = gather_257_axis_0, batch_dims = gather_257_batch_dims_0, indices = select_257_to_uint16, validate_indices = gather_257_validate_indices_0, x = var_2648_shape_cast_fp16_to_uint16)[name = string("gather_257_cast_uint16")]; string gather_257_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_257_cast_uint16_to_int32_dtype_0"), val = string("int32")]; - tensor linear_98 = linear(bias = linear_0_bias_0, weight = model_model_layers_14_self_attn_q_proj_weight_quantized, x = hidden_states_369)[name = string("linear_98")]; - tensor linear_99 = linear(bias = linear_1_bias_0, weight = model_model_layers_14_self_attn_k_proj_weight_quantized, x = hidden_states_369)[name = string("linear_99")]; - tensor linear_100 = linear(bias = linear_1_bias_0, weight = model_model_layers_14_self_attn_v_proj_weight_quantized, x = hidden_states_369)[name = string("linear_100")]; + tensor model_model_layers_14_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(626843200))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(628940416))))[name = string("model_model_layers_14_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_98_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_427_cast_fp16)[name = string("linear_98_cast_fp16")]; + tensor model_model_layers_14_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(629202624))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(629726976))))[name = string("model_model_layers_14_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_99_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_14_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_427_cast_fp16)[name = string("linear_99_cast_fp16")]; + tensor model_model_layers_14_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(629792576))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630316928))))[name = string("model_model_layers_14_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_100_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_14_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_427_cast_fp16)[name = string("linear_100_cast_fp16")]; tensor concat_266x = const()[name = string("concat_266x"), val = tensor([1, -1, 32, 64])]; - tensor var_2570 = reshape(shape = concat_266x, x = linear_98)[name = string("op_2570")]; + tensor var_2657_cast_fp16 = reshape(shape = concat_266x, x = linear_98_cast_fp16)[name = string("op_2657_cast_fp16")]; tensor q_29_perm_0 = const()[name = string("q_29_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_267x = const()[name = string("concat_267x"), val = tensor([1, -1, 8, 64])]; - tensor var_2573 = reshape(shape = concat_267x, x = linear_99)[name = string("op_2573")]; + tensor var_2660_cast_fp16 = reshape(shape = concat_267x, x = linear_99_cast_fp16)[name = string("op_2660_cast_fp16")]; tensor k_29_perm_0 = const()[name = string("k_29_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_268x = const()[name = string("concat_268x"), val = tensor([1, -1, 8, 64])]; - tensor var_2576 = reshape(shape = concat_268x, x = linear_100)[name = string("op_2576")]; + tensor var_2663_cast_fp16 = reshape(shape = concat_268x, x = linear_100_cast_fp16)[name = string("op_2663_cast_fp16")]; tensor v_state_29_perm_0 = const()[name = string("v_state_29_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor q_29 = transpose(perm = q_29_perm_0, x = var_2570)[name = string("transpose_7")]; - tensor var_2580 = mul(x = q_29, y = cos_7)[name = string("op_2580")]; + tensor q_29_cast_fp16 = transpose(perm = q_29_perm_0, x = var_2657_cast_fp16)[name = string("transpose_7")]; + tensor var_2667_cast_fp16 = mul(x = q_29_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2667_cast_fp16")]; tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 32, 0, 32])]; tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_57 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29)[name = string("x1_57")]; + tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29_cast_fp16)[name = string("x1_57_cast_fp16")]; tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 32, 0, 64])]; tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_57 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29)[name = string("x2_57")]; - fp16 const_31_promoted = const()[name = string("const_31_promoted"), val = fp16(-0x1p+0)]; - tensor var_2591 = mul(x = x2_57, y = const_31_promoted)[name = string("op_2591")]; - bool var_2593_interleave_0 = const()[name = string("op_2593_interleave_0"), val = bool(false)]; - tensor var_2593 = concat(axis = var_48, interleave = var_2593_interleave_0, values = (var_2591, x1_57))[name = string("op_2593")]; - tensor var_2594 = mul(x = var_2593, y = sin_7)[name = string("op_2594")]; - tensor query_states_59 = add(x = var_2580, y = var_2594)[name = string("query_states_59")]; - tensor k_29 = transpose(perm = k_29_perm_0, x = var_2573)[name = string("transpose_6")]; - tensor var_2596 = mul(x = k_29, y = cos_7)[name = string("op_2596")]; + tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29_cast_fp16)[name = string("x2_57_cast_fp16")]; + fp16 const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2678_cast_fp16 = mul(x = x2_57_cast_fp16, y = const_31_promoted_to_fp16)[name = string("op_2678_cast_fp16")]; + bool var_2680_interleave_0 = const()[name = string("op_2680_interleave_0"), val = bool(false)]; + tensor var_2680_cast_fp16 = concat(axis = var_48, interleave = var_2680_interleave_0, values = (var_2678_cast_fp16, x1_57_cast_fp16))[name = string("op_2680_cast_fp16")]; + tensor var_2681_cast_fp16 = mul(x = var_2680_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2681_cast_fp16")]; + tensor query_states_59_cast_fp16 = add(x = var_2667_cast_fp16, y = var_2681_cast_fp16)[name = string("query_states_59_cast_fp16")]; + tensor k_29_cast_fp16 = transpose(perm = k_29_perm_0, x = var_2660_cast_fp16)[name = string("transpose_6")]; + tensor var_2683_cast_fp16 = mul(x = k_29_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2683_cast_fp16")]; tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 8, 0, 32])]; tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_59 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29)[name = string("x1_59")]; + tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29_cast_fp16)[name = string("x1_59_cast_fp16")]; tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 8, 0, 64])]; tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_59 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29)[name = string("x2_59")]; - fp16 const_32_promoted = const()[name = string("const_32_promoted"), val = fp16(-0x1p+0)]; - tensor var_2607 = mul(x = x2_59, y = const_32_promoted)[name = string("op_2607")]; - bool var_2609_interleave_0 = const()[name = string("op_2609_interleave_0"), val = bool(false)]; - tensor var_2609 = concat(axis = var_48, interleave = var_2609_interleave_0, values = (var_2607, x1_59))[name = string("op_2609")]; - tensor var_2610 = mul(x = var_2609, y = sin_7)[name = string("op_2610")]; - tensor k_state_29 = add(x = var_2596, y = var_2610)[name = string("k_state_29")]; + tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29_cast_fp16)[name = string("x2_59_cast_fp16")]; + fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2694_cast_fp16 = mul(x = x2_59_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_2694_cast_fp16")]; + bool var_2696_interleave_0 = const()[name = string("op_2696_interleave_0"), val = bool(false)]; + tensor var_2696_cast_fp16 = concat(axis = var_48, interleave = var_2696_interleave_0, values = (var_2694_cast_fp16, x1_59_cast_fp16))[name = string("op_2696_cast_fp16")]; + tensor var_2697_cast_fp16 = mul(x = var_2696_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2697_cast_fp16")]; + tensor k_state_29_cast_fp16 = add(x = var_2683_cast_fp16, y = var_2697_cast_fp16)[name = string("k_state_29_cast_fp16")]; tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([0])]; tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; @@ -3047,87 +3032,87 @@ program(1.3) tensor key_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor key_cache_internal_tensor_assign_15 = slice_update(begin = concat_271, begin_mask = key_cache_internal_tensor_assign_15_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_15_squeeze_mask_0, stride = key_cache_internal_tensor_assign_15_stride_0, update = k_state_29, x = coreml_update_state_58)[name = string("key_cache_internal_tensor_assign_15")]; - write_state(data = key_cache_internal_tensor_assign_15, input = key_cache)[name = string("coreml_update_state_60_write_state")]; + tensor key_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_271, begin_mask = key_cache_internal_tensor_assign_15_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_15_squeeze_mask_0, stride = key_cache_internal_tensor_assign_15_stride_0, update = k_state_29_cast_fp16, x = coreml_update_state_58)[name = string("key_cache_internal_tensor_assign_15_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_15_cast_fp16, input = key_cache)[name = string("coreml_update_state_60_write_state")]; tensor coreml_update_state_60 = read_state(input = key_cache)[name = string("coreml_update_state_60")]; tensor value_cache_internal_tensor_assign_15_stride_0 = const()[name = string("value_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor v_state_29 = transpose(perm = v_state_29_perm_0, x = var_2576)[name = string("transpose_5")]; - tensor value_cache_internal_tensor_assign_15 = slice_update(begin = concat_271, begin_mask = value_cache_internal_tensor_assign_15_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_15_squeeze_mask_0, stride = value_cache_internal_tensor_assign_15_stride_0, update = v_state_29, x = coreml_update_state_59)[name = string("value_cache_internal_tensor_assign_15")]; - write_state(data = value_cache_internal_tensor_assign_15, input = value_cache)[name = string("coreml_update_state_61_write_state")]; + tensor v_state_29_cast_fp16 = transpose(perm = v_state_29_perm_0, x = var_2663_cast_fp16)[name = string("transpose_5")]; + tensor value_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_271, begin_mask = value_cache_internal_tensor_assign_15_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_15_squeeze_mask_0, stride = value_cache_internal_tensor_assign_15_stride_0, update = v_state_29_cast_fp16, x = coreml_update_state_59)[name = string("value_cache_internal_tensor_assign_15_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_15_cast_fp16, input = value_cache)[name = string("coreml_update_state_61_write_state")]; tensor coreml_update_state_61 = read_state(input = value_cache)[name = string("coreml_update_state_61")]; - tensor var_2633_begin_0 = const()[name = string("op_2633_begin_0"), val = tensor([14, 0, 0, 0, 0])]; - tensor var_2633_end_0 = const()[name = string("op_2633_end_0"), val = tensor([15, 1, 8, 2048, 64])]; - tensor var_2633_end_mask_0 = const()[name = string("op_2633_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_2633_squeeze_mask_0 = const()[name = string("op_2633_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_2633 = slice_by_index(begin = var_2633_begin_0, end = var_2633_end_0, end_mask = var_2633_end_mask_0, squeeze_mask = var_2633_squeeze_mask_0, x = coreml_update_state_60)[name = string("op_2633")]; - tensor var_2636_begin_0 = const()[name = string("op_2636_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2636_end_mask_0 = const()[name = string("op_2636_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_2636 = slice_by_index(begin = var_2636_begin_0, end = concat_11, end_mask = var_2636_end_mask_0, x = var_2633)[name = string("op_2636")]; - tensor var_2638_begin_0 = const()[name = string("op_2638_begin_0"), val = tensor([14, 0, 0, 0, 0])]; - tensor var_2638_end_0 = const()[name = string("op_2638_end_0"), val = tensor([15, 1, 8, 2048, 64])]; - tensor var_2638_end_mask_0 = const()[name = string("op_2638_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_2638_squeeze_mask_0 = const()[name = string("op_2638_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_2638 = slice_by_index(begin = var_2638_begin_0, end = var_2638_end_0, end_mask = var_2638_end_mask_0, squeeze_mask = var_2638_squeeze_mask_0, x = coreml_update_state_61)[name = string("op_2638")]; - tensor var_2641_begin_0 = const()[name = string("op_2641_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2641_end_mask_0 = const()[name = string("op_2641_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_2641 = slice_by_index(begin = var_2641_begin_0, end = concat_11, end_mask = var_2641_end_mask_0, x = var_2638)[name = string("op_2641")]; - tensor var_2643_shape = shape(x = var_2636)[name = string("op_2643_shape")]; + tensor var_2720_begin_0 = const()[name = string("op_2720_begin_0"), val = tensor([14, 0, 0, 0, 0])]; + tensor var_2720_end_0 = const()[name = string("op_2720_end_0"), val = tensor([15, 1, 8, 2048, 64])]; + tensor var_2720_end_mask_0 = const()[name = string("op_2720_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2720_squeeze_mask_0 = const()[name = string("op_2720_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2720_cast_fp16 = slice_by_index(begin = var_2720_begin_0, end = var_2720_end_0, end_mask = var_2720_end_mask_0, squeeze_mask = var_2720_squeeze_mask_0, x = coreml_update_state_60)[name = string("op_2720_cast_fp16")]; + tensor var_2723_begin_0 = const()[name = string("op_2723_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2723_end_mask_0 = const()[name = string("op_2723_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2723_cast_fp16 = slice_by_index(begin = var_2723_begin_0, end = concat_11, end_mask = var_2723_end_mask_0, x = var_2720_cast_fp16)[name = string("op_2723_cast_fp16")]; + tensor var_2725_begin_0 = const()[name = string("op_2725_begin_0"), val = tensor([14, 0, 0, 0, 0])]; + tensor var_2725_end_0 = const()[name = string("op_2725_end_0"), val = tensor([15, 1, 8, 2048, 64])]; + tensor var_2725_end_mask_0 = const()[name = string("op_2725_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2725_squeeze_mask_0 = const()[name = string("op_2725_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2725_cast_fp16 = slice_by_index(begin = var_2725_begin_0, end = var_2725_end_0, end_mask = var_2725_end_mask_0, squeeze_mask = var_2725_squeeze_mask_0, x = coreml_update_state_61)[name = string("op_2725_cast_fp16")]; + tensor var_2728_begin_0 = const()[name = string("op_2728_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2728_end_mask_0 = const()[name = string("op_2728_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2728_cast_fp16 = slice_by_index(begin = var_2728_begin_0, end = concat_11, end_mask = var_2728_end_mask_0, x = var_2725_cast_fp16)[name = string("op_2728_cast_fp16")]; + tensor var_2730_shape_cast_fp16 = shape(x = var_2723_cast_fp16)[name = string("op_2730_shape_cast_fp16")]; int32 gather_265 = const()[name = string("gather_265"), val = int32(1)]; int32 gather_266 = const()[name = string("gather_266"), val = int32(8)]; int32 gather_267_axis_0 = const()[name = string("gather_267_axis_0"), val = int32(0)]; int32 gather_267_batch_dims_0 = const()[name = string("gather_267_batch_dims_0"), val = int32(0)]; bool gather_267_validate_indices_0 = const()[name = string("gather_267_validate_indices_0"), val = bool(false)]; - string var_2643_shape_to_uint16_dtype_0 = const()[name = string("op_2643_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2730_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2730_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_267_to_uint16 = const()[name = string("select_267_to_uint16"), val = uint16(2)]; - tensor var_2643_shape_to_uint16 = cast(dtype = var_2643_shape_to_uint16_dtype_0, x = var_2643_shape)[name = string("cast_14")]; - uint16 gather_267_cast_uint16 = gather(axis = gather_267_axis_0, batch_dims = gather_267_batch_dims_0, indices = select_267_to_uint16, validate_indices = gather_267_validate_indices_0, x = var_2643_shape_to_uint16)[name = string("gather_267_cast_uint16")]; + tensor var_2730_shape_cast_fp16_to_uint16 = cast(dtype = var_2730_shape_cast_fp16_to_uint16_dtype_0, x = var_2730_shape_cast_fp16)[name = string("cast_14")]; + uint16 gather_267_cast_uint16 = gather(axis = gather_267_axis_0, batch_dims = gather_267_batch_dims_0, indices = select_267_to_uint16, validate_indices = gather_267_validate_indices_0, x = var_2730_shape_cast_fp16_to_uint16)[name = string("gather_267_cast_uint16")]; string gather_267_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_267_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_268 = const()[name = string("gather_268"), val = int32(64)]; - tensor var_2650_axes_0 = const()[name = string("op_2650_axes_0"), val = tensor([2])]; - tensor var_2650 = expand_dims(axes = var_2650_axes_0, x = var_2636)[name = string("op_2650")]; - tensor shape_297 = shape(x = var_2650)[name = string("shape_297")]; + tensor var_2737_axes_0 = const()[name = string("op_2737_axes_0"), val = tensor([2])]; + tensor var_2737_cast_fp16 = expand_dims(axes = var_2737_axes_0, x = var_2723_cast_fp16)[name = string("op_2737_cast_fp16")]; + tensor shape_297_cast_fp16 = shape(x = var_2737_cast_fp16)[name = string("shape_297_cast_fp16")]; int32 concat_279_axis_0 = const()[name = string("concat_279_axis_0"), val = int32(0)]; bool concat_279_interleave_0 = const()[name = string("concat_279_interleave_0"), val = bool(false)]; int32 gather_267_cast_uint16_to_int32 = cast(dtype = gather_267_cast_uint16_to_int32_dtype_0, x = gather_267_cast_uint16)[name = string("cast_13")]; - tensor concat_279 = concat(axis = concat_279_axis_0, interleave = concat_279_interleave_0, values = (gather_265, gather_266, var_60, gather_267_cast_uint16_to_int32, gather_268))[name = string("concat_279")]; - tensor real_div_28 = real_div(x = concat_279, y = shape_297)[name = string("real_div_28")]; - tensor hidden_states_373 = tile(reps = real_div_28, x = var_2650)[name = string("hidden_states_373")]; + tensor concat_279 = concat(axis = concat_279_axis_0, interleave = concat_279_interleave_0, values = (gather_265, gather_266, var_59, gather_267_cast_uint16_to_int32, gather_268))[name = string("concat_279")]; + tensor real_div_28 = real_div(x = concat_279, y = shape_297_cast_fp16)[name = string("real_div_28")]; + tensor hidden_states_431_cast_fp16 = tile(reps = real_div_28, x = var_2737_cast_fp16)[name = string("hidden_states_431_cast_fp16")]; tensor concat_280x = const()[name = string("concat_280x"), val = tensor([1, 32, -1, 64])]; - tensor key_states_59 = reshape(shape = concat_280x, x = hidden_states_373)[name = string("key_states_59")]; - tensor var_2660_shape = shape(x = var_2641)[name = string("op_2660_shape")]; + tensor key_states_59_cast_fp16 = reshape(shape = concat_280x, x = hidden_states_431_cast_fp16)[name = string("key_states_59_cast_fp16")]; + tensor var_2747_shape_cast_fp16 = shape(x = var_2728_cast_fp16)[name = string("op_2747_shape_cast_fp16")]; int32 gather_269 = const()[name = string("gather_269"), val = int32(1)]; int32 gather_270 = const()[name = string("gather_270"), val = int32(8)]; int32 gather_271_axis_0 = const()[name = string("gather_271_axis_0"), val = int32(0)]; int32 gather_271_batch_dims_0 = const()[name = string("gather_271_batch_dims_0"), val = int32(0)]; bool gather_271_validate_indices_0 = const()[name = string("gather_271_validate_indices_0"), val = bool(false)]; - string var_2660_shape_to_uint16_dtype_0 = const()[name = string("op_2660_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2747_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2747_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_271_to_uint16 = const()[name = string("select_271_to_uint16"), val = uint16(2)]; - tensor var_2660_shape_to_uint16 = cast(dtype = var_2660_shape_to_uint16_dtype_0, x = var_2660_shape)[name = string("cast_12")]; - uint16 gather_271_cast_uint16 = gather(axis = gather_271_axis_0, batch_dims = gather_271_batch_dims_0, indices = select_271_to_uint16, validate_indices = gather_271_validate_indices_0, x = var_2660_shape_to_uint16)[name = string("gather_271_cast_uint16")]; + tensor var_2747_shape_cast_fp16_to_uint16 = cast(dtype = var_2747_shape_cast_fp16_to_uint16_dtype_0, x = var_2747_shape_cast_fp16)[name = string("cast_12")]; + uint16 gather_271_cast_uint16 = gather(axis = gather_271_axis_0, batch_dims = gather_271_batch_dims_0, indices = select_271_to_uint16, validate_indices = gather_271_validate_indices_0, x = var_2747_shape_cast_fp16_to_uint16)[name = string("gather_271_cast_uint16")]; string gather_271_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_271_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_272 = const()[name = string("gather_272"), val = int32(64)]; - tensor var_2667_axes_0 = const()[name = string("op_2667_axes_0"), val = tensor([2])]; - tensor var_2667 = expand_dims(axes = var_2667_axes_0, x = var_2641)[name = string("op_2667")]; - tensor shape_302 = shape(x = var_2667)[name = string("shape_302")]; + tensor var_2754_axes_0 = const()[name = string("op_2754_axes_0"), val = tensor([2])]; + tensor var_2754_cast_fp16 = expand_dims(axes = var_2754_axes_0, x = var_2728_cast_fp16)[name = string("op_2754_cast_fp16")]; + tensor shape_302_cast_fp16 = shape(x = var_2754_cast_fp16)[name = string("shape_302_cast_fp16")]; int32 concat_281_axis_0 = const()[name = string("concat_281_axis_0"), val = int32(0)]; bool concat_281_interleave_0 = const()[name = string("concat_281_interleave_0"), val = bool(false)]; int32 gather_271_cast_uint16_to_int32 = cast(dtype = gather_271_cast_uint16_to_int32_dtype_0, x = gather_271_cast_uint16)[name = string("cast_11")]; - tensor concat_281 = concat(axis = concat_281_axis_0, interleave = concat_281_interleave_0, values = (gather_269, gather_270, var_60, gather_271_cast_uint16_to_int32, gather_272))[name = string("concat_281")]; - tensor real_div_29 = real_div(x = concat_281, y = shape_302)[name = string("real_div_29")]; - tensor hidden_states_377 = tile(reps = real_div_29, x = var_2667)[name = string("hidden_states_377")]; + tensor concat_281 = concat(axis = concat_281_axis_0, interleave = concat_281_interleave_0, values = (gather_269, gather_270, var_59, gather_271_cast_uint16_to_int32, gather_272))[name = string("concat_281")]; + tensor real_div_29 = real_div(x = concat_281, y = shape_302_cast_fp16)[name = string("real_div_29")]; + tensor hidden_states_435_cast_fp16 = tile(reps = real_div_29, x = var_2754_cast_fp16)[name = string("hidden_states_435_cast_fp16")]; tensor concat_282x = const()[name = string("concat_282x"), val = tensor([1, 32, -1, 64])]; - tensor value_states_59 = reshape(shape = concat_282x, x = hidden_states_377)[name = string("value_states_59")]; - tensor var_2677_shape = shape(x = key_states_59)[name = string("op_2677_shape")]; + tensor value_states_59_cast_fp16 = reshape(shape = concat_282x, x = hidden_states_435_cast_fp16)[name = string("value_states_59_cast_fp16")]; + tensor var_2764_shape_cast_fp16 = shape(x = key_states_59_cast_fp16)[name = string("op_2764_shape_cast_fp16")]; int32 gather_273_axis_0 = const()[name = string("gather_273_axis_0"), val = int32(0)]; int32 gather_273_batch_dims_0 = const()[name = string("gather_273_batch_dims_0"), val = int32(0)]; bool gather_273_validate_indices_0 = const()[name = string("gather_273_validate_indices_0"), val = bool(false)]; - string var_2677_shape_to_uint16_dtype_0 = const()[name = string("op_2677_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2764_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2764_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_273_to_uint16 = const()[name = string("select_273_to_uint16"), val = uint16(2)]; - tensor var_2677_shape_to_uint16 = cast(dtype = var_2677_shape_to_uint16_dtype_0, x = var_2677_shape)[name = string("cast_10")]; - uint16 gather_273_cast_uint16 = gather(axis = gather_273_axis_0, batch_dims = gather_273_batch_dims_0, indices = select_273_to_uint16, validate_indices = gather_273_validate_indices_0, x = var_2677_shape_to_uint16)[name = string("gather_273_cast_uint16")]; + tensor var_2764_shape_cast_fp16_to_uint16 = cast(dtype = var_2764_shape_cast_fp16_to_uint16_dtype_0, x = var_2764_shape_cast_fp16)[name = string("cast_10")]; + uint16 gather_273_cast_uint16 = gather(axis = gather_273_axis_0, batch_dims = gather_273_batch_dims_0, indices = select_273_to_uint16, validate_indices = gather_273_validate_indices_0, x = var_2764_shape_cast_fp16_to_uint16)[name = string("gather_273_cast_uint16")]; string gather_273_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_273_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_283_values0_0 = const()[name = string("concat_283_values0_0"), val = int32(1)]; int32 concat_283_values1_0 = const()[name = string("concat_283_values1_0"), val = int32(1)]; @@ -3139,98 +3124,107 @@ program(1.3) tensor causal_mask_31_begin_0 = const()[name = string("causal_mask_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_31_end_mask_0 = const()[name = string("causal_mask_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_31_cast_fp16 = slice_by_index(begin = causal_mask_31_begin_0, end = concat_283, end_mask = causal_mask_31_end_mask_0, x = causal_mask)[name = string("causal_mask_31_cast_fp16")]; - tensor attn_output_57_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_31_cast_fp16, key = key_states_59, query = query_states_59, value = value_states_59)[name = string("attn_output_57_cast_fp16")]; - tensor var_2683_perm_0 = const()[name = string("op_2683_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor attn_output_57_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_31_cast_fp16, key = key_states_59_cast_fp16, query = query_states_59_cast_fp16, value = value_states_59_cast_fp16)[name = string("attn_output_57_cast_fp16")]; + tensor var_2770_perm_0 = const()[name = string("op_2770_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_284_axis_0 = const()[name = string("concat_284_axis_0"), val = int32(0)]; bool concat_284_interleave_0 = const()[name = string("concat_284_interleave_0"), val = bool(false)]; int32 gather_257_cast_uint16_to_int32 = cast(dtype = gather_257_cast_uint16_to_int32_dtype_0, x = gather_257_cast_uint16)[name = string("cast_8")]; tensor concat_284 = concat(axis = concat_284_axis_0, interleave = concat_284_interleave_0, values = (gather_256, gather_257_cast_uint16_to_int32, var_48))[name = string("concat_284")]; - tensor var_2683 = transpose(perm = var_2683_perm_0, x = attn_output_57_cast_fp16)[name = string("transpose_4")]; - tensor input_113 = reshape(shape = concat_284, x = var_2683)[name = string("input_113")]; - tensor linear_101 = linear(bias = linear_0_bias_0, weight = model_model_layers_14_self_attn_o_proj_weight_quantized, x = input_113)[name = string("linear_101")]; - tensor hidden_states_381 = add(x = hidden_states_363, y = linear_101)[name = string("hidden_states_381")]; - fp16 var_55_promoted_29_to_fp16 = const()[name = string("op_55_promoted_29_to_fp16"), val = fp16(0x1p+1)]; - tensor var_2692_cast_fp16 = pow(x = hidden_states_381, y = var_55_promoted_29_to_fp16)[name = string("op_2692_cast_fp16")]; + tensor var_2770_cast_fp16 = transpose(perm = var_2770_perm_0, x = attn_output_57_cast_fp16)[name = string("transpose_4")]; + tensor input_113_cast_fp16 = reshape(shape = concat_284, x = var_2770_cast_fp16)[name = string("input_113_cast_fp16")]; + tensor model_model_layers_14_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630382528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(632479744))))[name = string("model_model_layers_14_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_101_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_o_proj_weight_to_fp16_quantized, x = input_113_cast_fp16)[name = string("linear_101_cast_fp16")]; + tensor hidden_states_439_cast_fp16 = add(x = hidden_states_419_cast_fp16, y = linear_101_cast_fp16)[name = string("hidden_states_439_cast_fp16")]; + fp16 var_54_promoted_29_to_fp16 = const()[name = string("op_54_promoted_29_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2779_cast_fp16 = pow(x = hidden_states_439_cast_fp16, y = var_54_promoted_29_to_fp16)[name = string("op_2779_cast_fp16")]; tensor variance_59_axes_0 = const()[name = string("variance_59_axes_0"), val = tensor([-1])]; bool variance_59_keep_dims_0 = const()[name = string("variance_59_keep_dims_0"), val = bool(true)]; - tensor variance_59_cast_fp16 = reduce_mean(axes = variance_59_axes_0, keep_dims = variance_59_keep_dims_0, x = var_2692_cast_fp16)[name = string("variance_59_cast_fp16")]; - fp16 var_2695_to_fp16 = const()[name = string("op_2695_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_2696_cast_fp16 = add(x = variance_59_cast_fp16, y = var_2695_to_fp16)[name = string("op_2696_cast_fp16")]; - fp32 var_2697_epsilon_0 = const()[name = string("op_2697_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_2697_cast_fp16 = rsqrt(epsilon = var_2697_epsilon_0, x = var_2696_cast_fp16)[name = string("op_2697_cast_fp16")]; - tensor hidden_states_385_cast_fp16 = mul(x = hidden_states_381, y = var_2697_cast_fp16)[name = string("hidden_states_385_cast_fp16")]; - tensor input_115 = mul(x = model_model_layers_14_post_attention_layernorm_weight, y = hidden_states_385_cast_fp16)[name = string("input_115")]; - tensor linear_102 = linear(bias = linear_4_bias_0, weight = model_model_layers_14_mlp_gate_proj_weight_quantized, x = input_115)[name = string("linear_102")]; - tensor var_2706 = silu(x = linear_102)[name = string("op_2706")]; - tensor linear_103 = linear(bias = linear_4_bias_0, weight = model_model_layers_14_mlp_up_proj_weight_quantized, x = input_115)[name = string("linear_103")]; - tensor input_119 = mul(x = var_2706, y = linear_103)[name = string("input_119")]; - tensor linear_104 = linear(bias = linear_0_bias_0, weight = model_model_layers_14_mlp_down_proj_weight_quantized, x = input_119)[name = string("linear_104")]; - tensor hidden_states_389 = add(x = hidden_states_381, y = linear_104)[name = string("hidden_states_389")]; - fp16 var_55_promoted_30_to_fp16 = const()[name = string("op_55_promoted_30_to_fp16"), val = fp16(0x1p+1)]; - tensor var_2719_cast_fp16 = pow(x = hidden_states_389, y = var_55_promoted_30_to_fp16)[name = string("op_2719_cast_fp16")]; + tensor variance_59_cast_fp16 = reduce_mean(axes = variance_59_axes_0, keep_dims = variance_59_keep_dims_0, x = var_2779_cast_fp16)[name = string("variance_59_cast_fp16")]; + fp16 var_2782_to_fp16 = const()[name = string("op_2782_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2783_cast_fp16 = add(x = variance_59_cast_fp16, y = var_2782_to_fp16)[name = string("op_2783_cast_fp16")]; + fp32 var_2784_epsilon_0 = const()[name = string("op_2784_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2784_cast_fp16 = rsqrt(epsilon = var_2784_epsilon_0, x = var_2783_cast_fp16)[name = string("op_2784_cast_fp16")]; + tensor hidden_states_443_cast_fp16 = mul(x = hidden_states_439_cast_fp16, y = var_2784_cast_fp16)[name = string("hidden_states_443_cast_fp16")]; + tensor model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(632741952)))]; + tensor input_115_cast_fp16 = mul(x = model_model_layers_14_post_attention_layernorm_weight_to_fp16, y = hidden_states_443_cast_fp16)[name = string("input_115_cast_fp16")]; + tensor model_model_layers_14_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(632746112))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(641134784))))[name = string("model_model_layers_14_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_102_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_14_mlp_gate_proj_weight_to_fp16_quantized, x = input_115_cast_fp16)[name = string("linear_102_cast_fp16")]; + tensor var_2796_cast_fp16 = silu(x = linear_102_cast_fp16)[name = string("op_2796_cast_fp16")]; + tensor model_model_layers_14_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(642183424))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(650572096))))[name = string("model_model_layers_14_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_103_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_14_mlp_up_proj_weight_to_fp16_quantized, x = input_115_cast_fp16)[name = string("linear_103_cast_fp16")]; + tensor input_119_cast_fp16 = mul(x = var_2796_cast_fp16, y = linear_103_cast_fp16)[name = string("input_119_cast_fp16")]; + tensor model_model_layers_14_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(651620736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(660009408))))[name = string("model_model_layers_14_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_104_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_mlp_down_proj_weight_to_fp16_quantized, x = input_119_cast_fp16)[name = string("linear_104_cast_fp16")]; + tensor hidden_states_449_cast_fp16 = add(x = hidden_states_439_cast_fp16, y = linear_104_cast_fp16)[name = string("hidden_states_449_cast_fp16")]; + fp16 var_54_promoted_30_to_fp16 = const()[name = string("op_54_promoted_30_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2809_cast_fp16 = pow(x = hidden_states_449_cast_fp16, y = var_54_promoted_30_to_fp16)[name = string("op_2809_cast_fp16")]; tensor variance_61_axes_0 = const()[name = string("variance_61_axes_0"), val = tensor([-1])]; bool variance_61_keep_dims_0 = const()[name = string("variance_61_keep_dims_0"), val = bool(true)]; - tensor variance_61_cast_fp16 = reduce_mean(axes = variance_61_axes_0, keep_dims = variance_61_keep_dims_0, x = var_2719_cast_fp16)[name = string("variance_61_cast_fp16")]; - fp16 var_2722_to_fp16 = const()[name = string("op_2722_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_2723_cast_fp16 = add(x = variance_61_cast_fp16, y = var_2722_to_fp16)[name = string("op_2723_cast_fp16")]; - fp32 var_2724_epsilon_0 = const()[name = string("op_2724_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_2724_cast_fp16 = rsqrt(epsilon = var_2724_epsilon_0, x = var_2723_cast_fp16)[name = string("op_2724_cast_fp16")]; - tensor hidden_states_393_cast_fp16 = mul(x = hidden_states_389, y = var_2724_cast_fp16)[name = string("hidden_states_393_cast_fp16")]; - tensor hidden_states_395 = mul(x = model_model_layers_15_input_layernorm_weight, y = hidden_states_393_cast_fp16)[name = string("hidden_states_395")]; - tensor var_2732_shape = shape(x = hidden_states_395)[name = string("op_2732_shape")]; + tensor variance_61_cast_fp16 = reduce_mean(axes = variance_61_axes_0, keep_dims = variance_61_keep_dims_0, x = var_2809_cast_fp16)[name = string("variance_61_cast_fp16")]; + fp16 var_2812_to_fp16 = const()[name = string("op_2812_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2813_cast_fp16 = add(x = variance_61_cast_fp16, y = var_2812_to_fp16)[name = string("op_2813_cast_fp16")]; + fp32 var_2814_epsilon_0 = const()[name = string("op_2814_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2814_cast_fp16 = rsqrt(epsilon = var_2814_epsilon_0, x = var_2813_cast_fp16)[name = string("op_2814_cast_fp16")]; + tensor hidden_states_453_cast_fp16 = mul(x = hidden_states_449_cast_fp16, y = var_2814_cast_fp16)[name = string("hidden_states_453_cast_fp16")]; + tensor model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(661058048)))]; + tensor hidden_states_457_cast_fp16 = mul(x = model_model_layers_15_input_layernorm_weight_to_fp16, y = hidden_states_453_cast_fp16)[name = string("hidden_states_457_cast_fp16")]; + tensor var_2825_shape_cast_fp16 = shape(x = hidden_states_457_cast_fp16)[name = string("op_2825_shape_cast_fp16")]; int32 gather_274 = const()[name = string("gather_274"), val = int32(1)]; int32 gather_275_axis_0 = const()[name = string("gather_275_axis_0"), val = int32(0)]; int32 gather_275_batch_dims_0 = const()[name = string("gather_275_batch_dims_0"), val = int32(0)]; bool gather_275_validate_indices_0 = const()[name = string("gather_275_validate_indices_0"), val = bool(false)]; - string var_2732_shape_to_uint16_dtype_0 = const()[name = string("op_2732_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2825_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2825_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_275_to_uint16 = const()[name = string("select_275_to_uint16"), val = uint16(1)]; - tensor var_2732_shape_to_uint16 = cast(dtype = var_2732_shape_to_uint16_dtype_0, x = var_2732_shape)[name = string("cast_7")]; - uint16 gather_275_cast_uint16 = gather(axis = gather_275_axis_0, batch_dims = gather_275_batch_dims_0, indices = select_275_to_uint16, validate_indices = gather_275_validate_indices_0, x = var_2732_shape_to_uint16)[name = string("gather_275_cast_uint16")]; + tensor var_2825_shape_cast_fp16_to_uint16 = cast(dtype = var_2825_shape_cast_fp16_to_uint16_dtype_0, x = var_2825_shape_cast_fp16)[name = string("cast_7")]; + uint16 gather_275_cast_uint16 = gather(axis = gather_275_axis_0, batch_dims = gather_275_batch_dims_0, indices = select_275_to_uint16, validate_indices = gather_275_validate_indices_0, x = var_2825_shape_cast_fp16_to_uint16)[name = string("gather_275_cast_uint16")]; string gather_275_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_275_cast_uint16_to_int32_dtype_0"), val = string("int32")]; - tensor linear_105 = linear(bias = linear_0_bias_0, weight = model_model_layers_15_self_attn_q_proj_weight_quantized, x = hidden_states_395)[name = string("linear_105")]; - tensor linear_106 = linear(bias = linear_1_bias_0, weight = model_model_layers_15_self_attn_k_proj_weight_quantized, x = hidden_states_395)[name = string("linear_106")]; - tensor linear_107 = linear(bias = linear_1_bias_0, weight = model_model_layers_15_self_attn_v_proj_weight_quantized, x = hidden_states_395)[name = string("linear_107")]; + tensor model_model_layers_15_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(661062208))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(663159424))))[name = string("model_model_layers_15_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_105_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_457_cast_fp16)[name = string("linear_105_cast_fp16")]; + tensor model_model_layers_15_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(663421632))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(663945984))))[name = string("model_model_layers_15_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_106_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_15_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_457_cast_fp16)[name = string("linear_106_cast_fp16")]; + tensor model_model_layers_15_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(664011584))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(664535936))))[name = string("model_model_layers_15_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_107_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_15_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_457_cast_fp16)[name = string("linear_107_cast_fp16")]; tensor concat_285x = const()[name = string("concat_285x"), val = tensor([1, -1, 32, 64])]; - tensor var_2741 = reshape(shape = concat_285x, x = linear_105)[name = string("op_2741")]; + tensor var_2834_cast_fp16 = reshape(shape = concat_285x, x = linear_105_cast_fp16)[name = string("op_2834_cast_fp16")]; tensor q_perm_0 = const()[name = string("q_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_286x = const()[name = string("concat_286x"), val = tensor([1, -1, 8, 64])]; - tensor var_2744 = reshape(shape = concat_286x, x = linear_106)[name = string("op_2744")]; + tensor var_2837_cast_fp16 = reshape(shape = concat_286x, x = linear_106_cast_fp16)[name = string("op_2837_cast_fp16")]; tensor k_perm_0 = const()[name = string("k_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_287x = const()[name = string("concat_287x"), val = tensor([1, -1, 8, 64])]; - tensor var_2747 = reshape(shape = concat_287x, x = linear_107)[name = string("op_2747")]; + tensor var_2840_cast_fp16 = reshape(shape = concat_287x, x = linear_107_cast_fp16)[name = string("op_2840_cast_fp16")]; tensor v_state_perm_0 = const()[name = string("v_state_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor q = transpose(perm = q_perm_0, x = var_2741)[name = string("transpose_3")]; - tensor var_2751 = mul(x = q, y = cos_7)[name = string("op_2751")]; + tensor q_cast_fp16 = transpose(perm = q_perm_0, x = var_2834_cast_fp16)[name = string("transpose_3")]; + tensor var_2844_cast_fp16 = mul(x = q_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2844_cast_fp16")]; tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 32, 0, 32])]; tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1_61 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q)[name = string("x1_61")]; + tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_cast_fp16)[name = string("x1_61_cast_fp16")]; tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 32, 0, 64])]; tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2_61 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q)[name = string("x2_61")]; - fp16 const_33_promoted = const()[name = string("const_33_promoted"), val = fp16(-0x1p+0)]; - tensor var_2762 = mul(x = x2_61, y = const_33_promoted)[name = string("op_2762")]; - bool var_2764_interleave_0 = const()[name = string("op_2764_interleave_0"), val = bool(false)]; - tensor var_2764 = concat(axis = var_48, interleave = var_2764_interleave_0, values = (var_2762, x1_61))[name = string("op_2764")]; - tensor var_2765 = mul(x = var_2764, y = sin_7)[name = string("op_2765")]; - tensor query_states = add(x = var_2751, y = var_2765)[name = string("query_states")]; - tensor k = transpose(perm = k_perm_0, x = var_2744)[name = string("transpose_2")]; - tensor var_2767 = mul(x = k, y = cos_7)[name = string("op_2767")]; + tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_cast_fp16)[name = string("x2_61_cast_fp16")]; + fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2855_cast_fp16 = mul(x = x2_61_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_2855_cast_fp16")]; + bool var_2857_interleave_0 = const()[name = string("op_2857_interleave_0"), val = bool(false)]; + tensor var_2857_cast_fp16 = concat(axis = var_48, interleave = var_2857_interleave_0, values = (var_2855_cast_fp16, x1_61_cast_fp16))[name = string("op_2857_cast_fp16")]; + tensor var_2858_cast_fp16 = mul(x = var_2857_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2858_cast_fp16")]; + tensor query_states_cast_fp16 = add(x = var_2844_cast_fp16, y = var_2858_cast_fp16)[name = string("query_states_cast_fp16")]; + tensor k_cast_fp16 = transpose(perm = k_perm_0, x = var_2837_cast_fp16)[name = string("transpose_2")]; + tensor var_2860_cast_fp16 = mul(x = k_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2860_cast_fp16")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 0, 32])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; - tensor x1 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k)[name = string("x1")]; + tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_cast_fp16)[name = string("x1_cast_fp16")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 0, 64])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; - tensor x2 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k)[name = string("x2")]; - fp16 const_34_promoted = const()[name = string("const_34_promoted"), val = fp16(-0x1p+0)]; - tensor var_2778 = mul(x = x2, y = const_34_promoted)[name = string("op_2778")]; - bool var_2780_interleave_0 = const()[name = string("op_2780_interleave_0"), val = bool(false)]; - tensor var_2780 = concat(axis = var_48, interleave = var_2780_interleave_0, values = (var_2778, x1))[name = string("op_2780")]; - tensor var_2781 = mul(x = var_2780, y = sin_7)[name = string("op_2781")]; - tensor k_state = add(x = var_2767, y = var_2781)[name = string("k_state")]; + tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_cast_fp16)[name = string("x2_cast_fp16")]; + fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2871_cast_fp16 = mul(x = x2_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_2871_cast_fp16")]; + bool var_2873_interleave_0 = const()[name = string("op_2873_interleave_0"), val = bool(false)]; + tensor var_2873_cast_fp16 = concat(axis = var_48, interleave = var_2873_interleave_0, values = (var_2871_cast_fp16, x1_cast_fp16))[name = string("op_2873_cast_fp16")]; + tensor var_2874_cast_fp16 = mul(x = var_2873_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2874_cast_fp16")]; + tensor k_state_cast_fp16 = add(x = var_2860_cast_fp16, y = var_2874_cast_fp16)[name = string("k_state_cast_fp16")]; tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([0])]; tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; @@ -3242,87 +3236,87 @@ program(1.3) tensor key_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor key_cache_internal_tensor_assign_16 = slice_update(begin = concat_290, begin_mask = key_cache_internal_tensor_assign_16_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_16_squeeze_mask_0, stride = key_cache_internal_tensor_assign_16_stride_0, update = k_state, x = coreml_update_state_60)[name = string("key_cache_internal_tensor_assign_16")]; - write_state(data = key_cache_internal_tensor_assign_16, input = key_cache)[name = string("coreml_update_state_62_write_state")]; + tensor key_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_290, begin_mask = key_cache_internal_tensor_assign_16_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_16_squeeze_mask_0, stride = key_cache_internal_tensor_assign_16_stride_0, update = k_state_cast_fp16, x = coreml_update_state_60)[name = string("key_cache_internal_tensor_assign_16_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_16_cast_fp16, input = key_cache)[name = string("coreml_update_state_62_write_state")]; tensor coreml_update_state_62 = read_state(input = key_cache)[name = string("coreml_update_state_62")]; tensor value_cache_internal_tensor_assign_16_stride_0 = const()[name = string("value_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor v_state = transpose(perm = v_state_perm_0, x = var_2747)[name = string("transpose_1")]; - tensor value_cache_internal_tensor_assign_16 = slice_update(begin = concat_290, begin_mask = value_cache_internal_tensor_assign_16_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_16_squeeze_mask_0, stride = value_cache_internal_tensor_assign_16_stride_0, update = v_state, x = coreml_update_state_61)[name = string("value_cache_internal_tensor_assign_16")]; - write_state(data = value_cache_internal_tensor_assign_16, input = value_cache)[name = string("coreml_update_state_63_write_state")]; + tensor v_state_cast_fp16 = transpose(perm = v_state_perm_0, x = var_2840_cast_fp16)[name = string("transpose_1")]; + tensor value_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_290, begin_mask = value_cache_internal_tensor_assign_16_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_16_squeeze_mask_0, stride = value_cache_internal_tensor_assign_16_stride_0, update = v_state_cast_fp16, x = coreml_update_state_61)[name = string("value_cache_internal_tensor_assign_16_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_16_cast_fp16, input = value_cache)[name = string("coreml_update_state_63_write_state")]; tensor coreml_update_state_63 = read_state(input = value_cache)[name = string("coreml_update_state_63")]; - tensor var_2804_begin_0 = const()[name = string("op_2804_begin_0"), val = tensor([15, 0, 0, 0, 0])]; - tensor var_2804_end_0 = const()[name = string("op_2804_end_0"), val = tensor([16, 1, 8, 2048, 64])]; - tensor var_2804_end_mask_0 = const()[name = string("op_2804_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_2804_squeeze_mask_0 = const()[name = string("op_2804_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_2804 = slice_by_index(begin = var_2804_begin_0, end = var_2804_end_0, end_mask = var_2804_end_mask_0, squeeze_mask = var_2804_squeeze_mask_0, x = coreml_update_state_62)[name = string("op_2804")]; - tensor var_2807_begin_0 = const()[name = string("op_2807_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2807_end_mask_0 = const()[name = string("op_2807_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_2807 = slice_by_index(begin = var_2807_begin_0, end = concat_11, end_mask = var_2807_end_mask_0, x = var_2804)[name = string("op_2807")]; - tensor var_2809_begin_0 = const()[name = string("op_2809_begin_0"), val = tensor([15, 0, 0, 0, 0])]; - tensor var_2809_end_0 = const()[name = string("op_2809_end_0"), val = tensor([16, 1, 8, 2048, 64])]; - tensor var_2809_end_mask_0 = const()[name = string("op_2809_end_mask_0"), val = tensor([false, true, true, true, true])]; - tensor var_2809_squeeze_mask_0 = const()[name = string("op_2809_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; - tensor var_2809 = slice_by_index(begin = var_2809_begin_0, end = var_2809_end_0, end_mask = var_2809_end_mask_0, squeeze_mask = var_2809_squeeze_mask_0, x = coreml_update_state_63)[name = string("op_2809")]; - tensor var_2812_begin_0 = const()[name = string("op_2812_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2812_end_mask_0 = const()[name = string("op_2812_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_2812 = slice_by_index(begin = var_2812_begin_0, end = concat_11, end_mask = var_2812_end_mask_0, x = var_2809)[name = string("op_2812")]; - tensor var_2814_shape = shape(x = var_2807)[name = string("op_2814_shape")]; + tensor var_2897_begin_0 = const()[name = string("op_2897_begin_0"), val = tensor([15, 0, 0, 0, 0])]; + tensor var_2897_end_0 = const()[name = string("op_2897_end_0"), val = tensor([16, 1, 8, 2048, 64])]; + tensor var_2897_end_mask_0 = const()[name = string("op_2897_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2897_squeeze_mask_0 = const()[name = string("op_2897_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2897_cast_fp16 = slice_by_index(begin = var_2897_begin_0, end = var_2897_end_0, end_mask = var_2897_end_mask_0, squeeze_mask = var_2897_squeeze_mask_0, x = coreml_update_state_62)[name = string("op_2897_cast_fp16")]; + tensor var_2900_begin_0 = const()[name = string("op_2900_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2900_end_mask_0 = const()[name = string("op_2900_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2900_cast_fp16 = slice_by_index(begin = var_2900_begin_0, end = concat_11, end_mask = var_2900_end_mask_0, x = var_2897_cast_fp16)[name = string("op_2900_cast_fp16")]; + tensor var_2902_begin_0 = const()[name = string("op_2902_begin_0"), val = tensor([15, 0, 0, 0, 0])]; + tensor var_2902_end_0 = const()[name = string("op_2902_end_0"), val = tensor([16, 1, 8, 2048, 64])]; + tensor var_2902_end_mask_0 = const()[name = string("op_2902_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2902_squeeze_mask_0 = const()[name = string("op_2902_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2902_cast_fp16 = slice_by_index(begin = var_2902_begin_0, end = var_2902_end_0, end_mask = var_2902_end_mask_0, squeeze_mask = var_2902_squeeze_mask_0, x = coreml_update_state_63)[name = string("op_2902_cast_fp16")]; + tensor var_2905_begin_0 = const()[name = string("op_2905_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2905_end_mask_0 = const()[name = string("op_2905_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2905_cast_fp16 = slice_by_index(begin = var_2905_begin_0, end = concat_11, end_mask = var_2905_end_mask_0, x = var_2902_cast_fp16)[name = string("op_2905_cast_fp16")]; + tensor var_2907_shape_cast_fp16 = shape(x = var_2900_cast_fp16)[name = string("op_2907_shape_cast_fp16")]; int32 gather_283 = const()[name = string("gather_283"), val = int32(1)]; int32 gather_284 = const()[name = string("gather_284"), val = int32(8)]; int32 gather_285_axis_0 = const()[name = string("gather_285_axis_0"), val = int32(0)]; int32 gather_285_batch_dims_0 = const()[name = string("gather_285_batch_dims_0"), val = int32(0)]; bool gather_285_validate_indices_0 = const()[name = string("gather_285_validate_indices_0"), val = bool(false)]; - string var_2814_shape_to_uint16_dtype_0 = const()[name = string("op_2814_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2907_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2907_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_285_to_uint16 = const()[name = string("select_285_to_uint16"), val = uint16(2)]; - tensor var_2814_shape_to_uint16 = cast(dtype = var_2814_shape_to_uint16_dtype_0, x = var_2814_shape)[name = string("cast_6")]; - uint16 gather_285_cast_uint16 = gather(axis = gather_285_axis_0, batch_dims = gather_285_batch_dims_0, indices = select_285_to_uint16, validate_indices = gather_285_validate_indices_0, x = var_2814_shape_to_uint16)[name = string("gather_285_cast_uint16")]; + tensor var_2907_shape_cast_fp16_to_uint16 = cast(dtype = var_2907_shape_cast_fp16_to_uint16_dtype_0, x = var_2907_shape_cast_fp16)[name = string("cast_6")]; + uint16 gather_285_cast_uint16 = gather(axis = gather_285_axis_0, batch_dims = gather_285_batch_dims_0, indices = select_285_to_uint16, validate_indices = gather_285_validate_indices_0, x = var_2907_shape_cast_fp16_to_uint16)[name = string("gather_285_cast_uint16")]; string gather_285_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_285_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_286 = const()[name = string("gather_286"), val = int32(64)]; - tensor var_2821_axes_0 = const()[name = string("op_2821_axes_0"), val = tensor([2])]; - tensor var_2821 = expand_dims(axes = var_2821_axes_0, x = var_2807)[name = string("op_2821")]; - tensor shape_317 = shape(x = var_2821)[name = string("shape_317")]; + tensor var_2914_axes_0 = const()[name = string("op_2914_axes_0"), val = tensor([2])]; + tensor var_2914_cast_fp16 = expand_dims(axes = var_2914_axes_0, x = var_2900_cast_fp16)[name = string("op_2914_cast_fp16")]; + tensor shape_317_cast_fp16 = shape(x = var_2914_cast_fp16)[name = string("shape_317_cast_fp16")]; int32 concat_298_axis_0 = const()[name = string("concat_298_axis_0"), val = int32(0)]; bool concat_298_interleave_0 = const()[name = string("concat_298_interleave_0"), val = bool(false)]; int32 gather_285_cast_uint16_to_int32 = cast(dtype = gather_285_cast_uint16_to_int32_dtype_0, x = gather_285_cast_uint16)[name = string("cast_5")]; - tensor concat_298 = concat(axis = concat_298_axis_0, interleave = concat_298_interleave_0, values = (gather_283, gather_284, var_60, gather_285_cast_uint16_to_int32, gather_286))[name = string("concat_298")]; - tensor real_div_30 = real_div(x = concat_298, y = shape_317)[name = string("real_div_30")]; - tensor hidden_states_399 = tile(reps = real_div_30, x = var_2821)[name = string("hidden_states_399")]; + tensor concat_298 = concat(axis = concat_298_axis_0, interleave = concat_298_interleave_0, values = (gather_283, gather_284, var_59, gather_285_cast_uint16_to_int32, gather_286))[name = string("concat_298")]; + tensor real_div_30 = real_div(x = concat_298, y = shape_317_cast_fp16)[name = string("real_div_30")]; + tensor hidden_states_461_cast_fp16 = tile(reps = real_div_30, x = var_2914_cast_fp16)[name = string("hidden_states_461_cast_fp16")]; tensor concat_299x = const()[name = string("concat_299x"), val = tensor([1, 32, -1, 64])]; - tensor key_states = reshape(shape = concat_299x, x = hidden_states_399)[name = string("key_states")]; - tensor var_2831_shape = shape(x = var_2812)[name = string("op_2831_shape")]; + tensor key_states_cast_fp16 = reshape(shape = concat_299x, x = hidden_states_461_cast_fp16)[name = string("key_states_cast_fp16")]; + tensor var_2924_shape_cast_fp16 = shape(x = var_2905_cast_fp16)[name = string("op_2924_shape_cast_fp16")]; int32 gather_287 = const()[name = string("gather_287"), val = int32(1)]; int32 gather_288 = const()[name = string("gather_288"), val = int32(8)]; int32 gather_289_axis_0 = const()[name = string("gather_289_axis_0"), val = int32(0)]; int32 gather_289_batch_dims_0 = const()[name = string("gather_289_batch_dims_0"), val = int32(0)]; bool gather_289_validate_indices_0 = const()[name = string("gather_289_validate_indices_0"), val = bool(false)]; - string var_2831_shape_to_uint16_dtype_0 = const()[name = string("op_2831_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2924_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2924_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_289_to_uint16 = const()[name = string("select_289_to_uint16"), val = uint16(2)]; - tensor var_2831_shape_to_uint16 = cast(dtype = var_2831_shape_to_uint16_dtype_0, x = var_2831_shape)[name = string("cast_4")]; - uint16 gather_289_cast_uint16 = gather(axis = gather_289_axis_0, batch_dims = gather_289_batch_dims_0, indices = select_289_to_uint16, validate_indices = gather_289_validate_indices_0, x = var_2831_shape_to_uint16)[name = string("gather_289_cast_uint16")]; + tensor var_2924_shape_cast_fp16_to_uint16 = cast(dtype = var_2924_shape_cast_fp16_to_uint16_dtype_0, x = var_2924_shape_cast_fp16)[name = string("cast_4")]; + uint16 gather_289_cast_uint16 = gather(axis = gather_289_axis_0, batch_dims = gather_289_batch_dims_0, indices = select_289_to_uint16, validate_indices = gather_289_validate_indices_0, x = var_2924_shape_cast_fp16_to_uint16)[name = string("gather_289_cast_uint16")]; string gather_289_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_289_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_290 = const()[name = string("gather_290"), val = int32(64)]; - tensor var_2838_axes_0 = const()[name = string("op_2838_axes_0"), val = tensor([2])]; - tensor var_2838 = expand_dims(axes = var_2838_axes_0, x = var_2812)[name = string("op_2838")]; - tensor shape_322 = shape(x = var_2838)[name = string("shape_322")]; + tensor var_2931_axes_0 = const()[name = string("op_2931_axes_0"), val = tensor([2])]; + tensor var_2931_cast_fp16 = expand_dims(axes = var_2931_axes_0, x = var_2905_cast_fp16)[name = string("op_2931_cast_fp16")]; + tensor shape_322_cast_fp16 = shape(x = var_2931_cast_fp16)[name = string("shape_322_cast_fp16")]; int32 concat_300_axis_0 = const()[name = string("concat_300_axis_0"), val = int32(0)]; bool concat_300_interleave_0 = const()[name = string("concat_300_interleave_0"), val = bool(false)]; int32 gather_289_cast_uint16_to_int32 = cast(dtype = gather_289_cast_uint16_to_int32_dtype_0, x = gather_289_cast_uint16)[name = string("cast_3")]; - tensor concat_300 = concat(axis = concat_300_axis_0, interleave = concat_300_interleave_0, values = (gather_287, gather_288, var_60, gather_289_cast_uint16_to_int32, gather_290))[name = string("concat_300")]; - tensor real_div_31 = real_div(x = concat_300, y = shape_322)[name = string("real_div_31")]; - tensor hidden_states_403 = tile(reps = real_div_31, x = var_2838)[name = string("hidden_states_403")]; + tensor concat_300 = concat(axis = concat_300_axis_0, interleave = concat_300_interleave_0, values = (gather_287, gather_288, var_59, gather_289_cast_uint16_to_int32, gather_290))[name = string("concat_300")]; + tensor real_div_31 = real_div(x = concat_300, y = shape_322_cast_fp16)[name = string("real_div_31")]; + tensor hidden_states_465_cast_fp16 = tile(reps = real_div_31, x = var_2931_cast_fp16)[name = string("hidden_states_465_cast_fp16")]; tensor concat_301x = const()[name = string("concat_301x"), val = tensor([1, 32, -1, 64])]; - tensor value_states = reshape(shape = concat_301x, x = hidden_states_403)[name = string("value_states")]; - tensor var_2848_shape = shape(x = key_states)[name = string("op_2848_shape")]; + tensor value_states_cast_fp16 = reshape(shape = concat_301x, x = hidden_states_465_cast_fp16)[name = string("value_states_cast_fp16")]; + tensor var_2941_shape_cast_fp16 = shape(x = key_states_cast_fp16)[name = string("op_2941_shape_cast_fp16")]; int32 gather_291_axis_0 = const()[name = string("gather_291_axis_0"), val = int32(0)]; int32 gather_291_batch_dims_0 = const()[name = string("gather_291_batch_dims_0"), val = int32(0)]; bool gather_291_validate_indices_0 = const()[name = string("gather_291_validate_indices_0"), val = bool(false)]; - string var_2848_shape_to_uint16_dtype_0 = const()[name = string("op_2848_shape_to_uint16_dtype_0"), val = string("uint16")]; + string var_2941_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2941_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_291_to_uint16 = const()[name = string("select_291_to_uint16"), val = uint16(2)]; - tensor var_2848_shape_to_uint16 = cast(dtype = var_2848_shape_to_uint16_dtype_0, x = var_2848_shape)[name = string("cast_2")]; - uint16 gather_291_cast_uint16 = gather(axis = gather_291_axis_0, batch_dims = gather_291_batch_dims_0, indices = select_291_to_uint16, validate_indices = gather_291_validate_indices_0, x = var_2848_shape_to_uint16)[name = string("gather_291_cast_uint16")]; + tensor var_2941_shape_cast_fp16_to_uint16 = cast(dtype = var_2941_shape_cast_fp16_to_uint16_dtype_0, x = var_2941_shape_cast_fp16)[name = string("cast_2")]; + uint16 gather_291_cast_uint16 = gather(axis = gather_291_axis_0, batch_dims = gather_291_batch_dims_0, indices = select_291_to_uint16, validate_indices = gather_291_validate_indices_0, x = var_2941_shape_cast_fp16_to_uint16)[name = string("gather_291_cast_uint16")]; string gather_291_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_291_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_302_values0_0 = const()[name = string("concat_302_values0_0"), val = int32(1)]; int32 concat_302_values1_0 = const()[name = string("concat_302_values1_0"), val = int32(1)]; @@ -3334,45 +3328,51 @@ program(1.3) tensor causal_mask_begin_0 = const()[name = string("causal_mask_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_end_mask_0 = const()[name = string("causal_mask_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_cast_fp16 = slice_by_index(begin = causal_mask_begin_0, end = concat_302, end_mask = causal_mask_end_mask_0, x = causal_mask)[name = string("causal_mask_cast_fp16")]; - tensor attn_output_61_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_cast_fp16, key = key_states, query = query_states, value = value_states)[name = string("attn_output_61_cast_fp16")]; - tensor var_2854_perm_0 = const()[name = string("op_2854_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor attn_output_61_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_cast_fp16, key = key_states_cast_fp16, query = query_states_cast_fp16, value = value_states_cast_fp16)[name = string("attn_output_61_cast_fp16")]; + tensor var_2947_perm_0 = const()[name = string("op_2947_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_303_axis_0 = const()[name = string("concat_303_axis_0"), val = int32(0)]; bool concat_303_interleave_0 = const()[name = string("concat_303_interleave_0"), val = bool(false)]; int32 gather_275_cast_uint16_to_int32 = cast(dtype = gather_275_cast_uint16_to_int32_dtype_0, x = gather_275_cast_uint16)[name = string("cast_0")]; tensor concat_303 = concat(axis = concat_303_axis_0, interleave = concat_303_interleave_0, values = (gather_274, gather_275_cast_uint16_to_int32, var_48))[name = string("concat_303")]; - tensor var_2854 = transpose(perm = var_2854_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_0")]; - tensor input_121 = reshape(shape = concat_303, x = var_2854)[name = string("input_121")]; - tensor linear_108 = linear(bias = linear_0_bias_0, weight = model_model_layers_15_self_attn_o_proj_weight_quantized, x = input_121)[name = string("linear_108")]; - tensor hidden_states_407 = add(x = hidden_states_389, y = linear_108)[name = string("hidden_states_407")]; - fp16 var_55_promoted_31_to_fp16 = const()[name = string("op_55_promoted_31_to_fp16"), val = fp16(0x1p+1)]; - tensor var_2863_cast_fp16 = pow(x = hidden_states_407, y = var_55_promoted_31_to_fp16)[name = string("op_2863_cast_fp16")]; + tensor var_2947_cast_fp16 = transpose(perm = var_2947_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_0")]; + tensor input_121_cast_fp16 = reshape(shape = concat_303, x = var_2947_cast_fp16)[name = string("input_121_cast_fp16")]; + tensor model_model_layers_15_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(664601536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666698752))))[name = string("model_model_layers_15_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_108_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_o_proj_weight_to_fp16_quantized, x = input_121_cast_fp16)[name = string("linear_108_cast_fp16")]; + tensor hidden_states_469_cast_fp16 = add(x = hidden_states_449_cast_fp16, y = linear_108_cast_fp16)[name = string("hidden_states_469_cast_fp16")]; + fp16 var_54_promoted_31_to_fp16 = const()[name = string("op_54_promoted_31_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2956_cast_fp16 = pow(x = hidden_states_469_cast_fp16, y = var_54_promoted_31_to_fp16)[name = string("op_2956_cast_fp16")]; tensor variance_63_axes_0 = const()[name = string("variance_63_axes_0"), val = tensor([-1])]; bool variance_63_keep_dims_0 = const()[name = string("variance_63_keep_dims_0"), val = bool(true)]; - tensor variance_63_cast_fp16 = reduce_mean(axes = variance_63_axes_0, keep_dims = variance_63_keep_dims_0, x = var_2863_cast_fp16)[name = string("variance_63_cast_fp16")]; - fp16 var_2866_to_fp16 = const()[name = string("op_2866_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_2867_cast_fp16 = add(x = variance_63_cast_fp16, y = var_2866_to_fp16)[name = string("op_2867_cast_fp16")]; - fp32 var_2868_epsilon_0 = const()[name = string("op_2868_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_2868_cast_fp16 = rsqrt(epsilon = var_2868_epsilon_0, x = var_2867_cast_fp16)[name = string("op_2868_cast_fp16")]; - tensor hidden_states_411_cast_fp16 = mul(x = hidden_states_407, y = var_2868_cast_fp16)[name = string("hidden_states_411_cast_fp16")]; - tensor input_123 = mul(x = model_model_layers_15_post_attention_layernorm_weight, y = hidden_states_411_cast_fp16)[name = string("input_123")]; - tensor linear_109 = linear(bias = linear_4_bias_0, weight = model_model_layers_15_mlp_gate_proj_weight_quantized, x = input_123)[name = string("linear_109")]; - tensor var_2877 = silu(x = linear_109)[name = string("op_2877")]; - tensor linear_110 = linear(bias = linear_4_bias_0, weight = model_model_layers_15_mlp_up_proj_weight_quantized, x = input_123)[name = string("linear_110")]; - tensor input_127 = mul(x = var_2877, y = linear_110)[name = string("input_127")]; - tensor linear_111 = linear(bias = linear_0_bias_0, weight = model_model_layers_15_mlp_down_proj_weight_quantized, x = input_127)[name = string("linear_111")]; - tensor hidden_states_415 = add(x = hidden_states_407, y = linear_111)[name = string("hidden_states_415")]; - fp16 var_55_promoted_32_to_fp16 = const()[name = string("op_55_promoted_32_to_fp16"), val = fp16(0x1p+1)]; - tensor var_2886_cast_fp16 = pow(x = hidden_states_415, y = var_55_promoted_32_to_fp16)[name = string("op_2886_cast_fp16")]; + tensor variance_63_cast_fp16 = reduce_mean(axes = variance_63_axes_0, keep_dims = variance_63_keep_dims_0, x = var_2956_cast_fp16)[name = string("variance_63_cast_fp16")]; + fp16 var_2959_to_fp16 = const()[name = string("op_2959_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2960_cast_fp16 = add(x = variance_63_cast_fp16, y = var_2959_to_fp16)[name = string("op_2960_cast_fp16")]; + fp32 var_2961_epsilon_0 = const()[name = string("op_2961_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2961_cast_fp16 = rsqrt(epsilon = var_2961_epsilon_0, x = var_2960_cast_fp16)[name = string("op_2961_cast_fp16")]; + tensor hidden_states_473_cast_fp16 = mul(x = hidden_states_469_cast_fp16, y = var_2961_cast_fp16)[name = string("hidden_states_473_cast_fp16")]; + tensor model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666960960)))]; + tensor input_123_cast_fp16 = mul(x = model_model_layers_15_post_attention_layernorm_weight_to_fp16, y = hidden_states_473_cast_fp16)[name = string("input_123_cast_fp16")]; + tensor model_model_layers_15_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(666965120))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(675353792))))[name = string("model_model_layers_15_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_109_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_15_mlp_gate_proj_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = string("linear_109_cast_fp16")]; + tensor var_2973_cast_fp16 = silu(x = linear_109_cast_fp16)[name = string("op_2973_cast_fp16")]; + tensor model_model_layers_15_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676402432))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(684791104))))[name = string("model_model_layers_15_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_110_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_15_mlp_up_proj_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = string("linear_110_cast_fp16")]; + tensor input_127_cast_fp16 = mul(x = var_2973_cast_fp16, y = linear_110_cast_fp16)[name = string("input_127_cast_fp16")]; + tensor model_model_layers_15_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(685839744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(694228416))))[name = string("model_model_layers_15_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_111_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_mlp_down_proj_weight_to_fp16_quantized, x = input_127_cast_fp16)[name = string("linear_111_cast_fp16")]; + tensor hidden_states_479_cast_fp16 = add(x = hidden_states_469_cast_fp16, y = linear_111_cast_fp16)[name = string("hidden_states_479_cast_fp16")]; + fp16 var_54_promoted_32_to_fp16 = const()[name = string("op_54_promoted_32_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2982_cast_fp16 = pow(x = hidden_states_479_cast_fp16, y = var_54_promoted_32_to_fp16)[name = string("op_2982_cast_fp16")]; tensor variance_axes_0 = const()[name = string("variance_axes_0"), val = tensor([-1])]; bool variance_keep_dims_0 = const()[name = string("variance_keep_dims_0"), val = bool(true)]; - tensor variance_cast_fp16 = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_2886_cast_fp16)[name = string("variance_cast_fp16")]; - fp16 var_2889_to_fp16 = const()[name = string("op_2889_to_fp16"), val = fp16(0x1.5p-17)]; - tensor var_2890_cast_fp16 = add(x = variance_cast_fp16, y = var_2889_to_fp16)[name = string("op_2890_cast_fp16")]; - fp32 var_2891_epsilon_0 = const()[name = string("op_2891_epsilon_0"), val = fp32(0x1.197998p-40)]; - tensor var_2891_cast_fp16 = rsqrt(epsilon = var_2891_epsilon_0, x = var_2890_cast_fp16)[name = string("op_2891_cast_fp16")]; - tensor hidden_states_419_cast_fp16 = mul(x = hidden_states_415, y = var_2891_cast_fp16)[name = string("hidden_states_419_cast_fp16")]; - tensor hidden_states = mul(x = model_model_norm_weight, y = hidden_states_419_cast_fp16)[name = string("hidden_states")]; - tensor linear_112_bias_0 = const()[name = string("linear_112_bias_0"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695281216)))]; - tensor logits = linear(bias = linear_112_bias_0, weight = model_model_embed_tokens_weight_quantized, x = hidden_states)[name = string("linear_112")]; + tensor variance_cast_fp16 = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_2982_cast_fp16)[name = string("variance_cast_fp16")]; + fp16 var_2985_to_fp16 = const()[name = string("op_2985_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2986_cast_fp16 = add(x = variance_cast_fp16, y = var_2985_to_fp16)[name = string("op_2986_cast_fp16")]; + fp32 var_2987_epsilon_0 = const()[name = string("op_2987_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2987_cast_fp16 = rsqrt(epsilon = var_2987_epsilon_0, x = var_2986_cast_fp16)[name = string("op_2987_cast_fp16")]; + tensor hidden_states_483_cast_fp16 = mul(x = hidden_states_479_cast_fp16, y = var_2987_cast_fp16)[name = string("hidden_states_483_cast_fp16")]; + tensor model_model_norm_weight_to_fp16 = const()[name = string("model_model_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695277056)))]; + tensor hidden_states_cast_fp16 = mul(x = model_model_norm_weight_to_fp16, y = hidden_states_483_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor linear_112_bias_0_to_fp16 = const()[name = string("linear_112_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695281216)))]; + tensor logits = linear(bias = linear_112_bias_0_to_fp16, weight = model_model_embed_tokens_weight_to_fp16_quantized, x = hidden_states_cast_fp16)[name = string("linear_112_cast_fp16")]; } -> (logits); } \ No newline at end of file