diff --git "a/openai_whisper-small_216MB/TextDecoder.mlmodelc/model.mil" "b/openai_whisper-small_216MB/TextDecoder.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/openai_whisper-small_216MB/TextDecoder.mlmodelc/model.mil" @@ -0,0 +1,2869 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3404.16.1"}, {"coremlc-version", "3404.23.1"}})] +{ + func main(tensor cache_length, tensor decoder_key_padding_mask, tensor encoder_output_embeds, tensor input_ids, tensor key_cache, tensor kv_cache_update_mask, tensor value_cache) { + tensor var_43_axis_0 = const()[name = tensor("op_43_axis_0"), val = tensor(0)]; + tensor var_43_batch_dims_0 = const()[name = tensor("op_43_batch_dims_0"), val = tensor(0)]; + tensor embed_positions_inlier_module_weight_to_fp16 = const()[name = tensor("embed_positions_inlier_module_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor var_43_cast_fp16 = gather(axis = var_43_axis_0, batch_dims = var_43_batch_dims_0, indices = cache_length, x = embed_positions_inlier_module_weight_to_fp16)[name = tensor("op_43_cast_fp16")]; + tensor var_45_axis_0 = const()[name = tensor("op_45_axis_0"), val = tensor(0)]; + tensor var_45_batch_dims_0 = const()[name = tensor("op_45_batch_dims_0"), val = tensor(0)]; + tensor embed_positions_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(697216))), name = tensor("embed_positions_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(688256))), shape = tensor([448, 768])]; + tensor var_45_cast_fp16 = gather(axis = var_45_axis_0, batch_dims = var_45_batch_dims_0, indices = cache_length, x = embed_positions_outlier_module_weight_to_fp16_sparsified)[name = tensor("op_45_cast_fp16")]; + tensor embed_positions_1_cast_fp16 = add(x = var_43_cast_fp16, y = var_45_cast_fp16)[name = tensor("embed_positions_1_cast_fp16")]; + tensor var_50_axis_0 = const()[name = tensor("op_50_axis_0"), val = tensor(0)]; + tensor var_50_batch_dims_0 = const()[name = tensor("op_50_batch_dims_0"), val = tensor(0)]; + tensor embed_tokens_weight_to_fp16 = const()[name = tensor("embed_tokens_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(740288)))]; + tensor var_50_cast_fp16 = gather(axis = var_50_axis_0, batch_dims = var_50_batch_dims_0, indices = input_ids, x = embed_tokens_weight_to_fp16)[name = tensor("op_50_cast_fp16")]; + tensor hidden_states_1_cast_fp16 = add(x = var_50_cast_fp16, y = embed_positions_1_cast_fp16)[name = tensor("hidden_states_1_cast_fp16")]; + tensor var_64_axes_0 = const()[name = tensor("op_64_axes_0"), val = tensor([2])]; + tensor var_64_cast_fp16 = expand_dims(axes = var_64_axes_0, x = hidden_states_1_cast_fp16)[name = tensor("op_64_cast_fp16")]; + tensor inputs_1_axes_0 = const()[name = tensor("inputs_1_axes_0"), val = tensor([3])]; + tensor inputs_1_cast_fp16 = expand_dims(axes = inputs_1_axes_0, x = var_64_cast_fp16)[name = tensor("inputs_1_cast_fp16")]; + tensor tile_0 = const()[name = tensor("tile_0"), val = tensor([768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768])]; + tensor var_69_axis_0 = const()[name = tensor("op_69_axis_0"), val = tensor(1)]; + tensor var_69_cast_fp16_0, tensor var_69_cast_fp16_1, tensor var_69_cast_fp16_2, tensor var_69_cast_fp16_3, tensor var_69_cast_fp16_4, tensor var_69_cast_fp16_5, tensor var_69_cast_fp16_6, tensor var_69_cast_fp16_7, tensor var_69_cast_fp16_8, tensor var_69_cast_fp16_9, tensor var_69_cast_fp16_10, tensor var_69_cast_fp16_11 = split(axis = var_69_axis_0, split_sizes = tile_0, x = key_cache)[name = tensor("op_69_cast_fp16")]; + tensor tile_1 = const()[name = tensor("tile_1"), val = tensor([768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768, 768])]; + tensor var_84_axis_0 = const()[name = tensor("op_84_axis_0"), val = tensor(1)]; + tensor var_84_cast_fp16_0, tensor var_84_cast_fp16_1, tensor var_84_cast_fp16_2, tensor var_84_cast_fp16_3, tensor var_84_cast_fp16_4, tensor var_84_cast_fp16_5, tensor var_84_cast_fp16_6, tensor var_84_cast_fp16_7, tensor var_84_cast_fp16_8, tensor var_84_cast_fp16_9, tensor var_84_cast_fp16_10, tensor var_84_cast_fp16_11 = split(axis = var_84_axis_0, split_sizes = tile_1, x = value_cache)[name = tensor("op_84_cast_fp16")]; + tensor var_102 = const()[name = tensor("op_102"), val = tensor(3)]; + tensor out_1_axes_0 = const()[name = tensor("out_1_axes_0"), val = tensor([1])]; + tensor var_127_to_fp16 = const()[name = tensor("op_127_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_127_to_fp16, x = inputs_1_cast_fp16)[name = tensor("out_1_cast_fp16")]; + tensor obj_1_mean_0_to_fp16 = const()[name = tensor("obj_1_mean_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80404992)))]; + tensor obj_1_variance_0_to_fp16 = const()[name = tensor("obj_1_variance_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80406592)))]; + tensor obj_1_gamma_0_to_fp16 = const()[name = tensor("obj_1_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80408192)))]; + tensor obj_1_beta_0_to_fp16 = const()[name = tensor("obj_1_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80409792)))]; + tensor obj_1_epsilon_0_to_fp16 = const()[name = tensor("obj_1_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor("obj_1_cast_fp16")]; + tensor var_149_pad_type_0 = const()[name = tensor("op_149_pad_type_0"), val = tensor("valid")]; + tensor var_149_strides_0 = const()[name = tensor("op_149_strides_0"), val = tensor([1, 1])]; + tensor var_149_pad_0 = const()[name = tensor("op_149_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_149_dilations_0 = const()[name = tensor("op_149_dilations_0"), val = tensor([1, 1])]; + tensor var_149_groups_0 = const()[name = tensor("op_149_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80411392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80706368))), name = tensor("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80706496)))]; + tensor var_149_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_149_dilations_0, groups = var_149_groups_0, pad = var_149_pad_0, pad_type = var_149_pad_type_0, strides = var_149_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_149_cast_fp16")]; + tensor var_155_pad_type_0 = const()[name = tensor("op_155_pad_type_0"), val = tensor("valid")]; + tensor var_155_strides_0 = const()[name = tensor("op_155_strides_0"), val = tensor([1, 1])]; + tensor var_155_pad_0 = const()[name = tensor("op_155_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_155_dilations_0 = const()[name = tensor("op_155_dilations_0"), val = tensor([1, 1])]; + tensor var_155_groups_0 = const()[name = tensor("op_155_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80719552))), name = tensor("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80708096))), shape = tensor([768, 768, 1, 1])]; + tensor var_155_cast_fp16 = conv(dilations = var_155_dilations_0, groups = var_155_groups_0, pad = var_155_pad_0, pad_type = var_155_pad_type_0, strides = var_155_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_155_cast_fp16")]; + tensor query_1_cast_fp16 = add(x = var_149_cast_fp16, y = var_155_cast_fp16)[name = tensor("query_1_cast_fp16")]; + tensor var_164_pad_type_0 = const()[name = tensor("op_164_pad_type_0"), val = tensor("valid")]; + tensor var_164_strides_0 = const()[name = tensor("op_164_strides_0"), val = tensor([1, 1])]; + tensor var_164_pad_0 = const()[name = tensor("op_164_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_164_dilations_0 = const()[name = tensor("op_164_dilations_0"), val = tensor([1, 1])]; + tensor var_164_groups_0 = const()[name = tensor("op_164_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80793344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81088320))), name = tensor("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_164_cast_fp16 = conv(dilations = var_164_dilations_0, groups = var_164_groups_0, pad = var_164_pad_0, pad_type = var_164_pad_type_0, strides = var_164_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_164_cast_fp16")]; + tensor var_170_pad_type_0 = const()[name = tensor("op_170_pad_type_0"), val = tensor("valid")]; + tensor var_170_strides_0 = const()[name = tensor("op_170_strides_0"), val = tensor([1, 1])]; + tensor var_170_pad_0 = const()[name = tensor("op_170_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_170_dilations_0 = const()[name = tensor("op_170_dilations_0"), val = tensor([1, 1])]; + tensor var_170_groups_0 = const()[name = tensor("op_170_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81100544))), name = tensor("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81088448))), shape = tensor([768, 768, 1, 1])]; + tensor var_170_cast_fp16 = conv(dilations = var_170_dilations_0, groups = var_170_groups_0, pad = var_170_pad_0, pad_type = var_170_pad_type_0, strides = var_170_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_170_cast_fp16")]; + tensor current_key_1_cast_fp16 = add(x = var_164_cast_fp16, y = var_170_cast_fp16)[name = tensor("current_key_1_cast_fp16")]; + tensor var_180_pad_type_0 = const()[name = tensor("op_180_pad_type_0"), val = tensor("valid")]; + tensor var_180_strides_0 = const()[name = tensor("op_180_strides_0"), val = tensor([1, 1])]; + tensor var_180_pad_0 = const()[name = tensor("op_180_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_180_dilations_0 = const()[name = tensor("op_180_dilations_0"), val = tensor([1, 1])]; + tensor var_180_groups_0 = const()[name = tensor("op_180_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81174336))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81469312))), name = tensor("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81469440)))]; + tensor var_180_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_180_dilations_0, groups = var_180_groups_0, pad = var_180_pad_0, pad_type = var_180_pad_type_0, strides = var_180_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_180_cast_fp16")]; + tensor var_186_pad_type_0 = const()[name = tensor("op_186_pad_type_0"), val = tensor("valid")]; + tensor var_186_strides_0 = const()[name = tensor("op_186_strides_0"), val = tensor([1, 1])]; + tensor var_186_pad_0 = const()[name = tensor("op_186_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_186_dilations_0 = const()[name = tensor("op_186_dilations_0"), val = tensor([1, 1])]; + tensor var_186_groups_0 = const()[name = tensor("op_186_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81477824))), name = tensor("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81471040))), shape = tensor([768, 768, 1, 1])]; + tensor var_186_cast_fp16 = conv(dilations = var_186_dilations_0, groups = var_186_groups_0, pad = var_186_pad_0, pad_type = var_186_pad_type_0, strides = var_186_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_186_cast_fp16")]; + tensor current_value_1_cast_fp16 = add(x = var_180_cast_fp16, y = var_186_cast_fp16)[name = tensor("current_value_1_cast_fp16")]; + tensor var_189_axes_0 = const()[name = tensor("op_189_axes_0"), val = tensor([1])]; + tensor var_189_cast_fp16 = expand_dims(axes = var_189_axes_0, x = kv_cache_update_mask)[name = tensor("op_189_cast_fp16")]; + tensor var_190_axes_0 = const()[name = tensor("op_190_axes_0"), val = tensor([2])]; + tensor var_190_cast_fp16 = expand_dims(axes = var_190_axes_0, x = var_189_cast_fp16)[name = tensor("op_190_cast_fp16")]; + tensor var_103_to_fp16 = const()[name = tensor("op_103_to_fp16"), val = tensor(0x1p+0)]; + tensor var_192_cast_fp16 = sub(x = var_103_to_fp16, y = var_190_cast_fp16)[name = tensor("op_192_cast_fp16")]; + tensor var_193_cast_fp16 = mul(x = var_69_cast_fp16_0, y = var_192_cast_fp16)[name = tensor("op_193_cast_fp16")]; + tensor var_194_cast_fp16 = mul(x = current_key_1_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_194_cast_fp16")]; + tensor key_1_cast_fp16 = add(x = var_193_cast_fp16, y = var_194_cast_fp16)[name = tensor("key_1_cast_fp16")]; + tensor var_197_cast_fp16 = mul(x = var_84_cast_fp16_0, y = var_192_cast_fp16)[name = tensor("op_197_cast_fp16")]; + tensor var_198_cast_fp16 = mul(x = current_value_1_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_198_cast_fp16")]; + tensor value_1_cast_fp16 = add(x = var_197_cast_fp16, y = var_198_cast_fp16)[name = tensor("value_1_cast_fp16")]; + tensor var_202 = const()[name = tensor("op_202"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_1_cast_fp16 = reshape(shape = var_202, x = query_1_cast_fp16)[name = tensor("mh_q_1_cast_fp16")]; + tensor var_204_to_fp16 = const()[name = tensor("op_204_to_fp16"), val = tensor(0x1p-3)]; + tensor var_205_cast_fp16 = mul(x = mh_q_1_cast_fp16, y = var_204_to_fp16)[name = tensor("op_205_cast_fp16")]; + tensor var_208 = const()[name = tensor("op_208"), val = tensor([1, 12, 64, 448])]; + tensor var_209_cast_fp16 = reshape(shape = var_208, x = key_1_cast_fp16)[name = tensor("op_209_cast_fp16")]; + tensor mh_w_1_transpose_x_0 = const()[name = tensor("mh_w_1_transpose_x_0"), val = tensor(true)]; + tensor mh_w_1_transpose_y_0 = const()[name = tensor("mh_w_1_transpose_y_0"), val = tensor(false)]; + tensor mh_w_1_cast_fp16 = matmul(transpose_x = mh_w_1_transpose_x_0, transpose_y = mh_w_1_transpose_y_0, x = var_205_cast_fp16, y = var_209_cast_fp16)[name = tensor("mh_w_1_cast_fp16")]; + tensor var_213_axes_0 = const()[name = tensor("op_213_axes_0"), val = tensor([1])]; + tensor var_213_cast_fp16 = expand_dims(axes = var_213_axes_0, x = decoder_key_padding_mask)[name = tensor("op_213_cast_fp16")]; + tensor var_214_axes_0 = const()[name = tensor("op_214_axes_0"), val = tensor([2])]; + tensor var_214_cast_fp16 = expand_dims(axes = var_214_axes_0, x = var_213_cast_fp16)[name = tensor("op_214_cast_fp16")]; + tensor mh_w_3_cast_fp16 = add(x = mh_w_1_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_3_cast_fp16")]; + tensor var_217_cast_fp16 = softmax(axis = var_102, x = mh_w_3_cast_fp16)[name = tensor("op_217_cast_fp16")]; + tensor var_218 = const()[name = tensor("op_218"), val = tensor([1, 12, 64, 448])]; + tensor var_219_cast_fp16 = reshape(shape = var_218, x = value_1_cast_fp16)[name = tensor("op_219_cast_fp16")]; + tensor attn_1_transpose_x_0 = const()[name = tensor("attn_1_transpose_x_0"), val = tensor(false)]; + tensor attn_1_transpose_y_0 = const()[name = tensor("attn_1_transpose_y_0"), val = tensor(true)]; + tensor attn_1_cast_fp16 = matmul(transpose_x = attn_1_transpose_x_0, transpose_y = attn_1_transpose_y_0, x = var_219_cast_fp16, y = var_217_cast_fp16)[name = tensor("attn_1_cast_fp16")]; + tensor var_222 = const()[name = tensor("op_222"), val = tensor([1, 768, 1, 1])]; + tensor input_1_cast_fp16 = reshape(shape = var_222, x = attn_1_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_232_pad_type_0 = const()[name = tensor("op_232_pad_type_0"), val = tensor("valid")]; + tensor var_232_strides_0 = const()[name = tensor("op_232_strides_0"), val = tensor([1, 1])]; + tensor var_232_pad_0 = const()[name = tensor("op_232_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_232_dilations_0 = const()[name = tensor("op_232_dilations_0"), val = tensor([1, 1])]; + tensor var_232_groups_0 = const()[name = tensor("op_232_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81551616))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81846592))), name = tensor("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81846720)))]; + tensor var_232_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_232_dilations_0, groups = var_232_groups_0, pad = var_232_pad_0, pad_type = var_232_pad_type_0, strides = var_232_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = tensor("op_232_cast_fp16")]; + tensor var_238_pad_type_0 = const()[name = tensor("op_238_pad_type_0"), val = tensor("valid")]; + tensor var_238_strides_0 = const()[name = tensor("op_238_strides_0"), val = tensor([1, 1])]; + tensor var_238_pad_0 = const()[name = tensor("op_238_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_238_dilations_0 = const()[name = tensor("op_238_dilations_0"), val = tensor([1, 1])]; + tensor var_238_groups_0 = const()[name = tensor("op_238_groups_0"), val = tensor(1)]; + tensor layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81857088))), name = tensor("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81848320))), shape = tensor([768, 768, 1, 1])]; + tensor var_238_cast_fp16 = conv(dilations = var_238_dilations_0, groups = var_238_groups_0, pad = var_238_pad_0, pad_type = var_238_pad_type_0, strides = var_238_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = tensor("op_238_cast_fp16")]; + tensor obj_7_cast_fp16 = add(x = var_232_cast_fp16, y = var_238_cast_fp16)[name = tensor("obj_7_cast_fp16")]; + tensor inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_7_cast_fp16)[name = tensor("inputs_3_cast_fp16")]; + tensor out_3_axes_0 = const()[name = tensor("out_3_axes_0"), val = tensor([1])]; + tensor var_253_to_fp16 = const()[name = tensor("op_253_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_253_to_fp16, x = inputs_3_cast_fp16)[name = tensor("out_3_cast_fp16")]; + tensor obj_9_gamma_0_to_fp16 = const()[name = tensor("obj_9_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81930880)))]; + tensor obj_9_beta_0_to_fp16 = const()[name = tensor("obj_9_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81932480)))]; + tensor obj_9_epsilon_0_to_fp16 = const()[name = tensor("obj_9_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor("obj_9_cast_fp16")]; + tensor var_275_pad_type_0 = const()[name = tensor("op_275_pad_type_0"), val = tensor("valid")]; + tensor var_275_strides_0 = const()[name = tensor("op_275_strides_0"), val = tensor([1, 1])]; + tensor var_275_pad_0 = const()[name = tensor("op_275_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_275_dilations_0 = const()[name = tensor("op_275_dilations_0"), val = tensor([1, 1])]; + tensor var_275_groups_0 = const()[name = tensor("op_275_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81934080))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82229056))), name = tensor("layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82229184)))]; + tensor var_275_cast_fp16 = conv(bias = layers_0_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_275_dilations_0, groups = var_275_groups_0, pad = var_275_pad_0, pad_type = var_275_pad_type_0, strides = var_275_strides_0, weight = layers_0_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = tensor("op_275_cast_fp16")]; + tensor var_281_pad_type_0 = const()[name = tensor("op_281_pad_type_0"), val = tensor("valid")]; + tensor var_281_strides_0 = const()[name = tensor("op_281_strides_0"), val = tensor([1, 1])]; + tensor var_281_pad_0 = const()[name = tensor("op_281_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_281_dilations_0 = const()[name = tensor("op_281_dilations_0"), val = tensor([1, 1])]; + tensor var_281_groups_0 = const()[name = tensor("op_281_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82242752))), name = tensor("layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82230784))), shape = tensor([768, 768, 1, 1])]; + tensor var_281_cast_fp16 = conv(dilations = var_281_dilations_0, groups = var_281_groups_0, pad = var_281_pad_0, pad_type = var_281_pad_type_0, strides = var_281_strides_0, weight = layers_0_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = tensor("op_281_cast_fp16")]; + tensor query_3_cast_fp16 = add(x = var_275_cast_fp16, y = var_281_cast_fp16)[name = tensor("query_3_cast_fp16")]; + tensor var_290_pad_type_0 = const()[name = tensor("op_290_pad_type_0"), val = tensor("valid")]; + tensor var_290_strides_0 = const()[name = tensor("op_290_strides_0"), val = tensor([1, 1])]; + tensor var_290_pad_0 = const()[name = tensor("op_290_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_290_dilations_0 = const()[name = tensor("op_290_dilations_0"), val = tensor([1, 1])]; + tensor var_290_groups_0 = const()[name = tensor("op_290_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82316544))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82611520))), name = tensor("layers_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_290_cast_fp16 = conv(dilations = var_290_dilations_0, groups = var_290_groups_0, pad = var_290_pad_0, pad_type = var_290_pad_type_0, strides = var_290_strides_0, weight = layers_0_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_290_cast_fp16")]; + tensor var_296_pad_type_0 = const()[name = tensor("op_296_pad_type_0"), val = tensor("valid")]; + tensor var_296_strides_0 = const()[name = tensor("op_296_strides_0"), val = tensor([1, 1])]; + tensor var_296_pad_0 = const()[name = tensor("op_296_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_296_dilations_0 = const()[name = tensor("op_296_dilations_0"), val = tensor([1, 1])]; + tensor var_296_groups_0 = const()[name = tensor("op_296_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82623232))), name = tensor("layers_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82611648))), shape = tensor([768, 768, 1, 1])]; + tensor var_296_cast_fp16 = conv(dilations = var_296_dilations_0, groups = var_296_groups_0, pad = var_296_pad_0, pad_type = var_296_pad_type_0, strides = var_296_strides_0, weight = layers_0_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_296_cast_fp16")]; + tensor key_3_cast_fp16 = add(x = var_290_cast_fp16, y = var_296_cast_fp16)[name = tensor("key_3_cast_fp16")]; + tensor var_306_pad_type_0 = const()[name = tensor("op_306_pad_type_0"), val = tensor("valid")]; + tensor var_306_strides_0 = const()[name = tensor("op_306_strides_0"), val = tensor([1, 1])]; + tensor var_306_pad_0 = const()[name = tensor("op_306_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_306_dilations_0 = const()[name = tensor("op_306_dilations_0"), val = tensor([1, 1])]; + tensor var_306_groups_0 = const()[name = tensor("op_306_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82697024))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82992000))), name = tensor("layers_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_0_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82992128)))]; + tensor var_306_cast_fp16 = conv(bias = layers_0_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_306_dilations_0, groups = var_306_groups_0, pad = var_306_pad_0, pad_type = var_306_pad_type_0, strides = var_306_strides_0, weight = layers_0_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_306_cast_fp16")]; + tensor var_312_pad_type_0 = const()[name = tensor("op_312_pad_type_0"), val = tensor("valid")]; + tensor var_312_strides_0 = const()[name = tensor("op_312_strides_0"), val = tensor([1, 1])]; + tensor var_312_pad_0 = const()[name = tensor("op_312_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_312_dilations_0 = const()[name = tensor("op_312_dilations_0"), val = tensor([1, 1])]; + tensor var_312_groups_0 = const()[name = tensor("op_312_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82999232))), name = tensor("layers_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82993728))), shape = tensor([768, 768, 1, 1])]; + tensor var_312_cast_fp16 = conv(dilations = var_312_dilations_0, groups = var_312_groups_0, pad = var_312_pad_0, pad_type = var_312_pad_type_0, strides = var_312_strides_0, weight = layers_0_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_312_cast_fp16")]; + tensor value_3_cast_fp16 = add(x = var_306_cast_fp16, y = var_312_cast_fp16)[name = tensor("value_3_cast_fp16")]; + tensor var_316 = const()[name = tensor("op_316"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_3_cast_fp16 = reshape(shape = var_316, x = query_3_cast_fp16)[name = tensor("mh_q_3_cast_fp16")]; + tensor var_318_to_fp16 = const()[name = tensor("op_318_to_fp16"), val = tensor(0x1p-3)]; + tensor var_319_cast_fp16 = mul(x = mh_q_3_cast_fp16, y = var_318_to_fp16)[name = tensor("op_319_cast_fp16")]; + tensor var_322 = const()[name = tensor("op_322"), val = tensor([1, 12, 64, 1500])]; + tensor var_323_cast_fp16 = reshape(shape = var_322, x = key_3_cast_fp16)[name = tensor("op_323_cast_fp16")]; + tensor mh_w_5_transpose_x_0 = const()[name = tensor("mh_w_5_transpose_x_0"), val = tensor(true)]; + tensor mh_w_5_transpose_y_0 = const()[name = tensor("mh_w_5_transpose_y_0"), val = tensor(false)]; + tensor mh_w_5_cast_fp16 = matmul(transpose_x = mh_w_5_transpose_x_0, transpose_y = mh_w_5_transpose_y_0, x = var_319_cast_fp16, y = var_323_cast_fp16)[name = tensor("mh_w_5_cast_fp16")]; + tensor obj_13_cast_fp16 = softmax(axis = var_102, x = mh_w_5_cast_fp16)[name = tensor("obj_13_cast_fp16")]; + tensor var_327 = const()[name = tensor("op_327"), val = tensor([1, 12, 64, 1500])]; + tensor var_328_cast_fp16 = reshape(shape = var_327, x = value_3_cast_fp16)[name = tensor("op_328_cast_fp16")]; + tensor attn_3_transpose_x_0 = const()[name = tensor("attn_3_transpose_x_0"), val = tensor(false)]; + tensor attn_3_transpose_y_0 = const()[name = tensor("attn_3_transpose_y_0"), val = tensor(true)]; + tensor attn_3_cast_fp16 = matmul(transpose_x = attn_3_transpose_x_0, transpose_y = attn_3_transpose_y_0, x = var_328_cast_fp16, y = obj_13_cast_fp16)[name = tensor("attn_3_cast_fp16")]; + tensor var_331 = const()[name = tensor("op_331"), val = tensor([1, 768, 1, 1])]; + tensor input_3_cast_fp16 = reshape(shape = var_331, x = attn_3_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_341_pad_type_0 = const()[name = tensor("op_341_pad_type_0"), val = tensor("valid")]; + tensor var_341_strides_0 = const()[name = tensor("op_341_strides_0"), val = tensor([1, 1])]; + tensor var_341_pad_0 = const()[name = tensor("op_341_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_341_dilations_0 = const()[name = tensor("op_341_dilations_0"), val = tensor([1, 1])]; + tensor var_341_groups_0 = const()[name = tensor("op_341_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83073024))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83368000))), name = tensor("layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83368128)))]; + tensor var_341_cast_fp16 = conv(bias = layers_0_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_341_dilations_0, groups = var_341_groups_0, pad = var_341_pad_0, pad_type = var_341_pad_type_0, strides = var_341_strides_0, weight = layers_0_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = tensor("op_341_cast_fp16")]; + tensor var_347_pad_type_0 = const()[name = tensor("op_347_pad_type_0"), val = tensor("valid")]; + tensor var_347_strides_0 = const()[name = tensor("op_347_strides_0"), val = tensor([1, 1])]; + tensor var_347_pad_0 = const()[name = tensor("op_347_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_347_dilations_0 = const()[name = tensor("op_347_dilations_0"), val = tensor([1, 1])]; + tensor var_347_groups_0 = const()[name = tensor("op_347_groups_0"), val = tensor(1)]; + tensor layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83375744))), name = tensor("layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83369728))), shape = tensor([768, 768, 1, 1])]; + tensor var_347_cast_fp16 = conv(dilations = var_347_dilations_0, groups = var_347_groups_0, pad = var_347_pad_0, pad_type = var_347_pad_type_0, strides = var_347_strides_0, weight = layers_0_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = tensor("op_347_cast_fp16")]; + tensor obj_11_cast_fp16 = add(x = var_341_cast_fp16, y = var_347_cast_fp16)[name = tensor("obj_11_cast_fp16")]; + tensor inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = obj_11_cast_fp16)[name = tensor("inputs_5_cast_fp16")]; + tensor out_5_axes_0 = const()[name = tensor("out_5_axes_0"), val = tensor([1])]; + tensor var_358_to_fp16 = const()[name = tensor("op_358_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_358_to_fp16, x = inputs_5_cast_fp16)[name = tensor("out_5_cast_fp16")]; + tensor input_5_gamma_0_to_fp16 = const()[name = tensor("input_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83449536)))]; + tensor input_5_beta_0_to_fp16 = const()[name = tensor("input_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83451136)))]; + tensor input_5_epsilon_0_to_fp16 = const()[name = tensor("input_5_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_5_cast_fp16 = batch_norm(beta = input_5_beta_0_to_fp16, epsilon = input_5_epsilon_0_to_fp16, gamma = input_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_376_pad_type_0 = const()[name = tensor("op_376_pad_type_0"), val = tensor("valid")]; + tensor var_376_strides_0 = const()[name = tensor("op_376_strides_0"), val = tensor([1, 1])]; + tensor var_376_pad_0 = const()[name = tensor("op_376_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_376_dilations_0 = const()[name = tensor("op_376_dilations_0"), val = tensor([1, 1])]; + tensor var_376_groups_0 = const()[name = tensor("op_376_groups_0"), val = tensor(1)]; + tensor layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83452736))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84632448))), name = tensor("layers_0_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84632576)))]; + tensor var_376_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_376_dilations_0, groups = var_376_groups_0, pad = var_376_pad_0, pad_type = var_376_pad_type_0, strides = var_376_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_5_cast_fp16)[name = tensor("op_376_cast_fp16")]; + tensor var_382_pad_type_0 = const()[name = tensor("op_382_pad_type_0"), val = tensor("valid")]; + tensor var_382_strides_0 = const()[name = tensor("op_382_strides_0"), val = tensor([1, 1])]; + tensor var_382_pad_0 = const()[name = tensor("op_382_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_382_dilations_0 = const()[name = tensor("op_382_dilations_0"), val = tensor([1, 1])]; + tensor var_382_groups_0 = const()[name = tensor("op_382_groups_0"), val = tensor(1)]; + tensor layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84675072))), name = tensor("layers_0_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84638784))), shape = tensor([3072, 768, 1, 1])]; + tensor var_382_cast_fp16 = conv(dilations = var_382_dilations_0, groups = var_382_groups_0, pad = var_382_pad_0, pad_type = var_382_pad_type_0, strides = var_382_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_5_cast_fp16)[name = tensor("op_382_cast_fp16")]; + tensor input_7_cast_fp16 = add(x = var_376_cast_fp16, y = var_382_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor input_9_mode_0 = const()[name = tensor("input_9_mode_0"), val = tensor("EXACT")]; + tensor input_9_cast_fp16 = gelu(mode = input_9_mode_0, x = input_7_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_393_pad_type_0 = const()[name = tensor("op_393_pad_type_0"), val = tensor("valid")]; + tensor var_393_strides_0 = const()[name = tensor("op_393_strides_0"), val = tensor([1, 1])]; + tensor var_393_pad_0 = const()[name = tensor("op_393_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_393_dilations_0 = const()[name = tensor("op_393_dilations_0"), val = tensor([1, 1])]; + tensor var_393_groups_0 = const()[name = tensor("op_393_groups_0"), val = tensor(1)]; + tensor layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84970048))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86149760))), name = tensor("layers_0_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86149888)))]; + tensor var_393_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_393_dilations_0, groups = var_393_groups_0, pad = var_393_pad_0, pad_type = var_393_pad_type_0, strides = var_393_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = tensor("op_393_cast_fp16")]; + tensor var_399_pad_type_0 = const()[name = tensor("op_399_pad_type_0"), val = tensor("valid")]; + tensor var_399_strides_0 = const()[name = tensor("op_399_strides_0"), val = tensor([1, 1])]; + tensor var_399_pad_0 = const()[name = tensor("op_399_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_399_dilations_0 = const()[name = tensor("op_399_dilations_0"), val = tensor([1, 1])]; + tensor var_399_groups_0 = const()[name = tensor("op_399_groups_0"), val = tensor(1)]; + tensor layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86176448))), name = tensor("layers_0_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86151488))), shape = tensor([768, 3072, 1, 1])]; + tensor var_399_cast_fp16 = conv(dilations = var_399_dilations_0, groups = var_399_groups_0, pad = var_399_pad_0, pad_type = var_399_pad_type_0, strides = var_399_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = tensor("op_399_cast_fp16")]; + tensor hidden_states_3_cast_fp16 = add(x = var_393_cast_fp16, y = var_399_cast_fp16)[name = tensor("hidden_states_3_cast_fp16")]; + tensor inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = hidden_states_3_cast_fp16)[name = tensor("inputs_7_cast_fp16")]; + tensor var_411 = const()[name = tensor("op_411"), val = tensor(3)]; + tensor out_7_axes_0 = const()[name = tensor("out_7_axes_0"), val = tensor([1])]; + tensor var_436_to_fp16 = const()[name = tensor("op_436_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_436_to_fp16, x = inputs_7_cast_fp16)[name = tensor("out_7_cast_fp16")]; + tensor obj_15_gamma_0_to_fp16 = const()[name = tensor("obj_15_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86471424)))]; + tensor obj_15_beta_0_to_fp16 = const()[name = tensor("obj_15_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86473024)))]; + tensor obj_15_epsilon_0_to_fp16 = const()[name = tensor("obj_15_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_15_cast_fp16 = batch_norm(beta = obj_15_beta_0_to_fp16, epsilon = obj_15_epsilon_0_to_fp16, gamma = obj_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor("obj_15_cast_fp16")]; + tensor var_458_pad_type_0 = const()[name = tensor("op_458_pad_type_0"), val = tensor("valid")]; + tensor var_458_strides_0 = const()[name = tensor("op_458_strides_0"), val = tensor([1, 1])]; + tensor var_458_pad_0 = const()[name = tensor("op_458_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_458_dilations_0 = const()[name = tensor("op_458_dilations_0"), val = tensor([1, 1])]; + tensor var_458_groups_0 = const()[name = tensor("op_458_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86474624))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86769600))), name = tensor("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86769728)))]; + tensor var_458_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_458_dilations_0, groups = var_458_groups_0, pad = var_458_pad_0, pad_type = var_458_pad_type_0, strides = var_458_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_15_cast_fp16)[name = tensor("op_458_cast_fp16")]; + tensor var_464_pad_type_0 = const()[name = tensor("op_464_pad_type_0"), val = tensor("valid")]; + tensor var_464_strides_0 = const()[name = tensor("op_464_strides_0"), val = tensor([1, 1])]; + tensor var_464_pad_0 = const()[name = tensor("op_464_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_464_dilations_0 = const()[name = tensor("op_464_dilations_0"), val = tensor([1, 1])]; + tensor var_464_groups_0 = const()[name = tensor("op_464_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86788544))), name = tensor("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86771328))), shape = tensor([768, 768, 1, 1])]; + tensor var_464_cast_fp16 = conv(dilations = var_464_dilations_0, groups = var_464_groups_0, pad = var_464_pad_0, pad_type = var_464_pad_type_0, strides = var_464_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_15_cast_fp16)[name = tensor("op_464_cast_fp16")]; + tensor query_5_cast_fp16 = add(x = var_458_cast_fp16, y = var_464_cast_fp16)[name = tensor("query_5_cast_fp16")]; + tensor var_473_pad_type_0 = const()[name = tensor("op_473_pad_type_0"), val = tensor("valid")]; + tensor var_473_strides_0 = const()[name = tensor("op_473_strides_0"), val = tensor([1, 1])]; + tensor var_473_pad_0 = const()[name = tensor("op_473_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_473_dilations_0 = const()[name = tensor("op_473_dilations_0"), val = tensor([1, 1])]; + tensor var_473_groups_0 = const()[name = tensor("op_473_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86862336))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87157312))), name = tensor("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_473_cast_fp16 = conv(dilations = var_473_dilations_0, groups = var_473_groups_0, pad = var_473_pad_0, pad_type = var_473_pad_type_0, strides = var_473_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_15_cast_fp16)[name = tensor("op_473_cast_fp16")]; + tensor var_479_pad_type_0 = const()[name = tensor("op_479_pad_type_0"), val = tensor("valid")]; + tensor var_479_strides_0 = const()[name = tensor("op_479_strides_0"), val = tensor([1, 1])]; + tensor var_479_pad_0 = const()[name = tensor("op_479_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_479_dilations_0 = const()[name = tensor("op_479_dilations_0"), val = tensor([1, 1])]; + tensor var_479_groups_0 = const()[name = tensor("op_479_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87171392))), name = tensor("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87157440))), shape = tensor([768, 768, 1, 1])]; + tensor var_479_cast_fp16 = conv(dilations = var_479_dilations_0, groups = var_479_groups_0, pad = var_479_pad_0, pad_type = var_479_pad_type_0, strides = var_479_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_15_cast_fp16)[name = tensor("op_479_cast_fp16")]; + tensor current_key_3_cast_fp16 = add(x = var_473_cast_fp16, y = var_479_cast_fp16)[name = tensor("current_key_3_cast_fp16")]; + tensor var_489_pad_type_0 = const()[name = tensor("op_489_pad_type_0"), val = tensor("valid")]; + tensor var_489_strides_0 = const()[name = tensor("op_489_strides_0"), val = tensor([1, 1])]; + tensor var_489_pad_0 = const()[name = tensor("op_489_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_489_dilations_0 = const()[name = tensor("op_489_dilations_0"), val = tensor([1, 1])]; + tensor var_489_groups_0 = const()[name = tensor("op_489_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87245184))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87540160))), name = tensor("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87540288)))]; + tensor var_489_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_489_dilations_0, groups = var_489_groups_0, pad = var_489_pad_0, pad_type = var_489_pad_type_0, strides = var_489_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_15_cast_fp16)[name = tensor("op_489_cast_fp16")]; + tensor var_495_pad_type_0 = const()[name = tensor("op_495_pad_type_0"), val = tensor("valid")]; + tensor var_495_strides_0 = const()[name = tensor("op_495_strides_0"), val = tensor([1, 1])]; + tensor var_495_pad_0 = const()[name = tensor("op_495_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_495_dilations_0 = const()[name = tensor("op_495_dilations_0"), val = tensor([1, 1])]; + tensor var_495_groups_0 = const()[name = tensor("op_495_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87558720))), name = tensor("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87541888))), shape = tensor([768, 768, 1, 1])]; + tensor var_495_cast_fp16 = conv(dilations = var_495_dilations_0, groups = var_495_groups_0, pad = var_495_pad_0, pad_type = var_495_pad_type_0, strides = var_495_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_15_cast_fp16)[name = tensor("op_495_cast_fp16")]; + tensor current_value_3_cast_fp16 = add(x = var_489_cast_fp16, y = var_495_cast_fp16)[name = tensor("current_value_3_cast_fp16")]; + tensor var_502_cast_fp16 = mul(x = var_69_cast_fp16_1, y = var_192_cast_fp16)[name = tensor("op_502_cast_fp16")]; + tensor var_503_cast_fp16 = mul(x = current_key_3_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_503_cast_fp16")]; + tensor key_5_cast_fp16 = add(x = var_502_cast_fp16, y = var_503_cast_fp16)[name = tensor("key_5_cast_fp16")]; + tensor var_506_cast_fp16 = mul(x = var_84_cast_fp16_1, y = var_192_cast_fp16)[name = tensor("op_506_cast_fp16")]; + tensor var_507_cast_fp16 = mul(x = current_value_3_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_507_cast_fp16")]; + tensor value_5_cast_fp16 = add(x = var_506_cast_fp16, y = var_507_cast_fp16)[name = tensor("value_5_cast_fp16")]; + tensor var_511 = const()[name = tensor("op_511"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_5_cast_fp16 = reshape(shape = var_511, x = query_5_cast_fp16)[name = tensor("mh_q_5_cast_fp16")]; + tensor var_513_to_fp16 = const()[name = tensor("op_513_to_fp16"), val = tensor(0x1p-3)]; + tensor var_514_cast_fp16 = mul(x = mh_q_5_cast_fp16, y = var_513_to_fp16)[name = tensor("op_514_cast_fp16")]; + tensor var_517 = const()[name = tensor("op_517"), val = tensor([1, 12, 64, 448])]; + tensor var_518_cast_fp16 = reshape(shape = var_517, x = key_5_cast_fp16)[name = tensor("op_518_cast_fp16")]; + tensor mh_w_7_transpose_x_0 = const()[name = tensor("mh_w_7_transpose_x_0"), val = tensor(true)]; + tensor mh_w_7_transpose_y_0 = const()[name = tensor("mh_w_7_transpose_y_0"), val = tensor(false)]; + tensor mh_w_7_cast_fp16 = matmul(transpose_x = mh_w_7_transpose_x_0, transpose_y = mh_w_7_transpose_y_0, x = var_514_cast_fp16, y = var_518_cast_fp16)[name = tensor("mh_w_7_cast_fp16")]; + tensor mh_w_9_cast_fp16 = add(x = mh_w_7_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_9_cast_fp16")]; + tensor var_526_cast_fp16 = softmax(axis = var_411, x = mh_w_9_cast_fp16)[name = tensor("op_526_cast_fp16")]; + tensor var_527 = const()[name = tensor("op_527"), val = tensor([1, 12, 64, 448])]; + tensor var_528_cast_fp16 = reshape(shape = var_527, x = value_5_cast_fp16)[name = tensor("op_528_cast_fp16")]; + tensor attn_5_transpose_x_0 = const()[name = tensor("attn_5_transpose_x_0"), val = tensor(false)]; + tensor attn_5_transpose_y_0 = const()[name = tensor("attn_5_transpose_y_0"), val = tensor(true)]; + tensor attn_5_cast_fp16 = matmul(transpose_x = attn_5_transpose_x_0, transpose_y = attn_5_transpose_y_0, x = var_528_cast_fp16, y = var_526_cast_fp16)[name = tensor("attn_5_cast_fp16")]; + tensor var_531 = const()[name = tensor("op_531"), val = tensor([1, 768, 1, 1])]; + tensor input_11_cast_fp16 = reshape(shape = var_531, x = attn_5_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_541_pad_type_0 = const()[name = tensor("op_541_pad_type_0"), val = tensor("valid")]; + tensor var_541_strides_0 = const()[name = tensor("op_541_strides_0"), val = tensor([1, 1])]; + tensor var_541_pad_0 = const()[name = tensor("op_541_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_541_dilations_0 = const()[name = tensor("op_541_dilations_0"), val = tensor([1, 1])]; + tensor var_541_groups_0 = const()[name = tensor("op_541_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87632512))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87927488))), name = tensor("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87927616)))]; + tensor var_541_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_541_dilations_0, groups = var_541_groups_0, pad = var_541_pad_0, pad_type = var_541_pad_type_0, strides = var_541_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = tensor("op_541_cast_fp16")]; + tensor var_547_pad_type_0 = const()[name = tensor("op_547_pad_type_0"), val = tensor("valid")]; + tensor var_547_strides_0 = const()[name = tensor("op_547_strides_0"), val = tensor([1, 1])]; + tensor var_547_pad_0 = const()[name = tensor("op_547_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_547_dilations_0 = const()[name = tensor("op_547_dilations_0"), val = tensor([1, 1])]; + tensor var_547_groups_0 = const()[name = tensor("op_547_groups_0"), val = tensor(1)]; + tensor layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87946176))), name = tensor("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(87929216))), shape = tensor([768, 768, 1, 1])]; + tensor var_547_cast_fp16 = conv(dilations = var_547_dilations_0, groups = var_547_groups_0, pad = var_547_pad_0, pad_type = var_547_pad_type_0, strides = var_547_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = tensor("op_547_cast_fp16")]; + tensor obj_21_cast_fp16 = add(x = var_541_cast_fp16, y = var_547_cast_fp16)[name = tensor("obj_21_cast_fp16")]; + tensor inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = obj_21_cast_fp16)[name = tensor("inputs_9_cast_fp16")]; + tensor out_9_axes_0 = const()[name = tensor("out_9_axes_0"), val = tensor([1])]; + tensor var_562_to_fp16 = const()[name = tensor("op_562_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_562_to_fp16, x = inputs_9_cast_fp16)[name = tensor("out_9_cast_fp16")]; + tensor obj_23_gamma_0_to_fp16 = const()[name = tensor("obj_23_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88019968)))]; + tensor obj_23_beta_0_to_fp16 = const()[name = tensor("obj_23_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88021568)))]; + tensor obj_23_epsilon_0_to_fp16 = const()[name = tensor("obj_23_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_23_cast_fp16 = batch_norm(beta = obj_23_beta_0_to_fp16, epsilon = obj_23_epsilon_0_to_fp16, gamma = obj_23_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor("obj_23_cast_fp16")]; + tensor var_584_pad_type_0 = const()[name = tensor("op_584_pad_type_0"), val = tensor("valid")]; + tensor var_584_strides_0 = const()[name = tensor("op_584_strides_0"), val = tensor([1, 1])]; + tensor var_584_pad_0 = const()[name = tensor("op_584_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_584_dilations_0 = const()[name = tensor("op_584_dilations_0"), val = tensor([1, 1])]; + tensor var_584_groups_0 = const()[name = tensor("op_584_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88023168))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88318144))), name = tensor("layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88318272)))]; + tensor var_584_cast_fp16 = conv(bias = layers_1_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_584_dilations_0, groups = var_584_groups_0, pad = var_584_pad_0, pad_type = var_584_pad_type_0, strides = var_584_strides_0, weight = layers_1_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_23_cast_fp16)[name = tensor("op_584_cast_fp16")]; + tensor var_590_pad_type_0 = const()[name = tensor("op_590_pad_type_0"), val = tensor("valid")]; + tensor var_590_strides_0 = const()[name = tensor("op_590_strides_0"), val = tensor([1, 1])]; + tensor var_590_pad_0 = const()[name = tensor("op_590_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_590_dilations_0 = const()[name = tensor("op_590_dilations_0"), val = tensor([1, 1])]; + tensor var_590_groups_0 = const()[name = tensor("op_590_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88326784))), name = tensor("layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88319872))), shape = tensor([768, 768, 1, 1])]; + tensor var_590_cast_fp16 = conv(dilations = var_590_dilations_0, groups = var_590_groups_0, pad = var_590_pad_0, pad_type = var_590_pad_type_0, strides = var_590_strides_0, weight = layers_1_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_23_cast_fp16)[name = tensor("op_590_cast_fp16")]; + tensor query_7_cast_fp16 = add(x = var_584_cast_fp16, y = var_590_cast_fp16)[name = tensor("query_7_cast_fp16")]; + tensor var_599_pad_type_0 = const()[name = tensor("op_599_pad_type_0"), val = tensor("valid")]; + tensor var_599_strides_0 = const()[name = tensor("op_599_strides_0"), val = tensor([1, 1])]; + tensor var_599_pad_0 = const()[name = tensor("op_599_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_599_dilations_0 = const()[name = tensor("op_599_dilations_0"), val = tensor([1, 1])]; + tensor var_599_groups_0 = const()[name = tensor("op_599_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88400576))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88695552))), name = tensor("layers_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_599_cast_fp16 = conv(dilations = var_599_dilations_0, groups = var_599_groups_0, pad = var_599_pad_0, pad_type = var_599_pad_type_0, strides = var_599_strides_0, weight = layers_1_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_599_cast_fp16")]; + tensor var_605_pad_type_0 = const()[name = tensor("op_605_pad_type_0"), val = tensor("valid")]; + tensor var_605_strides_0 = const()[name = tensor("op_605_strides_0"), val = tensor([1, 1])]; + tensor var_605_pad_0 = const()[name = tensor("op_605_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_605_dilations_0 = const()[name = tensor("op_605_dilations_0"), val = tensor([1, 1])]; + tensor var_605_groups_0 = const()[name = tensor("op_605_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88703104))), name = tensor("layers_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88695680))), shape = tensor([768, 768, 1, 1])]; + tensor var_605_cast_fp16 = conv(dilations = var_605_dilations_0, groups = var_605_groups_0, pad = var_605_pad_0, pad_type = var_605_pad_type_0, strides = var_605_strides_0, weight = layers_1_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_605_cast_fp16")]; + tensor key_7_cast_fp16 = add(x = var_599_cast_fp16, y = var_605_cast_fp16)[name = tensor("key_7_cast_fp16")]; + tensor var_615_pad_type_0 = const()[name = tensor("op_615_pad_type_0"), val = tensor("valid")]; + tensor var_615_strides_0 = const()[name = tensor("op_615_strides_0"), val = tensor([1, 1])]; + tensor var_615_pad_0 = const()[name = tensor("op_615_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_615_dilations_0 = const()[name = tensor("op_615_dilations_0"), val = tensor([1, 1])]; + tensor var_615_groups_0 = const()[name = tensor("op_615_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(88776896))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89071872))), name = tensor("layers_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_1_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89072000)))]; + tensor var_615_cast_fp16 = conv(bias = layers_1_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_615_dilations_0, groups = var_615_groups_0, pad = var_615_pad_0, pad_type = var_615_pad_type_0, strides = var_615_strides_0, weight = layers_1_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_615_cast_fp16")]; + tensor var_621_pad_type_0 = const()[name = tensor("op_621_pad_type_0"), val = tensor("valid")]; + tensor var_621_strides_0 = const()[name = tensor("op_621_strides_0"), val = tensor([1, 1])]; + tensor var_621_pad_0 = const()[name = tensor("op_621_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_621_dilations_0 = const()[name = tensor("op_621_dilations_0"), val = tensor([1, 1])]; + tensor var_621_groups_0 = const()[name = tensor("op_621_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89079680))), name = tensor("layers_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89073600))), shape = tensor([768, 768, 1, 1])]; + tensor var_621_cast_fp16 = conv(dilations = var_621_dilations_0, groups = var_621_groups_0, pad = var_621_pad_0, pad_type = var_621_pad_type_0, strides = var_621_strides_0, weight = layers_1_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_621_cast_fp16")]; + tensor value_7_cast_fp16 = add(x = var_615_cast_fp16, y = var_621_cast_fp16)[name = tensor("value_7_cast_fp16")]; + tensor var_625 = const()[name = tensor("op_625"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_7_cast_fp16 = reshape(shape = var_625, x = query_7_cast_fp16)[name = tensor("mh_q_7_cast_fp16")]; + tensor var_627_to_fp16 = const()[name = tensor("op_627_to_fp16"), val = tensor(0x1p-3)]; + tensor var_628_cast_fp16 = mul(x = mh_q_7_cast_fp16, y = var_627_to_fp16)[name = tensor("op_628_cast_fp16")]; + tensor var_631 = const()[name = tensor("op_631"), val = tensor([1, 12, 64, 1500])]; + tensor var_632_cast_fp16 = reshape(shape = var_631, x = key_7_cast_fp16)[name = tensor("op_632_cast_fp16")]; + tensor mh_w_11_transpose_x_0 = const()[name = tensor("mh_w_11_transpose_x_0"), val = tensor(true)]; + tensor mh_w_11_transpose_y_0 = const()[name = tensor("mh_w_11_transpose_y_0"), val = tensor(false)]; + tensor mh_w_11_cast_fp16 = matmul(transpose_x = mh_w_11_transpose_x_0, transpose_y = mh_w_11_transpose_y_0, x = var_628_cast_fp16, y = var_632_cast_fp16)[name = tensor("mh_w_11_cast_fp16")]; + tensor obj_27_cast_fp16 = softmax(axis = var_411, x = mh_w_11_cast_fp16)[name = tensor("obj_27_cast_fp16")]; + tensor var_636 = const()[name = tensor("op_636"), val = tensor([1, 12, 64, 1500])]; + tensor var_637_cast_fp16 = reshape(shape = var_636, x = value_7_cast_fp16)[name = tensor("op_637_cast_fp16")]; + tensor attn_7_transpose_x_0 = const()[name = tensor("attn_7_transpose_x_0"), val = tensor(false)]; + tensor attn_7_transpose_y_0 = const()[name = tensor("attn_7_transpose_y_0"), val = tensor(true)]; + tensor attn_7_cast_fp16 = matmul(transpose_x = attn_7_transpose_x_0, transpose_y = attn_7_transpose_y_0, x = var_637_cast_fp16, y = obj_27_cast_fp16)[name = tensor("attn_7_cast_fp16")]; + tensor var_640 = const()[name = tensor("op_640"), val = tensor([1, 768, 1, 1])]; + tensor input_13_cast_fp16 = reshape(shape = var_640, x = attn_7_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_650_pad_type_0 = const()[name = tensor("op_650_pad_type_0"), val = tensor("valid")]; + tensor var_650_strides_0 = const()[name = tensor("op_650_strides_0"), val = tensor([1, 1])]; + tensor var_650_pad_0 = const()[name = tensor("op_650_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_650_dilations_0 = const()[name = tensor("op_650_dilations_0"), val = tensor([1, 1])]; + tensor var_650_groups_0 = const()[name = tensor("op_650_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89153472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89448448))), name = tensor("layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89448576)))]; + tensor var_650_cast_fp16 = conv(bias = layers_1_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_650_dilations_0, groups = var_650_groups_0, pad = var_650_pad_0, pad_type = var_650_pad_type_0, strides = var_650_strides_0, weight = layers_1_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_13_cast_fp16)[name = tensor("op_650_cast_fp16")]; + tensor var_656_pad_type_0 = const()[name = tensor("op_656_pad_type_0"), val = tensor("valid")]; + tensor var_656_strides_0 = const()[name = tensor("op_656_strides_0"), val = tensor([1, 1])]; + tensor var_656_pad_0 = const()[name = tensor("op_656_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_656_dilations_0 = const()[name = tensor("op_656_dilations_0"), val = tensor([1, 1])]; + tensor var_656_groups_0 = const()[name = tensor("op_656_groups_0"), val = tensor(1)]; + tensor layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89456896))), name = tensor("layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89450176))), shape = tensor([768, 768, 1, 1])]; + tensor var_656_cast_fp16 = conv(dilations = var_656_dilations_0, groups = var_656_groups_0, pad = var_656_pad_0, pad_type = var_656_pad_type_0, strides = var_656_strides_0, weight = layers_1_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_13_cast_fp16)[name = tensor("op_656_cast_fp16")]; + tensor obj_25_cast_fp16 = add(x = var_650_cast_fp16, y = var_656_cast_fp16)[name = tensor("obj_25_cast_fp16")]; + tensor inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_25_cast_fp16)[name = tensor("inputs_11_cast_fp16")]; + tensor out_11_axes_0 = const()[name = tensor("out_11_axes_0"), val = tensor([1])]; + tensor var_667_to_fp16 = const()[name = tensor("op_667_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_667_to_fp16, x = inputs_11_cast_fp16)[name = tensor("out_11_cast_fp16")]; + tensor input_15_gamma_0_to_fp16 = const()[name = tensor("input_15_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89530688)))]; + tensor input_15_beta_0_to_fp16 = const()[name = tensor("input_15_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89532288)))]; + tensor input_15_epsilon_0_to_fp16 = const()[name = tensor("input_15_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_15_cast_fp16 = batch_norm(beta = input_15_beta_0_to_fp16, epsilon = input_15_epsilon_0_to_fp16, gamma = input_15_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_685_pad_type_0 = const()[name = tensor("op_685_pad_type_0"), val = tensor("valid")]; + tensor var_685_strides_0 = const()[name = tensor("op_685_strides_0"), val = tensor([1, 1])]; + tensor var_685_pad_0 = const()[name = tensor("op_685_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_685_dilations_0 = const()[name = tensor("op_685_dilations_0"), val = tensor([1, 1])]; + tensor var_685_groups_0 = const()[name = tensor("op_685_groups_0"), val = tensor(1)]; + tensor layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89533888))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90713600))), name = tensor("layers_1_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90713728)))]; + tensor var_685_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_685_dilations_0, groups = var_685_groups_0, pad = var_685_pad_0, pad_type = var_685_pad_type_0, strides = var_685_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = tensor("op_685_cast_fp16")]; + tensor var_691_pad_type_0 = const()[name = tensor("op_691_pad_type_0"), val = tensor("valid")]; + tensor var_691_strides_0 = const()[name = tensor("op_691_strides_0"), val = tensor([1, 1])]; + tensor var_691_pad_0 = const()[name = tensor("op_691_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_691_dilations_0 = const()[name = tensor("op_691_dilations_0"), val = tensor([1, 1])]; + tensor var_691_groups_0 = const()[name = tensor("op_691_groups_0"), val = tensor(1)]; + tensor layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90768512))), name = tensor("layers_1_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(90719936))), shape = tensor([3072, 768, 1, 1])]; + tensor var_691_cast_fp16 = conv(dilations = var_691_dilations_0, groups = var_691_groups_0, pad = var_691_pad_0, pad_type = var_691_pad_type_0, strides = var_691_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = tensor("op_691_cast_fp16")]; + tensor input_17_cast_fp16 = add(x = var_685_cast_fp16, y = var_691_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor input_19_mode_0 = const()[name = tensor("input_19_mode_0"), val = tensor("EXACT")]; + tensor input_19_cast_fp16 = gelu(mode = input_19_mode_0, x = input_17_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_702_pad_type_0 = const()[name = tensor("op_702_pad_type_0"), val = tensor("valid")]; + tensor var_702_strides_0 = const()[name = tensor("op_702_strides_0"), val = tensor([1, 1])]; + tensor var_702_pad_0 = const()[name = tensor("op_702_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_702_dilations_0 = const()[name = tensor("op_702_dilations_0"), val = tensor([1, 1])]; + tensor var_702_groups_0 = const()[name = tensor("op_702_groups_0"), val = tensor(1)]; + tensor layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91063488))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92243200))), name = tensor("layers_1_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92243328)))]; + tensor var_702_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_702_dilations_0, groups = var_702_groups_0, pad = var_702_pad_0, pad_type = var_702_pad_type_0, strides = var_702_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = tensor("op_702_cast_fp16")]; + tensor var_708_pad_type_0 = const()[name = tensor("op_708_pad_type_0"), val = tensor("valid")]; + tensor var_708_strides_0 = const()[name = tensor("op_708_strides_0"), val = tensor([1, 1])]; + tensor var_708_pad_0 = const()[name = tensor("op_708_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_708_dilations_0 = const()[name = tensor("op_708_dilations_0"), val = tensor([1, 1])]; + tensor var_708_groups_0 = const()[name = tensor("op_708_groups_0"), val = tensor(1)]; + tensor layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92277440))), name = tensor("layers_1_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92244928))), shape = tensor([768, 3072, 1, 1])]; + tensor var_708_cast_fp16 = conv(dilations = var_708_dilations_0, groups = var_708_groups_0, pad = var_708_pad_0, pad_type = var_708_pad_type_0, strides = var_708_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = tensor("op_708_cast_fp16")]; + tensor hidden_states_5_cast_fp16 = add(x = var_702_cast_fp16, y = var_708_cast_fp16)[name = tensor("hidden_states_5_cast_fp16")]; + tensor inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor("inputs_13_cast_fp16")]; + tensor var_720 = const()[name = tensor("op_720"), val = tensor(3)]; + tensor out_13_axes_0 = const()[name = tensor("out_13_axes_0"), val = tensor([1])]; + tensor var_745_to_fp16 = const()[name = tensor("op_745_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_745_to_fp16, x = inputs_13_cast_fp16)[name = tensor("out_13_cast_fp16")]; + tensor obj_29_gamma_0_to_fp16 = const()[name = tensor("obj_29_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92572416)))]; + tensor obj_29_beta_0_to_fp16 = const()[name = tensor("obj_29_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92574016)))]; + tensor obj_29_epsilon_0_to_fp16 = const()[name = tensor("obj_29_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor("obj_29_cast_fp16")]; + tensor var_767_pad_type_0 = const()[name = tensor("op_767_pad_type_0"), val = tensor("valid")]; + tensor var_767_strides_0 = const()[name = tensor("op_767_strides_0"), val = tensor([1, 1])]; + tensor var_767_pad_0 = const()[name = tensor("op_767_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_767_dilations_0 = const()[name = tensor("op_767_dilations_0"), val = tensor([1, 1])]; + tensor var_767_groups_0 = const()[name = tensor("op_767_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92575616))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92870592))), name = tensor("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92870720)))]; + tensor var_767_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_767_dilations_0, groups = var_767_groups_0, pad = var_767_pad_0, pad_type = var_767_pad_type_0, strides = var_767_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_767_cast_fp16")]; + tensor var_773_pad_type_0 = const()[name = tensor("op_773_pad_type_0"), val = tensor("valid")]; + tensor var_773_strides_0 = const()[name = tensor("op_773_strides_0"), val = tensor([1, 1])]; + tensor var_773_pad_0 = const()[name = tensor("op_773_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_773_dilations_0 = const()[name = tensor("op_773_dilations_0"), val = tensor([1, 1])]; + tensor var_773_groups_0 = const()[name = tensor("op_773_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92899264))), name = tensor("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92872320))), shape = tensor([768, 768, 1, 1])]; + tensor var_773_cast_fp16 = conv(dilations = var_773_dilations_0, groups = var_773_groups_0, pad = var_773_pad_0, pad_type = var_773_pad_type_0, strides = var_773_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_773_cast_fp16")]; + tensor query_9_cast_fp16 = add(x = var_767_cast_fp16, y = var_773_cast_fp16)[name = tensor("query_9_cast_fp16")]; + tensor var_782_pad_type_0 = const()[name = tensor("op_782_pad_type_0"), val = tensor("valid")]; + tensor var_782_strides_0 = const()[name = tensor("op_782_strides_0"), val = tensor([1, 1])]; + tensor var_782_pad_0 = const()[name = tensor("op_782_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_782_dilations_0 = const()[name = tensor("op_782_dilations_0"), val = tensor([1, 1])]; + tensor var_782_groups_0 = const()[name = tensor("op_782_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92973056))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93268032))), name = tensor("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_782_cast_fp16 = conv(dilations = var_782_dilations_0, groups = var_782_groups_0, pad = var_782_pad_0, pad_type = var_782_pad_type_0, strides = var_782_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_782_cast_fp16")]; + tensor var_788_pad_type_0 = const()[name = tensor("op_788_pad_type_0"), val = tensor("valid")]; + tensor var_788_strides_0 = const()[name = tensor("op_788_strides_0"), val = tensor([1, 1])]; + tensor var_788_pad_0 = const()[name = tensor("op_788_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_788_dilations_0 = const()[name = tensor("op_788_dilations_0"), val = tensor([1, 1])]; + tensor var_788_groups_0 = const()[name = tensor("op_788_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93292800))), name = tensor("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93268160))), shape = tensor([768, 768, 1, 1])]; + tensor var_788_cast_fp16 = conv(dilations = var_788_dilations_0, groups = var_788_groups_0, pad = var_788_pad_0, pad_type = var_788_pad_type_0, strides = var_788_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_788_cast_fp16")]; + tensor current_key_5_cast_fp16 = add(x = var_782_cast_fp16, y = var_788_cast_fp16)[name = tensor("current_key_5_cast_fp16")]; + tensor var_798_pad_type_0 = const()[name = tensor("op_798_pad_type_0"), val = tensor("valid")]; + tensor var_798_strides_0 = const()[name = tensor("op_798_strides_0"), val = tensor([1, 1])]; + tensor var_798_pad_0 = const()[name = tensor("op_798_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_798_dilations_0 = const()[name = tensor("op_798_dilations_0"), val = tensor([1, 1])]; + tensor var_798_groups_0 = const()[name = tensor("op_798_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93366592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93661568))), name = tensor("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93661696)))]; + tensor var_798_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_798_dilations_0, groups = var_798_groups_0, pad = var_798_pad_0, pad_type = var_798_pad_type_0, strides = var_798_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_798_cast_fp16")]; + tensor var_804_pad_type_0 = const()[name = tensor("op_804_pad_type_0"), val = tensor("valid")]; + tensor var_804_strides_0 = const()[name = tensor("op_804_strides_0"), val = tensor([1, 1])]; + tensor var_804_pad_0 = const()[name = tensor("op_804_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_804_dilations_0 = const()[name = tensor("op_804_dilations_0"), val = tensor([1, 1])]; + tensor var_804_groups_0 = const()[name = tensor("op_804_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93695552))), name = tensor("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93663296))), shape = tensor([768, 768, 1, 1])]; + tensor var_804_cast_fp16 = conv(dilations = var_804_dilations_0, groups = var_804_groups_0, pad = var_804_pad_0, pad_type = var_804_pad_type_0, strides = var_804_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_804_cast_fp16")]; + tensor current_value_5_cast_fp16 = add(x = var_798_cast_fp16, y = var_804_cast_fp16)[name = tensor("current_value_5_cast_fp16")]; + tensor var_811_cast_fp16 = mul(x = var_69_cast_fp16_2, y = var_192_cast_fp16)[name = tensor("op_811_cast_fp16")]; + tensor var_812_cast_fp16 = mul(x = current_key_5_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_812_cast_fp16")]; + tensor key_9_cast_fp16 = add(x = var_811_cast_fp16, y = var_812_cast_fp16)[name = tensor("key_9_cast_fp16")]; + tensor var_815_cast_fp16 = mul(x = var_84_cast_fp16_2, y = var_192_cast_fp16)[name = tensor("op_815_cast_fp16")]; + tensor var_816_cast_fp16 = mul(x = current_value_5_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_816_cast_fp16")]; + tensor value_9_cast_fp16 = add(x = var_815_cast_fp16, y = var_816_cast_fp16)[name = tensor("value_9_cast_fp16")]; + tensor var_820 = const()[name = tensor("op_820"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_9_cast_fp16 = reshape(shape = var_820, x = query_9_cast_fp16)[name = tensor("mh_q_9_cast_fp16")]; + tensor var_822_to_fp16 = const()[name = tensor("op_822_to_fp16"), val = tensor(0x1p-3)]; + tensor var_823_cast_fp16 = mul(x = mh_q_9_cast_fp16, y = var_822_to_fp16)[name = tensor("op_823_cast_fp16")]; + tensor var_826 = const()[name = tensor("op_826"), val = tensor([1, 12, 64, 448])]; + tensor var_827_cast_fp16 = reshape(shape = var_826, x = key_9_cast_fp16)[name = tensor("op_827_cast_fp16")]; + tensor mh_w_13_transpose_x_0 = const()[name = tensor("mh_w_13_transpose_x_0"), val = tensor(true)]; + tensor mh_w_13_transpose_y_0 = const()[name = tensor("mh_w_13_transpose_y_0"), val = tensor(false)]; + tensor mh_w_13_cast_fp16 = matmul(transpose_x = mh_w_13_transpose_x_0, transpose_y = mh_w_13_transpose_y_0, x = var_823_cast_fp16, y = var_827_cast_fp16)[name = tensor("mh_w_13_cast_fp16")]; + tensor mh_w_15_cast_fp16 = add(x = mh_w_13_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_15_cast_fp16")]; + tensor var_835_cast_fp16 = softmax(axis = var_720, x = mh_w_15_cast_fp16)[name = tensor("op_835_cast_fp16")]; + tensor var_836 = const()[name = tensor("op_836"), val = tensor([1, 12, 64, 448])]; + tensor var_837_cast_fp16 = reshape(shape = var_836, x = value_9_cast_fp16)[name = tensor("op_837_cast_fp16")]; + tensor attn_9_transpose_x_0 = const()[name = tensor("attn_9_transpose_x_0"), val = tensor(false)]; + tensor attn_9_transpose_y_0 = const()[name = tensor("attn_9_transpose_y_0"), val = tensor(true)]; + tensor attn_9_cast_fp16 = matmul(transpose_x = attn_9_transpose_x_0, transpose_y = attn_9_transpose_y_0, x = var_837_cast_fp16, y = var_835_cast_fp16)[name = tensor("attn_9_cast_fp16")]; + tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 768, 1, 1])]; + tensor input_21_cast_fp16 = reshape(shape = var_840, x = attn_9_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_850_pad_type_0 = const()[name = tensor("op_850_pad_type_0"), val = tensor("valid")]; + tensor var_850_strides_0 = const()[name = tensor("op_850_strides_0"), val = tensor([1, 1])]; + tensor var_850_pad_0 = const()[name = tensor("op_850_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_850_dilations_0 = const()[name = tensor("op_850_dilations_0"), val = tensor([1, 1])]; + tensor var_850_groups_0 = const()[name = tensor("op_850_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93769344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94064320))), name = tensor("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94064448)))]; + tensor var_850_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_850_dilations_0, groups = var_850_groups_0, pad = var_850_pad_0, pad_type = var_850_pad_type_0, strides = var_850_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_21_cast_fp16)[name = tensor("op_850_cast_fp16")]; + tensor var_856_pad_type_0 = const()[name = tensor("op_856_pad_type_0"), val = tensor("valid")]; + tensor var_856_strides_0 = const()[name = tensor("op_856_strides_0"), val = tensor([1, 1])]; + tensor var_856_pad_0 = const()[name = tensor("op_856_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_856_dilations_0 = const()[name = tensor("op_856_dilations_0"), val = tensor([1, 1])]; + tensor var_856_groups_0 = const()[name = tensor("op_856_groups_0"), val = tensor(1)]; + tensor layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94090240))), name = tensor("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94066048))), shape = tensor([768, 768, 1, 1])]; + tensor var_856_cast_fp16 = conv(dilations = var_856_dilations_0, groups = var_856_groups_0, pad = var_856_pad_0, pad_type = var_856_pad_type_0, strides = var_856_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_21_cast_fp16)[name = tensor("op_856_cast_fp16")]; + tensor obj_35_cast_fp16 = add(x = var_850_cast_fp16, y = var_856_cast_fp16)[name = tensor("obj_35_cast_fp16")]; + tensor inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_35_cast_fp16)[name = tensor("inputs_15_cast_fp16")]; + tensor out_15_axes_0 = const()[name = tensor("out_15_axes_0"), val = tensor([1])]; + tensor var_871_to_fp16 = const()[name = tensor("op_871_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_871_to_fp16, x = inputs_15_cast_fp16)[name = tensor("out_15_cast_fp16")]; + tensor obj_37_gamma_0_to_fp16 = const()[name = tensor("obj_37_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94164032)))]; + tensor obj_37_beta_0_to_fp16 = const()[name = tensor("obj_37_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94165632)))]; + tensor obj_37_epsilon_0_to_fp16 = const()[name = tensor("obj_37_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor("obj_37_cast_fp16")]; + tensor var_893_pad_type_0 = const()[name = tensor("op_893_pad_type_0"), val = tensor("valid")]; + tensor var_893_strides_0 = const()[name = tensor("op_893_strides_0"), val = tensor([1, 1])]; + tensor var_893_pad_0 = const()[name = tensor("op_893_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_893_dilations_0 = const()[name = tensor("op_893_dilations_0"), val = tensor([1, 1])]; + tensor var_893_groups_0 = const()[name = tensor("op_893_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94167232))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94462208))), name = tensor("layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94462336)))]; + tensor var_893_cast_fp16 = conv(bias = layers_2_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_893_dilations_0, groups = var_893_groups_0, pad = var_893_pad_0, pad_type = var_893_pad_type_0, strides = var_893_strides_0, weight = layers_2_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = tensor("op_893_cast_fp16")]; + tensor var_899_pad_type_0 = const()[name = tensor("op_899_pad_type_0"), val = tensor("valid")]; + tensor var_899_strides_0 = const()[name = tensor("op_899_strides_0"), val = tensor([1, 1])]; + tensor var_899_pad_0 = const()[name = tensor("op_899_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_899_dilations_0 = const()[name = tensor("op_899_dilations_0"), val = tensor([1, 1])]; + tensor var_899_groups_0 = const()[name = tensor("op_899_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94474432))), name = tensor("layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94463936))), shape = tensor([768, 768, 1, 1])]; + tensor var_899_cast_fp16 = conv(dilations = var_899_dilations_0, groups = var_899_groups_0, pad = var_899_pad_0, pad_type = var_899_pad_type_0, strides = var_899_strides_0, weight = layers_2_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = tensor("op_899_cast_fp16")]; + tensor query_11_cast_fp16 = add(x = var_893_cast_fp16, y = var_899_cast_fp16)[name = tensor("query_11_cast_fp16")]; + tensor var_908_pad_type_0 = const()[name = tensor("op_908_pad_type_0"), val = tensor("valid")]; + tensor var_908_strides_0 = const()[name = tensor("op_908_strides_0"), val = tensor([1, 1])]; + tensor var_908_pad_0 = const()[name = tensor("op_908_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_908_dilations_0 = const()[name = tensor("op_908_dilations_0"), val = tensor([1, 1])]; + tensor var_908_groups_0 = const()[name = tensor("op_908_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94548224))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94843200))), name = tensor("layers_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_908_cast_fp16 = conv(dilations = var_908_dilations_0, groups = var_908_groups_0, pad = var_908_pad_0, pad_type = var_908_pad_type_0, strides = var_908_strides_0, weight = layers_2_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_908_cast_fp16")]; + tensor var_914_pad_type_0 = const()[name = tensor("op_914_pad_type_0"), val = tensor("valid")]; + tensor var_914_strides_0 = const()[name = tensor("op_914_strides_0"), val = tensor([1, 1])]; + tensor var_914_pad_0 = const()[name = tensor("op_914_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_914_dilations_0 = const()[name = tensor("op_914_dilations_0"), val = tensor([1, 1])]; + tensor var_914_groups_0 = const()[name = tensor("op_914_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94855232))), name = tensor("layers_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94843328))), shape = tensor([768, 768, 1, 1])]; + tensor var_914_cast_fp16 = conv(dilations = var_914_dilations_0, groups = var_914_groups_0, pad = var_914_pad_0, pad_type = var_914_pad_type_0, strides = var_914_strides_0, weight = layers_2_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_914_cast_fp16")]; + tensor key_11_cast_fp16 = add(x = var_908_cast_fp16, y = var_914_cast_fp16)[name = tensor("key_11_cast_fp16")]; + tensor var_924_pad_type_0 = const()[name = tensor("op_924_pad_type_0"), val = tensor("valid")]; + tensor var_924_strides_0 = const()[name = tensor("op_924_strides_0"), val = tensor([1, 1])]; + tensor var_924_pad_0 = const()[name = tensor("op_924_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_924_dilations_0 = const()[name = tensor("op_924_dilations_0"), val = tensor([1, 1])]; + tensor var_924_groups_0 = const()[name = tensor("op_924_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94929024))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95224000))), name = tensor("layers_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_2_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95224128)))]; + tensor var_924_cast_fp16 = conv(bias = layers_2_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_924_dilations_0, groups = var_924_groups_0, pad = var_924_pad_0, pad_type = var_924_pad_type_0, strides = var_924_strides_0, weight = layers_2_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_924_cast_fp16")]; + tensor var_930_pad_type_0 = const()[name = tensor("op_930_pad_type_0"), val = tensor("valid")]; + tensor var_930_strides_0 = const()[name = tensor("op_930_strides_0"), val = tensor([1, 1])]; + tensor var_930_pad_0 = const()[name = tensor("op_930_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_930_dilations_0 = const()[name = tensor("op_930_dilations_0"), val = tensor([1, 1])]; + tensor var_930_groups_0 = const()[name = tensor("op_930_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95235648))), name = tensor("layers_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95225728))), shape = tensor([768, 768, 1, 1])]; + tensor var_930_cast_fp16 = conv(dilations = var_930_dilations_0, groups = var_930_groups_0, pad = var_930_pad_0, pad_type = var_930_pad_type_0, strides = var_930_strides_0, weight = layers_2_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_930_cast_fp16")]; + tensor value_11_cast_fp16 = add(x = var_924_cast_fp16, y = var_930_cast_fp16)[name = tensor("value_11_cast_fp16")]; + tensor var_934 = const()[name = tensor("op_934"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_11_cast_fp16 = reshape(shape = var_934, x = query_11_cast_fp16)[name = tensor("mh_q_11_cast_fp16")]; + tensor var_936_to_fp16 = const()[name = tensor("op_936_to_fp16"), val = tensor(0x1p-3)]; + tensor var_937_cast_fp16 = mul(x = mh_q_11_cast_fp16, y = var_936_to_fp16)[name = tensor("op_937_cast_fp16")]; + tensor var_940 = const()[name = tensor("op_940"), val = tensor([1, 12, 64, 1500])]; + tensor var_941_cast_fp16 = reshape(shape = var_940, x = key_11_cast_fp16)[name = tensor("op_941_cast_fp16")]; + tensor mh_w_17_transpose_x_0 = const()[name = tensor("mh_w_17_transpose_x_0"), val = tensor(true)]; + tensor mh_w_17_transpose_y_0 = const()[name = tensor("mh_w_17_transpose_y_0"), val = tensor(false)]; + tensor mh_w_17_cast_fp16 = matmul(transpose_x = mh_w_17_transpose_x_0, transpose_y = mh_w_17_transpose_y_0, x = var_937_cast_fp16, y = var_941_cast_fp16)[name = tensor("mh_w_17_cast_fp16")]; + tensor obj_41_cast_fp16 = softmax(axis = var_720, x = mh_w_17_cast_fp16)[name = tensor("obj_41_cast_fp16")]; + tensor var_945 = const()[name = tensor("op_945"), val = tensor([1, 12, 64, 1500])]; + tensor var_946_cast_fp16 = reshape(shape = var_945, x = value_11_cast_fp16)[name = tensor("op_946_cast_fp16")]; + tensor attn_11_transpose_x_0 = const()[name = tensor("attn_11_transpose_x_0"), val = tensor(false)]; + tensor attn_11_transpose_y_0 = const()[name = tensor("attn_11_transpose_y_0"), val = tensor(true)]; + tensor attn_11_cast_fp16 = matmul(transpose_x = attn_11_transpose_x_0, transpose_y = attn_11_transpose_y_0, x = var_946_cast_fp16, y = obj_41_cast_fp16)[name = tensor("attn_11_cast_fp16")]; + tensor var_949 = const()[name = tensor("op_949"), val = tensor([1, 768, 1, 1])]; + tensor input_23_cast_fp16 = reshape(shape = var_949, x = attn_11_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_959_pad_type_0 = const()[name = tensor("op_959_pad_type_0"), val = tensor("valid")]; + tensor var_959_strides_0 = const()[name = tensor("op_959_strides_0"), val = tensor([1, 1])]; + tensor var_959_pad_0 = const()[name = tensor("op_959_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_959_dilations_0 = const()[name = tensor("op_959_dilations_0"), val = tensor([1, 1])]; + tensor var_959_groups_0 = const()[name = tensor("op_959_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95309440))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95604416))), name = tensor("layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95604544)))]; + tensor var_959_cast_fp16 = conv(bias = layers_2_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_959_dilations_0, groups = var_959_groups_0, pad = var_959_pad_0, pad_type = var_959_pad_type_0, strides = var_959_strides_0, weight = layers_2_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = tensor("op_959_cast_fp16")]; + tensor var_965_pad_type_0 = const()[name = tensor("op_965_pad_type_0"), val = tensor("valid")]; + tensor var_965_strides_0 = const()[name = tensor("op_965_strides_0"), val = tensor([1, 1])]; + tensor var_965_pad_0 = const()[name = tensor("op_965_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_965_dilations_0 = const()[name = tensor("op_965_dilations_0"), val = tensor([1, 1])]; + tensor var_965_groups_0 = const()[name = tensor("op_965_groups_0"), val = tensor(1)]; + tensor layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95616384))), name = tensor("layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95606144))), shape = tensor([768, 768, 1, 1])]; + tensor var_965_cast_fp16 = conv(dilations = var_965_dilations_0, groups = var_965_groups_0, pad = var_965_pad_0, pad_type = var_965_pad_type_0, strides = var_965_strides_0, weight = layers_2_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = tensor("op_965_cast_fp16")]; + tensor obj_39_cast_fp16 = add(x = var_959_cast_fp16, y = var_965_cast_fp16)[name = tensor("obj_39_cast_fp16")]; + tensor inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = obj_39_cast_fp16)[name = tensor("inputs_17_cast_fp16")]; + tensor out_17_axes_0 = const()[name = tensor("out_17_axes_0"), val = tensor([1])]; + tensor var_976_to_fp16 = const()[name = tensor("op_976_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_976_to_fp16, x = inputs_17_cast_fp16)[name = tensor("out_17_cast_fp16")]; + tensor input_25_gamma_0_to_fp16 = const()[name = tensor("input_25_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95690176)))]; + tensor input_25_beta_0_to_fp16 = const()[name = tensor("input_25_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95691776)))]; + tensor input_25_epsilon_0_to_fp16 = const()[name = tensor("input_25_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_25_cast_fp16 = batch_norm(beta = input_25_beta_0_to_fp16, epsilon = input_25_epsilon_0_to_fp16, gamma = input_25_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_994_pad_type_0 = const()[name = tensor("op_994_pad_type_0"), val = tensor("valid")]; + tensor var_994_strides_0 = const()[name = tensor("op_994_strides_0"), val = tensor([1, 1])]; + tensor var_994_pad_0 = const()[name = tensor("op_994_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_994_dilations_0 = const()[name = tensor("op_994_dilations_0"), val = tensor([1, 1])]; + tensor var_994_groups_0 = const()[name = tensor("op_994_groups_0"), val = tensor(1)]; + tensor layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95693376))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96873088))), name = tensor("layers_2_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96873216)))]; + tensor var_994_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_994_dilations_0, groups = var_994_groups_0, pad = var_994_pad_0, pad_type = var_994_pad_type_0, strides = var_994_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = tensor("op_994_cast_fp16")]; + tensor var_1000_pad_type_0 = const()[name = tensor("op_1000_pad_type_0"), val = tensor("valid")]; + tensor var_1000_strides_0 = const()[name = tensor("op_1000_strides_0"), val = tensor([1, 1])]; + tensor var_1000_pad_0 = const()[name = tensor("op_1000_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1000_dilations_0 = const()[name = tensor("op_1000_dilations_0"), val = tensor([1, 1])]; + tensor var_1000_groups_0 = const()[name = tensor("op_1000_groups_0"), val = tensor(1)]; + tensor layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96933504))), name = tensor("layers_2_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96879424))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1000_cast_fp16 = conv(dilations = var_1000_dilations_0, groups = var_1000_groups_0, pad = var_1000_pad_0, pad_type = var_1000_pad_type_0, strides = var_1000_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = tensor("op_1000_cast_fp16")]; + tensor input_27_cast_fp16 = add(x = var_994_cast_fp16, y = var_1000_cast_fp16)[name = tensor("input_27_cast_fp16")]; + tensor input_29_mode_0 = const()[name = tensor("input_29_mode_0"), val = tensor("EXACT")]; + tensor input_29_cast_fp16 = gelu(mode = input_29_mode_0, x = input_27_cast_fp16)[name = tensor("input_29_cast_fp16")]; + tensor var_1011_pad_type_0 = const()[name = tensor("op_1011_pad_type_0"), val = tensor("valid")]; + tensor var_1011_strides_0 = const()[name = tensor("op_1011_strides_0"), val = tensor([1, 1])]; + tensor var_1011_pad_0 = const()[name = tensor("op_1011_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1011_dilations_0 = const()[name = tensor("op_1011_dilations_0"), val = tensor([1, 1])]; + tensor var_1011_groups_0 = const()[name = tensor("op_1011_groups_0"), val = tensor(1)]; + tensor layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97228480))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98408192))), name = tensor("layers_2_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98408320)))]; + tensor var_1011_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_1011_dilations_0, groups = var_1011_groups_0, pad = var_1011_pad_0, pad_type = var_1011_pad_type_0, strides = var_1011_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_29_cast_fp16)[name = tensor("op_1011_cast_fp16")]; + tensor var_1017_pad_type_0 = const()[name = tensor("op_1017_pad_type_0"), val = tensor("valid")]; + tensor var_1017_strides_0 = const()[name = tensor("op_1017_strides_0"), val = tensor([1, 1])]; + tensor var_1017_pad_0 = const()[name = tensor("op_1017_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1017_dilations_0 = const()[name = tensor("op_1017_dilations_0"), val = tensor([1, 1])]; + tensor var_1017_groups_0 = const()[name = tensor("op_1017_groups_0"), val = tensor(1)]; + tensor layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98439488))), name = tensor("layers_2_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98409920))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1017_cast_fp16 = conv(dilations = var_1017_dilations_0, groups = var_1017_groups_0, pad = var_1017_pad_0, pad_type = var_1017_pad_type_0, strides = var_1017_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_29_cast_fp16)[name = tensor("op_1017_cast_fp16")]; + tensor hidden_states_7_cast_fp16 = add(x = var_1011_cast_fp16, y = var_1017_cast_fp16)[name = tensor("hidden_states_7_cast_fp16")]; + tensor inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor("inputs_19_cast_fp16")]; + tensor var_1029 = const()[name = tensor("op_1029"), val = tensor(3)]; + tensor out_19_axes_0 = const()[name = tensor("out_19_axes_0"), val = tensor([1])]; + tensor var_1054_to_fp16 = const()[name = tensor("op_1054_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_1054_to_fp16, x = inputs_19_cast_fp16)[name = tensor("out_19_cast_fp16")]; + tensor obj_43_gamma_0_to_fp16 = const()[name = tensor("obj_43_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98734464)))]; + tensor obj_43_beta_0_to_fp16 = const()[name = tensor("obj_43_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98736064)))]; + tensor obj_43_epsilon_0_to_fp16 = const()[name = tensor("obj_43_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_43_cast_fp16 = batch_norm(beta = obj_43_beta_0_to_fp16, epsilon = obj_43_epsilon_0_to_fp16, gamma = obj_43_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor("obj_43_cast_fp16")]; + tensor var_1076_pad_type_0 = const()[name = tensor("op_1076_pad_type_0"), val = tensor("valid")]; + tensor var_1076_strides_0 = const()[name = tensor("op_1076_strides_0"), val = tensor([1, 1])]; + tensor var_1076_pad_0 = const()[name = tensor("op_1076_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1076_dilations_0 = const()[name = tensor("op_1076_dilations_0"), val = tensor([1, 1])]; + tensor var_1076_groups_0 = const()[name = tensor("op_1076_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98737664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99032640))), name = tensor("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99032768)))]; + tensor var_1076_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1076_dilations_0, groups = var_1076_groups_0, pad = var_1076_pad_0, pad_type = var_1076_pad_type_0, strides = var_1076_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = tensor("op_1076_cast_fp16")]; + tensor var_1082_pad_type_0 = const()[name = tensor("op_1082_pad_type_0"), val = tensor("valid")]; + tensor var_1082_strides_0 = const()[name = tensor("op_1082_strides_0"), val = tensor([1, 1])]; + tensor var_1082_pad_0 = const()[name = tensor("op_1082_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1082_dilations_0 = const()[name = tensor("op_1082_dilations_0"), val = tensor([1, 1])]; + tensor var_1082_groups_0 = const()[name = tensor("op_1082_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99049088))), name = tensor("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99034368))), shape = tensor([768, 768, 1, 1])]; + tensor var_1082_cast_fp16 = conv(dilations = var_1082_dilations_0, groups = var_1082_groups_0, pad = var_1082_pad_0, pad_type = var_1082_pad_type_0, strides = var_1082_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = tensor("op_1082_cast_fp16")]; + tensor query_13_cast_fp16 = add(x = var_1076_cast_fp16, y = var_1082_cast_fp16)[name = tensor("query_13_cast_fp16")]; + tensor var_1091_pad_type_0 = const()[name = tensor("op_1091_pad_type_0"), val = tensor("valid")]; + tensor var_1091_strides_0 = const()[name = tensor("op_1091_strides_0"), val = tensor([1, 1])]; + tensor var_1091_pad_0 = const()[name = tensor("op_1091_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1091_dilations_0 = const()[name = tensor("op_1091_dilations_0"), val = tensor([1, 1])]; + tensor var_1091_groups_0 = const()[name = tensor("op_1091_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99122880))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99417856))), name = tensor("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1091_cast_fp16 = conv(dilations = var_1091_dilations_0, groups = var_1091_groups_0, pad = var_1091_pad_0, pad_type = var_1091_pad_type_0, strides = var_1091_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = tensor("op_1091_cast_fp16")]; + tensor var_1097_pad_type_0 = const()[name = tensor("op_1097_pad_type_0"), val = tensor("valid")]; + tensor var_1097_strides_0 = const()[name = tensor("op_1097_strides_0"), val = tensor([1, 1])]; + tensor var_1097_pad_0 = const()[name = tensor("op_1097_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1097_dilations_0 = const()[name = tensor("op_1097_dilations_0"), val = tensor([1, 1])]; + tensor var_1097_groups_0 = const()[name = tensor("op_1097_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99433792))), name = tensor("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99417984))), shape = tensor([768, 768, 1, 1])]; + tensor var_1097_cast_fp16 = conv(dilations = var_1097_dilations_0, groups = var_1097_groups_0, pad = var_1097_pad_0, pad_type = var_1097_pad_type_0, strides = var_1097_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = tensor("op_1097_cast_fp16")]; + tensor current_key_7_cast_fp16 = add(x = var_1091_cast_fp16, y = var_1097_cast_fp16)[name = tensor("current_key_7_cast_fp16")]; + tensor var_1107_pad_type_0 = const()[name = tensor("op_1107_pad_type_0"), val = tensor("valid")]; + tensor var_1107_strides_0 = const()[name = tensor("op_1107_strides_0"), val = tensor([1, 1])]; + tensor var_1107_pad_0 = const()[name = tensor("op_1107_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1107_dilations_0 = const()[name = tensor("op_1107_dilations_0"), val = tensor([1, 1])]; + tensor var_1107_groups_0 = const()[name = tensor("op_1107_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99507584))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99802560))), name = tensor("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99802688)))]; + tensor var_1107_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1107_dilations_0, groups = var_1107_groups_0, pad = var_1107_pad_0, pad_type = var_1107_pad_type_0, strides = var_1107_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_43_cast_fp16)[name = tensor("op_1107_cast_fp16")]; + tensor var_1113_pad_type_0 = const()[name = tensor("op_1113_pad_type_0"), val = tensor("valid")]; + tensor var_1113_strides_0 = const()[name = tensor("op_1113_strides_0"), val = tensor([1, 1])]; + tensor var_1113_pad_0 = const()[name = tensor("op_1113_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1113_dilations_0 = const()[name = tensor("op_1113_dilations_0"), val = tensor([1, 1])]; + tensor var_1113_groups_0 = const()[name = tensor("op_1113_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99812160))), name = tensor("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99804288))), shape = tensor([768, 768, 1, 1])]; + tensor var_1113_cast_fp16 = conv(dilations = var_1113_dilations_0, groups = var_1113_groups_0, pad = var_1113_pad_0, pad_type = var_1113_pad_type_0, strides = var_1113_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_43_cast_fp16)[name = tensor("op_1113_cast_fp16")]; + tensor current_value_7_cast_fp16 = add(x = var_1107_cast_fp16, y = var_1113_cast_fp16)[name = tensor("current_value_7_cast_fp16")]; + tensor var_1120_cast_fp16 = mul(x = var_69_cast_fp16_3, y = var_192_cast_fp16)[name = tensor("op_1120_cast_fp16")]; + tensor var_1121_cast_fp16 = mul(x = current_key_7_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_1121_cast_fp16")]; + tensor key_13_cast_fp16 = add(x = var_1120_cast_fp16, y = var_1121_cast_fp16)[name = tensor("key_13_cast_fp16")]; + tensor var_1124_cast_fp16 = mul(x = var_84_cast_fp16_3, y = var_192_cast_fp16)[name = tensor("op_1124_cast_fp16")]; + tensor var_1125_cast_fp16 = mul(x = current_value_7_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_1125_cast_fp16")]; + tensor value_13_cast_fp16 = add(x = var_1124_cast_fp16, y = var_1125_cast_fp16)[name = tensor("value_13_cast_fp16")]; + tensor var_1129 = const()[name = tensor("op_1129"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_13_cast_fp16 = reshape(shape = var_1129, x = query_13_cast_fp16)[name = tensor("mh_q_13_cast_fp16")]; + tensor var_1131_to_fp16 = const()[name = tensor("op_1131_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1132_cast_fp16 = mul(x = mh_q_13_cast_fp16, y = var_1131_to_fp16)[name = tensor("op_1132_cast_fp16")]; + tensor var_1135 = const()[name = tensor("op_1135"), val = tensor([1, 12, 64, 448])]; + tensor var_1136_cast_fp16 = reshape(shape = var_1135, x = key_13_cast_fp16)[name = tensor("op_1136_cast_fp16")]; + tensor mh_w_19_transpose_x_0 = const()[name = tensor("mh_w_19_transpose_x_0"), val = tensor(true)]; + tensor mh_w_19_transpose_y_0 = const()[name = tensor("mh_w_19_transpose_y_0"), val = tensor(false)]; + tensor mh_w_19_cast_fp16 = matmul(transpose_x = mh_w_19_transpose_x_0, transpose_y = mh_w_19_transpose_y_0, x = var_1132_cast_fp16, y = var_1136_cast_fp16)[name = tensor("mh_w_19_cast_fp16")]; + tensor mh_w_21_cast_fp16 = add(x = mh_w_19_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_21_cast_fp16")]; + tensor var_1144_cast_fp16 = softmax(axis = var_1029, x = mh_w_21_cast_fp16)[name = tensor("op_1144_cast_fp16")]; + tensor var_1145 = const()[name = tensor("op_1145"), val = tensor([1, 12, 64, 448])]; + tensor var_1146_cast_fp16 = reshape(shape = var_1145, x = value_13_cast_fp16)[name = tensor("op_1146_cast_fp16")]; + tensor attn_13_transpose_x_0 = const()[name = tensor("attn_13_transpose_x_0"), val = tensor(false)]; + tensor attn_13_transpose_y_0 = const()[name = tensor("attn_13_transpose_y_0"), val = tensor(true)]; + tensor attn_13_cast_fp16 = matmul(transpose_x = attn_13_transpose_x_0, transpose_y = attn_13_transpose_y_0, x = var_1146_cast_fp16, y = var_1144_cast_fp16)[name = tensor("attn_13_cast_fp16")]; + tensor var_1149 = const()[name = tensor("op_1149"), val = tensor([1, 768, 1, 1])]; + tensor input_31_cast_fp16 = reshape(shape = var_1149, x = attn_13_cast_fp16)[name = tensor("input_31_cast_fp16")]; + tensor var_1159_pad_type_0 = const()[name = tensor("op_1159_pad_type_0"), val = tensor("valid")]; + tensor var_1159_strides_0 = const()[name = tensor("op_1159_strides_0"), val = tensor([1, 1])]; + tensor var_1159_pad_0 = const()[name = tensor("op_1159_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1159_dilations_0 = const()[name = tensor("op_1159_dilations_0"), val = tensor([1, 1])]; + tensor var_1159_groups_0 = const()[name = tensor("op_1159_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99885952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100180928))), name = tensor("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100181056)))]; + tensor var_1159_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1159_dilations_0, groups = var_1159_groups_0, pad = var_1159_pad_0, pad_type = var_1159_pad_type_0, strides = var_1159_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = tensor("op_1159_cast_fp16")]; + tensor var_1165_pad_type_0 = const()[name = tensor("op_1165_pad_type_0"), val = tensor("valid")]; + tensor var_1165_strides_0 = const()[name = tensor("op_1165_strides_0"), val = tensor([1, 1])]; + tensor var_1165_pad_0 = const()[name = tensor("op_1165_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1165_dilations_0 = const()[name = tensor("op_1165_dilations_0"), val = tensor([1, 1])]; + tensor var_1165_groups_0 = const()[name = tensor("op_1165_groups_0"), val = tensor(1)]; + tensor layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100193920))), name = tensor("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100182656))), shape = tensor([768, 768, 1, 1])]; + tensor var_1165_cast_fp16 = conv(dilations = var_1165_dilations_0, groups = var_1165_groups_0, pad = var_1165_pad_0, pad_type = var_1165_pad_type_0, strides = var_1165_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = tensor("op_1165_cast_fp16")]; + tensor obj_49_cast_fp16 = add(x = var_1159_cast_fp16, y = var_1165_cast_fp16)[name = tensor("obj_49_cast_fp16")]; + tensor inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = obj_49_cast_fp16)[name = tensor("inputs_21_cast_fp16")]; + tensor out_21_axes_0 = const()[name = tensor("out_21_axes_0"), val = tensor([1])]; + tensor var_1180_to_fp16 = const()[name = tensor("op_1180_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_1180_to_fp16, x = inputs_21_cast_fp16)[name = tensor("out_21_cast_fp16")]; + tensor obj_51_gamma_0_to_fp16 = const()[name = tensor("obj_51_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100267712)))]; + tensor obj_51_beta_0_to_fp16 = const()[name = tensor("obj_51_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100269312)))]; + tensor obj_51_epsilon_0_to_fp16 = const()[name = tensor("obj_51_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_51_cast_fp16 = batch_norm(beta = obj_51_beta_0_to_fp16, epsilon = obj_51_epsilon_0_to_fp16, gamma = obj_51_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor("obj_51_cast_fp16")]; + tensor var_1202_pad_type_0 = const()[name = tensor("op_1202_pad_type_0"), val = tensor("valid")]; + tensor var_1202_strides_0 = const()[name = tensor("op_1202_strides_0"), val = tensor([1, 1])]; + tensor var_1202_pad_0 = const()[name = tensor("op_1202_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1202_dilations_0 = const()[name = tensor("op_1202_dilations_0"), val = tensor([1, 1])]; + tensor var_1202_groups_0 = const()[name = tensor("op_1202_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100270912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100565888))), name = tensor("layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100566016)))]; + tensor var_1202_cast_fp16 = conv(bias = layers_3_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1202_dilations_0, groups = var_1202_groups_0, pad = var_1202_pad_0, pad_type = var_1202_pad_type_0, strides = var_1202_strides_0, weight = layers_3_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_51_cast_fp16)[name = tensor("op_1202_cast_fp16")]; + tensor var_1208_pad_type_0 = const()[name = tensor("op_1208_pad_type_0"), val = tensor("valid")]; + tensor var_1208_strides_0 = const()[name = tensor("op_1208_strides_0"), val = tensor([1, 1])]; + tensor var_1208_pad_0 = const()[name = tensor("op_1208_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1208_dilations_0 = const()[name = tensor("op_1208_dilations_0"), val = tensor([1, 1])]; + tensor var_1208_groups_0 = const()[name = tensor("op_1208_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100578048))), name = tensor("layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100567616))), shape = tensor([768, 768, 1, 1])]; + tensor var_1208_cast_fp16 = conv(dilations = var_1208_dilations_0, groups = var_1208_groups_0, pad = var_1208_pad_0, pad_type = var_1208_pad_type_0, strides = var_1208_strides_0, weight = layers_3_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_51_cast_fp16)[name = tensor("op_1208_cast_fp16")]; + tensor query_15_cast_fp16 = add(x = var_1202_cast_fp16, y = var_1208_cast_fp16)[name = tensor("query_15_cast_fp16")]; + tensor var_1217_pad_type_0 = const()[name = tensor("op_1217_pad_type_0"), val = tensor("valid")]; + tensor var_1217_strides_0 = const()[name = tensor("op_1217_strides_0"), val = tensor([1, 1])]; + tensor var_1217_pad_0 = const()[name = tensor("op_1217_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1217_dilations_0 = const()[name = tensor("op_1217_dilations_0"), val = tensor([1, 1])]; + tensor var_1217_groups_0 = const()[name = tensor("op_1217_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100651840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100946816))), name = tensor("layers_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1217_cast_fp16 = conv(dilations = var_1217_dilations_0, groups = var_1217_groups_0, pad = var_1217_pad_0, pad_type = var_1217_pad_type_0, strides = var_1217_strides_0, weight = layers_3_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_1217_cast_fp16")]; + tensor var_1223_pad_type_0 = const()[name = tensor("op_1223_pad_type_0"), val = tensor("valid")]; + tensor var_1223_strides_0 = const()[name = tensor("op_1223_strides_0"), val = tensor([1, 1])]; + tensor var_1223_pad_0 = const()[name = tensor("op_1223_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1223_dilations_0 = const()[name = tensor("op_1223_dilations_0"), val = tensor([1, 1])]; + tensor var_1223_groups_0 = const()[name = tensor("op_1223_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100958400))), name = tensor("layers_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100946944))), shape = tensor([768, 768, 1, 1])]; + tensor var_1223_cast_fp16 = conv(dilations = var_1223_dilations_0, groups = var_1223_groups_0, pad = var_1223_pad_0, pad_type = var_1223_pad_type_0, strides = var_1223_strides_0, weight = layers_3_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_1223_cast_fp16")]; + tensor key_15_cast_fp16 = add(x = var_1217_cast_fp16, y = var_1223_cast_fp16)[name = tensor("key_15_cast_fp16")]; + tensor var_1233_pad_type_0 = const()[name = tensor("op_1233_pad_type_0"), val = tensor("valid")]; + tensor var_1233_strides_0 = const()[name = tensor("op_1233_strides_0"), val = tensor([1, 1])]; + tensor var_1233_pad_0 = const()[name = tensor("op_1233_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1233_dilations_0 = const()[name = tensor("op_1233_dilations_0"), val = tensor([1, 1])]; + tensor var_1233_groups_0 = const()[name = tensor("op_1233_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101032192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101327168))), name = tensor("layers_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_3_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101327296)))]; + tensor var_1233_cast_fp16 = conv(bias = layers_3_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1233_dilations_0, groups = var_1233_groups_0, pad = var_1233_pad_0, pad_type = var_1233_pad_type_0, strides = var_1233_strides_0, weight = layers_3_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_1233_cast_fp16")]; + tensor var_1239_pad_type_0 = const()[name = tensor("op_1239_pad_type_0"), val = tensor("valid")]; + tensor var_1239_strides_0 = const()[name = tensor("op_1239_strides_0"), val = tensor([1, 1])]; + tensor var_1239_pad_0 = const()[name = tensor("op_1239_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1239_dilations_0 = const()[name = tensor("op_1239_dilations_0"), val = tensor([1, 1])]; + tensor var_1239_groups_0 = const()[name = tensor("op_1239_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101339136))), name = tensor("layers_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101328896))), shape = tensor([768, 768, 1, 1])]; + tensor var_1239_cast_fp16 = conv(dilations = var_1239_dilations_0, groups = var_1239_groups_0, pad = var_1239_pad_0, pad_type = var_1239_pad_type_0, strides = var_1239_strides_0, weight = layers_3_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_1239_cast_fp16")]; + tensor value_15_cast_fp16 = add(x = var_1233_cast_fp16, y = var_1239_cast_fp16)[name = tensor("value_15_cast_fp16")]; + tensor var_1243 = const()[name = tensor("op_1243"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_15_cast_fp16 = reshape(shape = var_1243, x = query_15_cast_fp16)[name = tensor("mh_q_15_cast_fp16")]; + tensor var_1245_to_fp16 = const()[name = tensor("op_1245_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1246_cast_fp16 = mul(x = mh_q_15_cast_fp16, y = var_1245_to_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1249 = const()[name = tensor("op_1249"), val = tensor([1, 12, 64, 1500])]; + tensor var_1250_cast_fp16 = reshape(shape = var_1249, x = key_15_cast_fp16)[name = tensor("op_1250_cast_fp16")]; + tensor mh_w_23_transpose_x_0 = const()[name = tensor("mh_w_23_transpose_x_0"), val = tensor(true)]; + tensor mh_w_23_transpose_y_0 = const()[name = tensor("mh_w_23_transpose_y_0"), val = tensor(false)]; + tensor mh_w_23_cast_fp16 = matmul(transpose_x = mh_w_23_transpose_x_0, transpose_y = mh_w_23_transpose_y_0, x = var_1246_cast_fp16, y = var_1250_cast_fp16)[name = tensor("mh_w_23_cast_fp16")]; + tensor obj_55_cast_fp16 = softmax(axis = var_1029, x = mh_w_23_cast_fp16)[name = tensor("obj_55_cast_fp16")]; + tensor var_1254 = const()[name = tensor("op_1254"), val = tensor([1, 12, 64, 1500])]; + tensor var_1255_cast_fp16 = reshape(shape = var_1254, x = value_15_cast_fp16)[name = tensor("op_1255_cast_fp16")]; + tensor attn_15_transpose_x_0 = const()[name = tensor("attn_15_transpose_x_0"), val = tensor(false)]; + tensor attn_15_transpose_y_0 = const()[name = tensor("attn_15_transpose_y_0"), val = tensor(true)]; + tensor attn_15_cast_fp16 = matmul(transpose_x = attn_15_transpose_x_0, transpose_y = attn_15_transpose_y_0, x = var_1255_cast_fp16, y = obj_55_cast_fp16)[name = tensor("attn_15_cast_fp16")]; + tensor var_1258 = const()[name = tensor("op_1258"), val = tensor([1, 768, 1, 1])]; + tensor input_33_cast_fp16 = reshape(shape = var_1258, x = attn_15_cast_fp16)[name = tensor("input_33_cast_fp16")]; + tensor var_1268_pad_type_0 = const()[name = tensor("op_1268_pad_type_0"), val = tensor("valid")]; + tensor var_1268_strides_0 = const()[name = tensor("op_1268_strides_0"), val = tensor([1, 1])]; + tensor var_1268_pad_0 = const()[name = tensor("op_1268_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1268_dilations_0 = const()[name = tensor("op_1268_dilations_0"), val = tensor([1, 1])]; + tensor var_1268_groups_0 = const()[name = tensor("op_1268_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101412928))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101707904))), name = tensor("layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101708032)))]; + tensor var_1268_cast_fp16 = conv(bias = layers_3_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1268_dilations_0, groups = var_1268_groups_0, pad = var_1268_pad_0, pad_type = var_1268_pad_type_0, strides = var_1268_strides_0, weight = layers_3_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = tensor("op_1268_cast_fp16")]; + tensor var_1274_pad_type_0 = const()[name = tensor("op_1274_pad_type_0"), val = tensor("valid")]; + tensor var_1274_strides_0 = const()[name = tensor("op_1274_strides_0"), val = tensor([1, 1])]; + tensor var_1274_pad_0 = const()[name = tensor("op_1274_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1274_dilations_0 = const()[name = tensor("op_1274_dilations_0"), val = tensor([1, 1])]; + tensor var_1274_groups_0 = const()[name = tensor("op_1274_groups_0"), val = tensor(1)]; + tensor layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101718592))), name = tensor("layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101709632))), shape = tensor([768, 768, 1, 1])]; + tensor var_1274_cast_fp16 = conv(dilations = var_1274_dilations_0, groups = var_1274_groups_0, pad = var_1274_pad_0, pad_type = var_1274_pad_type_0, strides = var_1274_strides_0, weight = layers_3_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = tensor("op_1274_cast_fp16")]; + tensor obj_53_cast_fp16 = add(x = var_1268_cast_fp16, y = var_1274_cast_fp16)[name = tensor("obj_53_cast_fp16")]; + tensor inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_53_cast_fp16)[name = tensor("inputs_23_cast_fp16")]; + tensor out_23_axes_0 = const()[name = tensor("out_23_axes_0"), val = tensor([1])]; + tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_1285_to_fp16, x = inputs_23_cast_fp16)[name = tensor("out_23_cast_fp16")]; + tensor input_35_gamma_0_to_fp16 = const()[name = tensor("input_35_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101792384)))]; + tensor input_35_beta_0_to_fp16 = const()[name = tensor("input_35_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101793984)))]; + tensor input_35_epsilon_0_to_fp16 = const()[name = tensor("input_35_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor("input_35_cast_fp16")]; + tensor var_1303_pad_type_0 = const()[name = tensor("op_1303_pad_type_0"), val = tensor("valid")]; + tensor var_1303_strides_0 = const()[name = tensor("op_1303_strides_0"), val = tensor([1, 1])]; + tensor var_1303_pad_0 = const()[name = tensor("op_1303_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1303_dilations_0 = const()[name = tensor("op_1303_dilations_0"), val = tensor([1, 1])]; + tensor var_1303_groups_0 = const()[name = tensor("op_1303_groups_0"), val = tensor(1)]; + tensor layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101795584))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102975296))), name = tensor("layers_3_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102975424)))]; + tensor var_1303_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_1303_dilations_0, groups = var_1303_groups_0, pad = var_1303_pad_0, pad_type = var_1303_pad_type_0, strides = var_1303_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = tensor("op_1303_cast_fp16")]; + tensor var_1309_pad_type_0 = const()[name = tensor("op_1309_pad_type_0"), val = tensor("valid")]; + tensor var_1309_strides_0 = const()[name = tensor("op_1309_strides_0"), val = tensor([1, 1])]; + tensor var_1309_pad_0 = const()[name = tensor("op_1309_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1309_dilations_0 = const()[name = tensor("op_1309_dilations_0"), val = tensor([1, 1])]; + tensor var_1309_groups_0 = const()[name = tensor("op_1309_groups_0"), val = tensor(1)]; + tensor layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103009024))), name = tensor("layers_3_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102981632))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1309_cast_fp16 = conv(dilations = var_1309_dilations_0, groups = var_1309_groups_0, pad = var_1309_pad_0, pad_type = var_1309_pad_type_0, strides = var_1309_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = tensor("op_1309_cast_fp16")]; + tensor input_37_cast_fp16 = add(x = var_1303_cast_fp16, y = var_1309_cast_fp16)[name = tensor("input_37_cast_fp16")]; + tensor input_39_mode_0 = const()[name = tensor("input_39_mode_0"), val = tensor("EXACT")]; + tensor input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = tensor("input_39_cast_fp16")]; + tensor var_1320_pad_type_0 = const()[name = tensor("op_1320_pad_type_0"), val = tensor("valid")]; + tensor var_1320_strides_0 = const()[name = tensor("op_1320_strides_0"), val = tensor([1, 1])]; + tensor var_1320_pad_0 = const()[name = tensor("op_1320_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1320_dilations_0 = const()[name = tensor("op_1320_dilations_0"), val = tensor([1, 1])]; + tensor var_1320_groups_0 = const()[name = tensor("op_1320_groups_0"), val = tensor(1)]; + tensor layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(103304000))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(104483712))), name = tensor("layers_3_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(104483840)))]; + tensor var_1320_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_1320_dilations_0, groups = var_1320_groups_0, pad = var_1320_pad_0, pad_type = var_1320_pad_type_0, strides = var_1320_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = tensor("op_1320_cast_fp16")]; + tensor var_1326_pad_type_0 = const()[name = tensor("op_1326_pad_type_0"), val = tensor("valid")]; + tensor var_1326_strides_0 = const()[name = tensor("op_1326_strides_0"), val = tensor([1, 1])]; + tensor var_1326_pad_0 = const()[name = tensor("op_1326_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1326_dilations_0 = const()[name = tensor("op_1326_dilations_0"), val = tensor([1, 1])]; + tensor var_1326_groups_0 = const()[name = tensor("op_1326_groups_0"), val = tensor(1)]; + tensor layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(104509888))), name = tensor("layers_3_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(104485440))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1326_cast_fp16 = conv(dilations = var_1326_dilations_0, groups = var_1326_groups_0, pad = var_1326_pad_0, pad_type = var_1326_pad_type_0, strides = var_1326_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_39_cast_fp16)[name = tensor("op_1326_cast_fp16")]; + tensor hidden_states_9_cast_fp16 = add(x = var_1320_cast_fp16, y = var_1326_cast_fp16)[name = tensor("hidden_states_9_cast_fp16")]; + tensor inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor("inputs_25_cast_fp16")]; + tensor var_1338 = const()[name = tensor("op_1338"), val = tensor(3)]; + tensor out_25_axes_0 = const()[name = tensor("out_25_axes_0"), val = tensor([1])]; + tensor var_1363_to_fp16 = const()[name = tensor("op_1363_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_1363_to_fp16, x = inputs_25_cast_fp16)[name = tensor("out_25_cast_fp16")]; + tensor obj_57_gamma_0_to_fp16 = const()[name = tensor("obj_57_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(104804864)))]; + tensor obj_57_beta_0_to_fp16 = const()[name = tensor("obj_57_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(104806464)))]; + tensor obj_57_epsilon_0_to_fp16 = const()[name = tensor("obj_57_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_57_cast_fp16 = batch_norm(beta = obj_57_beta_0_to_fp16, epsilon = obj_57_epsilon_0_to_fp16, gamma = obj_57_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = tensor("obj_57_cast_fp16")]; + tensor var_1385_pad_type_0 = const()[name = tensor("op_1385_pad_type_0"), val = tensor("valid")]; + tensor var_1385_strides_0 = const()[name = tensor("op_1385_strides_0"), val = tensor([1, 1])]; + tensor var_1385_pad_0 = const()[name = tensor("op_1385_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1385_dilations_0 = const()[name = tensor("op_1385_dilations_0"), val = tensor([1, 1])]; + tensor var_1385_groups_0 = const()[name = tensor("op_1385_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(104808064))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105103040))), name = tensor("layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_4_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105103168)))]; + tensor var_1385_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1385_dilations_0, groups = var_1385_groups_0, pad = var_1385_pad_0, pad_type = var_1385_pad_type_0, strides = var_1385_strides_0, weight = layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_57_cast_fp16)[name = tensor("op_1385_cast_fp16")]; + tensor var_1391_pad_type_0 = const()[name = tensor("op_1391_pad_type_0"), val = tensor("valid")]; + tensor var_1391_strides_0 = const()[name = tensor("op_1391_strides_0"), val = tensor([1, 1])]; + tensor var_1391_pad_0 = const()[name = tensor("op_1391_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1391_dilations_0 = const()[name = tensor("op_1391_dilations_0"), val = tensor([1, 1])]; + tensor var_1391_groups_0 = const()[name = tensor("op_1391_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105112960))), name = tensor("layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105104768))), shape = tensor([768, 768, 1, 1])]; + tensor var_1391_cast_fp16 = conv(dilations = var_1391_dilations_0, groups = var_1391_groups_0, pad = var_1391_pad_0, pad_type = var_1391_pad_type_0, strides = var_1391_strides_0, weight = layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_57_cast_fp16)[name = tensor("op_1391_cast_fp16")]; + tensor query_17_cast_fp16 = add(x = var_1385_cast_fp16, y = var_1391_cast_fp16)[name = tensor("query_17_cast_fp16")]; + tensor var_1400_pad_type_0 = const()[name = tensor("op_1400_pad_type_0"), val = tensor("valid")]; + tensor var_1400_strides_0 = const()[name = tensor("op_1400_strides_0"), val = tensor([1, 1])]; + tensor var_1400_pad_0 = const()[name = tensor("op_1400_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1400_dilations_0 = const()[name = tensor("op_1400_dilations_0"), val = tensor([1, 1])]; + tensor var_1400_groups_0 = const()[name = tensor("op_1400_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105186752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105481728))), name = tensor("layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1400_cast_fp16 = conv(dilations = var_1400_dilations_0, groups = var_1400_groups_0, pad = var_1400_pad_0, pad_type = var_1400_pad_type_0, strides = var_1400_strides_0, weight = layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_57_cast_fp16)[name = tensor("op_1400_cast_fp16")]; + tensor var_1406_pad_type_0 = const()[name = tensor("op_1406_pad_type_0"), val = tensor("valid")]; + tensor var_1406_strides_0 = const()[name = tensor("op_1406_strides_0"), val = tensor([1, 1])]; + tensor var_1406_pad_0 = const()[name = tensor("op_1406_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1406_dilations_0 = const()[name = tensor("op_1406_dilations_0"), val = tensor([1, 1])]; + tensor var_1406_groups_0 = const()[name = tensor("op_1406_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105491008))), name = tensor("layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105481856))), shape = tensor([768, 768, 1, 1])]; + tensor var_1406_cast_fp16 = conv(dilations = var_1406_dilations_0, groups = var_1406_groups_0, pad = var_1406_pad_0, pad_type = var_1406_pad_type_0, strides = var_1406_strides_0, weight = layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_57_cast_fp16)[name = tensor("op_1406_cast_fp16")]; + tensor current_key_9_cast_fp16 = add(x = var_1400_cast_fp16, y = var_1406_cast_fp16)[name = tensor("current_key_9_cast_fp16")]; + tensor var_1416_pad_type_0 = const()[name = tensor("op_1416_pad_type_0"), val = tensor("valid")]; + tensor var_1416_strides_0 = const()[name = tensor("op_1416_strides_0"), val = tensor([1, 1])]; + tensor var_1416_pad_0 = const()[name = tensor("op_1416_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1416_dilations_0 = const()[name = tensor("op_1416_dilations_0"), val = tensor([1, 1])]; + tensor var_1416_groups_0 = const()[name = tensor("op_1416_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105564800))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105859776))), name = tensor("layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_4_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105859904)))]; + tensor var_1416_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1416_dilations_0, groups = var_1416_groups_0, pad = var_1416_pad_0, pad_type = var_1416_pad_type_0, strides = var_1416_strides_0, weight = layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_57_cast_fp16)[name = tensor("op_1416_cast_fp16")]; + tensor var_1422_pad_type_0 = const()[name = tensor("op_1422_pad_type_0"), val = tensor("valid")]; + tensor var_1422_strides_0 = const()[name = tensor("op_1422_strides_0"), val = tensor([1, 1])]; + tensor var_1422_pad_0 = const()[name = tensor("op_1422_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1422_dilations_0 = const()[name = tensor("op_1422_dilations_0"), val = tensor([1, 1])]; + tensor var_1422_groups_0 = const()[name = tensor("op_1422_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105868224))), name = tensor("layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105861504))), shape = tensor([768, 768, 1, 1])]; + tensor var_1422_cast_fp16 = conv(dilations = var_1422_dilations_0, groups = var_1422_groups_0, pad = var_1422_pad_0, pad_type = var_1422_pad_type_0, strides = var_1422_strides_0, weight = layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_57_cast_fp16)[name = tensor("op_1422_cast_fp16")]; + tensor current_value_9_cast_fp16 = add(x = var_1416_cast_fp16, y = var_1422_cast_fp16)[name = tensor("current_value_9_cast_fp16")]; + tensor var_1429_cast_fp16 = mul(x = var_69_cast_fp16_4, y = var_192_cast_fp16)[name = tensor("op_1429_cast_fp16")]; + tensor var_1430_cast_fp16 = mul(x = current_key_9_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_1430_cast_fp16")]; + tensor key_17_cast_fp16 = add(x = var_1429_cast_fp16, y = var_1430_cast_fp16)[name = tensor("key_17_cast_fp16")]; + tensor var_1433_cast_fp16 = mul(x = var_84_cast_fp16_4, y = var_192_cast_fp16)[name = tensor("op_1433_cast_fp16")]; + tensor var_1434_cast_fp16 = mul(x = current_value_9_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_1434_cast_fp16")]; + tensor value_17_cast_fp16 = add(x = var_1433_cast_fp16, y = var_1434_cast_fp16)[name = tensor("value_17_cast_fp16")]; + tensor var_1438 = const()[name = tensor("op_1438"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_17_cast_fp16 = reshape(shape = var_1438, x = query_17_cast_fp16)[name = tensor("mh_q_17_cast_fp16")]; + tensor var_1440_to_fp16 = const()[name = tensor("op_1440_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1441_cast_fp16 = mul(x = mh_q_17_cast_fp16, y = var_1440_to_fp16)[name = tensor("op_1441_cast_fp16")]; + tensor var_1444 = const()[name = tensor("op_1444"), val = tensor([1, 12, 64, 448])]; + tensor var_1445_cast_fp16 = reshape(shape = var_1444, x = key_17_cast_fp16)[name = tensor("op_1445_cast_fp16")]; + tensor mh_w_25_transpose_x_0 = const()[name = tensor("mh_w_25_transpose_x_0"), val = tensor(true)]; + tensor mh_w_25_transpose_y_0 = const()[name = tensor("mh_w_25_transpose_y_0"), val = tensor(false)]; + tensor mh_w_25_cast_fp16 = matmul(transpose_x = mh_w_25_transpose_x_0, transpose_y = mh_w_25_transpose_y_0, x = var_1441_cast_fp16, y = var_1445_cast_fp16)[name = tensor("mh_w_25_cast_fp16")]; + tensor mh_w_27_cast_fp16 = add(x = mh_w_25_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_27_cast_fp16")]; + tensor var_1453_cast_fp16 = softmax(axis = var_1338, x = mh_w_27_cast_fp16)[name = tensor("op_1453_cast_fp16")]; + tensor var_1454 = const()[name = tensor("op_1454"), val = tensor([1, 12, 64, 448])]; + tensor var_1455_cast_fp16 = reshape(shape = var_1454, x = value_17_cast_fp16)[name = tensor("op_1455_cast_fp16")]; + tensor attn_17_transpose_x_0 = const()[name = tensor("attn_17_transpose_x_0"), val = tensor(false)]; + tensor attn_17_transpose_y_0 = const()[name = tensor("attn_17_transpose_y_0"), val = tensor(true)]; + tensor attn_17_cast_fp16 = matmul(transpose_x = attn_17_transpose_x_0, transpose_y = attn_17_transpose_y_0, x = var_1455_cast_fp16, y = var_1453_cast_fp16)[name = tensor("attn_17_cast_fp16")]; + tensor var_1458 = const()[name = tensor("op_1458"), val = tensor([1, 768, 1, 1])]; + tensor input_41_cast_fp16 = reshape(shape = var_1458, x = attn_17_cast_fp16)[name = tensor("input_41_cast_fp16")]; + tensor var_1468_pad_type_0 = const()[name = tensor("op_1468_pad_type_0"), val = tensor("valid")]; + tensor var_1468_strides_0 = const()[name = tensor("op_1468_strides_0"), val = tensor([1, 1])]; + tensor var_1468_pad_0 = const()[name = tensor("op_1468_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1468_dilations_0 = const()[name = tensor("op_1468_dilations_0"), val = tensor([1, 1])]; + tensor var_1468_groups_0 = const()[name = tensor("op_1468_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105942016))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106236992))), name = tensor("layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_4_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106237120)))]; + tensor var_1468_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1468_dilations_0, groups = var_1468_groups_0, pad = var_1468_pad_0, pad_type = var_1468_pad_type_0, strides = var_1468_strides_0, weight = layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = tensor("op_1468_cast_fp16")]; + tensor var_1474_pad_type_0 = const()[name = tensor("op_1474_pad_type_0"), val = tensor("valid")]; + tensor var_1474_strides_0 = const()[name = tensor("op_1474_strides_0"), val = tensor([1, 1])]; + tensor var_1474_pad_0 = const()[name = tensor("op_1474_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1474_dilations_0 = const()[name = tensor("op_1474_dilations_0"), val = tensor([1, 1])]; + tensor var_1474_groups_0 = const()[name = tensor("op_1474_groups_0"), val = tensor(1)]; + tensor layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106247936))), name = tensor("layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106238720))), shape = tensor([768, 768, 1, 1])]; + tensor var_1474_cast_fp16 = conv(dilations = var_1474_dilations_0, groups = var_1474_groups_0, pad = var_1474_pad_0, pad_type = var_1474_pad_type_0, strides = var_1474_strides_0, weight = layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_41_cast_fp16)[name = tensor("op_1474_cast_fp16")]; + tensor obj_63_cast_fp16 = add(x = var_1468_cast_fp16, y = var_1474_cast_fp16)[name = tensor("obj_63_cast_fp16")]; + tensor inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_63_cast_fp16)[name = tensor("inputs_27_cast_fp16")]; + tensor out_27_axes_0 = const()[name = tensor("out_27_axes_0"), val = tensor([1])]; + tensor var_1489_to_fp16 = const()[name = tensor("op_1489_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_27_cast_fp16 = layer_norm(axes = out_27_axes_0, epsilon = var_1489_to_fp16, x = inputs_27_cast_fp16)[name = tensor("out_27_cast_fp16")]; + tensor obj_65_gamma_0_to_fp16 = const()[name = tensor("obj_65_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106321728)))]; + tensor obj_65_beta_0_to_fp16 = const()[name = tensor("obj_65_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106323328)))]; + tensor obj_65_epsilon_0_to_fp16 = const()[name = tensor("obj_65_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_65_cast_fp16 = batch_norm(beta = obj_65_beta_0_to_fp16, epsilon = obj_65_epsilon_0_to_fp16, gamma = obj_65_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = tensor("obj_65_cast_fp16")]; + tensor var_1511_pad_type_0 = const()[name = tensor("op_1511_pad_type_0"), val = tensor("valid")]; + tensor var_1511_strides_0 = const()[name = tensor("op_1511_strides_0"), val = tensor([1, 1])]; + tensor var_1511_pad_0 = const()[name = tensor("op_1511_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1511_dilations_0 = const()[name = tensor("op_1511_dilations_0"), val = tensor([1, 1])]; + tensor var_1511_groups_0 = const()[name = tensor("op_1511_groups_0"), val = tensor(1)]; + tensor layers_4_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106324928))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106619904))), name = tensor("layers_4_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_4_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106620032)))]; + tensor var_1511_cast_fp16 = conv(bias = layers_4_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1511_dilations_0, groups = var_1511_groups_0, pad = var_1511_pad_0, pad_type = var_1511_pad_type_0, strides = var_1511_strides_0, weight = layers_4_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_65_cast_fp16)[name = tensor("op_1511_cast_fp16")]; + tensor var_1517_pad_type_0 = const()[name = tensor("op_1517_pad_type_0"), val = tensor("valid")]; + tensor var_1517_strides_0 = const()[name = tensor("op_1517_strides_0"), val = tensor([1, 1])]; + tensor var_1517_pad_0 = const()[name = tensor("op_1517_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1517_dilations_0 = const()[name = tensor("op_1517_dilations_0"), val = tensor([1, 1])]; + tensor var_1517_groups_0 = const()[name = tensor("op_1517_groups_0"), val = tensor(1)]; + tensor layers_4_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106630144))), name = tensor("layers_4_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106621632))), shape = tensor([768, 768, 1, 1])]; + tensor var_1517_cast_fp16 = conv(dilations = var_1517_dilations_0, groups = var_1517_groups_0, pad = var_1517_pad_0, pad_type = var_1517_pad_type_0, strides = var_1517_strides_0, weight = layers_4_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_65_cast_fp16)[name = tensor("op_1517_cast_fp16")]; + tensor query_19_cast_fp16 = add(x = var_1511_cast_fp16, y = var_1517_cast_fp16)[name = tensor("query_19_cast_fp16")]; + tensor var_1526_pad_type_0 = const()[name = tensor("op_1526_pad_type_0"), val = tensor("valid")]; + tensor var_1526_strides_0 = const()[name = tensor("op_1526_strides_0"), val = tensor([1, 1])]; + tensor var_1526_pad_0 = const()[name = tensor("op_1526_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1526_dilations_0 = const()[name = tensor("op_1526_dilations_0"), val = tensor([1, 1])]; + tensor var_1526_groups_0 = const()[name = tensor("op_1526_groups_0"), val = tensor(1)]; + tensor layers_4_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106703936))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106998912))), name = tensor("layers_4_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1526_cast_fp16 = conv(dilations = var_1526_dilations_0, groups = var_1526_groups_0, pad = var_1526_pad_0, pad_type = var_1526_pad_type_0, strides = var_1526_strides_0, weight = layers_4_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_1526_cast_fp16")]; + tensor var_1532_pad_type_0 = const()[name = tensor("op_1532_pad_type_0"), val = tensor("valid")]; + tensor var_1532_strides_0 = const()[name = tensor("op_1532_strides_0"), val = tensor([1, 1])]; + tensor var_1532_pad_0 = const()[name = tensor("op_1532_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1532_dilations_0 = const()[name = tensor("op_1532_dilations_0"), val = tensor([1, 1])]; + tensor var_1532_groups_0 = const()[name = tensor("op_1532_groups_0"), val = tensor(1)]; + tensor layers_4_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107007936))), name = tensor("layers_4_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106999040))), shape = tensor([768, 768, 1, 1])]; + tensor var_1532_cast_fp16 = conv(dilations = var_1532_dilations_0, groups = var_1532_groups_0, pad = var_1532_pad_0, pad_type = var_1532_pad_type_0, strides = var_1532_strides_0, weight = layers_4_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_1532_cast_fp16")]; + tensor key_19_cast_fp16 = add(x = var_1526_cast_fp16, y = var_1532_cast_fp16)[name = tensor("key_19_cast_fp16")]; + tensor var_1542_pad_type_0 = const()[name = tensor("op_1542_pad_type_0"), val = tensor("valid")]; + tensor var_1542_strides_0 = const()[name = tensor("op_1542_strides_0"), val = tensor([1, 1])]; + tensor var_1542_pad_0 = const()[name = tensor("op_1542_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1542_dilations_0 = const()[name = tensor("op_1542_dilations_0"), val = tensor([1, 1])]; + tensor var_1542_groups_0 = const()[name = tensor("op_1542_groups_0"), val = tensor(1)]; + tensor layers_4_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107081728))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107376704))), name = tensor("layers_4_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_4_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107376832)))]; + tensor var_1542_cast_fp16 = conv(bias = layers_4_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1542_dilations_0, groups = var_1542_groups_0, pad = var_1542_pad_0, pad_type = var_1542_pad_type_0, strides = var_1542_strides_0, weight = layers_4_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_1542_cast_fp16")]; + tensor var_1548_pad_type_0 = const()[name = tensor("op_1548_pad_type_0"), val = tensor("valid")]; + tensor var_1548_strides_0 = const()[name = tensor("op_1548_strides_0"), val = tensor([1, 1])]; + tensor var_1548_pad_0 = const()[name = tensor("op_1548_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1548_dilations_0 = const()[name = tensor("op_1548_dilations_0"), val = tensor([1, 1])]; + tensor var_1548_groups_0 = const()[name = tensor("op_1548_groups_0"), val = tensor(1)]; + tensor layers_4_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107387648))), name = tensor("layers_4_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107378432))), shape = tensor([768, 768, 1, 1])]; + tensor var_1548_cast_fp16 = conv(dilations = var_1548_dilations_0, groups = var_1548_groups_0, pad = var_1548_pad_0, pad_type = var_1548_pad_type_0, strides = var_1548_strides_0, weight = layers_4_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_1548_cast_fp16")]; + tensor value_19_cast_fp16 = add(x = var_1542_cast_fp16, y = var_1548_cast_fp16)[name = tensor("value_19_cast_fp16")]; + tensor var_1552 = const()[name = tensor("op_1552"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_19_cast_fp16 = reshape(shape = var_1552, x = query_19_cast_fp16)[name = tensor("mh_q_19_cast_fp16")]; + tensor var_1554_to_fp16 = const()[name = tensor("op_1554_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1555_cast_fp16 = mul(x = mh_q_19_cast_fp16, y = var_1554_to_fp16)[name = tensor("op_1555_cast_fp16")]; + tensor var_1558 = const()[name = tensor("op_1558"), val = tensor([1, 12, 64, 1500])]; + tensor var_1559_cast_fp16 = reshape(shape = var_1558, x = key_19_cast_fp16)[name = tensor("op_1559_cast_fp16")]; + tensor mh_w_29_transpose_x_0 = const()[name = tensor("mh_w_29_transpose_x_0"), val = tensor(true)]; + tensor mh_w_29_transpose_y_0 = const()[name = tensor("mh_w_29_transpose_y_0"), val = tensor(false)]; + tensor mh_w_29_cast_fp16 = matmul(transpose_x = mh_w_29_transpose_x_0, transpose_y = mh_w_29_transpose_y_0, x = var_1555_cast_fp16, y = var_1559_cast_fp16)[name = tensor("mh_w_29_cast_fp16")]; + tensor obj_69_cast_fp16 = softmax(axis = var_1338, x = mh_w_29_cast_fp16)[name = tensor("obj_69_cast_fp16")]; + tensor var_1563 = const()[name = tensor("op_1563"), val = tensor([1, 12, 64, 1500])]; + tensor var_1564_cast_fp16 = reshape(shape = var_1563, x = value_19_cast_fp16)[name = tensor("op_1564_cast_fp16")]; + tensor attn_19_transpose_x_0 = const()[name = tensor("attn_19_transpose_x_0"), val = tensor(false)]; + tensor attn_19_transpose_y_0 = const()[name = tensor("attn_19_transpose_y_0"), val = tensor(true)]; + tensor attn_19_cast_fp16 = matmul(transpose_x = attn_19_transpose_x_0, transpose_y = attn_19_transpose_y_0, x = var_1564_cast_fp16, y = obj_69_cast_fp16)[name = tensor("attn_19_cast_fp16")]; + tensor var_1567 = const()[name = tensor("op_1567"), val = tensor([1, 768, 1, 1])]; + tensor input_43_cast_fp16 = reshape(shape = var_1567, x = attn_19_cast_fp16)[name = tensor("input_43_cast_fp16")]; + tensor var_1577_pad_type_0 = const()[name = tensor("op_1577_pad_type_0"), val = tensor("valid")]; + tensor var_1577_strides_0 = const()[name = tensor("op_1577_strides_0"), val = tensor([1, 1])]; + tensor var_1577_pad_0 = const()[name = tensor("op_1577_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1577_dilations_0 = const()[name = tensor("op_1577_dilations_0"), val = tensor([1, 1])]; + tensor var_1577_groups_0 = const()[name = tensor("op_1577_groups_0"), val = tensor(1)]; + tensor layers_4_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107461440))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107756416))), name = tensor("layers_4_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_4_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107756544)))]; + tensor var_1577_cast_fp16 = conv(bias = layers_4_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1577_dilations_0, groups = var_1577_groups_0, pad = var_1577_pad_0, pad_type = var_1577_pad_type_0, strides = var_1577_strides_0, weight = layers_4_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = tensor("op_1577_cast_fp16")]; + tensor var_1583_pad_type_0 = const()[name = tensor("op_1583_pad_type_0"), val = tensor("valid")]; + tensor var_1583_strides_0 = const()[name = tensor("op_1583_strides_0"), val = tensor([1, 1])]; + tensor var_1583_pad_0 = const()[name = tensor("op_1583_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1583_dilations_0 = const()[name = tensor("op_1583_dilations_0"), val = tensor([1, 1])]; + tensor var_1583_groups_0 = const()[name = tensor("op_1583_groups_0"), val = tensor(1)]; + tensor layers_4_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107765952))), name = tensor("layers_4_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107758144))), shape = tensor([768, 768, 1, 1])]; + tensor var_1583_cast_fp16 = conv(dilations = var_1583_dilations_0, groups = var_1583_groups_0, pad = var_1583_pad_0, pad_type = var_1583_pad_type_0, strides = var_1583_strides_0, weight = layers_4_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_43_cast_fp16)[name = tensor("op_1583_cast_fp16")]; + tensor obj_67_cast_fp16 = add(x = var_1577_cast_fp16, y = var_1583_cast_fp16)[name = tensor("obj_67_cast_fp16")]; + tensor inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = obj_67_cast_fp16)[name = tensor("inputs_29_cast_fp16")]; + tensor out_29_axes_0 = const()[name = tensor("out_29_axes_0"), val = tensor([1])]; + tensor var_1594_to_fp16 = const()[name = tensor("op_1594_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_29_cast_fp16 = layer_norm(axes = out_29_axes_0, epsilon = var_1594_to_fp16, x = inputs_29_cast_fp16)[name = tensor("out_29_cast_fp16")]; + tensor input_45_gamma_0_to_fp16 = const()[name = tensor("input_45_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107839744)))]; + tensor input_45_beta_0_to_fp16 = const()[name = tensor("input_45_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107841344)))]; + tensor input_45_epsilon_0_to_fp16 = const()[name = tensor("input_45_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_45_cast_fp16 = batch_norm(beta = input_45_beta_0_to_fp16, epsilon = input_45_epsilon_0_to_fp16, gamma = input_45_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = tensor("input_45_cast_fp16")]; + tensor var_1612_pad_type_0 = const()[name = tensor("op_1612_pad_type_0"), val = tensor("valid")]; + tensor var_1612_strides_0 = const()[name = tensor("op_1612_strides_0"), val = tensor([1, 1])]; + tensor var_1612_pad_0 = const()[name = tensor("op_1612_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1612_dilations_0 = const()[name = tensor("op_1612_dilations_0"), val = tensor([1, 1])]; + tensor var_1612_groups_0 = const()[name = tensor("op_1612_groups_0"), val = tensor(1)]; + tensor layers_4_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107842944))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109022656))), name = tensor("layers_4_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_4_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109022784)))]; + tensor var_1612_cast_fp16 = conv(bias = layers_4_fc1_inlier_module_bias_to_fp16, dilations = var_1612_dilations_0, groups = var_1612_groups_0, pad = var_1612_pad_0, pad_type = var_1612_pad_type_0, strides = var_1612_strides_0, weight = layers_4_fc1_inlier_module_weight_to_fp16_palettized, x = input_45_cast_fp16)[name = tensor("op_1612_cast_fp16")]; + tensor var_1618_pad_type_0 = const()[name = tensor("op_1618_pad_type_0"), val = tensor("valid")]; + tensor var_1618_strides_0 = const()[name = tensor("op_1618_strides_0"), val = tensor([1, 1])]; + tensor var_1618_pad_0 = const()[name = tensor("op_1618_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1618_dilations_0 = const()[name = tensor("op_1618_dilations_0"), val = tensor([1, 1])]; + tensor var_1618_groups_0 = const()[name = tensor("op_1618_groups_0"), val = tensor(1)]; + tensor layers_4_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109050304))), name = tensor("layers_4_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109028992))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1618_cast_fp16 = conv(dilations = var_1618_dilations_0, groups = var_1618_groups_0, pad = var_1618_pad_0, pad_type = var_1618_pad_type_0, strides = var_1618_strides_0, weight = layers_4_fc1_outlier_module_weight_to_fp16_sparsified, x = input_45_cast_fp16)[name = tensor("op_1618_cast_fp16")]; + tensor input_47_cast_fp16 = add(x = var_1612_cast_fp16, y = var_1618_cast_fp16)[name = tensor("input_47_cast_fp16")]; + tensor input_49_mode_0 = const()[name = tensor("input_49_mode_0"), val = tensor("EXACT")]; + tensor input_49_cast_fp16 = gelu(mode = input_49_mode_0, x = input_47_cast_fp16)[name = tensor("input_49_cast_fp16")]; + tensor var_1629_pad_type_0 = const()[name = tensor("op_1629_pad_type_0"), val = tensor("valid")]; + tensor var_1629_strides_0 = const()[name = tensor("op_1629_strides_0"), val = tensor([1, 1])]; + tensor var_1629_pad_0 = const()[name = tensor("op_1629_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1629_dilations_0 = const()[name = tensor("op_1629_dilations_0"), val = tensor([1, 1])]; + tensor var_1629_groups_0 = const()[name = tensor("op_1629_groups_0"), val = tensor(1)]; + tensor layers_4_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109345280))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110524992))), name = tensor("layers_4_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_4_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110525120)))]; + tensor var_1629_cast_fp16 = conv(bias = layers_4_fc2_inlier_module_bias_to_fp16, dilations = var_1629_dilations_0, groups = var_1629_groups_0, pad = var_1629_pad_0, pad_type = var_1629_pad_type_0, strides = var_1629_strides_0, weight = layers_4_fc2_inlier_module_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = tensor("op_1629_cast_fp16")]; + tensor var_1635_pad_type_0 = const()[name = tensor("op_1635_pad_type_0"), val = tensor("valid")]; + tensor var_1635_strides_0 = const()[name = tensor("op_1635_strides_0"), val = tensor([1, 1])]; + tensor var_1635_pad_0 = const()[name = tensor("op_1635_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1635_dilations_0 = const()[name = tensor("op_1635_dilations_0"), val = tensor([1, 1])]; + tensor var_1635_groups_0 = const()[name = tensor("op_1635_groups_0"), val = tensor(1)]; + tensor layers_4_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110548096))), name = tensor("layers_4_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110526720))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1635_cast_fp16 = conv(dilations = var_1635_dilations_0, groups = var_1635_groups_0, pad = var_1635_pad_0, pad_type = var_1635_pad_type_0, strides = var_1635_strides_0, weight = layers_4_fc2_outlier_module_weight_to_fp16_sparsified, x = input_49_cast_fp16)[name = tensor("op_1635_cast_fp16")]; + tensor hidden_states_11_cast_fp16 = add(x = var_1629_cast_fp16, y = var_1635_cast_fp16)[name = tensor("hidden_states_11_cast_fp16")]; + tensor inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = hidden_states_11_cast_fp16)[name = tensor("inputs_31_cast_fp16")]; + tensor var_1647 = const()[name = tensor("op_1647"), val = tensor(3)]; + tensor out_31_axes_0 = const()[name = tensor("out_31_axes_0"), val = tensor([1])]; + tensor var_1672_to_fp16 = const()[name = tensor("op_1672_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_1672_to_fp16, x = inputs_31_cast_fp16)[name = tensor("out_31_cast_fp16")]; + tensor obj_71_gamma_0_to_fp16 = const()[name = tensor("obj_71_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110843072)))]; + tensor obj_71_beta_0_to_fp16 = const()[name = tensor("obj_71_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110844672)))]; + tensor obj_71_epsilon_0_to_fp16 = const()[name = tensor("obj_71_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_71_cast_fp16 = batch_norm(beta = obj_71_beta_0_to_fp16, epsilon = obj_71_epsilon_0_to_fp16, gamma = obj_71_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = tensor("obj_71_cast_fp16")]; + tensor var_1694_pad_type_0 = const()[name = tensor("op_1694_pad_type_0"), val = tensor("valid")]; + tensor var_1694_strides_0 = const()[name = tensor("op_1694_strides_0"), val = tensor([1, 1])]; + tensor var_1694_pad_0 = const()[name = tensor("op_1694_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1694_dilations_0 = const()[name = tensor("op_1694_dilations_0"), val = tensor([1, 1])]; + tensor var_1694_groups_0 = const()[name = tensor("op_1694_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110846272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111141248))), name = tensor("layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_5_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111141376)))]; + tensor var_1694_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1694_dilations_0, groups = var_1694_groups_0, pad = var_1694_pad_0, pad_type = var_1694_pad_type_0, strides = var_1694_strides_0, weight = layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_71_cast_fp16)[name = tensor("op_1694_cast_fp16")]; + tensor var_1700_pad_type_0 = const()[name = tensor("op_1700_pad_type_0"), val = tensor("valid")]; + tensor var_1700_strides_0 = const()[name = tensor("op_1700_strides_0"), val = tensor([1, 1])]; + tensor var_1700_pad_0 = const()[name = tensor("op_1700_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1700_dilations_0 = const()[name = tensor("op_1700_dilations_0"), val = tensor([1, 1])]; + tensor var_1700_groups_0 = const()[name = tensor("op_1700_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111151680))), name = tensor("layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111142976))), shape = tensor([768, 768, 1, 1])]; + tensor var_1700_cast_fp16 = conv(dilations = var_1700_dilations_0, groups = var_1700_groups_0, pad = var_1700_pad_0, pad_type = var_1700_pad_type_0, strides = var_1700_strides_0, weight = layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_71_cast_fp16)[name = tensor("op_1700_cast_fp16")]; + tensor query_21_cast_fp16 = add(x = var_1694_cast_fp16, y = var_1700_cast_fp16)[name = tensor("query_21_cast_fp16")]; + tensor var_1709_pad_type_0 = const()[name = tensor("op_1709_pad_type_0"), val = tensor("valid")]; + tensor var_1709_strides_0 = const()[name = tensor("op_1709_strides_0"), val = tensor([1, 1])]; + tensor var_1709_pad_0 = const()[name = tensor("op_1709_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1709_dilations_0 = const()[name = tensor("op_1709_dilations_0"), val = tensor([1, 1])]; + tensor var_1709_groups_0 = const()[name = tensor("op_1709_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111225472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111520448))), name = tensor("layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1709_cast_fp16 = conv(dilations = var_1709_dilations_0, groups = var_1709_groups_0, pad = var_1709_pad_0, pad_type = var_1709_pad_type_0, strides = var_1709_strides_0, weight = layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_71_cast_fp16)[name = tensor("op_1709_cast_fp16")]; + tensor var_1715_pad_type_0 = const()[name = tensor("op_1715_pad_type_0"), val = tensor("valid")]; + tensor var_1715_strides_0 = const()[name = tensor("op_1715_strides_0"), val = tensor([1, 1])]; + tensor var_1715_pad_0 = const()[name = tensor("op_1715_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1715_dilations_0 = const()[name = tensor("op_1715_dilations_0"), val = tensor([1, 1])]; + tensor var_1715_groups_0 = const()[name = tensor("op_1715_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111529984))), name = tensor("layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111520576))), shape = tensor([768, 768, 1, 1])]; + tensor var_1715_cast_fp16 = conv(dilations = var_1715_dilations_0, groups = var_1715_groups_0, pad = var_1715_pad_0, pad_type = var_1715_pad_type_0, strides = var_1715_strides_0, weight = layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_71_cast_fp16)[name = tensor("op_1715_cast_fp16")]; + tensor current_key_11_cast_fp16 = add(x = var_1709_cast_fp16, y = var_1715_cast_fp16)[name = tensor("current_key_11_cast_fp16")]; + tensor var_1725_pad_type_0 = const()[name = tensor("op_1725_pad_type_0"), val = tensor("valid")]; + tensor var_1725_strides_0 = const()[name = tensor("op_1725_strides_0"), val = tensor([1, 1])]; + tensor var_1725_pad_0 = const()[name = tensor("op_1725_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1725_dilations_0 = const()[name = tensor("op_1725_dilations_0"), val = tensor([1, 1])]; + tensor var_1725_groups_0 = const()[name = tensor("op_1725_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111603776))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111898752))), name = tensor("layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_5_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111898880)))]; + tensor var_1725_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1725_dilations_0, groups = var_1725_groups_0, pad = var_1725_pad_0, pad_type = var_1725_pad_type_0, strides = var_1725_strides_0, weight = layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_71_cast_fp16)[name = tensor("op_1725_cast_fp16")]; + tensor var_1731_pad_type_0 = const()[name = tensor("op_1731_pad_type_0"), val = tensor("valid")]; + tensor var_1731_strides_0 = const()[name = tensor("op_1731_strides_0"), val = tensor([1, 1])]; + tensor var_1731_pad_0 = const()[name = tensor("op_1731_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1731_dilations_0 = const()[name = tensor("op_1731_dilations_0"), val = tensor([1, 1])]; + tensor var_1731_groups_0 = const()[name = tensor("op_1731_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111906560))), name = tensor("layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111900480))), shape = tensor([768, 768, 1, 1])]; + tensor var_1731_cast_fp16 = conv(dilations = var_1731_dilations_0, groups = var_1731_groups_0, pad = var_1731_pad_0, pad_type = var_1731_pad_type_0, strides = var_1731_strides_0, weight = layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_71_cast_fp16)[name = tensor("op_1731_cast_fp16")]; + tensor current_value_11_cast_fp16 = add(x = var_1725_cast_fp16, y = var_1731_cast_fp16)[name = tensor("current_value_11_cast_fp16")]; + tensor var_1738_cast_fp16 = mul(x = var_69_cast_fp16_5, y = var_192_cast_fp16)[name = tensor("op_1738_cast_fp16")]; + tensor var_1739_cast_fp16 = mul(x = current_key_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_1739_cast_fp16")]; + tensor key_21_cast_fp16 = add(x = var_1738_cast_fp16, y = var_1739_cast_fp16)[name = tensor("key_21_cast_fp16")]; + tensor var_1742_cast_fp16 = mul(x = var_84_cast_fp16_5, y = var_192_cast_fp16)[name = tensor("op_1742_cast_fp16")]; + tensor var_1743_cast_fp16 = mul(x = current_value_11_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_1743_cast_fp16")]; + tensor value_21_cast_fp16 = add(x = var_1742_cast_fp16, y = var_1743_cast_fp16)[name = tensor("value_21_cast_fp16")]; + tensor var_1747 = const()[name = tensor("op_1747"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_21_cast_fp16 = reshape(shape = var_1747, x = query_21_cast_fp16)[name = tensor("mh_q_21_cast_fp16")]; + tensor var_1749_to_fp16 = const()[name = tensor("op_1749_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1750_cast_fp16 = mul(x = mh_q_21_cast_fp16, y = var_1749_to_fp16)[name = tensor("op_1750_cast_fp16")]; + tensor var_1753 = const()[name = tensor("op_1753"), val = tensor([1, 12, 64, 448])]; + tensor var_1754_cast_fp16 = reshape(shape = var_1753, x = key_21_cast_fp16)[name = tensor("op_1754_cast_fp16")]; + tensor mh_w_31_transpose_x_0 = const()[name = tensor("mh_w_31_transpose_x_0"), val = tensor(true)]; + tensor mh_w_31_transpose_y_0 = const()[name = tensor("mh_w_31_transpose_y_0"), val = tensor(false)]; + tensor mh_w_31_cast_fp16 = matmul(transpose_x = mh_w_31_transpose_x_0, transpose_y = mh_w_31_transpose_y_0, x = var_1750_cast_fp16, y = var_1754_cast_fp16)[name = tensor("mh_w_31_cast_fp16")]; + tensor mh_w_33_cast_fp16 = add(x = mh_w_31_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_33_cast_fp16")]; + tensor var_1762_cast_fp16 = softmax(axis = var_1647, x = mh_w_33_cast_fp16)[name = tensor("op_1762_cast_fp16")]; + tensor var_1763 = const()[name = tensor("op_1763"), val = tensor([1, 12, 64, 448])]; + tensor var_1764_cast_fp16 = reshape(shape = var_1763, x = value_21_cast_fp16)[name = tensor("op_1764_cast_fp16")]; + tensor attn_21_transpose_x_0 = const()[name = tensor("attn_21_transpose_x_0"), val = tensor(false)]; + tensor attn_21_transpose_y_0 = const()[name = tensor("attn_21_transpose_y_0"), val = tensor(true)]; + tensor attn_21_cast_fp16 = matmul(transpose_x = attn_21_transpose_x_0, transpose_y = attn_21_transpose_y_0, x = var_1764_cast_fp16, y = var_1762_cast_fp16)[name = tensor("attn_21_cast_fp16")]; + tensor var_1767 = const()[name = tensor("op_1767"), val = tensor([1, 768, 1, 1])]; + tensor input_51_cast_fp16 = reshape(shape = var_1767, x = attn_21_cast_fp16)[name = tensor("input_51_cast_fp16")]; + tensor var_1777_pad_type_0 = const()[name = tensor("op_1777_pad_type_0"), val = tensor("valid")]; + tensor var_1777_strides_0 = const()[name = tensor("op_1777_strides_0"), val = tensor([1, 1])]; + tensor var_1777_pad_0 = const()[name = tensor("op_1777_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1777_dilations_0 = const()[name = tensor("op_1777_dilations_0"), val = tensor([1, 1])]; + tensor var_1777_groups_0 = const()[name = tensor("op_1777_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111980352))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112275328))), name = tensor("layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_5_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112275456)))]; + tensor var_1777_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1777_dilations_0, groups = var_1777_groups_0, pad = var_1777_pad_0, pad_type = var_1777_pad_type_0, strides = var_1777_strides_0, weight = layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = tensor("op_1777_cast_fp16")]; + tensor var_1783_pad_type_0 = const()[name = tensor("op_1783_pad_type_0"), val = tensor("valid")]; + tensor var_1783_strides_0 = const()[name = tensor("op_1783_strides_0"), val = tensor([1, 1])]; + tensor var_1783_pad_0 = const()[name = tensor("op_1783_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1783_dilations_0 = const()[name = tensor("op_1783_dilations_0"), val = tensor([1, 1])]; + tensor var_1783_groups_0 = const()[name = tensor("op_1783_groups_0"), val = tensor(1)]; + tensor layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112284416))), name = tensor("layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112277056))), shape = tensor([768, 768, 1, 1])]; + tensor var_1783_cast_fp16 = conv(dilations = var_1783_dilations_0, groups = var_1783_groups_0, pad = var_1783_pad_0, pad_type = var_1783_pad_type_0, strides = var_1783_strides_0, weight = layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_51_cast_fp16)[name = tensor("op_1783_cast_fp16")]; + tensor obj_77_cast_fp16 = add(x = var_1777_cast_fp16, y = var_1783_cast_fp16)[name = tensor("obj_77_cast_fp16")]; + tensor inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = obj_77_cast_fp16)[name = tensor("inputs_33_cast_fp16")]; + tensor out_33_axes_0 = const()[name = tensor("out_33_axes_0"), val = tensor([1])]; + tensor var_1798_to_fp16 = const()[name = tensor("op_1798_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_33_cast_fp16 = layer_norm(axes = out_33_axes_0, epsilon = var_1798_to_fp16, x = inputs_33_cast_fp16)[name = tensor("out_33_cast_fp16")]; + tensor obj_79_gamma_0_to_fp16 = const()[name = tensor("obj_79_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112358208)))]; + tensor obj_79_beta_0_to_fp16 = const()[name = tensor("obj_79_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112359808)))]; + tensor obj_79_epsilon_0_to_fp16 = const()[name = tensor("obj_79_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_79_cast_fp16 = batch_norm(beta = obj_79_beta_0_to_fp16, epsilon = obj_79_epsilon_0_to_fp16, gamma = obj_79_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = tensor("obj_79_cast_fp16")]; + tensor var_1820_pad_type_0 = const()[name = tensor("op_1820_pad_type_0"), val = tensor("valid")]; + tensor var_1820_strides_0 = const()[name = tensor("op_1820_strides_0"), val = tensor([1, 1])]; + tensor var_1820_pad_0 = const()[name = tensor("op_1820_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1820_dilations_0 = const()[name = tensor("op_1820_dilations_0"), val = tensor([1, 1])]; + tensor var_1820_groups_0 = const()[name = tensor("op_1820_groups_0"), val = tensor(1)]; + tensor layers_5_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112361408))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112656384))), name = tensor("layers_5_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_5_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112656512)))]; + tensor var_1820_cast_fp16 = conv(bias = layers_5_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1820_dilations_0, groups = var_1820_groups_0, pad = var_1820_pad_0, pad_type = var_1820_pad_type_0, strides = var_1820_strides_0, weight = layers_5_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_79_cast_fp16)[name = tensor("op_1820_cast_fp16")]; + tensor var_1826_pad_type_0 = const()[name = tensor("op_1826_pad_type_0"), val = tensor("valid")]; + tensor var_1826_strides_0 = const()[name = tensor("op_1826_strides_0"), val = tensor([1, 1])]; + tensor var_1826_pad_0 = const()[name = tensor("op_1826_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1826_dilations_0 = const()[name = tensor("op_1826_dilations_0"), val = tensor([1, 1])]; + tensor var_1826_groups_0 = const()[name = tensor("op_1826_groups_0"), val = tensor(1)]; + tensor layers_5_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112664576))), name = tensor("layers_5_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112658112))), shape = tensor([768, 768, 1, 1])]; + tensor var_1826_cast_fp16 = conv(dilations = var_1826_dilations_0, groups = var_1826_groups_0, pad = var_1826_pad_0, pad_type = var_1826_pad_type_0, strides = var_1826_strides_0, weight = layers_5_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_79_cast_fp16)[name = tensor("op_1826_cast_fp16")]; + tensor query_23_cast_fp16 = add(x = var_1820_cast_fp16, y = var_1826_cast_fp16)[name = tensor("query_23_cast_fp16")]; + tensor var_1835_pad_type_0 = const()[name = tensor("op_1835_pad_type_0"), val = tensor("valid")]; + tensor var_1835_strides_0 = const()[name = tensor("op_1835_strides_0"), val = tensor([1, 1])]; + tensor var_1835_pad_0 = const()[name = tensor("op_1835_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1835_dilations_0 = const()[name = tensor("op_1835_dilations_0"), val = tensor([1, 1])]; + tensor var_1835_groups_0 = const()[name = tensor("op_1835_groups_0"), val = tensor(1)]; + tensor layers_5_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(112738368))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113033344))), name = tensor("layers_5_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_1835_cast_fp16 = conv(dilations = var_1835_dilations_0, groups = var_1835_groups_0, pad = var_1835_pad_0, pad_type = var_1835_pad_type_0, strides = var_1835_strides_0, weight = layers_5_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_1835_cast_fp16")]; + tensor var_1841_pad_type_0 = const()[name = tensor("op_1841_pad_type_0"), val = tensor("valid")]; + tensor var_1841_strides_0 = const()[name = tensor("op_1841_strides_0"), val = tensor([1, 1])]; + tensor var_1841_pad_0 = const()[name = tensor("op_1841_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1841_dilations_0 = const()[name = tensor("op_1841_dilations_0"), val = tensor([1, 1])]; + tensor var_1841_groups_0 = const()[name = tensor("op_1841_groups_0"), val = tensor(1)]; + tensor layers_5_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113040896))), name = tensor("layers_5_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113033472))), shape = tensor([768, 768, 1, 1])]; + tensor var_1841_cast_fp16 = conv(dilations = var_1841_dilations_0, groups = var_1841_groups_0, pad = var_1841_pad_0, pad_type = var_1841_pad_type_0, strides = var_1841_strides_0, weight = layers_5_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_1841_cast_fp16")]; + tensor key_23_cast_fp16 = add(x = var_1835_cast_fp16, y = var_1841_cast_fp16)[name = tensor("key_23_cast_fp16")]; + tensor var_1851_pad_type_0 = const()[name = tensor("op_1851_pad_type_0"), val = tensor("valid")]; + tensor var_1851_strides_0 = const()[name = tensor("op_1851_strides_0"), val = tensor([1, 1])]; + tensor var_1851_pad_0 = const()[name = tensor("op_1851_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1851_dilations_0 = const()[name = tensor("op_1851_dilations_0"), val = tensor([1, 1])]; + tensor var_1851_groups_0 = const()[name = tensor("op_1851_groups_0"), val = tensor(1)]; + tensor layers_5_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113114688))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113409664))), name = tensor("layers_5_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_5_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113409792)))]; + tensor var_1851_cast_fp16 = conv(bias = layers_5_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1851_dilations_0, groups = var_1851_groups_0, pad = var_1851_pad_0, pad_type = var_1851_pad_type_0, strides = var_1851_strides_0, weight = layers_5_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_1851_cast_fp16")]; + tensor var_1857_pad_type_0 = const()[name = tensor("op_1857_pad_type_0"), val = tensor("valid")]; + tensor var_1857_strides_0 = const()[name = tensor("op_1857_strides_0"), val = tensor([1, 1])]; + tensor var_1857_pad_0 = const()[name = tensor("op_1857_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1857_dilations_0 = const()[name = tensor("op_1857_dilations_0"), val = tensor([1, 1])]; + tensor var_1857_groups_0 = const()[name = tensor("op_1857_groups_0"), val = tensor(1)]; + tensor layers_5_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113417472))), name = tensor("layers_5_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113411392))), shape = tensor([768, 768, 1, 1])]; + tensor var_1857_cast_fp16 = conv(dilations = var_1857_dilations_0, groups = var_1857_groups_0, pad = var_1857_pad_0, pad_type = var_1857_pad_type_0, strides = var_1857_strides_0, weight = layers_5_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_1857_cast_fp16")]; + tensor value_23_cast_fp16 = add(x = var_1851_cast_fp16, y = var_1857_cast_fp16)[name = tensor("value_23_cast_fp16")]; + tensor var_1861 = const()[name = tensor("op_1861"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_23_cast_fp16 = reshape(shape = var_1861, x = query_23_cast_fp16)[name = tensor("mh_q_23_cast_fp16")]; + tensor var_1863_to_fp16 = const()[name = tensor("op_1863_to_fp16"), val = tensor(0x1p-3)]; + tensor var_1864_cast_fp16 = mul(x = mh_q_23_cast_fp16, y = var_1863_to_fp16)[name = tensor("op_1864_cast_fp16")]; + tensor var_1867 = const()[name = tensor("op_1867"), val = tensor([1, 12, 64, 1500])]; + tensor var_1868_cast_fp16 = reshape(shape = var_1867, x = key_23_cast_fp16)[name = tensor("op_1868_cast_fp16")]; + tensor mh_w_35_transpose_x_0 = const()[name = tensor("mh_w_35_transpose_x_0"), val = tensor(true)]; + tensor mh_w_35_transpose_y_0 = const()[name = tensor("mh_w_35_transpose_y_0"), val = tensor(false)]; + tensor mh_w_35_cast_fp16 = matmul(transpose_x = mh_w_35_transpose_x_0, transpose_y = mh_w_35_transpose_y_0, x = var_1864_cast_fp16, y = var_1868_cast_fp16)[name = tensor("mh_w_35_cast_fp16")]; + tensor obj_83_cast_fp16 = softmax(axis = var_1647, x = mh_w_35_cast_fp16)[name = tensor("obj_83_cast_fp16")]; + tensor var_1872 = const()[name = tensor("op_1872"), val = tensor([1, 12, 64, 1500])]; + tensor var_1873_cast_fp16 = reshape(shape = var_1872, x = value_23_cast_fp16)[name = tensor("op_1873_cast_fp16")]; + tensor attn_23_transpose_x_0 = const()[name = tensor("attn_23_transpose_x_0"), val = tensor(false)]; + tensor attn_23_transpose_y_0 = const()[name = tensor("attn_23_transpose_y_0"), val = tensor(true)]; + tensor attn_23_cast_fp16 = matmul(transpose_x = attn_23_transpose_x_0, transpose_y = attn_23_transpose_y_0, x = var_1873_cast_fp16, y = obj_83_cast_fp16)[name = tensor("attn_23_cast_fp16")]; + tensor var_1876 = const()[name = tensor("op_1876"), val = tensor([1, 768, 1, 1])]; + tensor input_53_cast_fp16 = reshape(shape = var_1876, x = attn_23_cast_fp16)[name = tensor("input_53_cast_fp16")]; + tensor var_1886_pad_type_0 = const()[name = tensor("op_1886_pad_type_0"), val = tensor("valid")]; + tensor var_1886_strides_0 = const()[name = tensor("op_1886_strides_0"), val = tensor([1, 1])]; + tensor var_1886_pad_0 = const()[name = tensor("op_1886_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1886_dilations_0 = const()[name = tensor("op_1886_dilations_0"), val = tensor([1, 1])]; + tensor var_1886_groups_0 = const()[name = tensor("op_1886_groups_0"), val = tensor(1)]; + tensor layers_5_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113491264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113786240))), name = tensor("layers_5_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_5_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113786368)))]; + tensor var_1886_cast_fp16 = conv(bias = layers_5_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1886_dilations_0, groups = var_1886_groups_0, pad = var_1886_pad_0, pad_type = var_1886_pad_type_0, strides = var_1886_strides_0, weight = layers_5_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_53_cast_fp16)[name = tensor("op_1886_cast_fp16")]; + tensor var_1892_pad_type_0 = const()[name = tensor("op_1892_pad_type_0"), val = tensor("valid")]; + tensor var_1892_strides_0 = const()[name = tensor("op_1892_strides_0"), val = tensor([1, 1])]; + tensor var_1892_pad_0 = const()[name = tensor("op_1892_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1892_dilations_0 = const()[name = tensor("op_1892_dilations_0"), val = tensor([1, 1])]; + tensor var_1892_groups_0 = const()[name = tensor("op_1892_groups_0"), val = tensor(1)]; + tensor layers_5_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113793472))), name = tensor("layers_5_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113787968))), shape = tensor([768, 768, 1, 1])]; + tensor var_1892_cast_fp16 = conv(dilations = var_1892_dilations_0, groups = var_1892_groups_0, pad = var_1892_pad_0, pad_type = var_1892_pad_type_0, strides = var_1892_strides_0, weight = layers_5_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_53_cast_fp16)[name = tensor("op_1892_cast_fp16")]; + tensor obj_81_cast_fp16 = add(x = var_1886_cast_fp16, y = var_1892_cast_fp16)[name = tensor("obj_81_cast_fp16")]; + tensor inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_81_cast_fp16)[name = tensor("inputs_35_cast_fp16")]; + tensor out_35_axes_0 = const()[name = tensor("out_35_axes_0"), val = tensor([1])]; + tensor var_1906_to_fp16 = const()[name = tensor("op_1906_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_35_cast_fp16 = layer_norm(axes = out_35_axes_0, epsilon = var_1906_to_fp16, x = inputs_35_cast_fp16)[name = tensor("out_35_cast_fp16")]; + tensor input_55_gamma_0_to_fp16 = const()[name = tensor("input_55_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113867264)))]; + tensor input_55_beta_0_to_fp16 = const()[name = tensor("input_55_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113868864)))]; + tensor input_55_epsilon_0_to_fp16 = const()[name = tensor("input_55_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_55_cast_fp16 = batch_norm(beta = input_55_beta_0_to_fp16, epsilon = input_55_epsilon_0_to_fp16, gamma = input_55_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = tensor("input_55_cast_fp16")]; + tensor var_1924_pad_type_0 = const()[name = tensor("op_1924_pad_type_0"), val = tensor("valid")]; + tensor var_1924_strides_0 = const()[name = tensor("op_1924_strides_0"), val = tensor([1, 1])]; + tensor var_1924_pad_0 = const()[name = tensor("op_1924_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1924_dilations_0 = const()[name = tensor("op_1924_dilations_0"), val = tensor([1, 1])]; + tensor var_1924_groups_0 = const()[name = tensor("op_1924_groups_0"), val = tensor(1)]; + tensor layers_5_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(113870464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115050176))), name = tensor("layers_5_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_5_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115050304)))]; + tensor var_1924_cast_fp16 = conv(bias = layers_5_fc1_inlier_module_bias_to_fp16, dilations = var_1924_dilations_0, groups = var_1924_groups_0, pad = var_1924_pad_0, pad_type = var_1924_pad_type_0, strides = var_1924_strides_0, weight = layers_5_fc1_inlier_module_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = tensor("op_1924_cast_fp16")]; + tensor var_1930_pad_type_0 = const()[name = tensor("op_1930_pad_type_0"), val = tensor("valid")]; + tensor var_1930_strides_0 = const()[name = tensor("op_1930_strides_0"), val = tensor([1, 1])]; + tensor var_1930_pad_0 = const()[name = tensor("op_1930_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1930_dilations_0 = const()[name = tensor("op_1930_dilations_0"), val = tensor([1, 1])]; + tensor var_1930_groups_0 = const()[name = tensor("op_1930_groups_0"), val = tensor(1)]; + tensor layers_5_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115075840))), name = tensor("layers_5_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115056512))), shape = tensor([3072, 768, 1, 1])]; + tensor var_1930_cast_fp16 = conv(dilations = var_1930_dilations_0, groups = var_1930_groups_0, pad = var_1930_pad_0, pad_type = var_1930_pad_type_0, strides = var_1930_strides_0, weight = layers_5_fc1_outlier_module_weight_to_fp16_sparsified, x = input_55_cast_fp16)[name = tensor("op_1930_cast_fp16")]; + tensor input_57_cast_fp16 = add(x = var_1924_cast_fp16, y = var_1930_cast_fp16)[name = tensor("input_57_cast_fp16")]; + tensor input_59_mode_0 = const()[name = tensor("input_59_mode_0"), val = tensor("EXACT")]; + tensor input_59_cast_fp16 = gelu(mode = input_59_mode_0, x = input_57_cast_fp16)[name = tensor("input_59_cast_fp16")]; + tensor var_1941_pad_type_0 = const()[name = tensor("op_1941_pad_type_0"), val = tensor("valid")]; + tensor var_1941_strides_0 = const()[name = tensor("op_1941_strides_0"), val = tensor([1, 1])]; + tensor var_1941_pad_0 = const()[name = tensor("op_1941_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1941_dilations_0 = const()[name = tensor("op_1941_dilations_0"), val = tensor([1, 1])]; + tensor var_1941_groups_0 = const()[name = tensor("op_1941_groups_0"), val = tensor(1)]; + tensor layers_5_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115370816))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116550528))), name = tensor("layers_5_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_5_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116550656)))]; + tensor var_1941_cast_fp16 = conv(bias = layers_5_fc2_inlier_module_bias_to_fp16, dilations = var_1941_dilations_0, groups = var_1941_groups_0, pad = var_1941_pad_0, pad_type = var_1941_pad_type_0, strides = var_1941_strides_0, weight = layers_5_fc2_inlier_module_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = tensor("op_1941_cast_fp16")]; + tensor var_1947_pad_type_0 = const()[name = tensor("op_1947_pad_type_0"), val = tensor("valid")]; + tensor var_1947_strides_0 = const()[name = tensor("op_1947_strides_0"), val = tensor([1, 1])]; + tensor var_1947_pad_0 = const()[name = tensor("op_1947_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1947_dilations_0 = const()[name = tensor("op_1947_dilations_0"), val = tensor([1, 1])]; + tensor var_1947_groups_0 = const()[name = tensor("op_1947_groups_0"), val = tensor(1)]; + tensor layers_5_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116574848))), name = tensor("layers_5_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116552256))), shape = tensor([768, 3072, 1, 1])]; + tensor var_1947_cast_fp16 = conv(dilations = var_1947_dilations_0, groups = var_1947_groups_0, pad = var_1947_pad_0, pad_type = var_1947_pad_type_0, strides = var_1947_strides_0, weight = layers_5_fc2_outlier_module_weight_to_fp16_sparsified, x = input_59_cast_fp16)[name = tensor("op_1947_cast_fp16")]; + tensor hidden_states_13_cast_fp16 = add(x = var_1941_cast_fp16, y = var_1947_cast_fp16)[name = tensor("hidden_states_13_cast_fp16")]; + tensor inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor("inputs_37_cast_fp16")]; + tensor var_1960 = const()[name = tensor("op_1960"), val = tensor(3)]; + tensor out_37_axes_0 = const()[name = tensor("out_37_axes_0"), val = tensor([1])]; + tensor var_1985_to_fp16 = const()[name = tensor("op_1985_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_1985_to_fp16, x = inputs_37_cast_fp16)[name = tensor("out_37_cast_fp16")]; + tensor obj_85_gamma_0_to_fp16 = const()[name = tensor("obj_85_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116869824)))]; + tensor obj_85_beta_0_to_fp16 = const()[name = tensor("obj_85_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116871424)))]; + tensor obj_85_epsilon_0_to_fp16 = const()[name = tensor("obj_85_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_85_cast_fp16 = batch_norm(beta = obj_85_beta_0_to_fp16, epsilon = obj_85_epsilon_0_to_fp16, gamma = obj_85_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = tensor("obj_85_cast_fp16")]; + tensor var_2007_pad_type_0 = const()[name = tensor("op_2007_pad_type_0"), val = tensor("valid")]; + tensor var_2007_strides_0 = const()[name = tensor("op_2007_strides_0"), val = tensor([1, 1])]; + tensor var_2007_pad_0 = const()[name = tensor("op_2007_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2007_dilations_0 = const()[name = tensor("op_2007_dilations_0"), val = tensor([1, 1])]; + tensor var_2007_groups_0 = const()[name = tensor("op_2007_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(116873024))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117168000))), name = tensor("layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_6_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117168128)))]; + tensor var_2007_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2007_dilations_0, groups = var_2007_groups_0, pad = var_2007_pad_0, pad_type = var_2007_pad_type_0, strides = var_2007_strides_0, weight = layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_85_cast_fp16)[name = tensor("op_2007_cast_fp16")]; + tensor var_2013_pad_type_0 = const()[name = tensor("op_2013_pad_type_0"), val = tensor("valid")]; + tensor var_2013_strides_0 = const()[name = tensor("op_2013_strides_0"), val = tensor([1, 1])]; + tensor var_2013_pad_0 = const()[name = tensor("op_2013_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2013_dilations_0 = const()[name = tensor("op_2013_dilations_0"), val = tensor([1, 1])]; + tensor var_2013_groups_0 = const()[name = tensor("op_2013_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117178048))), name = tensor("layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117169728))), shape = tensor([768, 768, 1, 1])]; + tensor var_2013_cast_fp16 = conv(dilations = var_2013_dilations_0, groups = var_2013_groups_0, pad = var_2013_pad_0, pad_type = var_2013_pad_type_0, strides = var_2013_strides_0, weight = layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_85_cast_fp16)[name = tensor("op_2013_cast_fp16")]; + tensor query_25_cast_fp16 = add(x = var_2007_cast_fp16, y = var_2013_cast_fp16)[name = tensor("query_25_cast_fp16")]; + tensor var_2022_pad_type_0 = const()[name = tensor("op_2022_pad_type_0"), val = tensor("valid")]; + tensor var_2022_strides_0 = const()[name = tensor("op_2022_strides_0"), val = tensor([1, 1])]; + tensor var_2022_pad_0 = const()[name = tensor("op_2022_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2022_dilations_0 = const()[name = tensor("op_2022_dilations_0"), val = tensor([1, 1])]; + tensor var_2022_groups_0 = const()[name = tensor("op_2022_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117251840))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117546816))), name = tensor("layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_2022_cast_fp16 = conv(dilations = var_2022_dilations_0, groups = var_2022_groups_0, pad = var_2022_pad_0, pad_type = var_2022_pad_type_0, strides = var_2022_strides_0, weight = layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_85_cast_fp16)[name = tensor("op_2022_cast_fp16")]; + tensor var_2028_pad_type_0 = const()[name = tensor("op_2028_pad_type_0"), val = tensor("valid")]; + tensor var_2028_strides_0 = const()[name = tensor("op_2028_strides_0"), val = tensor([1, 1])]; + tensor var_2028_pad_0 = const()[name = tensor("op_2028_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2028_dilations_0 = const()[name = tensor("op_2028_dilations_0"), val = tensor([1, 1])]; + tensor var_2028_groups_0 = const()[name = tensor("op_2028_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117555456))), name = tensor("layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117546944))), shape = tensor([768, 768, 1, 1])]; + tensor var_2028_cast_fp16 = conv(dilations = var_2028_dilations_0, groups = var_2028_groups_0, pad = var_2028_pad_0, pad_type = var_2028_pad_type_0, strides = var_2028_strides_0, weight = layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_85_cast_fp16)[name = tensor("op_2028_cast_fp16")]; + tensor current_key_13_cast_fp16 = add(x = var_2022_cast_fp16, y = var_2028_cast_fp16)[name = tensor("current_key_13_cast_fp16")]; + tensor var_2038_pad_type_0 = const()[name = tensor("op_2038_pad_type_0"), val = tensor("valid")]; + tensor var_2038_strides_0 = const()[name = tensor("op_2038_strides_0"), val = tensor([1, 1])]; + tensor var_2038_pad_0 = const()[name = tensor("op_2038_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2038_dilations_0 = const()[name = tensor("op_2038_dilations_0"), val = tensor([1, 1])]; + tensor var_2038_groups_0 = const()[name = tensor("op_2038_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117629248))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117924224))), name = tensor("layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_6_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117924352)))]; + tensor var_2038_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2038_dilations_0, groups = var_2038_groups_0, pad = var_2038_pad_0, pad_type = var_2038_pad_type_0, strides = var_2038_strides_0, weight = layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_85_cast_fp16)[name = tensor("op_2038_cast_fp16")]; + tensor var_2044_pad_type_0 = const()[name = tensor("op_2044_pad_type_0"), val = tensor("valid")]; + tensor var_2044_strides_0 = const()[name = tensor("op_2044_strides_0"), val = tensor([1, 1])]; + tensor var_2044_pad_0 = const()[name = tensor("op_2044_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2044_dilations_0 = const()[name = tensor("op_2044_dilations_0"), val = tensor([1, 1])]; + tensor var_2044_groups_0 = const()[name = tensor("op_2044_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117931648))), name = tensor("layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(117925952))), shape = tensor([768, 768, 1, 1])]; + tensor var_2044_cast_fp16 = conv(dilations = var_2044_dilations_0, groups = var_2044_groups_0, pad = var_2044_pad_0, pad_type = var_2044_pad_type_0, strides = var_2044_strides_0, weight = layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_85_cast_fp16)[name = tensor("op_2044_cast_fp16")]; + tensor current_value_13_cast_fp16 = add(x = var_2038_cast_fp16, y = var_2044_cast_fp16)[name = tensor("current_value_13_cast_fp16")]; + tensor var_2051_cast_fp16 = mul(x = var_69_cast_fp16_6, y = var_192_cast_fp16)[name = tensor("op_2051_cast_fp16")]; + tensor var_2052_cast_fp16 = mul(x = current_key_13_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_2052_cast_fp16")]; + tensor key_25_cast_fp16 = add(x = var_2051_cast_fp16, y = var_2052_cast_fp16)[name = tensor("key_25_cast_fp16")]; + tensor var_2055_cast_fp16 = mul(x = var_84_cast_fp16_6, y = var_192_cast_fp16)[name = tensor("op_2055_cast_fp16")]; + tensor var_2056_cast_fp16 = mul(x = current_value_13_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_2056_cast_fp16")]; + tensor value_25_cast_fp16 = add(x = var_2055_cast_fp16, y = var_2056_cast_fp16)[name = tensor("value_25_cast_fp16")]; + tensor var_2060 = const()[name = tensor("op_2060"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_25_cast_fp16 = reshape(shape = var_2060, x = query_25_cast_fp16)[name = tensor("mh_q_25_cast_fp16")]; + tensor var_2062_to_fp16 = const()[name = tensor("op_2062_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2063_cast_fp16 = mul(x = mh_q_25_cast_fp16, y = var_2062_to_fp16)[name = tensor("op_2063_cast_fp16")]; + tensor var_2066 = const()[name = tensor("op_2066"), val = tensor([1, 12, 64, 448])]; + tensor var_2067_cast_fp16 = reshape(shape = var_2066, x = key_25_cast_fp16)[name = tensor("op_2067_cast_fp16")]; + tensor mh_w_37_transpose_x_0 = const()[name = tensor("mh_w_37_transpose_x_0"), val = tensor(true)]; + tensor mh_w_37_transpose_y_0 = const()[name = tensor("mh_w_37_transpose_y_0"), val = tensor(false)]; + tensor mh_w_37_cast_fp16 = matmul(transpose_x = mh_w_37_transpose_x_0, transpose_y = mh_w_37_transpose_y_0, x = var_2063_cast_fp16, y = var_2067_cast_fp16)[name = tensor("mh_w_37_cast_fp16")]; + tensor mh_w_39_cast_fp16 = add(x = mh_w_37_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_39_cast_fp16")]; + tensor var_2075_cast_fp16 = softmax(axis = var_1960, x = mh_w_39_cast_fp16)[name = tensor("op_2075_cast_fp16")]; + tensor var_2076 = const()[name = tensor("op_2076"), val = tensor([1, 12, 64, 448])]; + tensor var_2077_cast_fp16 = reshape(shape = var_2076, x = value_25_cast_fp16)[name = tensor("op_2077_cast_fp16")]; + tensor attn_25_transpose_x_0 = const()[name = tensor("attn_25_transpose_x_0"), val = tensor(false)]; + tensor attn_25_transpose_y_0 = const()[name = tensor("attn_25_transpose_y_0"), val = tensor(true)]; + tensor attn_25_cast_fp16 = matmul(transpose_x = attn_25_transpose_x_0, transpose_y = attn_25_transpose_y_0, x = var_2077_cast_fp16, y = var_2075_cast_fp16)[name = tensor("attn_25_cast_fp16")]; + tensor var_2080 = const()[name = tensor("op_2080"), val = tensor([1, 768, 1, 1])]; + tensor input_61_cast_fp16 = reshape(shape = var_2080, x = attn_25_cast_fp16)[name = tensor("input_61_cast_fp16")]; + tensor var_2090_pad_type_0 = const()[name = tensor("op_2090_pad_type_0"), val = tensor("valid")]; + tensor var_2090_strides_0 = const()[name = tensor("op_2090_strides_0"), val = tensor([1, 1])]; + tensor var_2090_pad_0 = const()[name = tensor("op_2090_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2090_dilations_0 = const()[name = tensor("op_2090_dilations_0"), val = tensor([1, 1])]; + tensor var_2090_groups_0 = const()[name = tensor("op_2090_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118005440))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118300416))), name = tensor("layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_6_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118300544)))]; + tensor var_2090_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2090_dilations_0, groups = var_2090_groups_0, pad = var_2090_pad_0, pad_type = var_2090_pad_type_0, strides = var_2090_strides_0, weight = layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_61_cast_fp16)[name = tensor("op_2090_cast_fp16")]; + tensor var_2096_pad_type_0 = const()[name = tensor("op_2096_pad_type_0"), val = tensor("valid")]; + tensor var_2096_strides_0 = const()[name = tensor("op_2096_strides_0"), val = tensor([1, 1])]; + tensor var_2096_pad_0 = const()[name = tensor("op_2096_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2096_dilations_0 = const()[name = tensor("op_2096_dilations_0"), val = tensor([1, 1])]; + tensor var_2096_groups_0 = const()[name = tensor("op_2096_groups_0"), val = tensor(1)]; + tensor layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118308288))), name = tensor("layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118302144))), shape = tensor([768, 768, 1, 1])]; + tensor var_2096_cast_fp16 = conv(dilations = var_2096_dilations_0, groups = var_2096_groups_0, pad = var_2096_pad_0, pad_type = var_2096_pad_type_0, strides = var_2096_strides_0, weight = layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_61_cast_fp16)[name = tensor("op_2096_cast_fp16")]; + tensor obj_91_cast_fp16 = add(x = var_2090_cast_fp16, y = var_2096_cast_fp16)[name = tensor("obj_91_cast_fp16")]; + tensor inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_91_cast_fp16)[name = tensor("inputs_39_cast_fp16")]; + tensor out_39_axes_0 = const()[name = tensor("out_39_axes_0"), val = tensor([1])]; + tensor var_2111_to_fp16 = const()[name = tensor("op_2111_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_39_cast_fp16 = layer_norm(axes = out_39_axes_0, epsilon = var_2111_to_fp16, x = inputs_39_cast_fp16)[name = tensor("out_39_cast_fp16")]; + tensor obj_93_gamma_0_to_fp16 = const()[name = tensor("obj_93_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118382080)))]; + tensor obj_93_beta_0_to_fp16 = const()[name = tensor("obj_93_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118383680)))]; + tensor obj_93_epsilon_0_to_fp16 = const()[name = tensor("obj_93_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_93_cast_fp16 = batch_norm(beta = obj_93_beta_0_to_fp16, epsilon = obj_93_epsilon_0_to_fp16, gamma = obj_93_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = tensor("obj_93_cast_fp16")]; + tensor var_2133_pad_type_0 = const()[name = tensor("op_2133_pad_type_0"), val = tensor("valid")]; + tensor var_2133_strides_0 = const()[name = tensor("op_2133_strides_0"), val = tensor([1, 1])]; + tensor var_2133_pad_0 = const()[name = tensor("op_2133_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2133_dilations_0 = const()[name = tensor("op_2133_dilations_0"), val = tensor([1, 1])]; + tensor var_2133_groups_0 = const()[name = tensor("op_2133_groups_0"), val = tensor(1)]; + tensor layers_6_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118385280))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118680256))), name = tensor("layers_6_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_6_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118680384)))]; + tensor var_2133_cast_fp16 = conv(bias = layers_6_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2133_dilations_0, groups = var_2133_groups_0, pad = var_2133_pad_0, pad_type = var_2133_pad_type_0, strides = var_2133_strides_0, weight = layers_6_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_93_cast_fp16)[name = tensor("op_2133_cast_fp16")]; + tensor var_2139_pad_type_0 = const()[name = tensor("op_2139_pad_type_0"), val = tensor("valid")]; + tensor var_2139_strides_0 = const()[name = tensor("op_2139_strides_0"), val = tensor([1, 1])]; + tensor var_2139_pad_0 = const()[name = tensor("op_2139_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2139_dilations_0 = const()[name = tensor("op_2139_dilations_0"), val = tensor([1, 1])]; + tensor var_2139_groups_0 = const()[name = tensor("op_2139_groups_0"), val = tensor(1)]; + tensor layers_6_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118686912))), name = tensor("layers_6_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118681984))), shape = tensor([768, 768, 1, 1])]; + tensor var_2139_cast_fp16 = conv(dilations = var_2139_dilations_0, groups = var_2139_groups_0, pad = var_2139_pad_0, pad_type = var_2139_pad_type_0, strides = var_2139_strides_0, weight = layers_6_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_93_cast_fp16)[name = tensor("op_2139_cast_fp16")]; + tensor query_27_cast_fp16 = add(x = var_2133_cast_fp16, y = var_2139_cast_fp16)[name = tensor("query_27_cast_fp16")]; + tensor var_2148_pad_type_0 = const()[name = tensor("op_2148_pad_type_0"), val = tensor("valid")]; + tensor var_2148_strides_0 = const()[name = tensor("op_2148_strides_0"), val = tensor([1, 1])]; + tensor var_2148_pad_0 = const()[name = tensor("op_2148_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2148_dilations_0 = const()[name = tensor("op_2148_dilations_0"), val = tensor([1, 1])]; + tensor var_2148_groups_0 = const()[name = tensor("op_2148_groups_0"), val = tensor(1)]; + tensor layers_6_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118760704))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119055680))), name = tensor("layers_6_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_2148_cast_fp16 = conv(dilations = var_2148_dilations_0, groups = var_2148_groups_0, pad = var_2148_pad_0, pad_type = var_2148_pad_type_0, strides = var_2148_strides_0, weight = layers_6_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_2148_cast_fp16")]; + tensor var_2154_pad_type_0 = const()[name = tensor("op_2154_pad_type_0"), val = tensor("valid")]; + tensor var_2154_strides_0 = const()[name = tensor("op_2154_strides_0"), val = tensor([1, 1])]; + tensor var_2154_pad_0 = const()[name = tensor("op_2154_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2154_dilations_0 = const()[name = tensor("op_2154_dilations_0"), val = tensor([1, 1])]; + tensor var_2154_groups_0 = const()[name = tensor("op_2154_groups_0"), val = tensor(1)]; + tensor layers_6_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119062144))), name = tensor("layers_6_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119055808))), shape = tensor([768, 768, 1, 1])]; + tensor var_2154_cast_fp16 = conv(dilations = var_2154_dilations_0, groups = var_2154_groups_0, pad = var_2154_pad_0, pad_type = var_2154_pad_type_0, strides = var_2154_strides_0, weight = layers_6_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_2154_cast_fp16")]; + tensor key_27_cast_fp16 = add(x = var_2148_cast_fp16, y = var_2154_cast_fp16)[name = tensor("key_27_cast_fp16")]; + tensor var_2164_pad_type_0 = const()[name = tensor("op_2164_pad_type_0"), val = tensor("valid")]; + tensor var_2164_strides_0 = const()[name = tensor("op_2164_strides_0"), val = tensor([1, 1])]; + tensor var_2164_pad_0 = const()[name = tensor("op_2164_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2164_dilations_0 = const()[name = tensor("op_2164_dilations_0"), val = tensor([1, 1])]; + tensor var_2164_groups_0 = const()[name = tensor("op_2164_groups_0"), val = tensor(1)]; + tensor layers_6_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119135936))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119430912))), name = tensor("layers_6_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_6_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119431040)))]; + tensor var_2164_cast_fp16 = conv(bias = layers_6_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2164_dilations_0, groups = var_2164_groups_0, pad = var_2164_pad_0, pad_type = var_2164_pad_type_0, strides = var_2164_strides_0, weight = layers_6_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_2164_cast_fp16")]; + tensor var_2170_pad_type_0 = const()[name = tensor("op_2170_pad_type_0"), val = tensor("valid")]; + tensor var_2170_strides_0 = const()[name = tensor("op_2170_strides_0"), val = tensor([1, 1])]; + tensor var_2170_pad_0 = const()[name = tensor("op_2170_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2170_dilations_0 = const()[name = tensor("op_2170_dilations_0"), val = tensor([1, 1])]; + tensor var_2170_groups_0 = const()[name = tensor("op_2170_groups_0"), val = tensor(1)]; + tensor layers_6_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119441152))), name = tensor("layers_6_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119432640))), shape = tensor([768, 768, 1, 1])]; + tensor var_2170_cast_fp16 = conv(dilations = var_2170_dilations_0, groups = var_2170_groups_0, pad = var_2170_pad_0, pad_type = var_2170_pad_type_0, strides = var_2170_strides_0, weight = layers_6_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_2170_cast_fp16")]; + tensor value_27_cast_fp16 = add(x = var_2164_cast_fp16, y = var_2170_cast_fp16)[name = tensor("value_27_cast_fp16")]; + tensor var_2174 = const()[name = tensor("op_2174"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_27_cast_fp16 = reshape(shape = var_2174, x = query_27_cast_fp16)[name = tensor("mh_q_27_cast_fp16")]; + tensor var_2176_to_fp16 = const()[name = tensor("op_2176_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2177_cast_fp16 = mul(x = mh_q_27_cast_fp16, y = var_2176_to_fp16)[name = tensor("op_2177_cast_fp16")]; + tensor var_2180 = const()[name = tensor("op_2180"), val = tensor([1, 12, 64, 1500])]; + tensor var_2181_cast_fp16 = reshape(shape = var_2180, x = key_27_cast_fp16)[name = tensor("op_2181_cast_fp16")]; + tensor mh_w_41_transpose_x_0 = const()[name = tensor("mh_w_41_transpose_x_0"), val = tensor(true)]; + tensor mh_w_41_transpose_y_0 = const()[name = tensor("mh_w_41_transpose_y_0"), val = tensor(false)]; + tensor mh_w_41_cast_fp16 = matmul(transpose_x = mh_w_41_transpose_x_0, transpose_y = mh_w_41_transpose_y_0, x = var_2177_cast_fp16, y = var_2181_cast_fp16)[name = tensor("mh_w_41_cast_fp16")]; + tensor obj_97_cast_fp16 = softmax(axis = var_1960, x = mh_w_41_cast_fp16)[name = tensor("obj_97_cast_fp16")]; + tensor var_2185 = const()[name = tensor("op_2185"), val = tensor([1, 12, 64, 1500])]; + tensor var_2186_cast_fp16 = reshape(shape = var_2185, x = value_27_cast_fp16)[name = tensor("op_2186_cast_fp16")]; + tensor attn_27_transpose_x_0 = const()[name = tensor("attn_27_transpose_x_0"), val = tensor(false)]; + tensor attn_27_transpose_y_0 = const()[name = tensor("attn_27_transpose_y_0"), val = tensor(true)]; + tensor attn_27_cast_fp16 = matmul(transpose_x = attn_27_transpose_x_0, transpose_y = attn_27_transpose_y_0, x = var_2186_cast_fp16, y = obj_97_cast_fp16)[name = tensor("attn_27_cast_fp16")]; + tensor var_2189 = const()[name = tensor("op_2189"), val = tensor([1, 768, 1, 1])]; + tensor input_63_cast_fp16 = reshape(shape = var_2189, x = attn_27_cast_fp16)[name = tensor("input_63_cast_fp16")]; + tensor var_2199_pad_type_0 = const()[name = tensor("op_2199_pad_type_0"), val = tensor("valid")]; + tensor var_2199_strides_0 = const()[name = tensor("op_2199_strides_0"), val = tensor([1, 1])]; + tensor var_2199_pad_0 = const()[name = tensor("op_2199_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2199_dilations_0 = const()[name = tensor("op_2199_dilations_0"), val = tensor([1, 1])]; + tensor var_2199_groups_0 = const()[name = tensor("op_2199_groups_0"), val = tensor(1)]; + tensor layers_6_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119514944))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119809920))), name = tensor("layers_6_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_6_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119810048)))]; + tensor var_2199_cast_fp16 = conv(bias = layers_6_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2199_dilations_0, groups = var_2199_groups_0, pad = var_2199_pad_0, pad_type = var_2199_pad_type_0, strides = var_2199_strides_0, weight = layers_6_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = tensor("op_2199_cast_fp16")]; + tensor var_2205_pad_type_0 = const()[name = tensor("op_2205_pad_type_0"), val = tensor("valid")]; + tensor var_2205_strides_0 = const()[name = tensor("op_2205_strides_0"), val = tensor([1, 1])]; + tensor var_2205_pad_0 = const()[name = tensor("op_2205_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2205_dilations_0 = const()[name = tensor("op_2205_dilations_0"), val = tensor([1, 1])]; + tensor var_2205_groups_0 = const()[name = tensor("op_2205_groups_0"), val = tensor(1)]; + tensor layers_6_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119817920))), name = tensor("layers_6_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119811648))), shape = tensor([768, 768, 1, 1])]; + tensor var_2205_cast_fp16 = conv(dilations = var_2205_dilations_0, groups = var_2205_groups_0, pad = var_2205_pad_0, pad_type = var_2205_pad_type_0, strides = var_2205_strides_0, weight = layers_6_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_63_cast_fp16)[name = tensor("op_2205_cast_fp16")]; + tensor obj_95_cast_fp16 = add(x = var_2199_cast_fp16, y = var_2205_cast_fp16)[name = tensor("obj_95_cast_fp16")]; + tensor inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = obj_95_cast_fp16)[name = tensor("inputs_41_cast_fp16")]; + tensor out_41_axes_0 = const()[name = tensor("out_41_axes_0"), val = tensor([1])]; + tensor var_2216_to_fp16 = const()[name = tensor("op_2216_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_41_cast_fp16 = layer_norm(axes = out_41_axes_0, epsilon = var_2216_to_fp16, x = inputs_41_cast_fp16)[name = tensor("out_41_cast_fp16")]; + tensor input_65_gamma_0_to_fp16 = const()[name = tensor("input_65_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119891712)))]; + tensor input_65_beta_0_to_fp16 = const()[name = tensor("input_65_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119893312)))]; + tensor input_65_epsilon_0_to_fp16 = const()[name = tensor("input_65_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_65_cast_fp16 = batch_norm(beta = input_65_beta_0_to_fp16, epsilon = input_65_epsilon_0_to_fp16, gamma = input_65_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = tensor("input_65_cast_fp16")]; + tensor var_2234_pad_type_0 = const()[name = tensor("op_2234_pad_type_0"), val = tensor("valid")]; + tensor var_2234_strides_0 = const()[name = tensor("op_2234_strides_0"), val = tensor([1, 1])]; + tensor var_2234_pad_0 = const()[name = tensor("op_2234_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2234_dilations_0 = const()[name = tensor("op_2234_dilations_0"), val = tensor([1, 1])]; + tensor var_2234_groups_0 = const()[name = tensor("op_2234_groups_0"), val = tensor(1)]; + tensor layers_6_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119894912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121074624))), name = tensor("layers_6_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_6_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121074752)))]; + tensor var_2234_cast_fp16 = conv(bias = layers_6_fc1_inlier_module_bias_to_fp16, dilations = var_2234_dilations_0, groups = var_2234_groups_0, pad = var_2234_pad_0, pad_type = var_2234_pad_type_0, strides = var_2234_strides_0, weight = layers_6_fc1_inlier_module_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = tensor("op_2234_cast_fp16")]; + tensor var_2240_pad_type_0 = const()[name = tensor("op_2240_pad_type_0"), val = tensor("valid")]; + tensor var_2240_strides_0 = const()[name = tensor("op_2240_strides_0"), val = tensor([1, 1])]; + tensor var_2240_pad_0 = const()[name = tensor("op_2240_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2240_dilations_0 = const()[name = tensor("op_2240_dilations_0"), val = tensor([1, 1])]; + tensor var_2240_groups_0 = const()[name = tensor("op_2240_groups_0"), val = tensor(1)]; + tensor layers_6_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121100416))), name = tensor("layers_6_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121080960))), shape = tensor([3072, 768, 1, 1])]; + tensor var_2240_cast_fp16 = conv(dilations = var_2240_dilations_0, groups = var_2240_groups_0, pad = var_2240_pad_0, pad_type = var_2240_pad_type_0, strides = var_2240_strides_0, weight = layers_6_fc1_outlier_module_weight_to_fp16_sparsified, x = input_65_cast_fp16)[name = tensor("op_2240_cast_fp16")]; + tensor input_67_cast_fp16 = add(x = var_2234_cast_fp16, y = var_2240_cast_fp16)[name = tensor("input_67_cast_fp16")]; + tensor input_69_mode_0 = const()[name = tensor("input_69_mode_0"), val = tensor("EXACT")]; + tensor input_69_cast_fp16 = gelu(mode = input_69_mode_0, x = input_67_cast_fp16)[name = tensor("input_69_cast_fp16")]; + tensor var_2251_pad_type_0 = const()[name = tensor("op_2251_pad_type_0"), val = tensor("valid")]; + tensor var_2251_strides_0 = const()[name = tensor("op_2251_strides_0"), val = tensor([1, 1])]; + tensor var_2251_pad_0 = const()[name = tensor("op_2251_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2251_dilations_0 = const()[name = tensor("op_2251_dilations_0"), val = tensor([1, 1])]; + tensor var_2251_groups_0 = const()[name = tensor("op_2251_groups_0"), val = tensor(1)]; + tensor layers_6_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121395392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122575104))), name = tensor("layers_6_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_6_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122575232)))]; + tensor var_2251_cast_fp16 = conv(bias = layers_6_fc2_inlier_module_bias_to_fp16, dilations = var_2251_dilations_0, groups = var_2251_groups_0, pad = var_2251_pad_0, pad_type = var_2251_pad_type_0, strides = var_2251_strides_0, weight = layers_6_fc2_inlier_module_weight_to_fp16_palettized, x = input_69_cast_fp16)[name = tensor("op_2251_cast_fp16")]; + tensor var_2257_pad_type_0 = const()[name = tensor("op_2257_pad_type_0"), val = tensor("valid")]; + tensor var_2257_strides_0 = const()[name = tensor("op_2257_strides_0"), val = tensor([1, 1])]; + tensor var_2257_pad_0 = const()[name = tensor("op_2257_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2257_dilations_0 = const()[name = tensor("op_2257_dilations_0"), val = tensor([1, 1])]; + tensor var_2257_groups_0 = const()[name = tensor("op_2257_groups_0"), val = tensor(1)]; + tensor layers_6_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122601024))), name = tensor("layers_6_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122576832))), shape = tensor([768, 3072, 1, 1])]; + tensor var_2257_cast_fp16 = conv(dilations = var_2257_dilations_0, groups = var_2257_groups_0, pad = var_2257_pad_0, pad_type = var_2257_pad_type_0, strides = var_2257_strides_0, weight = layers_6_fc2_outlier_module_weight_to_fp16_sparsified, x = input_69_cast_fp16)[name = tensor("op_2257_cast_fp16")]; + tensor hidden_states_15_cast_fp16 = add(x = var_2251_cast_fp16, y = var_2257_cast_fp16)[name = tensor("hidden_states_15_cast_fp16")]; + tensor inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = hidden_states_15_cast_fp16)[name = tensor("inputs_43_cast_fp16")]; + tensor var_2269 = const()[name = tensor("op_2269"), val = tensor(3)]; + tensor out_43_axes_0 = const()[name = tensor("out_43_axes_0"), val = tensor([1])]; + tensor var_2294_to_fp16 = const()[name = tensor("op_2294_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_2294_to_fp16, x = inputs_43_cast_fp16)[name = tensor("out_43_cast_fp16")]; + tensor obj_99_gamma_0_to_fp16 = const()[name = tensor("obj_99_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122896000)))]; + tensor obj_99_beta_0_to_fp16 = const()[name = tensor("obj_99_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122897600)))]; + tensor obj_99_epsilon_0_to_fp16 = const()[name = tensor("obj_99_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_99_cast_fp16 = batch_norm(beta = obj_99_beta_0_to_fp16, epsilon = obj_99_epsilon_0_to_fp16, gamma = obj_99_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = tensor("obj_99_cast_fp16")]; + tensor var_2316_pad_type_0 = const()[name = tensor("op_2316_pad_type_0"), val = tensor("valid")]; + tensor var_2316_strides_0 = const()[name = tensor("op_2316_strides_0"), val = tensor([1, 1])]; + tensor var_2316_pad_0 = const()[name = tensor("op_2316_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2316_dilations_0 = const()[name = tensor("op_2316_dilations_0"), val = tensor([1, 1])]; + tensor var_2316_groups_0 = const()[name = tensor("op_2316_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122899200))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123194176))), name = tensor("layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_7_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123194304)))]; + tensor var_2316_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2316_dilations_0, groups = var_2316_groups_0, pad = var_2316_pad_0, pad_type = var_2316_pad_type_0, strides = var_2316_strides_0, weight = layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_99_cast_fp16)[name = tensor("op_2316_cast_fp16")]; + tensor var_2322_pad_type_0 = const()[name = tensor("op_2322_pad_type_0"), val = tensor("valid")]; + tensor var_2322_strides_0 = const()[name = tensor("op_2322_strides_0"), val = tensor([1, 1])]; + tensor var_2322_pad_0 = const()[name = tensor("op_2322_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2322_dilations_0 = const()[name = tensor("op_2322_dilations_0"), val = tensor([1, 1])]; + tensor var_2322_groups_0 = const()[name = tensor("op_2322_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123202880))), name = tensor("layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123195904))), shape = tensor([768, 768, 1, 1])]; + tensor var_2322_cast_fp16 = conv(dilations = var_2322_dilations_0, groups = var_2322_groups_0, pad = var_2322_pad_0, pad_type = var_2322_pad_type_0, strides = var_2322_strides_0, weight = layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_99_cast_fp16)[name = tensor("op_2322_cast_fp16")]; + tensor query_29_cast_fp16 = add(x = var_2316_cast_fp16, y = var_2322_cast_fp16)[name = tensor("query_29_cast_fp16")]; + tensor var_2331_pad_type_0 = const()[name = tensor("op_2331_pad_type_0"), val = tensor("valid")]; + tensor var_2331_strides_0 = const()[name = tensor("op_2331_strides_0"), val = tensor([1, 1])]; + tensor var_2331_pad_0 = const()[name = tensor("op_2331_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2331_dilations_0 = const()[name = tensor("op_2331_dilations_0"), val = tensor([1, 1])]; + tensor var_2331_groups_0 = const()[name = tensor("op_2331_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123276672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123571648))), name = tensor("layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_2331_cast_fp16 = conv(dilations = var_2331_dilations_0, groups = var_2331_groups_0, pad = var_2331_pad_0, pad_type = var_2331_pad_type_0, strides = var_2331_strides_0, weight = layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_99_cast_fp16)[name = tensor("op_2331_cast_fp16")]; + tensor var_2337_pad_type_0 = const()[name = tensor("op_2337_pad_type_0"), val = tensor("valid")]; + tensor var_2337_strides_0 = const()[name = tensor("op_2337_strides_0"), val = tensor([1, 1])]; + tensor var_2337_pad_0 = const()[name = tensor("op_2337_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2337_dilations_0 = const()[name = tensor("op_2337_dilations_0"), val = tensor([1, 1])]; + tensor var_2337_groups_0 = const()[name = tensor("op_2337_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123579712))), name = tensor("layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123571776))), shape = tensor([768, 768, 1, 1])]; + tensor var_2337_cast_fp16 = conv(dilations = var_2337_dilations_0, groups = var_2337_groups_0, pad = var_2337_pad_0, pad_type = var_2337_pad_type_0, strides = var_2337_strides_0, weight = layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_99_cast_fp16)[name = tensor("op_2337_cast_fp16")]; + tensor current_key_15_cast_fp16 = add(x = var_2331_cast_fp16, y = var_2337_cast_fp16)[name = tensor("current_key_15_cast_fp16")]; + tensor var_2347_pad_type_0 = const()[name = tensor("op_2347_pad_type_0"), val = tensor("valid")]; + tensor var_2347_strides_0 = const()[name = tensor("op_2347_strides_0"), val = tensor([1, 1])]; + tensor var_2347_pad_0 = const()[name = tensor("op_2347_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2347_dilations_0 = const()[name = tensor("op_2347_dilations_0"), val = tensor([1, 1])]; + tensor var_2347_groups_0 = const()[name = tensor("op_2347_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123653504))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123948480))), name = tensor("layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_7_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123948608)))]; + tensor var_2347_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2347_dilations_0, groups = var_2347_groups_0, pad = var_2347_pad_0, pad_type = var_2347_pad_type_0, strides = var_2347_strides_0, weight = layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_99_cast_fp16)[name = tensor("op_2347_cast_fp16")]; + tensor var_2353_pad_type_0 = const()[name = tensor("op_2353_pad_type_0"), val = tensor("valid")]; + tensor var_2353_strides_0 = const()[name = tensor("op_2353_strides_0"), val = tensor([1, 1])]; + tensor var_2353_pad_0 = const()[name = tensor("op_2353_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2353_dilations_0 = const()[name = tensor("op_2353_dilations_0"), val = tensor([1, 1])]; + tensor var_2353_groups_0 = const()[name = tensor("op_2353_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123956672))), name = tensor("layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123950208))), shape = tensor([768, 768, 1, 1])]; + tensor var_2353_cast_fp16 = conv(dilations = var_2353_dilations_0, groups = var_2353_groups_0, pad = var_2353_pad_0, pad_type = var_2353_pad_type_0, strides = var_2353_strides_0, weight = layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_99_cast_fp16)[name = tensor("op_2353_cast_fp16")]; + tensor current_value_15_cast_fp16 = add(x = var_2347_cast_fp16, y = var_2353_cast_fp16)[name = tensor("current_value_15_cast_fp16")]; + tensor var_2360_cast_fp16 = mul(x = var_69_cast_fp16_7, y = var_192_cast_fp16)[name = tensor("op_2360_cast_fp16")]; + tensor var_2361_cast_fp16 = mul(x = current_key_15_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_2361_cast_fp16")]; + tensor key_29_cast_fp16 = add(x = var_2360_cast_fp16, y = var_2361_cast_fp16)[name = tensor("key_29_cast_fp16")]; + tensor var_2364_cast_fp16 = mul(x = var_84_cast_fp16_7, y = var_192_cast_fp16)[name = tensor("op_2364_cast_fp16")]; + tensor var_2365_cast_fp16 = mul(x = current_value_15_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_2365_cast_fp16")]; + tensor value_29_cast_fp16 = add(x = var_2364_cast_fp16, y = var_2365_cast_fp16)[name = tensor("value_29_cast_fp16")]; + tensor var_2369 = const()[name = tensor("op_2369"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_29_cast_fp16 = reshape(shape = var_2369, x = query_29_cast_fp16)[name = tensor("mh_q_29_cast_fp16")]; + tensor var_2371_to_fp16 = const()[name = tensor("op_2371_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2372_cast_fp16 = mul(x = mh_q_29_cast_fp16, y = var_2371_to_fp16)[name = tensor("op_2372_cast_fp16")]; + tensor var_2375 = const()[name = tensor("op_2375"), val = tensor([1, 12, 64, 448])]; + tensor var_2376_cast_fp16 = reshape(shape = var_2375, x = key_29_cast_fp16)[name = tensor("op_2376_cast_fp16")]; + tensor mh_w_43_transpose_x_0 = const()[name = tensor("mh_w_43_transpose_x_0"), val = tensor(true)]; + tensor mh_w_43_transpose_y_0 = const()[name = tensor("mh_w_43_transpose_y_0"), val = tensor(false)]; + tensor mh_w_43_cast_fp16 = matmul(transpose_x = mh_w_43_transpose_x_0, transpose_y = mh_w_43_transpose_y_0, x = var_2372_cast_fp16, y = var_2376_cast_fp16)[name = tensor("mh_w_43_cast_fp16")]; + tensor mh_w_45_cast_fp16 = add(x = mh_w_43_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_45_cast_fp16")]; + tensor var_2384_cast_fp16 = softmax(axis = var_2269, x = mh_w_45_cast_fp16)[name = tensor("op_2384_cast_fp16")]; + tensor var_2385 = const()[name = tensor("op_2385"), val = tensor([1, 12, 64, 448])]; + tensor var_2386_cast_fp16 = reshape(shape = var_2385, x = value_29_cast_fp16)[name = tensor("op_2386_cast_fp16")]; + tensor attn_29_transpose_x_0 = const()[name = tensor("attn_29_transpose_x_0"), val = tensor(false)]; + tensor attn_29_transpose_y_0 = const()[name = tensor("attn_29_transpose_y_0"), val = tensor(true)]; + tensor attn_29_cast_fp16 = matmul(transpose_x = attn_29_transpose_x_0, transpose_y = attn_29_transpose_y_0, x = var_2386_cast_fp16, y = var_2384_cast_fp16)[name = tensor("attn_29_cast_fp16")]; + tensor var_2389 = const()[name = tensor("op_2389"), val = tensor([1, 768, 1, 1])]; + tensor input_71_cast_fp16 = reshape(shape = var_2389, x = attn_29_cast_fp16)[name = tensor("input_71_cast_fp16")]; + tensor var_2399_pad_type_0 = const()[name = tensor("op_2399_pad_type_0"), val = tensor("valid")]; + tensor var_2399_strides_0 = const()[name = tensor("op_2399_strides_0"), val = tensor([1, 1])]; + tensor var_2399_pad_0 = const()[name = tensor("op_2399_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2399_dilations_0 = const()[name = tensor("op_2399_dilations_0"), val = tensor([1, 1])]; + tensor var_2399_groups_0 = const()[name = tensor("op_2399_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124030464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124325440))), name = tensor("layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_7_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124325568)))]; + tensor var_2399_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2399_dilations_0, groups = var_2399_groups_0, pad = var_2399_pad_0, pad_type = var_2399_pad_type_0, strides = var_2399_strides_0, weight = layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = tensor("op_2399_cast_fp16")]; + tensor var_2405_pad_type_0 = const()[name = tensor("op_2405_pad_type_0"), val = tensor("valid")]; + tensor var_2405_strides_0 = const()[name = tensor("op_2405_strides_0"), val = tensor([1, 1])]; + tensor var_2405_pad_0 = const()[name = tensor("op_2405_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2405_dilations_0 = const()[name = tensor("op_2405_dilations_0"), val = tensor([1, 1])]; + tensor var_2405_groups_0 = const()[name = tensor("op_2405_groups_0"), val = tensor(1)]; + tensor layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124332992))), name = tensor("layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124327168))), shape = tensor([768, 768, 1, 1])]; + tensor var_2405_cast_fp16 = conv(dilations = var_2405_dilations_0, groups = var_2405_groups_0, pad = var_2405_pad_0, pad_type = var_2405_pad_type_0, strides = var_2405_strides_0, weight = layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_71_cast_fp16)[name = tensor("op_2405_cast_fp16")]; + tensor obj_105_cast_fp16 = add(x = var_2399_cast_fp16, y = var_2405_cast_fp16)[name = tensor("obj_105_cast_fp16")]; + tensor inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = obj_105_cast_fp16)[name = tensor("inputs_45_cast_fp16")]; + tensor out_45_axes_0 = const()[name = tensor("out_45_axes_0"), val = tensor([1])]; + tensor var_2420_to_fp16 = const()[name = tensor("op_2420_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_45_cast_fp16 = layer_norm(axes = out_45_axes_0, epsilon = var_2420_to_fp16, x = inputs_45_cast_fp16)[name = tensor("out_45_cast_fp16")]; + tensor obj_107_gamma_0_to_fp16 = const()[name = tensor("obj_107_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124406784)))]; + tensor obj_107_beta_0_to_fp16 = const()[name = tensor("obj_107_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124408384)))]; + tensor obj_107_epsilon_0_to_fp16 = const()[name = tensor("obj_107_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_107_cast_fp16 = batch_norm(beta = obj_107_beta_0_to_fp16, epsilon = obj_107_epsilon_0_to_fp16, gamma = obj_107_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = tensor("obj_107_cast_fp16")]; + tensor var_2442_pad_type_0 = const()[name = tensor("op_2442_pad_type_0"), val = tensor("valid")]; + tensor var_2442_strides_0 = const()[name = tensor("op_2442_strides_0"), val = tensor([1, 1])]; + tensor var_2442_pad_0 = const()[name = tensor("op_2442_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2442_dilations_0 = const()[name = tensor("op_2442_dilations_0"), val = tensor([1, 1])]; + tensor var_2442_groups_0 = const()[name = tensor("op_2442_groups_0"), val = tensor(1)]; + tensor layers_7_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124409984))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124704960))), name = tensor("layers_7_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_7_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124705088)))]; + tensor var_2442_cast_fp16 = conv(bias = layers_7_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2442_dilations_0, groups = var_2442_groups_0, pad = var_2442_pad_0, pad_type = var_2442_pad_type_0, strides = var_2442_strides_0, weight = layers_7_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_107_cast_fp16)[name = tensor("op_2442_cast_fp16")]; + tensor var_2448_pad_type_0 = const()[name = tensor("op_2448_pad_type_0"), val = tensor("valid")]; + tensor var_2448_strides_0 = const()[name = tensor("op_2448_strides_0"), val = tensor([1, 1])]; + tensor var_2448_pad_0 = const()[name = tensor("op_2448_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2448_dilations_0 = const()[name = tensor("op_2448_dilations_0"), val = tensor([1, 1])]; + tensor var_2448_groups_0 = const()[name = tensor("op_2448_groups_0"), val = tensor(1)]; + tensor layers_7_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124713152))), name = tensor("layers_7_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124706688))), shape = tensor([768, 768, 1, 1])]; + tensor var_2448_cast_fp16 = conv(dilations = var_2448_dilations_0, groups = var_2448_groups_0, pad = var_2448_pad_0, pad_type = var_2448_pad_type_0, strides = var_2448_strides_0, weight = layers_7_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_107_cast_fp16)[name = tensor("op_2448_cast_fp16")]; + tensor query_31_cast_fp16 = add(x = var_2442_cast_fp16, y = var_2448_cast_fp16)[name = tensor("query_31_cast_fp16")]; + tensor var_2457_pad_type_0 = const()[name = tensor("op_2457_pad_type_0"), val = tensor("valid")]; + tensor var_2457_strides_0 = const()[name = tensor("op_2457_strides_0"), val = tensor([1, 1])]; + tensor var_2457_pad_0 = const()[name = tensor("op_2457_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2457_dilations_0 = const()[name = tensor("op_2457_dilations_0"), val = tensor([1, 1])]; + tensor var_2457_groups_0 = const()[name = tensor("op_2457_groups_0"), val = tensor(1)]; + tensor layers_7_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124786944))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125081920))), name = tensor("layers_7_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_2457_cast_fp16 = conv(dilations = var_2457_dilations_0, groups = var_2457_groups_0, pad = var_2457_pad_0, pad_type = var_2457_pad_type_0, strides = var_2457_strides_0, weight = layers_7_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_2457_cast_fp16")]; + tensor var_2463_pad_type_0 = const()[name = tensor("op_2463_pad_type_0"), val = tensor("valid")]; + tensor var_2463_strides_0 = const()[name = tensor("op_2463_strides_0"), val = tensor([1, 1])]; + tensor var_2463_pad_0 = const()[name = tensor("op_2463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2463_dilations_0 = const()[name = tensor("op_2463_dilations_0"), val = tensor([1, 1])]; + tensor var_2463_groups_0 = const()[name = tensor("op_2463_groups_0"), val = tensor(1)]; + tensor layers_7_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125089408))), name = tensor("layers_7_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125082048))), shape = tensor([768, 768, 1, 1])]; + tensor var_2463_cast_fp16 = conv(dilations = var_2463_dilations_0, groups = var_2463_groups_0, pad = var_2463_pad_0, pad_type = var_2463_pad_type_0, strides = var_2463_strides_0, weight = layers_7_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_2463_cast_fp16")]; + tensor key_31_cast_fp16 = add(x = var_2457_cast_fp16, y = var_2463_cast_fp16)[name = tensor("key_31_cast_fp16")]; + tensor var_2473_pad_type_0 = const()[name = tensor("op_2473_pad_type_0"), val = tensor("valid")]; + tensor var_2473_strides_0 = const()[name = tensor("op_2473_strides_0"), val = tensor([1, 1])]; + tensor var_2473_pad_0 = const()[name = tensor("op_2473_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2473_dilations_0 = const()[name = tensor("op_2473_dilations_0"), val = tensor([1, 1])]; + tensor var_2473_groups_0 = const()[name = tensor("op_2473_groups_0"), val = tensor(1)]; + tensor layers_7_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125163200))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125458176))), name = tensor("layers_7_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_7_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125458304)))]; + tensor var_2473_cast_fp16 = conv(bias = layers_7_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2473_dilations_0, groups = var_2473_groups_0, pad = var_2473_pad_0, pad_type = var_2473_pad_type_0, strides = var_2473_strides_0, weight = layers_7_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_2473_cast_fp16")]; + tensor var_2479_pad_type_0 = const()[name = tensor("op_2479_pad_type_0"), val = tensor("valid")]; + tensor var_2479_strides_0 = const()[name = tensor("op_2479_strides_0"), val = tensor([1, 1])]; + tensor var_2479_pad_0 = const()[name = tensor("op_2479_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2479_dilations_0 = const()[name = tensor("op_2479_dilations_0"), val = tensor([1, 1])]; + tensor var_2479_groups_0 = const()[name = tensor("op_2479_groups_0"), val = tensor(1)]; + tensor layers_7_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125467008))), name = tensor("layers_7_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125459904))), shape = tensor([768, 768, 1, 1])]; + tensor var_2479_cast_fp16 = conv(dilations = var_2479_dilations_0, groups = var_2479_groups_0, pad = var_2479_pad_0, pad_type = var_2479_pad_type_0, strides = var_2479_strides_0, weight = layers_7_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_2479_cast_fp16")]; + tensor value_31_cast_fp16 = add(x = var_2473_cast_fp16, y = var_2479_cast_fp16)[name = tensor("value_31_cast_fp16")]; + tensor var_2483 = const()[name = tensor("op_2483"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_31_cast_fp16 = reshape(shape = var_2483, x = query_31_cast_fp16)[name = tensor("mh_q_31_cast_fp16")]; + tensor var_2485_to_fp16 = const()[name = tensor("op_2485_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2486_cast_fp16 = mul(x = mh_q_31_cast_fp16, y = var_2485_to_fp16)[name = tensor("op_2486_cast_fp16")]; + tensor var_2489 = const()[name = tensor("op_2489"), val = tensor([1, 12, 64, 1500])]; + tensor var_2490_cast_fp16 = reshape(shape = var_2489, x = key_31_cast_fp16)[name = tensor("op_2490_cast_fp16")]; + tensor mh_w_47_transpose_x_0 = const()[name = tensor("mh_w_47_transpose_x_0"), val = tensor(true)]; + tensor mh_w_47_transpose_y_0 = const()[name = tensor("mh_w_47_transpose_y_0"), val = tensor(false)]; + tensor mh_w_47_cast_fp16 = matmul(transpose_x = mh_w_47_transpose_x_0, transpose_y = mh_w_47_transpose_y_0, x = var_2486_cast_fp16, y = var_2490_cast_fp16)[name = tensor("mh_w_47_cast_fp16")]; + tensor obj_111_cast_fp16 = softmax(axis = var_2269, x = mh_w_47_cast_fp16)[name = tensor("obj_111_cast_fp16")]; + tensor var_2494 = const()[name = tensor("op_2494"), val = tensor([1, 12, 64, 1500])]; + tensor var_2495_cast_fp16 = reshape(shape = var_2494, x = value_31_cast_fp16)[name = tensor("op_2495_cast_fp16")]; + tensor attn_31_transpose_x_0 = const()[name = tensor("attn_31_transpose_x_0"), val = tensor(false)]; + tensor attn_31_transpose_y_0 = const()[name = tensor("attn_31_transpose_y_0"), val = tensor(true)]; + tensor attn_31_cast_fp16 = matmul(transpose_x = attn_31_transpose_x_0, transpose_y = attn_31_transpose_y_0, x = var_2495_cast_fp16, y = obj_111_cast_fp16)[name = tensor("attn_31_cast_fp16")]; + tensor var_2498 = const()[name = tensor("op_2498"), val = tensor([1, 768, 1, 1])]; + tensor input_73_cast_fp16 = reshape(shape = var_2498, x = attn_31_cast_fp16)[name = tensor("input_73_cast_fp16")]; + tensor var_2508_pad_type_0 = const()[name = tensor("op_2508_pad_type_0"), val = tensor("valid")]; + tensor var_2508_strides_0 = const()[name = tensor("op_2508_strides_0"), val = tensor([1, 1])]; + tensor var_2508_pad_0 = const()[name = tensor("op_2508_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2508_dilations_0 = const()[name = tensor("op_2508_dilations_0"), val = tensor([1, 1])]; + tensor var_2508_groups_0 = const()[name = tensor("op_2508_groups_0"), val = tensor(1)]; + tensor layers_7_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125540800))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125835776))), name = tensor("layers_7_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_7_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125835904)))]; + tensor var_2508_cast_fp16 = conv(bias = layers_7_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2508_dilations_0, groups = var_2508_groups_0, pad = var_2508_pad_0, pad_type = var_2508_pad_type_0, strides = var_2508_strides_0, weight = layers_7_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = tensor("op_2508_cast_fp16")]; + tensor var_2514_pad_type_0 = const()[name = tensor("op_2514_pad_type_0"), val = tensor("valid")]; + tensor var_2514_strides_0 = const()[name = tensor("op_2514_strides_0"), val = tensor([1, 1])]; + tensor var_2514_pad_0 = const()[name = tensor("op_2514_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2514_dilations_0 = const()[name = tensor("op_2514_dilations_0"), val = tensor([1, 1])]; + tensor var_2514_groups_0 = const()[name = tensor("op_2514_groups_0"), val = tensor(1)]; + tensor layers_7_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125844160))), name = tensor("layers_7_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125837504))), shape = tensor([768, 768, 1, 1])]; + tensor var_2514_cast_fp16 = conv(dilations = var_2514_dilations_0, groups = var_2514_groups_0, pad = var_2514_pad_0, pad_type = var_2514_pad_type_0, strides = var_2514_strides_0, weight = layers_7_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_73_cast_fp16)[name = tensor("op_2514_cast_fp16")]; + tensor obj_109_cast_fp16 = add(x = var_2508_cast_fp16, y = var_2514_cast_fp16)[name = tensor("obj_109_cast_fp16")]; + tensor inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_109_cast_fp16)[name = tensor("inputs_47_cast_fp16")]; + tensor out_47_axes_0 = const()[name = tensor("out_47_axes_0"), val = tensor([1])]; + tensor var_2525_to_fp16 = const()[name = tensor("op_2525_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_47_cast_fp16 = layer_norm(axes = out_47_axes_0, epsilon = var_2525_to_fp16, x = inputs_47_cast_fp16)[name = tensor("out_47_cast_fp16")]; + tensor input_75_gamma_0_to_fp16 = const()[name = tensor("input_75_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125917952)))]; + tensor input_75_beta_0_to_fp16 = const()[name = tensor("input_75_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125919552)))]; + tensor input_75_epsilon_0_to_fp16 = const()[name = tensor("input_75_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = tensor("input_75_cast_fp16")]; + tensor var_2543_pad_type_0 = const()[name = tensor("op_2543_pad_type_0"), val = tensor("valid")]; + tensor var_2543_strides_0 = const()[name = tensor("op_2543_strides_0"), val = tensor([1, 1])]; + tensor var_2543_pad_0 = const()[name = tensor("op_2543_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2543_dilations_0 = const()[name = tensor("op_2543_dilations_0"), val = tensor([1, 1])]; + tensor var_2543_groups_0 = const()[name = tensor("op_2543_groups_0"), val = tensor(1)]; + tensor layers_7_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(125921152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127100864))), name = tensor("layers_7_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_7_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127100992)))]; + tensor var_2543_cast_fp16 = conv(bias = layers_7_fc1_inlier_module_bias_to_fp16, dilations = var_2543_dilations_0, groups = var_2543_groups_0, pad = var_2543_pad_0, pad_type = var_2543_pad_type_0, strides = var_2543_strides_0, weight = layers_7_fc1_inlier_module_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = tensor("op_2543_cast_fp16")]; + tensor var_2549_pad_type_0 = const()[name = tensor("op_2549_pad_type_0"), val = tensor("valid")]; + tensor var_2549_strides_0 = const()[name = tensor("op_2549_strides_0"), val = tensor([1, 1])]; + tensor var_2549_pad_0 = const()[name = tensor("op_2549_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2549_dilations_0 = const()[name = tensor("op_2549_dilations_0"), val = tensor([1, 1])]; + tensor var_2549_groups_0 = const()[name = tensor("op_2549_groups_0"), val = tensor(1)]; + tensor layers_7_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127126144))), name = tensor("layers_7_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127107200))), shape = tensor([3072, 768, 1, 1])]; + tensor var_2549_cast_fp16 = conv(dilations = var_2549_dilations_0, groups = var_2549_groups_0, pad = var_2549_pad_0, pad_type = var_2549_pad_type_0, strides = var_2549_strides_0, weight = layers_7_fc1_outlier_module_weight_to_fp16_sparsified, x = input_75_cast_fp16)[name = tensor("op_2549_cast_fp16")]; + tensor input_77_cast_fp16 = add(x = var_2543_cast_fp16, y = var_2549_cast_fp16)[name = tensor("input_77_cast_fp16")]; + tensor input_79_mode_0 = const()[name = tensor("input_79_mode_0"), val = tensor("EXACT")]; + tensor input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = tensor("input_79_cast_fp16")]; + tensor var_2560_pad_type_0 = const()[name = tensor("op_2560_pad_type_0"), val = tensor("valid")]; + tensor var_2560_strides_0 = const()[name = tensor("op_2560_strides_0"), val = tensor([1, 1])]; + tensor var_2560_pad_0 = const()[name = tensor("op_2560_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2560_dilations_0 = const()[name = tensor("op_2560_dilations_0"), val = tensor([1, 1])]; + tensor var_2560_groups_0 = const()[name = tensor("op_2560_groups_0"), val = tensor(1)]; + tensor layers_7_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127421120))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(128600832))), name = tensor("layers_7_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_7_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(128600960)))]; + tensor var_2560_cast_fp16 = conv(bias = layers_7_fc2_inlier_module_bias_to_fp16, dilations = var_2560_dilations_0, groups = var_2560_groups_0, pad = var_2560_pad_0, pad_type = var_2560_pad_type_0, strides = var_2560_strides_0, weight = layers_7_fc2_inlier_module_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = tensor("op_2560_cast_fp16")]; + tensor var_2566_pad_type_0 = const()[name = tensor("op_2566_pad_type_0"), val = tensor("valid")]; + tensor var_2566_strides_0 = const()[name = tensor("op_2566_strides_0"), val = tensor([1, 1])]; + tensor var_2566_pad_0 = const()[name = tensor("op_2566_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2566_dilations_0 = const()[name = tensor("op_2566_dilations_0"), val = tensor([1, 1])]; + tensor var_2566_groups_0 = const()[name = tensor("op_2566_groups_0"), val = tensor(1)]; + tensor layers_7_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(128626624))), name = tensor("layers_7_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(128602560))), shape = tensor([768, 3072, 1, 1])]; + tensor var_2566_cast_fp16 = conv(dilations = var_2566_dilations_0, groups = var_2566_groups_0, pad = var_2566_pad_0, pad_type = var_2566_pad_type_0, strides = var_2566_strides_0, weight = layers_7_fc2_outlier_module_weight_to_fp16_sparsified, x = input_79_cast_fp16)[name = tensor("op_2566_cast_fp16")]; + tensor hidden_states_17_cast_fp16 = add(x = var_2560_cast_fp16, y = var_2566_cast_fp16)[name = tensor("hidden_states_17_cast_fp16")]; + tensor inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_17_cast_fp16)[name = tensor("inputs_49_cast_fp16")]; + tensor var_2578 = const()[name = tensor("op_2578"), val = tensor(3)]; + tensor out_49_axes_0 = const()[name = tensor("out_49_axes_0"), val = tensor([1])]; + tensor var_2603_to_fp16 = const()[name = tensor("op_2603_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_2603_to_fp16, x = inputs_49_cast_fp16)[name = tensor("out_49_cast_fp16")]; + tensor obj_113_gamma_0_to_fp16 = const()[name = tensor("obj_113_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(128921600)))]; + tensor obj_113_beta_0_to_fp16 = const()[name = tensor("obj_113_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(128923200)))]; + tensor obj_113_epsilon_0_to_fp16 = const()[name = tensor("obj_113_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_113_cast_fp16 = batch_norm(beta = obj_113_beta_0_to_fp16, epsilon = obj_113_epsilon_0_to_fp16, gamma = obj_113_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_49_cast_fp16)[name = tensor("obj_113_cast_fp16")]; + tensor var_2625_pad_type_0 = const()[name = tensor("op_2625_pad_type_0"), val = tensor("valid")]; + tensor var_2625_strides_0 = const()[name = tensor("op_2625_strides_0"), val = tensor([1, 1])]; + tensor var_2625_pad_0 = const()[name = tensor("op_2625_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2625_dilations_0 = const()[name = tensor("op_2625_dilations_0"), val = tensor([1, 1])]; + tensor var_2625_groups_0 = const()[name = tensor("op_2625_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(128924800))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129219776))), name = tensor("layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_8_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129219904)))]; + tensor var_2625_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2625_dilations_0, groups = var_2625_groups_0, pad = var_2625_pad_0, pad_type = var_2625_pad_type_0, strides = var_2625_strides_0, weight = layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_113_cast_fp16)[name = tensor("op_2625_cast_fp16")]; + tensor var_2631_pad_type_0 = const()[name = tensor("op_2631_pad_type_0"), val = tensor("valid")]; + tensor var_2631_strides_0 = const()[name = tensor("op_2631_strides_0"), val = tensor([1, 1])]; + tensor var_2631_pad_0 = const()[name = tensor("op_2631_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2631_dilations_0 = const()[name = tensor("op_2631_dilations_0"), val = tensor([1, 1])]; + tensor var_2631_groups_0 = const()[name = tensor("op_2631_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129226816))), name = tensor("layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129221504))), shape = tensor([768, 768, 1, 1])]; + tensor var_2631_cast_fp16 = conv(dilations = var_2631_dilations_0, groups = var_2631_groups_0, pad = var_2631_pad_0, pad_type = var_2631_pad_type_0, strides = var_2631_strides_0, weight = layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_113_cast_fp16)[name = tensor("op_2631_cast_fp16")]; + tensor query_33_cast_fp16 = add(x = var_2625_cast_fp16, y = var_2631_cast_fp16)[name = tensor("query_33_cast_fp16")]; + tensor var_2640_pad_type_0 = const()[name = tensor("op_2640_pad_type_0"), val = tensor("valid")]; + tensor var_2640_strides_0 = const()[name = tensor("op_2640_strides_0"), val = tensor([1, 1])]; + tensor var_2640_pad_0 = const()[name = tensor("op_2640_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2640_dilations_0 = const()[name = tensor("op_2640_dilations_0"), val = tensor([1, 1])]; + tensor var_2640_groups_0 = const()[name = tensor("op_2640_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129300608))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129595584))), name = tensor("layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_2640_cast_fp16 = conv(dilations = var_2640_dilations_0, groups = var_2640_groups_0, pad = var_2640_pad_0, pad_type = var_2640_pad_type_0, strides = var_2640_strides_0, weight = layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_113_cast_fp16)[name = tensor("op_2640_cast_fp16")]; + tensor var_2646_pad_type_0 = const()[name = tensor("op_2646_pad_type_0"), val = tensor("valid")]; + tensor var_2646_strides_0 = const()[name = tensor("op_2646_strides_0"), val = tensor([1, 1])]; + tensor var_2646_pad_0 = const()[name = tensor("op_2646_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2646_dilations_0 = const()[name = tensor("op_2646_dilations_0"), val = tensor([1, 1])]; + tensor var_2646_groups_0 = const()[name = tensor("op_2646_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129600960))), name = tensor("layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129595712))), shape = tensor([768, 768, 1, 1])]; + tensor var_2646_cast_fp16 = conv(dilations = var_2646_dilations_0, groups = var_2646_groups_0, pad = var_2646_pad_0, pad_type = var_2646_pad_type_0, strides = var_2646_strides_0, weight = layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_113_cast_fp16)[name = tensor("op_2646_cast_fp16")]; + tensor current_key_17_cast_fp16 = add(x = var_2640_cast_fp16, y = var_2646_cast_fp16)[name = tensor("current_key_17_cast_fp16")]; + tensor var_2656_pad_type_0 = const()[name = tensor("op_2656_pad_type_0"), val = tensor("valid")]; + tensor var_2656_strides_0 = const()[name = tensor("op_2656_strides_0"), val = tensor([1, 1])]; + tensor var_2656_pad_0 = const()[name = tensor("op_2656_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2656_dilations_0 = const()[name = tensor("op_2656_dilations_0"), val = tensor([1, 1])]; + tensor var_2656_groups_0 = const()[name = tensor("op_2656_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129674752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129969728))), name = tensor("layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_8_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129969856)))]; + tensor var_2656_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2656_dilations_0, groups = var_2656_groups_0, pad = var_2656_pad_0, pad_type = var_2656_pad_type_0, strides = var_2656_strides_0, weight = layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_113_cast_fp16)[name = tensor("op_2656_cast_fp16")]; + tensor var_2662_pad_type_0 = const()[name = tensor("op_2662_pad_type_0"), val = tensor("valid")]; + tensor var_2662_strides_0 = const()[name = tensor("op_2662_strides_0"), val = tensor([1, 1])]; + tensor var_2662_pad_0 = const()[name = tensor("op_2662_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2662_dilations_0 = const()[name = tensor("op_2662_dilations_0"), val = tensor([1, 1])]; + tensor var_2662_groups_0 = const()[name = tensor("op_2662_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129976576))), name = tensor("layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129971456))), shape = tensor([768, 768, 1, 1])]; + tensor var_2662_cast_fp16 = conv(dilations = var_2662_dilations_0, groups = var_2662_groups_0, pad = var_2662_pad_0, pad_type = var_2662_pad_type_0, strides = var_2662_strides_0, weight = layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_113_cast_fp16)[name = tensor("op_2662_cast_fp16")]; + tensor current_value_17_cast_fp16 = add(x = var_2656_cast_fp16, y = var_2662_cast_fp16)[name = tensor("current_value_17_cast_fp16")]; + tensor var_2669_cast_fp16 = mul(x = var_69_cast_fp16_8, y = var_192_cast_fp16)[name = tensor("op_2669_cast_fp16")]; + tensor var_2670_cast_fp16 = mul(x = current_key_17_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_2670_cast_fp16")]; + tensor key_33_cast_fp16 = add(x = var_2669_cast_fp16, y = var_2670_cast_fp16)[name = tensor("key_33_cast_fp16")]; + tensor var_2673_cast_fp16 = mul(x = var_84_cast_fp16_8, y = var_192_cast_fp16)[name = tensor("op_2673_cast_fp16")]; + tensor var_2674_cast_fp16 = mul(x = current_value_17_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_2674_cast_fp16")]; + tensor value_33_cast_fp16 = add(x = var_2673_cast_fp16, y = var_2674_cast_fp16)[name = tensor("value_33_cast_fp16")]; + tensor var_2678 = const()[name = tensor("op_2678"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_33_cast_fp16 = reshape(shape = var_2678, x = query_33_cast_fp16)[name = tensor("mh_q_33_cast_fp16")]; + tensor var_2680_to_fp16 = const()[name = tensor("op_2680_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2681_cast_fp16 = mul(x = mh_q_33_cast_fp16, y = var_2680_to_fp16)[name = tensor("op_2681_cast_fp16")]; + tensor var_2684 = const()[name = tensor("op_2684"), val = tensor([1, 12, 64, 448])]; + tensor var_2685_cast_fp16 = reshape(shape = var_2684, x = key_33_cast_fp16)[name = tensor("op_2685_cast_fp16")]; + tensor mh_w_49_transpose_x_0 = const()[name = tensor("mh_w_49_transpose_x_0"), val = tensor(true)]; + tensor mh_w_49_transpose_y_0 = const()[name = tensor("mh_w_49_transpose_y_0"), val = tensor(false)]; + tensor mh_w_49_cast_fp16 = matmul(transpose_x = mh_w_49_transpose_x_0, transpose_y = mh_w_49_transpose_y_0, x = var_2681_cast_fp16, y = var_2685_cast_fp16)[name = tensor("mh_w_49_cast_fp16")]; + tensor mh_w_51_cast_fp16 = add(x = mh_w_49_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_51_cast_fp16")]; + tensor var_2693_cast_fp16 = softmax(axis = var_2578, x = mh_w_51_cast_fp16)[name = tensor("op_2693_cast_fp16")]; + tensor var_2694 = const()[name = tensor("op_2694"), val = tensor([1, 12, 64, 448])]; + tensor var_2695_cast_fp16 = reshape(shape = var_2694, x = value_33_cast_fp16)[name = tensor("op_2695_cast_fp16")]; + tensor attn_33_transpose_x_0 = const()[name = tensor("attn_33_transpose_x_0"), val = tensor(false)]; + tensor attn_33_transpose_y_0 = const()[name = tensor("attn_33_transpose_y_0"), val = tensor(true)]; + tensor attn_33_cast_fp16 = matmul(transpose_x = attn_33_transpose_x_0, transpose_y = attn_33_transpose_y_0, x = var_2695_cast_fp16, y = var_2693_cast_fp16)[name = tensor("attn_33_cast_fp16")]; + tensor var_2698 = const()[name = tensor("op_2698"), val = tensor([1, 768, 1, 1])]; + tensor input_81_cast_fp16 = reshape(shape = var_2698, x = attn_33_cast_fp16)[name = tensor("input_81_cast_fp16")]; + tensor var_2708_pad_type_0 = const()[name = tensor("op_2708_pad_type_0"), val = tensor("valid")]; + tensor var_2708_strides_0 = const()[name = tensor("op_2708_strides_0"), val = tensor([1, 1])]; + tensor var_2708_pad_0 = const()[name = tensor("op_2708_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2708_dilations_0 = const()[name = tensor("op_2708_dilations_0"), val = tensor([1, 1])]; + tensor var_2708_groups_0 = const()[name = tensor("op_2708_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130050368))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130345344))), name = tensor("layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_8_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130345472)))]; + tensor var_2708_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2708_dilations_0, groups = var_2708_groups_0, pad = var_2708_pad_0, pad_type = var_2708_pad_type_0, strides = var_2708_strides_0, weight = layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = tensor("op_2708_cast_fp16")]; + tensor var_2714_pad_type_0 = const()[name = tensor("op_2714_pad_type_0"), val = tensor("valid")]; + tensor var_2714_strides_0 = const()[name = tensor("op_2714_strides_0"), val = tensor([1, 1])]; + tensor var_2714_pad_0 = const()[name = tensor("op_2714_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2714_dilations_0 = const()[name = tensor("op_2714_dilations_0"), val = tensor([1, 1])]; + tensor var_2714_groups_0 = const()[name = tensor("op_2714_groups_0"), val = tensor(1)]; + tensor layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130353024))), name = tensor("layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130347072))), shape = tensor([768, 768, 1, 1])]; + tensor var_2714_cast_fp16 = conv(dilations = var_2714_dilations_0, groups = var_2714_groups_0, pad = var_2714_pad_0, pad_type = var_2714_pad_type_0, strides = var_2714_strides_0, weight = layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_81_cast_fp16)[name = tensor("op_2714_cast_fp16")]; + tensor obj_119_cast_fp16 = add(x = var_2708_cast_fp16, y = var_2714_cast_fp16)[name = tensor("obj_119_cast_fp16")]; + tensor inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = obj_119_cast_fp16)[name = tensor("inputs_51_cast_fp16")]; + tensor out_51_axes_0 = const()[name = tensor("out_51_axes_0"), val = tensor([1])]; + tensor var_2729_to_fp16 = const()[name = tensor("op_2729_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_51_cast_fp16 = layer_norm(axes = out_51_axes_0, epsilon = var_2729_to_fp16, x = inputs_51_cast_fp16)[name = tensor("out_51_cast_fp16")]; + tensor obj_121_gamma_0_to_fp16 = const()[name = tensor("obj_121_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130426816)))]; + tensor obj_121_beta_0_to_fp16 = const()[name = tensor("obj_121_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130428416)))]; + tensor obj_121_epsilon_0_to_fp16 = const()[name = tensor("obj_121_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_121_cast_fp16 = batch_norm(beta = obj_121_beta_0_to_fp16, epsilon = obj_121_epsilon_0_to_fp16, gamma = obj_121_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_51_cast_fp16)[name = tensor("obj_121_cast_fp16")]; + tensor var_2751_pad_type_0 = const()[name = tensor("op_2751_pad_type_0"), val = tensor("valid")]; + tensor var_2751_strides_0 = const()[name = tensor("op_2751_strides_0"), val = tensor([1, 1])]; + tensor var_2751_pad_0 = const()[name = tensor("op_2751_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2751_dilations_0 = const()[name = tensor("op_2751_dilations_0"), val = tensor([1, 1])]; + tensor var_2751_groups_0 = const()[name = tensor("op_2751_groups_0"), val = tensor(1)]; + tensor layers_8_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130430016))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130724992))), name = tensor("layers_8_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_8_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130725120)))]; + tensor var_2751_cast_fp16 = conv(bias = layers_8_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2751_dilations_0, groups = var_2751_groups_0, pad = var_2751_pad_0, pad_type = var_2751_pad_type_0, strides = var_2751_strides_0, weight = layers_8_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_121_cast_fp16)[name = tensor("op_2751_cast_fp16")]; + tensor var_2757_pad_type_0 = const()[name = tensor("op_2757_pad_type_0"), val = tensor("valid")]; + tensor var_2757_strides_0 = const()[name = tensor("op_2757_strides_0"), val = tensor([1, 1])]; + tensor var_2757_pad_0 = const()[name = tensor("op_2757_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2757_dilations_0 = const()[name = tensor("op_2757_dilations_0"), val = tensor([1, 1])]; + tensor var_2757_groups_0 = const()[name = tensor("op_2757_groups_0"), val = tensor(1)]; + tensor layers_8_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130732224))), name = tensor("layers_8_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130726720))), shape = tensor([768, 768, 1, 1])]; + tensor var_2757_cast_fp16 = conv(dilations = var_2757_dilations_0, groups = var_2757_groups_0, pad = var_2757_pad_0, pad_type = var_2757_pad_type_0, strides = var_2757_strides_0, weight = layers_8_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_121_cast_fp16)[name = tensor("op_2757_cast_fp16")]; + tensor query_35_cast_fp16 = add(x = var_2751_cast_fp16, y = var_2757_cast_fp16)[name = tensor("query_35_cast_fp16")]; + tensor var_2766_pad_type_0 = const()[name = tensor("op_2766_pad_type_0"), val = tensor("valid")]; + tensor var_2766_strides_0 = const()[name = tensor("op_2766_strides_0"), val = tensor([1, 1])]; + tensor var_2766_pad_0 = const()[name = tensor("op_2766_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2766_dilations_0 = const()[name = tensor("op_2766_dilations_0"), val = tensor([1, 1])]; + tensor var_2766_groups_0 = const()[name = tensor("op_2766_groups_0"), val = tensor(1)]; + tensor layers_8_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(130806016))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131100992))), name = tensor("layers_8_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_2766_cast_fp16 = conv(dilations = var_2766_dilations_0, groups = var_2766_groups_0, pad = var_2766_pad_0, pad_type = var_2766_pad_type_0, strides = var_2766_strides_0, weight = layers_8_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_2766_cast_fp16")]; + tensor var_2772_pad_type_0 = const()[name = tensor("op_2772_pad_type_0"), val = tensor("valid")]; + tensor var_2772_strides_0 = const()[name = tensor("op_2772_strides_0"), val = tensor([1, 1])]; + tensor var_2772_pad_0 = const()[name = tensor("op_2772_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2772_dilations_0 = const()[name = tensor("op_2772_dilations_0"), val = tensor([1, 1])]; + tensor var_2772_groups_0 = const()[name = tensor("op_2772_groups_0"), val = tensor(1)]; + tensor layers_8_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131108416))), name = tensor("layers_8_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131101120))), shape = tensor([768, 768, 1, 1])]; + tensor var_2772_cast_fp16 = conv(dilations = var_2772_dilations_0, groups = var_2772_groups_0, pad = var_2772_pad_0, pad_type = var_2772_pad_type_0, strides = var_2772_strides_0, weight = layers_8_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_2772_cast_fp16")]; + tensor key_35_cast_fp16 = add(x = var_2766_cast_fp16, y = var_2772_cast_fp16)[name = tensor("key_35_cast_fp16")]; + tensor var_2782_pad_type_0 = const()[name = tensor("op_2782_pad_type_0"), val = tensor("valid")]; + tensor var_2782_strides_0 = const()[name = tensor("op_2782_strides_0"), val = tensor([1, 1])]; + tensor var_2782_pad_0 = const()[name = tensor("op_2782_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2782_dilations_0 = const()[name = tensor("op_2782_dilations_0"), val = tensor([1, 1])]; + tensor var_2782_groups_0 = const()[name = tensor("op_2782_groups_0"), val = tensor(1)]; + tensor layers_8_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131182208))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131477184))), name = tensor("layers_8_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_8_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131477312)))]; + tensor var_2782_cast_fp16 = conv(bias = layers_8_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2782_dilations_0, groups = var_2782_groups_0, pad = var_2782_pad_0, pad_type = var_2782_pad_type_0, strides = var_2782_strides_0, weight = layers_8_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_2782_cast_fp16")]; + tensor var_2788_pad_type_0 = const()[name = tensor("op_2788_pad_type_0"), val = tensor("valid")]; + tensor var_2788_strides_0 = const()[name = tensor("op_2788_strides_0"), val = tensor([1, 1])]; + tensor var_2788_pad_0 = const()[name = tensor("op_2788_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2788_dilations_0 = const()[name = tensor("op_2788_dilations_0"), val = tensor([1, 1])]; + tensor var_2788_groups_0 = const()[name = tensor("op_2788_groups_0"), val = tensor(1)]; + tensor layers_8_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131484416))), name = tensor("layers_8_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131478912))), shape = tensor([768, 768, 1, 1])]; + tensor var_2788_cast_fp16 = conv(dilations = var_2788_dilations_0, groups = var_2788_groups_0, pad = var_2788_pad_0, pad_type = var_2788_pad_type_0, strides = var_2788_strides_0, weight = layers_8_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_2788_cast_fp16")]; + tensor value_35_cast_fp16 = add(x = var_2782_cast_fp16, y = var_2788_cast_fp16)[name = tensor("value_35_cast_fp16")]; + tensor var_2792 = const()[name = tensor("op_2792"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_35_cast_fp16 = reshape(shape = var_2792, x = query_35_cast_fp16)[name = tensor("mh_q_35_cast_fp16")]; + tensor var_2794_to_fp16 = const()[name = tensor("op_2794_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2795_cast_fp16 = mul(x = mh_q_35_cast_fp16, y = var_2794_to_fp16)[name = tensor("op_2795_cast_fp16")]; + tensor var_2798 = const()[name = tensor("op_2798"), val = tensor([1, 12, 64, 1500])]; + tensor var_2799_cast_fp16 = reshape(shape = var_2798, x = key_35_cast_fp16)[name = tensor("op_2799_cast_fp16")]; + tensor mh_w_53_transpose_x_0 = const()[name = tensor("mh_w_53_transpose_x_0"), val = tensor(true)]; + tensor mh_w_53_transpose_y_0 = const()[name = tensor("mh_w_53_transpose_y_0"), val = tensor(false)]; + tensor mh_w_53_cast_fp16 = matmul(transpose_x = mh_w_53_transpose_x_0, transpose_y = mh_w_53_transpose_y_0, x = var_2795_cast_fp16, y = var_2799_cast_fp16)[name = tensor("mh_w_53_cast_fp16")]; + tensor obj_125_cast_fp16 = softmax(axis = var_2578, x = mh_w_53_cast_fp16)[name = tensor("obj_125_cast_fp16")]; + tensor var_2803 = const()[name = tensor("op_2803"), val = tensor([1, 12, 64, 1500])]; + tensor var_2804_cast_fp16 = reshape(shape = var_2803, x = value_35_cast_fp16)[name = tensor("op_2804_cast_fp16")]; + tensor attn_35_transpose_x_0 = const()[name = tensor("attn_35_transpose_x_0"), val = tensor(false)]; + tensor attn_35_transpose_y_0 = const()[name = tensor("attn_35_transpose_y_0"), val = tensor(true)]; + tensor attn_35_cast_fp16 = matmul(transpose_x = attn_35_transpose_x_0, transpose_y = attn_35_transpose_y_0, x = var_2804_cast_fp16, y = obj_125_cast_fp16)[name = tensor("attn_35_cast_fp16")]; + tensor var_2807 = const()[name = tensor("op_2807"), val = tensor([1, 768, 1, 1])]; + tensor input_83_cast_fp16 = reshape(shape = var_2807, x = attn_35_cast_fp16)[name = tensor("input_83_cast_fp16")]; + tensor var_2817_pad_type_0 = const()[name = tensor("op_2817_pad_type_0"), val = tensor("valid")]; + tensor var_2817_strides_0 = const()[name = tensor("op_2817_strides_0"), val = tensor([1, 1])]; + tensor var_2817_pad_0 = const()[name = tensor("op_2817_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2817_dilations_0 = const()[name = tensor("op_2817_dilations_0"), val = tensor([1, 1])]; + tensor var_2817_groups_0 = const()[name = tensor("op_2817_groups_0"), val = tensor(1)]; + tensor layers_8_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131558208))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131853184))), name = tensor("layers_8_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_8_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131853312)))]; + tensor var_2817_cast_fp16 = conv(bias = layers_8_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_2817_dilations_0, groups = var_2817_groups_0, pad = var_2817_pad_0, pad_type = var_2817_pad_type_0, strides = var_2817_strides_0, weight = layers_8_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = tensor("op_2817_cast_fp16")]; + tensor var_2823_pad_type_0 = const()[name = tensor("op_2823_pad_type_0"), val = tensor("valid")]; + tensor var_2823_strides_0 = const()[name = tensor("op_2823_strides_0"), val = tensor([1, 1])]; + tensor var_2823_pad_0 = const()[name = tensor("op_2823_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2823_dilations_0 = const()[name = tensor("op_2823_dilations_0"), val = tensor([1, 1])]; + tensor var_2823_groups_0 = const()[name = tensor("op_2823_groups_0"), val = tensor(1)]; + tensor layers_8_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131859648))), name = tensor("layers_8_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131854912))), shape = tensor([768, 768, 1, 1])]; + tensor var_2823_cast_fp16 = conv(dilations = var_2823_dilations_0, groups = var_2823_groups_0, pad = var_2823_pad_0, pad_type = var_2823_pad_type_0, strides = var_2823_strides_0, weight = layers_8_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_83_cast_fp16)[name = tensor("op_2823_cast_fp16")]; + tensor obj_123_cast_fp16 = add(x = var_2817_cast_fp16, y = var_2823_cast_fp16)[name = tensor("obj_123_cast_fp16")]; + tensor inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = obj_123_cast_fp16)[name = tensor("inputs_53_cast_fp16")]; + tensor out_53_axes_0 = const()[name = tensor("out_53_axes_0"), val = tensor([1])]; + tensor var_2837_to_fp16 = const()[name = tensor("op_2837_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_53_cast_fp16 = layer_norm(axes = out_53_axes_0, epsilon = var_2837_to_fp16, x = inputs_53_cast_fp16)[name = tensor("out_53_cast_fp16")]; + tensor input_85_gamma_0_to_fp16 = const()[name = tensor("input_85_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131933440)))]; + tensor input_85_beta_0_to_fp16 = const()[name = tensor("input_85_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131935040)))]; + tensor input_85_epsilon_0_to_fp16 = const()[name = tensor("input_85_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_85_cast_fp16 = batch_norm(beta = input_85_beta_0_to_fp16, epsilon = input_85_epsilon_0_to_fp16, gamma = input_85_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_53_cast_fp16)[name = tensor("input_85_cast_fp16")]; + tensor var_2855_pad_type_0 = const()[name = tensor("op_2855_pad_type_0"), val = tensor("valid")]; + tensor var_2855_strides_0 = const()[name = tensor("op_2855_strides_0"), val = tensor([1, 1])]; + tensor var_2855_pad_0 = const()[name = tensor("op_2855_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2855_dilations_0 = const()[name = tensor("op_2855_dilations_0"), val = tensor([1, 1])]; + tensor var_2855_groups_0 = const()[name = tensor("op_2855_groups_0"), val = tensor(1)]; + tensor layers_8_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131936640))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133116352))), name = tensor("layers_8_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_8_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133116480)))]; + tensor var_2855_cast_fp16 = conv(bias = layers_8_fc1_inlier_module_bias_to_fp16, dilations = var_2855_dilations_0, groups = var_2855_groups_0, pad = var_2855_pad_0, pad_type = var_2855_pad_type_0, strides = var_2855_strides_0, weight = layers_8_fc1_inlier_module_weight_to_fp16_palettized, x = input_85_cast_fp16)[name = tensor("op_2855_cast_fp16")]; + tensor var_2861_pad_type_0 = const()[name = tensor("op_2861_pad_type_0"), val = tensor("valid")]; + tensor var_2861_strides_0 = const()[name = tensor("op_2861_strides_0"), val = tensor([1, 1])]; + tensor var_2861_pad_0 = const()[name = tensor("op_2861_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2861_dilations_0 = const()[name = tensor("op_2861_dilations_0"), val = tensor([1, 1])]; + tensor var_2861_groups_0 = const()[name = tensor("op_2861_groups_0"), val = tensor(1)]; + tensor layers_8_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133141376))), name = tensor("layers_8_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133122688))), shape = tensor([3072, 768, 1, 1])]; + tensor var_2861_cast_fp16 = conv(dilations = var_2861_dilations_0, groups = var_2861_groups_0, pad = var_2861_pad_0, pad_type = var_2861_pad_type_0, strides = var_2861_strides_0, weight = layers_8_fc1_outlier_module_weight_to_fp16_sparsified, x = input_85_cast_fp16)[name = tensor("op_2861_cast_fp16")]; + tensor input_87_cast_fp16 = add(x = var_2855_cast_fp16, y = var_2861_cast_fp16)[name = tensor("input_87_cast_fp16")]; + tensor input_89_mode_0 = const()[name = tensor("input_89_mode_0"), val = tensor("EXACT")]; + tensor input_89_cast_fp16 = gelu(mode = input_89_mode_0, x = input_87_cast_fp16)[name = tensor("input_89_cast_fp16")]; + tensor var_2872_pad_type_0 = const()[name = tensor("op_2872_pad_type_0"), val = tensor("valid")]; + tensor var_2872_strides_0 = const()[name = tensor("op_2872_strides_0"), val = tensor([1, 1])]; + tensor var_2872_pad_0 = const()[name = tensor("op_2872_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2872_dilations_0 = const()[name = tensor("op_2872_dilations_0"), val = tensor([1, 1])]; + tensor var_2872_groups_0 = const()[name = tensor("op_2872_groups_0"), val = tensor(1)]; + tensor layers_8_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133436352))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134616064))), name = tensor("layers_8_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_8_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134616192)))]; + tensor var_2872_cast_fp16 = conv(bias = layers_8_fc2_inlier_module_bias_to_fp16, dilations = var_2872_dilations_0, groups = var_2872_groups_0, pad = var_2872_pad_0, pad_type = var_2872_pad_type_0, strides = var_2872_strides_0, weight = layers_8_fc2_inlier_module_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = tensor("op_2872_cast_fp16")]; + tensor var_2878_pad_type_0 = const()[name = tensor("op_2878_pad_type_0"), val = tensor("valid")]; + tensor var_2878_strides_0 = const()[name = tensor("op_2878_strides_0"), val = tensor([1, 1])]; + tensor var_2878_pad_0 = const()[name = tensor("op_2878_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2878_dilations_0 = const()[name = tensor("op_2878_dilations_0"), val = tensor([1, 1])]; + tensor var_2878_groups_0 = const()[name = tensor("op_2878_groups_0"), val = tensor(1)]; + tensor layers_8_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134642432))), name = tensor("layers_8_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134617792))), shape = tensor([768, 3072, 1, 1])]; + tensor var_2878_cast_fp16 = conv(dilations = var_2878_dilations_0, groups = var_2878_groups_0, pad = var_2878_pad_0, pad_type = var_2878_pad_type_0, strides = var_2878_strides_0, weight = layers_8_fc2_outlier_module_weight_to_fp16_sparsified, x = input_89_cast_fp16)[name = tensor("op_2878_cast_fp16")]; + tensor hidden_states_19_cast_fp16 = add(x = var_2872_cast_fp16, y = var_2878_cast_fp16)[name = tensor("hidden_states_19_cast_fp16")]; + tensor inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = hidden_states_19_cast_fp16)[name = tensor("inputs_55_cast_fp16")]; + tensor var_2891 = const()[name = tensor("op_2891"), val = tensor(3)]; + tensor out_55_axes_0 = const()[name = tensor("out_55_axes_0"), val = tensor([1])]; + tensor var_2916_to_fp16 = const()[name = tensor("op_2916_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_2916_to_fp16, x = inputs_55_cast_fp16)[name = tensor("out_55_cast_fp16")]; + tensor obj_127_gamma_0_to_fp16 = const()[name = tensor("obj_127_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134937408)))]; + tensor obj_127_beta_0_to_fp16 = const()[name = tensor("obj_127_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134939008)))]; + tensor obj_127_epsilon_0_to_fp16 = const()[name = tensor("obj_127_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_127_cast_fp16 = batch_norm(beta = obj_127_beta_0_to_fp16, epsilon = obj_127_epsilon_0_to_fp16, gamma = obj_127_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_55_cast_fp16)[name = tensor("obj_127_cast_fp16")]; + tensor var_2938_pad_type_0 = const()[name = tensor("op_2938_pad_type_0"), val = tensor("valid")]; + tensor var_2938_strides_0 = const()[name = tensor("op_2938_strides_0"), val = tensor([1, 1])]; + tensor var_2938_pad_0 = const()[name = tensor("op_2938_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2938_dilations_0 = const()[name = tensor("op_2938_dilations_0"), val = tensor([1, 1])]; + tensor var_2938_groups_0 = const()[name = tensor("op_2938_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134940608))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135235584))), name = tensor("layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_9_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135235712)))]; + tensor var_2938_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_2938_dilations_0, groups = var_2938_groups_0, pad = var_2938_pad_0, pad_type = var_2938_pad_type_0, strides = var_2938_strides_0, weight = layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_127_cast_fp16)[name = tensor("op_2938_cast_fp16")]; + tensor var_2944_pad_type_0 = const()[name = tensor("op_2944_pad_type_0"), val = tensor("valid")]; + tensor var_2944_strides_0 = const()[name = tensor("op_2944_strides_0"), val = tensor([1, 1])]; + tensor var_2944_pad_0 = const()[name = tensor("op_2944_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2944_dilations_0 = const()[name = tensor("op_2944_dilations_0"), val = tensor([1, 1])]; + tensor var_2944_groups_0 = const()[name = tensor("op_2944_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135242880))), name = tensor("layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135237312))), shape = tensor([768, 768, 1, 1])]; + tensor var_2944_cast_fp16 = conv(dilations = var_2944_dilations_0, groups = var_2944_groups_0, pad = var_2944_pad_0, pad_type = var_2944_pad_type_0, strides = var_2944_strides_0, weight = layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_127_cast_fp16)[name = tensor("op_2944_cast_fp16")]; + tensor query_37_cast_fp16 = add(x = var_2938_cast_fp16, y = var_2944_cast_fp16)[name = tensor("query_37_cast_fp16")]; + tensor var_2953_pad_type_0 = const()[name = tensor("op_2953_pad_type_0"), val = tensor("valid")]; + tensor var_2953_strides_0 = const()[name = tensor("op_2953_strides_0"), val = tensor([1, 1])]; + tensor var_2953_pad_0 = const()[name = tensor("op_2953_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2953_dilations_0 = const()[name = tensor("op_2953_dilations_0"), val = tensor([1, 1])]; + tensor var_2953_groups_0 = const()[name = tensor("op_2953_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135316672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135611648))), name = tensor("layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_2953_cast_fp16 = conv(dilations = var_2953_dilations_0, groups = var_2953_groups_0, pad = var_2953_pad_0, pad_type = var_2953_pad_type_0, strides = var_2953_strides_0, weight = layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_127_cast_fp16)[name = tensor("op_2953_cast_fp16")]; + tensor var_2959_pad_type_0 = const()[name = tensor("op_2959_pad_type_0"), val = tensor("valid")]; + tensor var_2959_strides_0 = const()[name = tensor("op_2959_strides_0"), val = tensor([1, 1])]; + tensor var_2959_pad_0 = const()[name = tensor("op_2959_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2959_dilations_0 = const()[name = tensor("op_2959_dilations_0"), val = tensor([1, 1])]; + tensor var_2959_groups_0 = const()[name = tensor("op_2959_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135617600))), name = tensor("layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135611776))), shape = tensor([768, 768, 1, 1])]; + tensor var_2959_cast_fp16 = conv(dilations = var_2959_dilations_0, groups = var_2959_groups_0, pad = var_2959_pad_0, pad_type = var_2959_pad_type_0, strides = var_2959_strides_0, weight = layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_127_cast_fp16)[name = tensor("op_2959_cast_fp16")]; + tensor current_key_19_cast_fp16 = add(x = var_2953_cast_fp16, y = var_2959_cast_fp16)[name = tensor("current_key_19_cast_fp16")]; + tensor var_2969_pad_type_0 = const()[name = tensor("op_2969_pad_type_0"), val = tensor("valid")]; + tensor var_2969_strides_0 = const()[name = tensor("op_2969_strides_0"), val = tensor([1, 1])]; + tensor var_2969_pad_0 = const()[name = tensor("op_2969_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2969_dilations_0 = const()[name = tensor("op_2969_dilations_0"), val = tensor([1, 1])]; + tensor var_2969_groups_0 = const()[name = tensor("op_2969_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135691392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135986368))), name = tensor("layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_9_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135986496)))]; + tensor var_2969_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_2969_dilations_0, groups = var_2969_groups_0, pad = var_2969_pad_0, pad_type = var_2969_pad_type_0, strides = var_2969_strides_0, weight = layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_127_cast_fp16)[name = tensor("op_2969_cast_fp16")]; + tensor var_2975_pad_type_0 = const()[name = tensor("op_2975_pad_type_0"), val = tensor("valid")]; + tensor var_2975_strides_0 = const()[name = tensor("op_2975_strides_0"), val = tensor([1, 1])]; + tensor var_2975_pad_0 = const()[name = tensor("op_2975_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2975_dilations_0 = const()[name = tensor("op_2975_dilations_0"), val = tensor([1, 1])]; + tensor var_2975_groups_0 = const()[name = tensor("op_2975_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135995776))), name = tensor("layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135988096))), shape = tensor([768, 768, 1, 1])]; + tensor var_2975_cast_fp16 = conv(dilations = var_2975_dilations_0, groups = var_2975_groups_0, pad = var_2975_pad_0, pad_type = var_2975_pad_type_0, strides = var_2975_strides_0, weight = layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_127_cast_fp16)[name = tensor("op_2975_cast_fp16")]; + tensor current_value_19_cast_fp16 = add(x = var_2969_cast_fp16, y = var_2975_cast_fp16)[name = tensor("current_value_19_cast_fp16")]; + tensor var_2982_cast_fp16 = mul(x = var_69_cast_fp16_9, y = var_192_cast_fp16)[name = tensor("op_2982_cast_fp16")]; + tensor var_2983_cast_fp16 = mul(x = current_key_19_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_2983_cast_fp16")]; + tensor key_37_cast_fp16 = add(x = var_2982_cast_fp16, y = var_2983_cast_fp16)[name = tensor("key_37_cast_fp16")]; + tensor var_2986_cast_fp16 = mul(x = var_84_cast_fp16_9, y = var_192_cast_fp16)[name = tensor("op_2986_cast_fp16")]; + tensor var_2987_cast_fp16 = mul(x = current_value_19_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_2987_cast_fp16")]; + tensor value_37_cast_fp16 = add(x = var_2986_cast_fp16, y = var_2987_cast_fp16)[name = tensor("value_37_cast_fp16")]; + tensor var_2991 = const()[name = tensor("op_2991"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_37_cast_fp16 = reshape(shape = var_2991, x = query_37_cast_fp16)[name = tensor("mh_q_37_cast_fp16")]; + tensor var_2993_to_fp16 = const()[name = tensor("op_2993_to_fp16"), val = tensor(0x1p-3)]; + tensor var_2994_cast_fp16 = mul(x = mh_q_37_cast_fp16, y = var_2993_to_fp16)[name = tensor("op_2994_cast_fp16")]; + tensor var_2997 = const()[name = tensor("op_2997"), val = tensor([1, 12, 64, 448])]; + tensor var_2998_cast_fp16 = reshape(shape = var_2997, x = key_37_cast_fp16)[name = tensor("op_2998_cast_fp16")]; + tensor mh_w_55_transpose_x_0 = const()[name = tensor("mh_w_55_transpose_x_0"), val = tensor(true)]; + tensor mh_w_55_transpose_y_0 = const()[name = tensor("mh_w_55_transpose_y_0"), val = tensor(false)]; + tensor mh_w_55_cast_fp16 = matmul(transpose_x = mh_w_55_transpose_x_0, transpose_y = mh_w_55_transpose_y_0, x = var_2994_cast_fp16, y = var_2998_cast_fp16)[name = tensor("mh_w_55_cast_fp16")]; + tensor mh_w_57_cast_fp16 = add(x = mh_w_55_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_57_cast_fp16")]; + tensor var_3006_cast_fp16 = softmax(axis = var_2891, x = mh_w_57_cast_fp16)[name = tensor("op_3006_cast_fp16")]; + tensor var_3007 = const()[name = tensor("op_3007"), val = tensor([1, 12, 64, 448])]; + tensor var_3008_cast_fp16 = reshape(shape = var_3007, x = value_37_cast_fp16)[name = tensor("op_3008_cast_fp16")]; + tensor attn_37_transpose_x_0 = const()[name = tensor("attn_37_transpose_x_0"), val = tensor(false)]; + tensor attn_37_transpose_y_0 = const()[name = tensor("attn_37_transpose_y_0"), val = tensor(true)]; + tensor attn_37_cast_fp16 = matmul(transpose_x = attn_37_transpose_x_0, transpose_y = attn_37_transpose_y_0, x = var_3008_cast_fp16, y = var_3006_cast_fp16)[name = tensor("attn_37_cast_fp16")]; + tensor var_3011 = const()[name = tensor("op_3011"), val = tensor([1, 768, 1, 1])]; + tensor input_91_cast_fp16 = reshape(shape = var_3011, x = attn_37_cast_fp16)[name = tensor("input_91_cast_fp16")]; + tensor var_3021_pad_type_0 = const()[name = tensor("op_3021_pad_type_0"), val = tensor("valid")]; + tensor var_3021_strides_0 = const()[name = tensor("op_3021_strides_0"), val = tensor([1, 1])]; + tensor var_3021_pad_0 = const()[name = tensor("op_3021_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3021_dilations_0 = const()[name = tensor("op_3021_dilations_0"), val = tensor([1, 1])]; + tensor var_3021_groups_0 = const()[name = tensor("op_3021_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136069568))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136364544))), name = tensor("layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_9_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136364672)))]; + tensor var_3021_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3021_dilations_0, groups = var_3021_groups_0, pad = var_3021_pad_0, pad_type = var_3021_pad_type_0, strides = var_3021_strides_0, weight = layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = tensor("op_3021_cast_fp16")]; + tensor var_3027_pad_type_0 = const()[name = tensor("op_3027_pad_type_0"), val = tensor("valid")]; + tensor var_3027_strides_0 = const()[name = tensor("op_3027_strides_0"), val = tensor([1, 1])]; + tensor var_3027_pad_0 = const()[name = tensor("op_3027_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3027_dilations_0 = const()[name = tensor("op_3027_dilations_0"), val = tensor([1, 1])]; + tensor var_3027_groups_0 = const()[name = tensor("op_3027_groups_0"), val = tensor(1)]; + tensor layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136374016))), name = tensor("layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136366272))), shape = tensor([768, 768, 1, 1])]; + tensor var_3027_cast_fp16 = conv(dilations = var_3027_dilations_0, groups = var_3027_groups_0, pad = var_3027_pad_0, pad_type = var_3027_pad_type_0, strides = var_3027_strides_0, weight = layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_91_cast_fp16)[name = tensor("op_3027_cast_fp16")]; + tensor obj_133_cast_fp16 = add(x = var_3021_cast_fp16, y = var_3027_cast_fp16)[name = tensor("obj_133_cast_fp16")]; + tensor inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = obj_133_cast_fp16)[name = tensor("inputs_57_cast_fp16")]; + tensor out_57_axes_0 = const()[name = tensor("out_57_axes_0"), val = tensor([1])]; + tensor var_3042_to_fp16 = const()[name = tensor("op_3042_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_57_cast_fp16 = layer_norm(axes = out_57_axes_0, epsilon = var_3042_to_fp16, x = inputs_57_cast_fp16)[name = tensor("out_57_cast_fp16")]; + tensor obj_135_gamma_0_to_fp16 = const()[name = tensor("obj_135_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136447808)))]; + tensor obj_135_beta_0_to_fp16 = const()[name = tensor("obj_135_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136449408)))]; + tensor obj_135_epsilon_0_to_fp16 = const()[name = tensor("obj_135_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_135_cast_fp16 = batch_norm(beta = obj_135_beta_0_to_fp16, epsilon = obj_135_epsilon_0_to_fp16, gamma = obj_135_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_57_cast_fp16)[name = tensor("obj_135_cast_fp16")]; + tensor var_3064_pad_type_0 = const()[name = tensor("op_3064_pad_type_0"), val = tensor("valid")]; + tensor var_3064_strides_0 = const()[name = tensor("op_3064_strides_0"), val = tensor([1, 1])]; + tensor var_3064_pad_0 = const()[name = tensor("op_3064_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3064_dilations_0 = const()[name = tensor("op_3064_dilations_0"), val = tensor([1, 1])]; + tensor var_3064_groups_0 = const()[name = tensor("op_3064_groups_0"), val = tensor(1)]; + tensor layers_9_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136451008))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136745984))), name = tensor("layers_9_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_9_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136746112)))]; + tensor var_3064_cast_fp16 = conv(bias = layers_9_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3064_dilations_0, groups = var_3064_groups_0, pad = var_3064_pad_0, pad_type = var_3064_pad_type_0, strides = var_3064_strides_0, weight = layers_9_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_135_cast_fp16)[name = tensor("op_3064_cast_fp16")]; + tensor var_3070_pad_type_0 = const()[name = tensor("op_3070_pad_type_0"), val = tensor("valid")]; + tensor var_3070_strides_0 = const()[name = tensor("op_3070_strides_0"), val = tensor([1, 1])]; + tensor var_3070_pad_0 = const()[name = tensor("op_3070_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3070_dilations_0 = const()[name = tensor("op_3070_dilations_0"), val = tensor([1, 1])]; + tensor var_3070_groups_0 = const()[name = tensor("op_3070_groups_0"), val = tensor(1)]; + tensor layers_9_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136752896))), name = tensor("layers_9_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136747712))), shape = tensor([768, 768, 1, 1])]; + tensor var_3070_cast_fp16 = conv(dilations = var_3070_dilations_0, groups = var_3070_groups_0, pad = var_3070_pad_0, pad_type = var_3070_pad_type_0, strides = var_3070_strides_0, weight = layers_9_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_135_cast_fp16)[name = tensor("op_3070_cast_fp16")]; + tensor query_39_cast_fp16 = add(x = var_3064_cast_fp16, y = var_3070_cast_fp16)[name = tensor("query_39_cast_fp16")]; + tensor var_3079_pad_type_0 = const()[name = tensor("op_3079_pad_type_0"), val = tensor("valid")]; + tensor var_3079_strides_0 = const()[name = tensor("op_3079_strides_0"), val = tensor([1, 1])]; + tensor var_3079_pad_0 = const()[name = tensor("op_3079_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3079_dilations_0 = const()[name = tensor("op_3079_dilations_0"), val = tensor([1, 1])]; + tensor var_3079_groups_0 = const()[name = tensor("op_3079_groups_0"), val = tensor(1)]; + tensor layers_9_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136826688))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137121664))), name = tensor("layers_9_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_3079_cast_fp16 = conv(dilations = var_3079_dilations_0, groups = var_3079_groups_0, pad = var_3079_pad_0, pad_type = var_3079_pad_type_0, strides = var_3079_strides_0, weight = layers_9_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_3079_cast_fp16")]; + tensor var_3085_pad_type_0 = const()[name = tensor("op_3085_pad_type_0"), val = tensor("valid")]; + tensor var_3085_strides_0 = const()[name = tensor("op_3085_strides_0"), val = tensor([1, 1])]; + tensor var_3085_pad_0 = const()[name = tensor("op_3085_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3085_dilations_0 = const()[name = tensor("op_3085_dilations_0"), val = tensor([1, 1])]; + tensor var_3085_groups_0 = const()[name = tensor("op_3085_groups_0"), val = tensor(1)]; + tensor layers_9_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137128640))), name = tensor("layers_9_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137121792))), shape = tensor([768, 768, 1, 1])]; + tensor var_3085_cast_fp16 = conv(dilations = var_3085_dilations_0, groups = var_3085_groups_0, pad = var_3085_pad_0, pad_type = var_3085_pad_type_0, strides = var_3085_strides_0, weight = layers_9_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_3085_cast_fp16")]; + tensor key_39_cast_fp16 = add(x = var_3079_cast_fp16, y = var_3085_cast_fp16)[name = tensor("key_39_cast_fp16")]; + tensor var_3095_pad_type_0 = const()[name = tensor("op_3095_pad_type_0"), val = tensor("valid")]; + tensor var_3095_strides_0 = const()[name = tensor("op_3095_strides_0"), val = tensor([1, 1])]; + tensor var_3095_pad_0 = const()[name = tensor("op_3095_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3095_dilations_0 = const()[name = tensor("op_3095_dilations_0"), val = tensor([1, 1])]; + tensor var_3095_groups_0 = const()[name = tensor("op_3095_groups_0"), val = tensor(1)]; + tensor layers_9_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137202432))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137497408))), name = tensor("layers_9_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_9_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137497536)))]; + tensor var_3095_cast_fp16 = conv(bias = layers_9_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3095_dilations_0, groups = var_3095_groups_0, pad = var_3095_pad_0, pad_type = var_3095_pad_type_0, strides = var_3095_strides_0, weight = layers_9_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_3095_cast_fp16")]; + tensor var_3101_pad_type_0 = const()[name = tensor("op_3101_pad_type_0"), val = tensor("valid")]; + tensor var_3101_strides_0 = const()[name = tensor("op_3101_strides_0"), val = tensor([1, 1])]; + tensor var_3101_pad_0 = const()[name = tensor("op_3101_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3101_dilations_0 = const()[name = tensor("op_3101_dilations_0"), val = tensor([1, 1])]; + tensor var_3101_groups_0 = const()[name = tensor("op_3101_groups_0"), val = tensor(1)]; + tensor layers_9_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137504192))), name = tensor("layers_9_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137499136))), shape = tensor([768, 768, 1, 1])]; + tensor var_3101_cast_fp16 = conv(dilations = var_3101_dilations_0, groups = var_3101_groups_0, pad = var_3101_pad_0, pad_type = var_3101_pad_type_0, strides = var_3101_strides_0, weight = layers_9_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_3101_cast_fp16")]; + tensor value_39_cast_fp16 = add(x = var_3095_cast_fp16, y = var_3101_cast_fp16)[name = tensor("value_39_cast_fp16")]; + tensor var_3105 = const()[name = tensor("op_3105"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_39_cast_fp16 = reshape(shape = var_3105, x = query_39_cast_fp16)[name = tensor("mh_q_39_cast_fp16")]; + tensor var_3107_to_fp16 = const()[name = tensor("op_3107_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3108_cast_fp16 = mul(x = mh_q_39_cast_fp16, y = var_3107_to_fp16)[name = tensor("op_3108_cast_fp16")]; + tensor var_3111 = const()[name = tensor("op_3111"), val = tensor([1, 12, 64, 1500])]; + tensor var_3112_cast_fp16 = reshape(shape = var_3111, x = key_39_cast_fp16)[name = tensor("op_3112_cast_fp16")]; + tensor mh_w_59_transpose_x_0 = const()[name = tensor("mh_w_59_transpose_x_0"), val = tensor(true)]; + tensor mh_w_59_transpose_y_0 = const()[name = tensor("mh_w_59_transpose_y_0"), val = tensor(false)]; + tensor mh_w_59_cast_fp16 = matmul(transpose_x = mh_w_59_transpose_x_0, transpose_y = mh_w_59_transpose_y_0, x = var_3108_cast_fp16, y = var_3112_cast_fp16)[name = tensor("mh_w_59_cast_fp16")]; + tensor obj_139_cast_fp16 = softmax(axis = var_2891, x = mh_w_59_cast_fp16)[name = tensor("obj_139_cast_fp16")]; + tensor var_3116 = const()[name = tensor("op_3116"), val = tensor([1, 12, 64, 1500])]; + tensor var_3117_cast_fp16 = reshape(shape = var_3116, x = value_39_cast_fp16)[name = tensor("op_3117_cast_fp16")]; + tensor attn_39_transpose_x_0 = const()[name = tensor("attn_39_transpose_x_0"), val = tensor(false)]; + tensor attn_39_transpose_y_0 = const()[name = tensor("attn_39_transpose_y_0"), val = tensor(true)]; + tensor attn_39_cast_fp16 = matmul(transpose_x = attn_39_transpose_x_0, transpose_y = attn_39_transpose_y_0, x = var_3117_cast_fp16, y = obj_139_cast_fp16)[name = tensor("attn_39_cast_fp16")]; + tensor var_3120 = const()[name = tensor("op_3120"), val = tensor([1, 768, 1, 1])]; + tensor input_93_cast_fp16 = reshape(shape = var_3120, x = attn_39_cast_fp16)[name = tensor("input_93_cast_fp16")]; + tensor var_3130_pad_type_0 = const()[name = tensor("op_3130_pad_type_0"), val = tensor("valid")]; + tensor var_3130_strides_0 = const()[name = tensor("op_3130_strides_0"), val = tensor([1, 1])]; + tensor var_3130_pad_0 = const()[name = tensor("op_3130_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3130_dilations_0 = const()[name = tensor("op_3130_dilations_0"), val = tensor([1, 1])]; + tensor var_3130_groups_0 = const()[name = tensor("op_3130_groups_0"), val = tensor(1)]; + tensor layers_9_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137577984))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137872960))), name = tensor("layers_9_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_9_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137873088)))]; + tensor var_3130_cast_fp16 = conv(bias = layers_9_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3130_dilations_0, groups = var_3130_groups_0, pad = var_3130_pad_0, pad_type = var_3130_pad_type_0, strides = var_3130_strides_0, weight = layers_9_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_93_cast_fp16)[name = tensor("op_3130_cast_fp16")]; + tensor var_3136_pad_type_0 = const()[name = tensor("op_3136_pad_type_0"), val = tensor("valid")]; + tensor var_3136_strides_0 = const()[name = tensor("op_3136_strides_0"), val = tensor([1, 1])]; + tensor var_3136_pad_0 = const()[name = tensor("op_3136_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3136_dilations_0 = const()[name = tensor("op_3136_dilations_0"), val = tensor([1, 1])]; + tensor var_3136_groups_0 = const()[name = tensor("op_3136_groups_0"), val = tensor(1)]; + tensor layers_9_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137880192))), name = tensor("layers_9_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137874688))), shape = tensor([768, 768, 1, 1])]; + tensor var_3136_cast_fp16 = conv(dilations = var_3136_dilations_0, groups = var_3136_groups_0, pad = var_3136_pad_0, pad_type = var_3136_pad_type_0, strides = var_3136_strides_0, weight = layers_9_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_93_cast_fp16)[name = tensor("op_3136_cast_fp16")]; + tensor obj_137_cast_fp16 = add(x = var_3130_cast_fp16, y = var_3136_cast_fp16)[name = tensor("obj_137_cast_fp16")]; + tensor inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = obj_137_cast_fp16)[name = tensor("inputs_59_cast_fp16")]; + tensor out_59_axes_0 = const()[name = tensor("out_59_axes_0"), val = tensor([1])]; + tensor var_3150_to_fp16 = const()[name = tensor("op_3150_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_59_cast_fp16 = layer_norm(axes = out_59_axes_0, epsilon = var_3150_to_fp16, x = inputs_59_cast_fp16)[name = tensor("out_59_cast_fp16")]; + tensor input_95_gamma_0_to_fp16 = const()[name = tensor("input_95_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137953984)))]; + tensor input_95_beta_0_to_fp16 = const()[name = tensor("input_95_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137955584)))]; + tensor input_95_epsilon_0_to_fp16 = const()[name = tensor("input_95_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_95_cast_fp16 = batch_norm(beta = input_95_beta_0_to_fp16, epsilon = input_95_epsilon_0_to_fp16, gamma = input_95_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_59_cast_fp16)[name = tensor("input_95_cast_fp16")]; + tensor var_3168_pad_type_0 = const()[name = tensor("op_3168_pad_type_0"), val = tensor("valid")]; + tensor var_3168_strides_0 = const()[name = tensor("op_3168_strides_0"), val = tensor([1, 1])]; + tensor var_3168_pad_0 = const()[name = tensor("op_3168_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3168_dilations_0 = const()[name = tensor("op_3168_dilations_0"), val = tensor([1, 1])]; + tensor var_3168_groups_0 = const()[name = tensor("op_3168_groups_0"), val = tensor(1)]; + tensor layers_9_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137957184))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139136896))), name = tensor("layers_9_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_9_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139137024)))]; + tensor var_3168_cast_fp16 = conv(bias = layers_9_fc1_inlier_module_bias_to_fp16, dilations = var_3168_dilations_0, groups = var_3168_groups_0, pad = var_3168_pad_0, pad_type = var_3168_pad_type_0, strides = var_3168_strides_0, weight = layers_9_fc1_inlier_module_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = tensor("op_3168_cast_fp16")]; + tensor var_3174_pad_type_0 = const()[name = tensor("op_3174_pad_type_0"), val = tensor("valid")]; + tensor var_3174_strides_0 = const()[name = tensor("op_3174_strides_0"), val = tensor([1, 1])]; + tensor var_3174_pad_0 = const()[name = tensor("op_3174_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3174_dilations_0 = const()[name = tensor("op_3174_dilations_0"), val = tensor([1, 1])]; + tensor var_3174_groups_0 = const()[name = tensor("op_3174_groups_0"), val = tensor(1)]; + tensor layers_9_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139160384))), name = tensor("layers_9_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139143232))), shape = tensor([3072, 768, 1, 1])]; + tensor var_3174_cast_fp16 = conv(dilations = var_3174_dilations_0, groups = var_3174_groups_0, pad = var_3174_pad_0, pad_type = var_3174_pad_type_0, strides = var_3174_strides_0, weight = layers_9_fc1_outlier_module_weight_to_fp16_sparsified, x = input_95_cast_fp16)[name = tensor("op_3174_cast_fp16")]; + tensor input_97_cast_fp16 = add(x = var_3168_cast_fp16, y = var_3174_cast_fp16)[name = tensor("input_97_cast_fp16")]; + tensor input_99_mode_0 = const()[name = tensor("input_99_mode_0"), val = tensor("EXACT")]; + tensor input_99_cast_fp16 = gelu(mode = input_99_mode_0, x = input_97_cast_fp16)[name = tensor("input_99_cast_fp16")]; + tensor var_3185_pad_type_0 = const()[name = tensor("op_3185_pad_type_0"), val = tensor("valid")]; + tensor var_3185_strides_0 = const()[name = tensor("op_3185_strides_0"), val = tensor([1, 1])]; + tensor var_3185_pad_0 = const()[name = tensor("op_3185_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3185_dilations_0 = const()[name = tensor("op_3185_dilations_0"), val = tensor([1, 1])]; + tensor var_3185_groups_0 = const()[name = tensor("op_3185_groups_0"), val = tensor(1)]; + tensor layers_9_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(139455360))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140635072))), name = tensor("layers_9_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_9_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140635200)))]; + tensor var_3185_cast_fp16 = conv(bias = layers_9_fc2_inlier_module_bias_to_fp16, dilations = var_3185_dilations_0, groups = var_3185_groups_0, pad = var_3185_pad_0, pad_type = var_3185_pad_type_0, strides = var_3185_strides_0, weight = layers_9_fc2_inlier_module_weight_to_fp16_palettized, x = input_99_cast_fp16)[name = tensor("op_3185_cast_fp16")]; + tensor var_3191_pad_type_0 = const()[name = tensor("op_3191_pad_type_0"), val = tensor("valid")]; + tensor var_3191_strides_0 = const()[name = tensor("op_3191_strides_0"), val = tensor([1, 1])]; + tensor var_3191_pad_0 = const()[name = tensor("op_3191_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3191_dilations_0 = const()[name = tensor("op_3191_dilations_0"), val = tensor([1, 1])]; + tensor var_3191_groups_0 = const()[name = tensor("op_3191_groups_0"), val = tensor(1)]; + tensor layers_9_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140660480))), name = tensor("layers_9_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140636800))), shape = tensor([768, 3072, 1, 1])]; + tensor var_3191_cast_fp16 = conv(dilations = var_3191_dilations_0, groups = var_3191_groups_0, pad = var_3191_pad_0, pad_type = var_3191_pad_type_0, strides = var_3191_strides_0, weight = layers_9_fc2_outlier_module_weight_to_fp16_sparsified, x = input_99_cast_fp16)[name = tensor("op_3191_cast_fp16")]; + tensor hidden_states_21_cast_fp16 = add(x = var_3185_cast_fp16, y = var_3191_cast_fp16)[name = tensor("hidden_states_21_cast_fp16")]; + tensor inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = hidden_states_21_cast_fp16)[name = tensor("inputs_61_cast_fp16")]; + tensor var_3204 = const()[name = tensor("op_3204"), val = tensor(3)]; + tensor out_61_axes_0 = const()[name = tensor("out_61_axes_0"), val = tensor([1])]; + tensor var_3229_to_fp16 = const()[name = tensor("op_3229_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_3229_to_fp16, x = inputs_61_cast_fp16)[name = tensor("out_61_cast_fp16")]; + tensor obj_141_gamma_0_to_fp16 = const()[name = tensor("obj_141_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140955456)))]; + tensor obj_141_beta_0_to_fp16 = const()[name = tensor("obj_141_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140957056)))]; + tensor obj_141_epsilon_0_to_fp16 = const()[name = tensor("obj_141_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_141_cast_fp16 = batch_norm(beta = obj_141_beta_0_to_fp16, epsilon = obj_141_epsilon_0_to_fp16, gamma = obj_141_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_61_cast_fp16)[name = tensor("obj_141_cast_fp16")]; + tensor var_3251_pad_type_0 = const()[name = tensor("op_3251_pad_type_0"), val = tensor("valid")]; + tensor var_3251_strides_0 = const()[name = tensor("op_3251_strides_0"), val = tensor([1, 1])]; + tensor var_3251_pad_0 = const()[name = tensor("op_3251_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3251_dilations_0 = const()[name = tensor("op_3251_dilations_0"), val = tensor([1, 1])]; + tensor var_3251_groups_0 = const()[name = tensor("op_3251_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140958656))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141253632))), name = tensor("layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_10_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141253760)))]; + tensor var_3251_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3251_dilations_0, groups = var_3251_groups_0, pad = var_3251_pad_0, pad_type = var_3251_pad_type_0, strides = var_3251_strides_0, weight = layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_141_cast_fp16)[name = tensor("op_3251_cast_fp16")]; + tensor var_3257_pad_type_0 = const()[name = tensor("op_3257_pad_type_0"), val = tensor("valid")]; + tensor var_3257_strides_0 = const()[name = tensor("op_3257_strides_0"), val = tensor([1, 1])]; + tensor var_3257_pad_0 = const()[name = tensor("op_3257_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3257_dilations_0 = const()[name = tensor("op_3257_dilations_0"), val = tensor([1, 1])]; + tensor var_3257_groups_0 = const()[name = tensor("op_3257_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141260352))), name = tensor("layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141255360))), shape = tensor([768, 768, 1, 1])]; + tensor var_3257_cast_fp16 = conv(dilations = var_3257_dilations_0, groups = var_3257_groups_0, pad = var_3257_pad_0, pad_type = var_3257_pad_type_0, strides = var_3257_strides_0, weight = layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_141_cast_fp16)[name = tensor("op_3257_cast_fp16")]; + tensor query_41_cast_fp16 = add(x = var_3251_cast_fp16, y = var_3257_cast_fp16)[name = tensor("query_41_cast_fp16")]; + tensor var_3266_pad_type_0 = const()[name = tensor("op_3266_pad_type_0"), val = tensor("valid")]; + tensor var_3266_strides_0 = const()[name = tensor("op_3266_strides_0"), val = tensor([1, 1])]; + tensor var_3266_pad_0 = const()[name = tensor("op_3266_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3266_dilations_0 = const()[name = tensor("op_3266_dilations_0"), val = tensor([1, 1])]; + tensor var_3266_groups_0 = const()[name = tensor("op_3266_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141334144))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141629120))), name = tensor("layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_3266_cast_fp16 = conv(dilations = var_3266_dilations_0, groups = var_3266_groups_0, pad = var_3266_pad_0, pad_type = var_3266_pad_type_0, strides = var_3266_strides_0, weight = layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_141_cast_fp16)[name = tensor("op_3266_cast_fp16")]; + tensor var_3272_pad_type_0 = const()[name = tensor("op_3272_pad_type_0"), val = tensor("valid")]; + tensor var_3272_strides_0 = const()[name = tensor("op_3272_strides_0"), val = tensor([1, 1])]; + tensor var_3272_pad_0 = const()[name = tensor("op_3272_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3272_dilations_0 = const()[name = tensor("op_3272_dilations_0"), val = tensor([1, 1])]; + tensor var_3272_groups_0 = const()[name = tensor("op_3272_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141634688))), name = tensor("layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141629248))), shape = tensor([768, 768, 1, 1])]; + tensor var_3272_cast_fp16 = conv(dilations = var_3272_dilations_0, groups = var_3272_groups_0, pad = var_3272_pad_0, pad_type = var_3272_pad_type_0, strides = var_3272_strides_0, weight = layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_141_cast_fp16)[name = tensor("op_3272_cast_fp16")]; + tensor current_key_21_cast_fp16 = add(x = var_3266_cast_fp16, y = var_3272_cast_fp16)[name = tensor("current_key_21_cast_fp16")]; + tensor var_3282_pad_type_0 = const()[name = tensor("op_3282_pad_type_0"), val = tensor("valid")]; + tensor var_3282_strides_0 = const()[name = tensor("op_3282_strides_0"), val = tensor([1, 1])]; + tensor var_3282_pad_0 = const()[name = tensor("op_3282_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3282_dilations_0 = const()[name = tensor("op_3282_dilations_0"), val = tensor([1, 1])]; + tensor var_3282_groups_0 = const()[name = tensor("op_3282_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(141708480))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142003456))), name = tensor("layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_10_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142003584)))]; + tensor var_3282_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3282_dilations_0, groups = var_3282_groups_0, pad = var_3282_pad_0, pad_type = var_3282_pad_type_0, strides = var_3282_strides_0, weight = layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_141_cast_fp16)[name = tensor("op_3282_cast_fp16")]; + tensor var_3288_pad_type_0 = const()[name = tensor("op_3288_pad_type_0"), val = tensor("valid")]; + tensor var_3288_strides_0 = const()[name = tensor("op_3288_strides_0"), val = tensor([1, 1])]; + tensor var_3288_pad_0 = const()[name = tensor("op_3288_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3288_dilations_0 = const()[name = tensor("op_3288_dilations_0"), val = tensor([1, 1])]; + tensor var_3288_groups_0 = const()[name = tensor("op_3288_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142011968))), name = tensor("layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142005184))), shape = tensor([768, 768, 1, 1])]; + tensor var_3288_cast_fp16 = conv(dilations = var_3288_dilations_0, groups = var_3288_groups_0, pad = var_3288_pad_0, pad_type = var_3288_pad_type_0, strides = var_3288_strides_0, weight = layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_141_cast_fp16)[name = tensor("op_3288_cast_fp16")]; + tensor current_value_21_cast_fp16 = add(x = var_3282_cast_fp16, y = var_3288_cast_fp16)[name = tensor("current_value_21_cast_fp16")]; + tensor var_3295_cast_fp16 = mul(x = var_69_cast_fp16_10, y = var_192_cast_fp16)[name = tensor("op_3295_cast_fp16")]; + tensor var_3296_cast_fp16 = mul(x = current_key_21_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_3296_cast_fp16")]; + tensor key_41_cast_fp16 = add(x = var_3295_cast_fp16, y = var_3296_cast_fp16)[name = tensor("key_41_cast_fp16")]; + tensor var_3299_cast_fp16 = mul(x = var_84_cast_fp16_10, y = var_192_cast_fp16)[name = tensor("op_3299_cast_fp16")]; + tensor var_3300_cast_fp16 = mul(x = current_value_21_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_3300_cast_fp16")]; + tensor value_41_cast_fp16 = add(x = var_3299_cast_fp16, y = var_3300_cast_fp16)[name = tensor("value_41_cast_fp16")]; + tensor var_3304 = const()[name = tensor("op_3304"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_41_cast_fp16 = reshape(shape = var_3304, x = query_41_cast_fp16)[name = tensor("mh_q_41_cast_fp16")]; + tensor var_3306_to_fp16 = const()[name = tensor("op_3306_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3307_cast_fp16 = mul(x = mh_q_41_cast_fp16, y = var_3306_to_fp16)[name = tensor("op_3307_cast_fp16")]; + tensor var_3310 = const()[name = tensor("op_3310"), val = tensor([1, 12, 64, 448])]; + tensor var_3311_cast_fp16 = reshape(shape = var_3310, x = key_41_cast_fp16)[name = tensor("op_3311_cast_fp16")]; + tensor mh_w_61_transpose_x_0 = const()[name = tensor("mh_w_61_transpose_x_0"), val = tensor(true)]; + tensor mh_w_61_transpose_y_0 = const()[name = tensor("mh_w_61_transpose_y_0"), val = tensor(false)]; + tensor mh_w_61_cast_fp16 = matmul(transpose_x = mh_w_61_transpose_x_0, transpose_y = mh_w_61_transpose_y_0, x = var_3307_cast_fp16, y = var_3311_cast_fp16)[name = tensor("mh_w_61_cast_fp16")]; + tensor mh_w_63_cast_fp16 = add(x = mh_w_61_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_63_cast_fp16")]; + tensor var_3319_cast_fp16 = softmax(axis = var_3204, x = mh_w_63_cast_fp16)[name = tensor("op_3319_cast_fp16")]; + tensor var_3320 = const()[name = tensor("op_3320"), val = tensor([1, 12, 64, 448])]; + tensor var_3321_cast_fp16 = reshape(shape = var_3320, x = value_41_cast_fp16)[name = tensor("op_3321_cast_fp16")]; + tensor attn_41_transpose_x_0 = const()[name = tensor("attn_41_transpose_x_0"), val = tensor(false)]; + tensor attn_41_transpose_y_0 = const()[name = tensor("attn_41_transpose_y_0"), val = tensor(true)]; + tensor attn_41_cast_fp16 = matmul(transpose_x = attn_41_transpose_x_0, transpose_y = attn_41_transpose_y_0, x = var_3321_cast_fp16, y = var_3319_cast_fp16)[name = tensor("attn_41_cast_fp16")]; + tensor var_3324 = const()[name = tensor("op_3324"), val = tensor([1, 768, 1, 1])]; + tensor input_101_cast_fp16 = reshape(shape = var_3324, x = attn_41_cast_fp16)[name = tensor("input_101_cast_fp16")]; + tensor var_3334_pad_type_0 = const()[name = tensor("op_3334_pad_type_0"), val = tensor("valid")]; + tensor var_3334_strides_0 = const()[name = tensor("op_3334_strides_0"), val = tensor([1, 1])]; + tensor var_3334_pad_0 = const()[name = tensor("op_3334_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3334_dilations_0 = const()[name = tensor("op_3334_dilations_0"), val = tensor([1, 1])]; + tensor var_3334_groups_0 = const()[name = tensor("op_3334_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142085760))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142380736))), name = tensor("layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_10_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142380864)))]; + tensor var_3334_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3334_dilations_0, groups = var_3334_groups_0, pad = var_3334_pad_0, pad_type = var_3334_pad_type_0, strides = var_3334_strides_0, weight = layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_101_cast_fp16)[name = tensor("op_3334_cast_fp16")]; + tensor var_3340_pad_type_0 = const()[name = tensor("op_3340_pad_type_0"), val = tensor("valid")]; + tensor var_3340_strides_0 = const()[name = tensor("op_3340_strides_0"), val = tensor([1, 1])]; + tensor var_3340_pad_0 = const()[name = tensor("op_3340_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3340_dilations_0 = const()[name = tensor("op_3340_dilations_0"), val = tensor([1, 1])]; + tensor var_3340_groups_0 = const()[name = tensor("op_3340_groups_0"), val = tensor(1)]; + tensor layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142389312))), name = tensor("layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142382464))), shape = tensor([768, 768, 1, 1])]; + tensor var_3340_cast_fp16 = conv(dilations = var_3340_dilations_0, groups = var_3340_groups_0, pad = var_3340_pad_0, pad_type = var_3340_pad_type_0, strides = var_3340_strides_0, weight = layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_101_cast_fp16)[name = tensor("op_3340_cast_fp16")]; + tensor obj_147_cast_fp16 = add(x = var_3334_cast_fp16, y = var_3340_cast_fp16)[name = tensor("obj_147_cast_fp16")]; + tensor inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = obj_147_cast_fp16)[name = tensor("inputs_63_cast_fp16")]; + tensor out_63_axes_0 = const()[name = tensor("out_63_axes_0"), val = tensor([1])]; + tensor var_3355_to_fp16 = const()[name = tensor("op_3355_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_63_cast_fp16 = layer_norm(axes = out_63_axes_0, epsilon = var_3355_to_fp16, x = inputs_63_cast_fp16)[name = tensor("out_63_cast_fp16")]; + tensor obj_149_gamma_0_to_fp16 = const()[name = tensor("obj_149_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142463104)))]; + tensor obj_149_beta_0_to_fp16 = const()[name = tensor("obj_149_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142464704)))]; + tensor obj_149_epsilon_0_to_fp16 = const()[name = tensor("obj_149_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_149_cast_fp16 = batch_norm(beta = obj_149_beta_0_to_fp16, epsilon = obj_149_epsilon_0_to_fp16, gamma = obj_149_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_63_cast_fp16)[name = tensor("obj_149_cast_fp16")]; + tensor var_3377_pad_type_0 = const()[name = tensor("op_3377_pad_type_0"), val = tensor("valid")]; + tensor var_3377_strides_0 = const()[name = tensor("op_3377_strides_0"), val = tensor([1, 1])]; + tensor var_3377_pad_0 = const()[name = tensor("op_3377_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3377_dilations_0 = const()[name = tensor("op_3377_dilations_0"), val = tensor([1, 1])]; + tensor var_3377_groups_0 = const()[name = tensor("op_3377_groups_0"), val = tensor(1)]; + tensor layers_10_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142466304))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142761280))), name = tensor("layers_10_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_10_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142761408)))]; + tensor var_3377_cast_fp16 = conv(bias = layers_10_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3377_dilations_0, groups = var_3377_groups_0, pad = var_3377_pad_0, pad_type = var_3377_pad_type_0, strides = var_3377_strides_0, weight = layers_10_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_149_cast_fp16)[name = tensor("op_3377_cast_fp16")]; + tensor var_3383_pad_type_0 = const()[name = tensor("op_3383_pad_type_0"), val = tensor("valid")]; + tensor var_3383_strides_0 = const()[name = tensor("op_3383_strides_0"), val = tensor([1, 1])]; + tensor var_3383_pad_0 = const()[name = tensor("op_3383_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3383_dilations_0 = const()[name = tensor("op_3383_dilations_0"), val = tensor([1, 1])]; + tensor var_3383_groups_0 = const()[name = tensor("op_3383_groups_0"), val = tensor(1)]; + tensor layers_10_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142768320))), name = tensor("layers_10_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142763008))), shape = tensor([768, 768, 1, 1])]; + tensor var_3383_cast_fp16 = conv(dilations = var_3383_dilations_0, groups = var_3383_groups_0, pad = var_3383_pad_0, pad_type = var_3383_pad_type_0, strides = var_3383_strides_0, weight = layers_10_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_149_cast_fp16)[name = tensor("op_3383_cast_fp16")]; + tensor query_43_cast_fp16 = add(x = var_3377_cast_fp16, y = var_3383_cast_fp16)[name = tensor("query_43_cast_fp16")]; + tensor var_3392_pad_type_0 = const()[name = tensor("op_3392_pad_type_0"), val = tensor("valid")]; + tensor var_3392_strides_0 = const()[name = tensor("op_3392_strides_0"), val = tensor([1, 1])]; + tensor var_3392_pad_0 = const()[name = tensor("op_3392_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3392_dilations_0 = const()[name = tensor("op_3392_dilations_0"), val = tensor([1, 1])]; + tensor var_3392_groups_0 = const()[name = tensor("op_3392_groups_0"), val = tensor(1)]; + tensor layers_10_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142842112))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143137088))), name = tensor("layers_10_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_3392_cast_fp16 = conv(dilations = var_3392_dilations_0, groups = var_3392_groups_0, pad = var_3392_pad_0, pad_type = var_3392_pad_type_0, strides = var_3392_strides_0, weight = layers_10_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_3392_cast_fp16")]; + tensor var_3398_pad_type_0 = const()[name = tensor("op_3398_pad_type_0"), val = tensor("valid")]; + tensor var_3398_strides_0 = const()[name = tensor("op_3398_strides_0"), val = tensor([1, 1])]; + tensor var_3398_pad_0 = const()[name = tensor("op_3398_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3398_dilations_0 = const()[name = tensor("op_3398_dilations_0"), val = tensor([1, 1])]; + tensor var_3398_groups_0 = const()[name = tensor("op_3398_groups_0"), val = tensor(1)]; + tensor layers_10_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143144640))), name = tensor("layers_10_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143137216))), shape = tensor([768, 768, 1, 1])]; + tensor var_3398_cast_fp16 = conv(dilations = var_3398_dilations_0, groups = var_3398_groups_0, pad = var_3398_pad_0, pad_type = var_3398_pad_type_0, strides = var_3398_strides_0, weight = layers_10_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_3398_cast_fp16")]; + tensor key_43_cast_fp16 = add(x = var_3392_cast_fp16, y = var_3398_cast_fp16)[name = tensor("key_43_cast_fp16")]; + tensor var_3408_pad_type_0 = const()[name = tensor("op_3408_pad_type_0"), val = tensor("valid")]; + tensor var_3408_strides_0 = const()[name = tensor("op_3408_strides_0"), val = tensor([1, 1])]; + tensor var_3408_pad_0 = const()[name = tensor("op_3408_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3408_dilations_0 = const()[name = tensor("op_3408_dilations_0"), val = tensor([1, 1])]; + tensor var_3408_groups_0 = const()[name = tensor("op_3408_groups_0"), val = tensor(1)]; + tensor layers_10_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143218432))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143513408))), name = tensor("layers_10_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_10_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143513536)))]; + tensor var_3408_cast_fp16 = conv(bias = layers_10_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3408_dilations_0, groups = var_3408_groups_0, pad = var_3408_pad_0, pad_type = var_3408_pad_type_0, strides = var_3408_strides_0, weight = layers_10_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_3408_cast_fp16")]; + tensor var_3414_pad_type_0 = const()[name = tensor("op_3414_pad_type_0"), val = tensor("valid")]; + tensor var_3414_strides_0 = const()[name = tensor("op_3414_strides_0"), val = tensor([1, 1])]; + tensor var_3414_pad_0 = const()[name = tensor("op_3414_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3414_dilations_0 = const()[name = tensor("op_3414_dilations_0"), val = tensor([1, 1])]; + tensor var_3414_groups_0 = const()[name = tensor("op_3414_groups_0"), val = tensor(1)]; + tensor layers_10_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143521088))), name = tensor("layers_10_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143515136))), shape = tensor([768, 768, 1, 1])]; + tensor var_3414_cast_fp16 = conv(dilations = var_3414_dilations_0, groups = var_3414_groups_0, pad = var_3414_pad_0, pad_type = var_3414_pad_type_0, strides = var_3414_strides_0, weight = layers_10_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_3414_cast_fp16")]; + tensor value_43_cast_fp16 = add(x = var_3408_cast_fp16, y = var_3414_cast_fp16)[name = tensor("value_43_cast_fp16")]; + tensor var_3418 = const()[name = tensor("op_3418"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_43_cast_fp16 = reshape(shape = var_3418, x = query_43_cast_fp16)[name = tensor("mh_q_43_cast_fp16")]; + tensor var_3420_to_fp16 = const()[name = tensor("op_3420_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3421_cast_fp16 = mul(x = mh_q_43_cast_fp16, y = var_3420_to_fp16)[name = tensor("op_3421_cast_fp16")]; + tensor var_3424 = const()[name = tensor("op_3424"), val = tensor([1, 12, 64, 1500])]; + tensor var_3425_cast_fp16 = reshape(shape = var_3424, x = key_43_cast_fp16)[name = tensor("op_3425_cast_fp16")]; + tensor mh_w_65_transpose_x_0 = const()[name = tensor("mh_w_65_transpose_x_0"), val = tensor(true)]; + tensor mh_w_65_transpose_y_0 = const()[name = tensor("mh_w_65_transpose_y_0"), val = tensor(false)]; + tensor mh_w_65_cast_fp16 = matmul(transpose_x = mh_w_65_transpose_x_0, transpose_y = mh_w_65_transpose_y_0, x = var_3421_cast_fp16, y = var_3425_cast_fp16)[name = tensor("mh_w_65_cast_fp16")]; + tensor obj_153_cast_fp16 = softmax(axis = var_3204, x = mh_w_65_cast_fp16)[name = tensor("obj_153_cast_fp16")]; + tensor var_3429 = const()[name = tensor("op_3429"), val = tensor([1, 12, 64, 1500])]; + tensor var_3430_cast_fp16 = reshape(shape = var_3429, x = value_43_cast_fp16)[name = tensor("op_3430_cast_fp16")]; + tensor attn_43_transpose_x_0 = const()[name = tensor("attn_43_transpose_x_0"), val = tensor(false)]; + tensor attn_43_transpose_y_0 = const()[name = tensor("attn_43_transpose_y_0"), val = tensor(true)]; + tensor attn_43_cast_fp16 = matmul(transpose_x = attn_43_transpose_x_0, transpose_y = attn_43_transpose_y_0, x = var_3430_cast_fp16, y = obj_153_cast_fp16)[name = tensor("attn_43_cast_fp16")]; + tensor var_3433 = const()[name = tensor("op_3433"), val = tensor([1, 768, 1, 1])]; + tensor input_103_cast_fp16 = reshape(shape = var_3433, x = attn_43_cast_fp16)[name = tensor("input_103_cast_fp16")]; + tensor var_3443_pad_type_0 = const()[name = tensor("op_3443_pad_type_0"), val = tensor("valid")]; + tensor var_3443_strides_0 = const()[name = tensor("op_3443_strides_0"), val = tensor([1, 1])]; + tensor var_3443_pad_0 = const()[name = tensor("op_3443_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3443_dilations_0 = const()[name = tensor("op_3443_dilations_0"), val = tensor([1, 1])]; + tensor var_3443_groups_0 = const()[name = tensor("op_3443_groups_0"), val = tensor(1)]; + tensor layers_10_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143594880))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143889856))), name = tensor("layers_10_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_10_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143889984)))]; + tensor var_3443_cast_fp16 = conv(bias = layers_10_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3443_dilations_0, groups = var_3443_groups_0, pad = var_3443_pad_0, pad_type = var_3443_pad_type_0, strides = var_3443_strides_0, weight = layers_10_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_103_cast_fp16)[name = tensor("op_3443_cast_fp16")]; + tensor var_3449_pad_type_0 = const()[name = tensor("op_3449_pad_type_0"), val = tensor("valid")]; + tensor var_3449_strides_0 = const()[name = tensor("op_3449_strides_0"), val = tensor([1, 1])]; + tensor var_3449_pad_0 = const()[name = tensor("op_3449_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3449_dilations_0 = const()[name = tensor("op_3449_dilations_0"), val = tensor([1, 1])]; + tensor var_3449_groups_0 = const()[name = tensor("op_3449_groups_0"), val = tensor(1)]; + tensor layers_10_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143898240))), name = tensor("layers_10_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143891584))), shape = tensor([768, 768, 1, 1])]; + tensor var_3449_cast_fp16 = conv(dilations = var_3449_dilations_0, groups = var_3449_groups_0, pad = var_3449_pad_0, pad_type = var_3449_pad_type_0, strides = var_3449_strides_0, weight = layers_10_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_103_cast_fp16)[name = tensor("op_3449_cast_fp16")]; + tensor obj_151_cast_fp16 = add(x = var_3443_cast_fp16, y = var_3449_cast_fp16)[name = tensor("obj_151_cast_fp16")]; + tensor inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = obj_151_cast_fp16)[name = tensor("inputs_65_cast_fp16")]; + tensor out_65_axes_0 = const()[name = tensor("out_65_axes_0"), val = tensor([1])]; + tensor var_3463_to_fp16 = const()[name = tensor("op_3463_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_65_cast_fp16 = layer_norm(axes = out_65_axes_0, epsilon = var_3463_to_fp16, x = inputs_65_cast_fp16)[name = tensor("out_65_cast_fp16")]; + tensor input_105_gamma_0_to_fp16 = const()[name = tensor("input_105_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143972032)))]; + tensor input_105_beta_0_to_fp16 = const()[name = tensor("input_105_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143973632)))]; + tensor input_105_epsilon_0_to_fp16 = const()[name = tensor("input_105_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_105_cast_fp16 = batch_norm(beta = input_105_beta_0_to_fp16, epsilon = input_105_epsilon_0_to_fp16, gamma = input_105_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_65_cast_fp16)[name = tensor("input_105_cast_fp16")]; + tensor var_3481_pad_type_0 = const()[name = tensor("op_3481_pad_type_0"), val = tensor("valid")]; + tensor var_3481_strides_0 = const()[name = tensor("op_3481_strides_0"), val = tensor([1, 1])]; + tensor var_3481_pad_0 = const()[name = tensor("op_3481_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3481_dilations_0 = const()[name = tensor("op_3481_dilations_0"), val = tensor([1, 1])]; + tensor var_3481_groups_0 = const()[name = tensor("op_3481_groups_0"), val = tensor(1)]; + tensor layers_10_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143975232))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145154944))), name = tensor("layers_10_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_10_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145155072)))]; + tensor var_3481_cast_fp16 = conv(bias = layers_10_fc1_inlier_module_bias_to_fp16, dilations = var_3481_dilations_0, groups = var_3481_groups_0, pad = var_3481_pad_0, pad_type = var_3481_pad_type_0, strides = var_3481_strides_0, weight = layers_10_fc1_inlier_module_weight_to_fp16_palettized, x = input_105_cast_fp16)[name = tensor("op_3481_cast_fp16")]; + tensor var_3487_pad_type_0 = const()[name = tensor("op_3487_pad_type_0"), val = tensor("valid")]; + tensor var_3487_strides_0 = const()[name = tensor("op_3487_strides_0"), val = tensor([1, 1])]; + tensor var_3487_pad_0 = const()[name = tensor("op_3487_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3487_dilations_0 = const()[name = tensor("op_3487_dilations_0"), val = tensor([1, 1])]; + tensor var_3487_groups_0 = const()[name = tensor("op_3487_groups_0"), val = tensor(1)]; + tensor layers_10_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145182912))), name = tensor("layers_10_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145161280))), shape = tensor([3072, 768, 1, 1])]; + tensor var_3487_cast_fp16 = conv(dilations = var_3487_dilations_0, groups = var_3487_groups_0, pad = var_3487_pad_0, pad_type = var_3487_pad_type_0, strides = var_3487_strides_0, weight = layers_10_fc1_outlier_module_weight_to_fp16_sparsified, x = input_105_cast_fp16)[name = tensor("op_3487_cast_fp16")]; + tensor input_107_cast_fp16 = add(x = var_3481_cast_fp16, y = var_3487_cast_fp16)[name = tensor("input_107_cast_fp16")]; + tensor input_109_mode_0 = const()[name = tensor("input_109_mode_0"), val = tensor("EXACT")]; + tensor input_109_cast_fp16 = gelu(mode = input_109_mode_0, x = input_107_cast_fp16)[name = tensor("input_109_cast_fp16")]; + tensor var_3498_pad_type_0 = const()[name = tensor("op_3498_pad_type_0"), val = tensor("valid")]; + tensor var_3498_strides_0 = const()[name = tensor("op_3498_strides_0"), val = tensor([1, 1])]; + tensor var_3498_pad_0 = const()[name = tensor("op_3498_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3498_dilations_0 = const()[name = tensor("op_3498_dilations_0"), val = tensor([1, 1])]; + tensor var_3498_groups_0 = const()[name = tensor("op_3498_groups_0"), val = tensor(1)]; + tensor layers_10_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145477888))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146657600))), name = tensor("layers_10_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_10_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146657728)))]; + tensor var_3498_cast_fp16 = conv(bias = layers_10_fc2_inlier_module_bias_to_fp16, dilations = var_3498_dilations_0, groups = var_3498_groups_0, pad = var_3498_pad_0, pad_type = var_3498_pad_type_0, strides = var_3498_strides_0, weight = layers_10_fc2_inlier_module_weight_to_fp16_palettized, x = input_109_cast_fp16)[name = tensor("op_3498_cast_fp16")]; + tensor var_3504_pad_type_0 = const()[name = tensor("op_3504_pad_type_0"), val = tensor("valid")]; + tensor var_3504_strides_0 = const()[name = tensor("op_3504_strides_0"), val = tensor([1, 1])]; + tensor var_3504_pad_0 = const()[name = tensor("op_3504_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3504_dilations_0 = const()[name = tensor("op_3504_dilations_0"), val = tensor([1, 1])]; + tensor var_3504_groups_0 = const()[name = tensor("op_3504_groups_0"), val = tensor(1)]; + tensor layers_10_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146684928))), name = tensor("layers_10_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146659328))), shape = tensor([768, 3072, 1, 1])]; + tensor var_3504_cast_fp16 = conv(dilations = var_3504_dilations_0, groups = var_3504_groups_0, pad = var_3504_pad_0, pad_type = var_3504_pad_type_0, strides = var_3504_strides_0, weight = layers_10_fc2_outlier_module_weight_to_fp16_sparsified, x = input_109_cast_fp16)[name = tensor("op_3504_cast_fp16")]; + tensor hidden_states_23_cast_fp16 = add(x = var_3498_cast_fp16, y = var_3504_cast_fp16)[name = tensor("hidden_states_23_cast_fp16")]; + tensor inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = hidden_states_23_cast_fp16)[name = tensor("inputs_67_cast_fp16")]; + tensor var_3517 = const()[name = tensor("op_3517"), val = tensor(3)]; + tensor out_67_axes_0 = const()[name = tensor("out_67_axes_0"), val = tensor([1])]; + tensor var_3542_to_fp16 = const()[name = tensor("op_3542_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_3542_to_fp16, x = inputs_67_cast_fp16)[name = tensor("out_67_cast_fp16")]; + tensor obj_155_gamma_0_to_fp16 = const()[name = tensor("obj_155_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146979904)))]; + tensor obj_155_beta_0_to_fp16 = const()[name = tensor("obj_155_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146981504)))]; + tensor obj_155_epsilon_0_to_fp16 = const()[name = tensor("obj_155_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_155_cast_fp16 = batch_norm(beta = obj_155_beta_0_to_fp16, epsilon = obj_155_epsilon_0_to_fp16, gamma = obj_155_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_67_cast_fp16)[name = tensor("obj_155_cast_fp16")]; + tensor var_3564_pad_type_0 = const()[name = tensor("op_3564_pad_type_0"), val = tensor("valid")]; + tensor var_3564_strides_0 = const()[name = tensor("op_3564_strides_0"), val = tensor([1, 1])]; + tensor var_3564_pad_0 = const()[name = tensor("op_3564_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3564_dilations_0 = const()[name = tensor("op_3564_dilations_0"), val = tensor([1, 1])]; + tensor var_3564_groups_0 = const()[name = tensor("op_3564_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146983104))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147278080))), name = tensor("layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_11_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147278208)))]; + tensor var_3564_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3564_dilations_0, groups = var_3564_groups_0, pad = var_3564_pad_0, pad_type = var_3564_pad_type_0, strides = var_3564_strides_0, weight = layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_155_cast_fp16)[name = tensor("op_3564_cast_fp16")]; + tensor var_3570_pad_type_0 = const()[name = tensor("op_3570_pad_type_0"), val = tensor("valid")]; + tensor var_3570_strides_0 = const()[name = tensor("op_3570_strides_0"), val = tensor([1, 1])]; + tensor var_3570_pad_0 = const()[name = tensor("op_3570_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3570_dilations_0 = const()[name = tensor("op_3570_dilations_0"), val = tensor([1, 1])]; + tensor var_3570_groups_0 = const()[name = tensor("op_3570_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147284992))), name = tensor("layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147279808))), shape = tensor([768, 768, 1, 1])]; + tensor var_3570_cast_fp16 = conv(dilations = var_3570_dilations_0, groups = var_3570_groups_0, pad = var_3570_pad_0, pad_type = var_3570_pad_type_0, strides = var_3570_strides_0, weight = layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_155_cast_fp16)[name = tensor("op_3570_cast_fp16")]; + tensor query_45_cast_fp16 = add(x = var_3564_cast_fp16, y = var_3570_cast_fp16)[name = tensor("query_45_cast_fp16")]; + tensor var_3579_pad_type_0 = const()[name = tensor("op_3579_pad_type_0"), val = tensor("valid")]; + tensor var_3579_strides_0 = const()[name = tensor("op_3579_strides_0"), val = tensor([1, 1])]; + tensor var_3579_pad_0 = const()[name = tensor("op_3579_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3579_dilations_0 = const()[name = tensor("op_3579_dilations_0"), val = tensor([1, 1])]; + tensor var_3579_groups_0 = const()[name = tensor("op_3579_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147358784))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147653760))), name = tensor("layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_3579_cast_fp16 = conv(dilations = var_3579_dilations_0, groups = var_3579_groups_0, pad = var_3579_pad_0, pad_type = var_3579_pad_type_0, strides = var_3579_strides_0, weight = layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_155_cast_fp16)[name = tensor("op_3579_cast_fp16")]; + tensor var_3585_pad_type_0 = const()[name = tensor("op_3585_pad_type_0"), val = tensor("valid")]; + tensor var_3585_strides_0 = const()[name = tensor("op_3585_strides_0"), val = tensor([1, 1])]; + tensor var_3585_pad_0 = const()[name = tensor("op_3585_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3585_dilations_0 = const()[name = tensor("op_3585_dilations_0"), val = tensor([1, 1])]; + tensor var_3585_groups_0 = const()[name = tensor("op_3585_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147658880))), name = tensor("layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147653888))), shape = tensor([768, 768, 1, 1])]; + tensor var_3585_cast_fp16 = conv(dilations = var_3585_dilations_0, groups = var_3585_groups_0, pad = var_3585_pad_0, pad_type = var_3585_pad_type_0, strides = var_3585_strides_0, weight = layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_155_cast_fp16)[name = tensor("op_3585_cast_fp16")]; + tensor current_key_cast_fp16 = add(x = var_3579_cast_fp16, y = var_3585_cast_fp16)[name = tensor("current_key_cast_fp16")]; + tensor var_3595_pad_type_0 = const()[name = tensor("op_3595_pad_type_0"), val = tensor("valid")]; + tensor var_3595_strides_0 = const()[name = tensor("op_3595_strides_0"), val = tensor([1, 1])]; + tensor var_3595_pad_0 = const()[name = tensor("op_3595_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3595_dilations_0 = const()[name = tensor("op_3595_dilations_0"), val = tensor([1, 1])]; + tensor var_3595_groups_0 = const()[name = tensor("op_3595_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147732672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148027648))), name = tensor("layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_11_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148027776)))]; + tensor var_3595_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3595_dilations_0, groups = var_3595_groups_0, pad = var_3595_pad_0, pad_type = var_3595_pad_type_0, strides = var_3595_strides_0, weight = layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_155_cast_fp16)[name = tensor("op_3595_cast_fp16")]; + tensor var_3601_pad_type_0 = const()[name = tensor("op_3601_pad_type_0"), val = tensor("valid")]; + tensor var_3601_strides_0 = const()[name = tensor("op_3601_strides_0"), val = tensor([1, 1])]; + tensor var_3601_pad_0 = const()[name = tensor("op_3601_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3601_dilations_0 = const()[name = tensor("op_3601_dilations_0"), val = tensor([1, 1])]; + tensor var_3601_groups_0 = const()[name = tensor("op_3601_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148035968))), name = tensor("layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148029376))), shape = tensor([768, 768, 1, 1])]; + tensor var_3601_cast_fp16 = conv(dilations = var_3601_dilations_0, groups = var_3601_groups_0, pad = var_3601_pad_0, pad_type = var_3601_pad_type_0, strides = var_3601_strides_0, weight = layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_155_cast_fp16)[name = tensor("op_3601_cast_fp16")]; + tensor current_value_cast_fp16 = add(x = var_3595_cast_fp16, y = var_3601_cast_fp16)[name = tensor("current_value_cast_fp16")]; + tensor var_3608_cast_fp16 = mul(x = var_69_cast_fp16_11, y = var_192_cast_fp16)[name = tensor("op_3608_cast_fp16")]; + tensor var_3609_cast_fp16 = mul(x = current_key_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_3609_cast_fp16")]; + tensor key_45_cast_fp16 = add(x = var_3608_cast_fp16, y = var_3609_cast_fp16)[name = tensor("key_45_cast_fp16")]; + tensor var_3612_cast_fp16 = mul(x = var_84_cast_fp16_11, y = var_192_cast_fp16)[name = tensor("op_3612_cast_fp16")]; + tensor var_3613_cast_fp16 = mul(x = current_value_cast_fp16, y = var_190_cast_fp16)[name = tensor("op_3613_cast_fp16")]; + tensor value_45_cast_fp16 = add(x = var_3612_cast_fp16, y = var_3613_cast_fp16)[name = tensor("value_45_cast_fp16")]; + tensor var_3617 = const()[name = tensor("op_3617"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_45_cast_fp16 = reshape(shape = var_3617, x = query_45_cast_fp16)[name = tensor("mh_q_45_cast_fp16")]; + tensor var_3619_to_fp16 = const()[name = tensor("op_3619_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3620_cast_fp16 = mul(x = mh_q_45_cast_fp16, y = var_3619_to_fp16)[name = tensor("op_3620_cast_fp16")]; + tensor var_3623 = const()[name = tensor("op_3623"), val = tensor([1, 12, 64, 448])]; + tensor var_3624_cast_fp16 = reshape(shape = var_3623, x = key_45_cast_fp16)[name = tensor("op_3624_cast_fp16")]; + tensor mh_w_67_transpose_x_0 = const()[name = tensor("mh_w_67_transpose_x_0"), val = tensor(true)]; + tensor mh_w_67_transpose_y_0 = const()[name = tensor("mh_w_67_transpose_y_0"), val = tensor(false)]; + tensor mh_w_67_cast_fp16 = matmul(transpose_x = mh_w_67_transpose_x_0, transpose_y = mh_w_67_transpose_y_0, x = var_3620_cast_fp16, y = var_3624_cast_fp16)[name = tensor("mh_w_67_cast_fp16")]; + tensor mh_w_69_cast_fp16 = add(x = mh_w_67_cast_fp16, y = var_214_cast_fp16)[name = tensor("mh_w_69_cast_fp16")]; + tensor var_3632_cast_fp16 = softmax(axis = var_3517, x = mh_w_69_cast_fp16)[name = tensor("op_3632_cast_fp16")]; + tensor var_3633 = const()[name = tensor("op_3633"), val = tensor([1, 12, 64, 448])]; + tensor var_3634_cast_fp16 = reshape(shape = var_3633, x = value_45_cast_fp16)[name = tensor("op_3634_cast_fp16")]; + tensor attn_45_transpose_x_0 = const()[name = tensor("attn_45_transpose_x_0"), val = tensor(false)]; + tensor attn_45_transpose_y_0 = const()[name = tensor("attn_45_transpose_y_0"), val = tensor(true)]; + tensor attn_45_cast_fp16 = matmul(transpose_x = attn_45_transpose_x_0, transpose_y = attn_45_transpose_y_0, x = var_3634_cast_fp16, y = var_3632_cast_fp16)[name = tensor("attn_45_cast_fp16")]; + tensor var_3637 = const()[name = tensor("op_3637"), val = tensor([1, 768, 1, 1])]; + tensor input_111_cast_fp16 = reshape(shape = var_3637, x = attn_45_cast_fp16)[name = tensor("input_111_cast_fp16")]; + tensor var_3647_pad_type_0 = const()[name = tensor("op_3647_pad_type_0"), val = tensor("valid")]; + tensor var_3647_strides_0 = const()[name = tensor("op_3647_strides_0"), val = tensor([1, 1])]; + tensor var_3647_pad_0 = const()[name = tensor("op_3647_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3647_dilations_0 = const()[name = tensor("op_3647_dilations_0"), val = tensor([1, 1])]; + tensor var_3647_groups_0 = const()[name = tensor("op_3647_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148109760))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148404736))), name = tensor("layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_11_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148404864)))]; + tensor var_3647_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3647_dilations_0, groups = var_3647_groups_0, pad = var_3647_pad_0, pad_type = var_3647_pad_type_0, strides = var_3647_strides_0, weight = layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_111_cast_fp16)[name = tensor("op_3647_cast_fp16")]; + tensor var_3653_pad_type_0 = const()[name = tensor("op_3653_pad_type_0"), val = tensor("valid")]; + tensor var_3653_strides_0 = const()[name = tensor("op_3653_strides_0"), val = tensor([1, 1])]; + tensor var_3653_pad_0 = const()[name = tensor("op_3653_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3653_dilations_0 = const()[name = tensor("op_3653_dilations_0"), val = tensor([1, 1])]; + tensor var_3653_groups_0 = const()[name = tensor("op_3653_groups_0"), val = tensor(1)]; + tensor layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148415872))), name = tensor("layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148406464))), shape = tensor([768, 768, 1, 1])]; + tensor var_3653_cast_fp16 = conv(dilations = var_3653_dilations_0, groups = var_3653_groups_0, pad = var_3653_pad_0, pad_type = var_3653_pad_type_0, strides = var_3653_strides_0, weight = layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_111_cast_fp16)[name = tensor("op_3653_cast_fp16")]; + tensor obj_161_cast_fp16 = add(x = var_3647_cast_fp16, y = var_3653_cast_fp16)[name = tensor("obj_161_cast_fp16")]; + tensor inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = obj_161_cast_fp16)[name = tensor("inputs_69_cast_fp16")]; + tensor out_69_axes_0 = const()[name = tensor("out_69_axes_0"), val = tensor([1])]; + tensor var_3668_to_fp16 = const()[name = tensor("op_3668_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_69_cast_fp16 = layer_norm(axes = out_69_axes_0, epsilon = var_3668_to_fp16, x = inputs_69_cast_fp16)[name = tensor("out_69_cast_fp16")]; + tensor obj_163_gamma_0_to_fp16 = const()[name = tensor("obj_163_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148489664)))]; + tensor obj_163_beta_0_to_fp16 = const()[name = tensor("obj_163_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148491264)))]; + tensor obj_163_epsilon_0_to_fp16 = const()[name = tensor("obj_163_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_163_cast_fp16 = batch_norm(beta = obj_163_beta_0_to_fp16, epsilon = obj_163_epsilon_0_to_fp16, gamma = obj_163_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_69_cast_fp16)[name = tensor("obj_163_cast_fp16")]; + tensor var_3690_pad_type_0 = const()[name = tensor("op_3690_pad_type_0"), val = tensor("valid")]; + tensor var_3690_strides_0 = const()[name = tensor("op_3690_strides_0"), val = tensor([1, 1])]; + tensor var_3690_pad_0 = const()[name = tensor("op_3690_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3690_dilations_0 = const()[name = tensor("op_3690_dilations_0"), val = tensor([1, 1])]; + tensor var_3690_groups_0 = const()[name = tensor("op_3690_groups_0"), val = tensor(1)]; + tensor layers_11_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148492864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148787840))), name = tensor("layers_11_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_11_encoder_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_encoder_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148787968)))]; + tensor var_3690_cast_fp16 = conv(bias = layers_11_encoder_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3690_dilations_0, groups = var_3690_groups_0, pad = var_3690_pad_0, pad_type = var_3690_pad_type_0, strides = var_3690_strides_0, weight = layers_11_encoder_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_163_cast_fp16)[name = tensor("op_3690_cast_fp16")]; + tensor var_3696_pad_type_0 = const()[name = tensor("op_3696_pad_type_0"), val = tensor("valid")]; + tensor var_3696_strides_0 = const()[name = tensor("op_3696_strides_0"), val = tensor([1, 1])]; + tensor var_3696_pad_0 = const()[name = tensor("op_3696_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3696_dilations_0 = const()[name = tensor("op_3696_dilations_0"), val = tensor([1, 1])]; + tensor var_3696_groups_0 = const()[name = tensor("op_3696_groups_0"), val = tensor(1)]; + tensor layers_11_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148795904))), name = tensor("layers_11_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148789568))), shape = tensor([768, 768, 1, 1])]; + tensor var_3696_cast_fp16 = conv(dilations = var_3696_dilations_0, groups = var_3696_groups_0, pad = var_3696_pad_0, pad_type = var_3696_pad_type_0, strides = var_3696_strides_0, weight = layers_11_encoder_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_163_cast_fp16)[name = tensor("op_3696_cast_fp16")]; + tensor query_cast_fp16 = add(x = var_3690_cast_fp16, y = var_3696_cast_fp16)[name = tensor("query_cast_fp16")]; + tensor var_3705_pad_type_0 = const()[name = tensor("op_3705_pad_type_0"), val = tensor("valid")]; + tensor var_3705_strides_0 = const()[name = tensor("op_3705_strides_0"), val = tensor([1, 1])]; + tensor var_3705_pad_0 = const()[name = tensor("op_3705_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3705_dilations_0 = const()[name = tensor("op_3705_dilations_0"), val = tensor([1, 1])]; + tensor var_3705_groups_0 = const()[name = tensor("op_3705_groups_0"), val = tensor(1)]; + tensor layers_11_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148869696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149164672))), name = tensor("layers_11_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor var_3705_cast_fp16 = conv(dilations = var_3705_dilations_0, groups = var_3705_groups_0, pad = var_3705_pad_0, pad_type = var_3705_pad_type_0, strides = var_3705_strides_0, weight = layers_11_encoder_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_3705_cast_fp16")]; + tensor var_3711_pad_type_0 = const()[name = tensor("op_3711_pad_type_0"), val = tensor("valid")]; + tensor var_3711_strides_0 = const()[name = tensor("op_3711_strides_0"), val = tensor([1, 1])]; + tensor var_3711_pad_0 = const()[name = tensor("op_3711_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3711_dilations_0 = const()[name = tensor("op_3711_dilations_0"), val = tensor([1, 1])]; + tensor var_3711_groups_0 = const()[name = tensor("op_3711_groups_0"), val = tensor(1)]; + tensor layers_11_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149171072))), name = tensor("layers_11_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149164800))), shape = tensor([768, 768, 1, 1])]; + tensor var_3711_cast_fp16 = conv(dilations = var_3711_dilations_0, groups = var_3711_groups_0, pad = var_3711_pad_0, pad_type = var_3711_pad_type_0, strides = var_3711_strides_0, weight = layers_11_encoder_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_3711_cast_fp16")]; + tensor key_cast_fp16 = add(x = var_3705_cast_fp16, y = var_3711_cast_fp16)[name = tensor("key_cast_fp16")]; + tensor var_3721_pad_type_0 = const()[name = tensor("op_3721_pad_type_0"), val = tensor("valid")]; + tensor var_3721_strides_0 = const()[name = tensor("op_3721_strides_0"), val = tensor([1, 1])]; + tensor var_3721_pad_0 = const()[name = tensor("op_3721_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3721_dilations_0 = const()[name = tensor("op_3721_dilations_0"), val = tensor([1, 1])]; + tensor var_3721_groups_0 = const()[name = tensor("op_3721_groups_0"), val = tensor(1)]; + tensor layers_11_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149244864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149539840))), name = tensor("layers_11_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_11_encoder_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_encoder_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149539968)))]; + tensor var_3721_cast_fp16 = conv(bias = layers_11_encoder_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3721_dilations_0, groups = var_3721_groups_0, pad = var_3721_pad_0, pad_type = var_3721_pad_type_0, strides = var_3721_strides_0, weight = layers_11_encoder_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = encoder_output_embeds)[name = tensor("op_3721_cast_fp16")]; + tensor var_3727_pad_type_0 = const()[name = tensor("op_3727_pad_type_0"), val = tensor("valid")]; + tensor var_3727_strides_0 = const()[name = tensor("op_3727_strides_0"), val = tensor([1, 1])]; + tensor var_3727_pad_0 = const()[name = tensor("op_3727_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3727_dilations_0 = const()[name = tensor("op_3727_dilations_0"), val = tensor([1, 1])]; + tensor var_3727_groups_0 = const()[name = tensor("op_3727_groups_0"), val = tensor(1)]; + tensor layers_11_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149547904))), name = tensor("layers_11_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149541568))), shape = tensor([768, 768, 1, 1])]; + tensor var_3727_cast_fp16 = conv(dilations = var_3727_dilations_0, groups = var_3727_groups_0, pad = var_3727_pad_0, pad_type = var_3727_pad_type_0, strides = var_3727_strides_0, weight = layers_11_encoder_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = encoder_output_embeds)[name = tensor("op_3727_cast_fp16")]; + tensor value_cast_fp16 = add(x = var_3721_cast_fp16, y = var_3727_cast_fp16)[name = tensor("value_cast_fp16")]; + tensor var_3731 = const()[name = tensor("op_3731"), val = tensor([1, 12, 64, 1])]; + tensor mh_q_cast_fp16 = reshape(shape = var_3731, x = query_cast_fp16)[name = tensor("mh_q_cast_fp16")]; + tensor var_3733_to_fp16 = const()[name = tensor("op_3733_to_fp16"), val = tensor(0x1p-3)]; + tensor var_3734_cast_fp16 = mul(x = mh_q_cast_fp16, y = var_3733_to_fp16)[name = tensor("op_3734_cast_fp16")]; + tensor var_3737 = const()[name = tensor("op_3737"), val = tensor([1, 12, 64, 1500])]; + tensor var_3738_cast_fp16 = reshape(shape = var_3737, x = key_cast_fp16)[name = tensor("op_3738_cast_fp16")]; + tensor mh_w_transpose_x_0 = const()[name = tensor("mh_w_transpose_x_0"), val = tensor(true)]; + tensor mh_w_transpose_y_0 = const()[name = tensor("mh_w_transpose_y_0"), val = tensor(false)]; + tensor mh_w_cast_fp16 = matmul(transpose_x = mh_w_transpose_x_0, transpose_y = mh_w_transpose_y_0, x = var_3734_cast_fp16, y = var_3738_cast_fp16)[name = tensor("mh_w_cast_fp16")]; + tensor obj_167_cast_fp16 = softmax(axis = var_3517, x = mh_w_cast_fp16)[name = tensor("obj_167_cast_fp16")]; + tensor var_3742 = const()[name = tensor("op_3742"), val = tensor([1, 12, 64, 1500])]; + tensor var_3743_cast_fp16 = reshape(shape = var_3742, x = value_cast_fp16)[name = tensor("op_3743_cast_fp16")]; + tensor attn_transpose_x_0 = const()[name = tensor("attn_transpose_x_0"), val = tensor(false)]; + tensor attn_transpose_y_0 = const()[name = tensor("attn_transpose_y_0"), val = tensor(true)]; + tensor attn_cast_fp16 = matmul(transpose_x = attn_transpose_x_0, transpose_y = attn_transpose_y_0, x = var_3743_cast_fp16, y = obj_167_cast_fp16)[name = tensor("attn_cast_fp16")]; + tensor var_3746 = const()[name = tensor("op_3746"), val = tensor([1, 768, 1, 1])]; + tensor input_113_cast_fp16 = reshape(shape = var_3746, x = attn_cast_fp16)[name = tensor("input_113_cast_fp16")]; + tensor var_3756_pad_type_0 = const()[name = tensor("op_3756_pad_type_0"), val = tensor("valid")]; + tensor var_3756_strides_0 = const()[name = tensor("op_3756_strides_0"), val = tensor([1, 1])]; + tensor var_3756_pad_0 = const()[name = tensor("op_3756_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3756_dilations_0 = const()[name = tensor("op_3756_dilations_0"), val = tensor([1, 1])]; + tensor var_3756_groups_0 = const()[name = tensor("op_3756_groups_0"), val = tensor(1)]; + tensor layers_11_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149621696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149916672))), name = tensor("layers_11_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 768, 1, 1])]; + tensor layers_11_encoder_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_encoder_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149916800)))]; + tensor var_3756_cast_fp16 = conv(bias = layers_11_encoder_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3756_dilations_0, groups = var_3756_groups_0, pad = var_3756_pad_0, pad_type = var_3756_pad_type_0, strides = var_3756_strides_0, weight = layers_11_encoder_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_113_cast_fp16)[name = tensor("op_3756_cast_fp16")]; + tensor var_3762_pad_type_0 = const()[name = tensor("op_3762_pad_type_0"), val = tensor("valid")]; + tensor var_3762_strides_0 = const()[name = tensor("op_3762_strides_0"), val = tensor([1, 1])]; + tensor var_3762_pad_0 = const()[name = tensor("op_3762_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3762_dilations_0 = const()[name = tensor("op_3762_dilations_0"), val = tensor([1, 1])]; + tensor var_3762_groups_0 = const()[name = tensor("op_3762_groups_0"), val = tensor(1)]; + tensor layers_11_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149929472))), name = tensor("layers_11_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149918400))), shape = tensor([768, 768, 1, 1])]; + tensor var_3762_cast_fp16 = conv(dilations = var_3762_dilations_0, groups = var_3762_groups_0, pad = var_3762_pad_0, pad_type = var_3762_pad_type_0, strides = var_3762_strides_0, weight = layers_11_encoder_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_113_cast_fp16)[name = tensor("op_3762_cast_fp16")]; + tensor obj_165_cast_fp16 = add(x = var_3756_cast_fp16, y = var_3762_cast_fp16)[name = tensor("obj_165_cast_fp16")]; + tensor inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = obj_165_cast_fp16)[name = tensor("inputs_71_cast_fp16")]; + tensor out_71_axes_0 = const()[name = tensor("out_71_axes_0"), val = tensor([1])]; + tensor var_3773_to_fp16 = const()[name = tensor("op_3773_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_71_cast_fp16 = layer_norm(axes = out_71_axes_0, epsilon = var_3773_to_fp16, x = inputs_71_cast_fp16)[name = tensor("out_71_cast_fp16")]; + tensor input_115_gamma_0_to_fp16 = const()[name = tensor("input_115_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150003264)))]; + tensor input_115_beta_0_to_fp16 = const()[name = tensor("input_115_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150004864)))]; + tensor input_115_epsilon_0_to_fp16 = const()[name = tensor("input_115_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_115_cast_fp16 = batch_norm(beta = input_115_beta_0_to_fp16, epsilon = input_115_epsilon_0_to_fp16, gamma = input_115_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_71_cast_fp16)[name = tensor("input_115_cast_fp16")]; + tensor var_3791_pad_type_0 = const()[name = tensor("op_3791_pad_type_0"), val = tensor("valid")]; + tensor var_3791_strides_0 = const()[name = tensor("op_3791_strides_0"), val = tensor([1, 1])]; + tensor var_3791_pad_0 = const()[name = tensor("op_3791_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3791_dilations_0 = const()[name = tensor("op_3791_dilations_0"), val = tensor([1, 1])]; + tensor var_3791_groups_0 = const()[name = tensor("op_3791_groups_0"), val = tensor(1)]; + tensor layers_11_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150006464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151186176))), name = tensor("layers_11_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([3072, 768, 1, 1])]; + tensor layers_11_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151186304)))]; + tensor var_3791_cast_fp16 = conv(bias = layers_11_fc1_inlier_module_bias_to_fp16, dilations = var_3791_dilations_0, groups = var_3791_groups_0, pad = var_3791_pad_0, pad_type = var_3791_pad_type_0, strides = var_3791_strides_0, weight = layers_11_fc1_inlier_module_weight_to_fp16_palettized, x = input_115_cast_fp16)[name = tensor("op_3791_cast_fp16")]; + tensor var_3797_pad_type_0 = const()[name = tensor("op_3797_pad_type_0"), val = tensor("valid")]; + tensor var_3797_strides_0 = const()[name = tensor("op_3797_strides_0"), val = tensor([1, 1])]; + tensor var_3797_pad_0 = const()[name = tensor("op_3797_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3797_dilations_0 = const()[name = tensor("op_3797_dilations_0"), val = tensor([1, 1])]; + tensor var_3797_groups_0 = const()[name = tensor("op_3797_groups_0"), val = tensor(1)]; + tensor layers_11_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151221184))), name = tensor("layers_11_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151192512))), shape = tensor([3072, 768, 1, 1])]; + tensor var_3797_cast_fp16 = conv(dilations = var_3797_dilations_0, groups = var_3797_groups_0, pad = var_3797_pad_0, pad_type = var_3797_pad_type_0, strides = var_3797_strides_0, weight = layers_11_fc1_outlier_module_weight_to_fp16_sparsified, x = input_115_cast_fp16)[name = tensor("op_3797_cast_fp16")]; + tensor input_117_cast_fp16 = add(x = var_3791_cast_fp16, y = var_3797_cast_fp16)[name = tensor("input_117_cast_fp16")]; + tensor input_mode_0 = const()[name = tensor("input_mode_0"), val = tensor("EXACT")]; + tensor input_cast_fp16 = gelu(mode = input_mode_0, x = input_117_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_3808_pad_type_0 = const()[name = tensor("op_3808_pad_type_0"), val = tensor("valid")]; + tensor var_3808_strides_0 = const()[name = tensor("op_3808_strides_0"), val = tensor([1, 1])]; + tensor var_3808_pad_0 = const()[name = tensor("op_3808_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3808_dilations_0 = const()[name = tensor("op_3808_dilations_0"), val = tensor([1, 1])]; + tensor var_3808_groups_0 = const()[name = tensor("op_3808_groups_0"), val = tensor(1)]; + tensor layers_11_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151516160))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152695872))), name = tensor("layers_11_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([768, 3072, 1, 1])]; + tensor layers_11_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152696000)))]; + tensor var_3808_cast_fp16 = conv(bias = layers_11_fc2_inlier_module_bias_to_fp16, dilations = var_3808_dilations_0, groups = var_3808_groups_0, pad = var_3808_pad_0, pad_type = var_3808_pad_type_0, strides = var_3808_strides_0, weight = layers_11_fc2_inlier_module_weight_to_fp16_palettized, x = input_cast_fp16)[name = tensor("op_3808_cast_fp16")]; + tensor var_3814_pad_type_0 = const()[name = tensor("op_3814_pad_type_0"), val = tensor("valid")]; + tensor var_3814_strides_0 = const()[name = tensor("op_3814_strides_0"), val = tensor([1, 1])]; + tensor var_3814_pad_0 = const()[name = tensor("op_3814_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3814_dilations_0 = const()[name = tensor("op_3814_dilations_0"), val = tensor([1, 1])]; + tensor var_3814_groups_0 = const()[name = tensor("op_3814_groups_0"), val = tensor(1)]; + tensor layers_11_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152728512))), name = tensor("layers_11_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152697600))), shape = tensor([768, 3072, 1, 1])]; + tensor var_3814_cast_fp16 = conv(dilations = var_3814_dilations_0, groups = var_3814_groups_0, pad = var_3814_pad_0, pad_type = var_3814_pad_type_0, strides = var_3814_strides_0, weight = layers_11_fc2_outlier_module_weight_to_fp16_sparsified, x = input_cast_fp16)[name = tensor("op_3814_cast_fp16")]; + tensor hidden_states_25_cast_fp16 = add(x = var_3808_cast_fp16, y = var_3814_cast_fp16)[name = tensor("hidden_states_25_cast_fp16")]; + tensor inputs_cast_fp16 = add(x = inputs_71_cast_fp16, y = hidden_states_25_cast_fp16)[name = tensor("inputs_cast_fp16")]; + tensor out_axes_0 = const()[name = tensor("out_axes_0"), val = tensor([1])]; + tensor var_3833_to_fp16 = const()[name = tensor("op_3833_to_fp16"), val = tensor(0x1.5p-17)]; + tensor out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_3833_to_fp16, x = inputs_cast_fp16)[name = tensor("out_cast_fp16")]; + tensor hidden_states_gamma_0_to_fp16 = const()[name = tensor("hidden_states_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(153023488)))]; + tensor hidden_states_beta_0_to_fp16 = const()[name = tensor("hidden_states_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(153025088)))]; + tensor hidden_states_epsilon_0_to_fp16 = const()[name = tensor("hidden_states_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor hidden_states_cast_fp16 = batch_norm(beta = hidden_states_beta_0_to_fp16, epsilon = hidden_states_epsilon_0_to_fp16, gamma = hidden_states_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor("hidden_states_cast_fp16")]; + tensor var_3844_axes_0 = const()[name = tensor("op_3844_axes_0"), val = tensor([2])]; + tensor var_3844_cast_fp16 = squeeze(axes = var_3844_axes_0, x = hidden_states_cast_fp16)[name = tensor("op_3844_cast_fp16")]; + tensor var_3847_perm_0 = const()[name = tensor("op_3847_perm_0"), val = tensor([0, 2, 1])]; + tensor linear_0_bias_0_to_fp16 = const()[name = tensor("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(153026688)))]; + tensor var_3847_cast_fp16 = transpose(perm = var_3847_perm_0, x = var_3844_cast_fp16)[name = tensor("transpose_0")]; + tensor logits = linear(bias = linear_0_bias_0_to_fp16, weight = embed_tokens_weight_to_fp16, x = var_3847_cast_fp16)[name = tensor("linear_0_cast_fp16")]; + tensor var_3851 = const()[name = tensor("op_3851"), val = tensor(1)]; + tensor obj_171_interleave_0 = const()[name = tensor("obj_171_interleave_0"), val = tensor(false)]; + tensor key_cache_updates = concat(axis = var_3851, interleave = obj_171_interleave_0, values = (current_key_1_cast_fp16, current_key_3_cast_fp16, current_key_5_cast_fp16, current_key_7_cast_fp16, current_key_9_cast_fp16, current_key_11_cast_fp16, current_key_13_cast_fp16, current_key_15_cast_fp16, current_key_17_cast_fp16, current_key_19_cast_fp16, current_key_21_cast_fp16, current_key_cast_fp16))[name = tensor("obj_171_cast_fp16")]; + tensor var_3854 = const()[name = tensor("op_3854"), val = tensor(1)]; + tensor obj_173_interleave_0 = const()[name = tensor("obj_173_interleave_0"), val = tensor(false)]; + tensor value_cache_updates = concat(axis = var_3854, interleave = obj_173_interleave_0, values = (current_value_1_cast_fp16, current_value_3_cast_fp16, current_value_5_cast_fp16, current_value_7_cast_fp16, current_value_9_cast_fp16, current_value_11_cast_fp16, current_value_13_cast_fp16, current_value_15_cast_fp16, current_value_17_cast_fp16, current_value_19_cast_fp16, current_value_21_cast_fp16, current_value_cast_fp16))[name = tensor("obj_173_cast_fp16")]; + tensor var_3865_begin_0 = const()[name = tensor("op_3865_begin_0"), val = tensor([0, 3, 0, 0])]; + tensor var_3865_end_0 = const()[name = tensor("op_3865_end_0"), val = tensor([1, 4, 1, 1500])]; + tensor var_3865_end_mask_0 = const()[name = tensor("op_3865_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3865_cast_fp16 = slice_by_index(begin = var_3865_begin_0, end = var_3865_end_0, end_mask = var_3865_end_mask_0, x = obj_83_cast_fp16)[name = tensor("op_3865_cast_fp16")]; + tensor var_3868_begin_0 = const()[name = tensor("op_3868_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3868_end_0 = const()[name = tensor("op_3868_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3868_end_mask_0 = const()[name = tensor("op_3868_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3868_squeeze_mask_0 = const()[name = tensor("op_3868_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3868_cast_fp16 = slice_by_index(begin = var_3868_begin_0, end = var_3868_end_0, end_mask = var_3868_end_mask_0, squeeze_mask = var_3868_squeeze_mask_0, x = var_3865_cast_fp16)[name = tensor("op_3868_cast_fp16")]; + tensor var_3883_begin_0 = const()[name = tensor("op_3883_begin_0"), val = tensor([0, 9, 0, 0])]; + tensor var_3883_end_0 = const()[name = tensor("op_3883_end_0"), val = tensor([1, 10, 1, 1500])]; + tensor var_3883_end_mask_0 = const()[name = tensor("op_3883_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3883_cast_fp16 = slice_by_index(begin = var_3883_begin_0, end = var_3883_end_0, end_mask = var_3883_end_mask_0, x = obj_83_cast_fp16)[name = tensor("op_3883_cast_fp16")]; + tensor var_3886_begin_0 = const()[name = tensor("op_3886_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3886_end_0 = const()[name = tensor("op_3886_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3886_end_mask_0 = const()[name = tensor("op_3886_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3886_squeeze_mask_0 = const()[name = tensor("op_3886_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3886_cast_fp16 = slice_by_index(begin = var_3886_begin_0, end = var_3886_end_0, end_mask = var_3886_end_mask_0, squeeze_mask = var_3886_squeeze_mask_0, x = var_3883_cast_fp16)[name = tensor("op_3886_cast_fp16")]; + tensor var_3901_begin_0 = const()[name = tensor("op_3901_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3901_end_0 = const()[name = tensor("op_3901_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3901_end_mask_0 = const()[name = tensor("op_3901_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3901_cast_fp16 = slice_by_index(begin = var_3901_begin_0, end = var_3901_end_0, end_mask = var_3901_end_mask_0, x = obj_125_cast_fp16)[name = tensor("op_3901_cast_fp16")]; + tensor var_3904_begin_0 = const()[name = tensor("op_3904_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3904_end_0 = const()[name = tensor("op_3904_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3904_end_mask_0 = const()[name = tensor("op_3904_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3904_squeeze_mask_0 = const()[name = tensor("op_3904_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3904_cast_fp16 = slice_by_index(begin = var_3904_begin_0, end = var_3904_end_0, end_mask = var_3904_end_mask_0, squeeze_mask = var_3904_squeeze_mask_0, x = var_3901_cast_fp16)[name = tensor("op_3904_cast_fp16")]; + tensor var_3919_begin_0 = const()[name = tensor("op_3919_begin_0"), val = tensor([0, 4, 0, 0])]; + tensor var_3919_end_0 = const()[name = tensor("op_3919_end_0"), val = tensor([1, 5, 1, 1500])]; + tensor var_3919_end_mask_0 = const()[name = tensor("op_3919_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3919_cast_fp16 = slice_by_index(begin = var_3919_begin_0, end = var_3919_end_0, end_mask = var_3919_end_mask_0, x = obj_125_cast_fp16)[name = tensor("op_3919_cast_fp16")]; + tensor var_3922_begin_0 = const()[name = tensor("op_3922_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3922_end_0 = const()[name = tensor("op_3922_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3922_end_mask_0 = const()[name = tensor("op_3922_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3922_squeeze_mask_0 = const()[name = tensor("op_3922_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3922_cast_fp16 = slice_by_index(begin = var_3922_begin_0, end = var_3922_end_0, end_mask = var_3922_end_mask_0, squeeze_mask = var_3922_squeeze_mask_0, x = var_3919_cast_fp16)[name = tensor("op_3922_cast_fp16")]; + tensor var_3937_begin_0 = const()[name = tensor("op_3937_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_3937_end_0 = const()[name = tensor("op_3937_end_0"), val = tensor([1, 8, 1, 1500])]; + tensor var_3937_end_mask_0 = const()[name = tensor("op_3937_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3937_cast_fp16 = slice_by_index(begin = var_3937_begin_0, end = var_3937_end_0, end_mask = var_3937_end_mask_0, x = obj_125_cast_fp16)[name = tensor("op_3937_cast_fp16")]; + tensor var_3940_begin_0 = const()[name = tensor("op_3940_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3940_end_0 = const()[name = tensor("op_3940_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3940_end_mask_0 = const()[name = tensor("op_3940_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3940_squeeze_mask_0 = const()[name = tensor("op_3940_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3940_cast_fp16 = slice_by_index(begin = var_3940_begin_0, end = var_3940_end_0, end_mask = var_3940_end_mask_0, squeeze_mask = var_3940_squeeze_mask_0, x = var_3937_cast_fp16)[name = tensor("op_3940_cast_fp16")]; + tensor var_3955_begin_0 = const()[name = tensor("op_3955_begin_0"), val = tensor([0, 8, 0, 0])]; + tensor var_3955_end_0 = const()[name = tensor("op_3955_end_0"), val = tensor([1, 9, 1, 1500])]; + tensor var_3955_end_mask_0 = const()[name = tensor("op_3955_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3955_cast_fp16 = slice_by_index(begin = var_3955_begin_0, end = var_3955_end_0, end_mask = var_3955_end_mask_0, x = obj_125_cast_fp16)[name = tensor("op_3955_cast_fp16")]; + tensor var_3958_begin_0 = const()[name = tensor("op_3958_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3958_end_0 = const()[name = tensor("op_3958_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3958_end_mask_0 = const()[name = tensor("op_3958_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3958_squeeze_mask_0 = const()[name = tensor("op_3958_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3958_cast_fp16 = slice_by_index(begin = var_3958_begin_0, end = var_3958_end_0, end_mask = var_3958_end_mask_0, squeeze_mask = var_3958_squeeze_mask_0, x = var_3955_cast_fp16)[name = tensor("op_3958_cast_fp16")]; + tensor var_3973_begin_0 = const()[name = tensor("op_3973_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3973_end_0 = const()[name = tensor("op_3973_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3973_end_mask_0 = const()[name = tensor("op_3973_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3973_cast_fp16 = slice_by_index(begin = var_3973_begin_0, end = var_3973_end_0, end_mask = var_3973_end_mask_0, x = obj_139_cast_fp16)[name = tensor("op_3973_cast_fp16")]; + tensor var_3976_begin_0 = const()[name = tensor("op_3976_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3976_end_0 = const()[name = tensor("op_3976_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3976_end_mask_0 = const()[name = tensor("op_3976_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3976_squeeze_mask_0 = const()[name = tensor("op_3976_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3976_cast_fp16 = slice_by_index(begin = var_3976_begin_0, end = var_3976_end_0, end_mask = var_3976_end_mask_0, squeeze_mask = var_3976_squeeze_mask_0, x = var_3973_cast_fp16)[name = tensor("op_3976_cast_fp16")]; + tensor var_3991_begin_0 = const()[name = tensor("op_3991_begin_0"), val = tensor([0, 7, 0, 0])]; + tensor var_3991_end_0 = const()[name = tensor("op_3991_end_0"), val = tensor([1, 8, 1, 1500])]; + tensor var_3991_end_mask_0 = const()[name = tensor("op_3991_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3991_cast_fp16 = slice_by_index(begin = var_3991_begin_0, end = var_3991_end_0, end_mask = var_3991_end_mask_0, x = obj_139_cast_fp16)[name = tensor("op_3991_cast_fp16")]; + tensor var_3994_begin_0 = const()[name = tensor("op_3994_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3994_end_0 = const()[name = tensor("op_3994_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_3994_end_mask_0 = const()[name = tensor("op_3994_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3994_squeeze_mask_0 = const()[name = tensor("op_3994_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3994_cast_fp16 = slice_by_index(begin = var_3994_begin_0, end = var_3994_end_0, end_mask = var_3994_end_mask_0, squeeze_mask = var_3994_squeeze_mask_0, x = var_3991_cast_fp16)[name = tensor("op_3994_cast_fp16")]; + tensor var_4009_begin_0 = const()[name = tensor("op_4009_begin_0"), val = tensor([0, 9, 0, 0])]; + tensor var_4009_end_0 = const()[name = tensor("op_4009_end_0"), val = tensor([1, 10, 1, 1500])]; + tensor var_4009_end_mask_0 = const()[name = tensor("op_4009_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4009_cast_fp16 = slice_by_index(begin = var_4009_begin_0, end = var_4009_end_0, end_mask = var_4009_end_mask_0, x = obj_139_cast_fp16)[name = tensor("op_4009_cast_fp16")]; + tensor var_4012_begin_0 = const()[name = tensor("op_4012_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4012_end_0 = const()[name = tensor("op_4012_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_4012_end_mask_0 = const()[name = tensor("op_4012_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4012_squeeze_mask_0 = const()[name = tensor("op_4012_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_4012_cast_fp16 = slice_by_index(begin = var_4012_begin_0, end = var_4012_end_0, end_mask = var_4012_end_mask_0, squeeze_mask = var_4012_squeeze_mask_0, x = var_4009_cast_fp16)[name = tensor("op_4012_cast_fp16")]; + tensor var_4027_begin_0 = const()[name = tensor("op_4027_begin_0"), val = tensor([0, 5, 0, 0])]; + tensor var_4027_end_0 = const()[name = tensor("op_4027_end_0"), val = tensor([1, 6, 1, 1500])]; + tensor var_4027_end_mask_0 = const()[name = tensor("op_4027_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4027_cast_fp16 = slice_by_index(begin = var_4027_begin_0, end = var_4027_end_0, end_mask = var_4027_end_mask_0, x = obj_153_cast_fp16)[name = tensor("op_4027_cast_fp16")]; + tensor var_4030_begin_0 = const()[name = tensor("op_4030_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4030_end_0 = const()[name = tensor("op_4030_end_0"), val = tensor([1, 1, 1, 1500])]; + tensor var_4030_end_mask_0 = const()[name = tensor("op_4030_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4030_squeeze_mask_0 = const()[name = tensor("op_4030_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_4030_cast_fp16 = slice_by_index(begin = var_4030_begin_0, end = var_4030_end_0, end_mask = var_4030_end_mask_0, squeeze_mask = var_4030_squeeze_mask_0, x = var_4027_cast_fp16)[name = tensor("op_4030_cast_fp16")]; + tensor var_4037 = const()[name = tensor("op_4037"), val = tensor(1)]; + tensor var_4038_interleave_0 = const()[name = tensor("op_4038_interleave_0"), val = tensor(false)]; + tensor var_4038_cast_fp16 = concat(axis = var_4037, interleave = var_4038_interleave_0, values = (var_3868_cast_fp16, var_3886_cast_fp16, var_3904_cast_fp16, var_3922_cast_fp16, var_3940_cast_fp16, var_3958_cast_fp16, var_3976_cast_fp16, var_3994_cast_fp16, var_4012_cast_fp16, var_4030_cast_fp16))[name = tensor("op_4038_cast_fp16")]; + tensor obj_axes_0 = const()[name = tensor("obj_axes_0"), val = tensor([1])]; + tensor obj_keep_dims_0 = const()[name = tensor("obj_keep_dims_0"), val = tensor(false)]; + tensor alignment_heads_weights = reduce_mean(axes = obj_axes_0, keep_dims = obj_keep_dims_0, x = var_4038_cast_fp16)[name = tensor("obj_cast_fp16")]; + } -> (logits, key_cache_updates, value_cache_updates, alignment_heads_weights); +} \ No newline at end of file