program(1.0) [buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}})] { func main(tensor melspectrogram_features) { tensor var_116_pad_type_0 = const()[name = tensor("op_116_pad_type_0"), val = tensor("custom")]; tensor var_116_pad_0 = const()[name = tensor("op_116_pad_0"), val = tensor([0, 0, 1, 1])]; tensor var_116_strides_0 = const()[name = tensor("op_116_strides_0"), val = tensor([1, 1])]; tensor var_116_dilations_0 = const()[name = tensor("op_116_dilations_0"), val = tensor([1, 1])]; tensor var_116_groups_0 = const()[name = tensor("op_116_groups_0"), val = tensor(1)]; tensor var_85_to_fp16 = const()[name = tensor("op_85_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; tensor var_97_to_fp16 = const()[name = tensor("op_97_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(983168)))]; tensor var_116_cast_fp16 = conv(bias = var_97_to_fp16, dilations = var_116_dilations_0, groups = var_116_groups_0, pad = var_116_pad_0, pad_type = var_116_pad_type_0, strides = var_116_strides_0, weight = var_85_to_fp16, x = melspectrogram_features)[name = tensor("op_116_cast_fp16")]; tensor var_154_pad_type_0 = const()[name = tensor("op_154_pad_type_0"), val = tensor("custom")]; tensor var_154_pad_0 = const()[name = tensor("op_154_pad_0"), val = tensor([0, 0, 1, 1])]; tensor var_154_strides_0 = const()[name = tensor("op_154_strides_0"), val = tensor([1, 1])]; tensor var_154_dilations_0 = const()[name = tensor("op_154_dilations_0"), val = tensor([1, 1])]; tensor var_154_groups_0 = const()[name = tensor("op_154_groups_0"), val = tensor(1)]; tensor op_129_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(985792))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1231616))), name = tensor("op_129_to_fp16_palettized"), shape = tensor([1280, 128, 1, 3])]; tensor var_135_to_fp16 = const()[name = tensor("op_135_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1231744)))]; tensor var_154_cast_fp16 = conv(bias = var_135_to_fp16, dilations = var_154_dilations_0, groups = var_154_groups_0, pad = var_154_pad_0, pad_type = var_154_pad_type_0, strides = var_154_strides_0, weight = op_129_to_fp16_palettized, x = melspectrogram_features)[name = tensor("op_154_cast_fp16")]; tensor var_156_cast_fp16 = add(x = var_116_cast_fp16, y = var_154_cast_fp16)[name = tensor("op_156_cast_fp16")]; tensor hidden_states_1_mode_0 = const()[name = tensor("hidden_states_1_mode_0"), val = tensor("EXACT")]; tensor hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_156_cast_fp16)[name = tensor("hidden_states_1_cast_fp16")]; tensor var_202_pad_type_0 = const()[name = tensor("op_202_pad_type_0"), val = tensor("custom")]; tensor var_202_pad_0 = const()[name = tensor("op_202_pad_0"), val = tensor([0, 0, 1, 1])]; tensor var_202_strides_0 = const()[name = tensor("op_202_strides_0"), val = tensor([2, 2])]; tensor var_202_dilations_0 = const()[name = tensor("op_202_dilations_0"), val = tensor([1, 1])]; tensor var_202_groups_0 = const()[name = tensor("op_202_groups_0"), val = tensor(1)]; tensor var_171_to_fp16 = const()[name = tensor("op_171_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1234368)))]; tensor var_202_cast_fp16 = conv(bias = var_97_to_fp16, dilations = var_202_dilations_0, groups = var_202_groups_0, pad = var_202_pad_0, pad_type = var_202_pad_type_0, strides = var_202_strides_0, weight = var_171_to_fp16, x = hidden_states_1_cast_fp16)[name = tensor("op_202_cast_fp16")]; tensor var_240_pad_type_0 = const()[name = tensor("op_240_pad_type_0"), val = tensor("custom")]; tensor var_240_pad_0 = const()[name = tensor("op_240_pad_0"), val = tensor([0, 0, 1, 1])]; tensor var_240_strides_0 = const()[name = tensor("op_240_strides_0"), val = tensor([2, 2])]; tensor var_240_dilations_0 = const()[name = tensor("op_240_dilations_0"), val = tensor([1, 1])]; tensor var_240_groups_0 = const()[name = tensor("op_240_groups_0"), val = tensor(1)]; tensor op_215_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(11064832))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13522496))), name = tensor("op_215_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 3])]; tensor var_221_to_fp16 = const()[name = tensor("op_221_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13522624)))]; tensor var_240_cast_fp16 = conv(bias = var_221_to_fp16, dilations = var_240_dilations_0, groups = var_240_groups_0, pad = var_240_pad_0, pad_type = var_240_pad_type_0, strides = var_240_strides_0, weight = op_215_to_fp16_palettized, x = hidden_states_1_cast_fp16)[name = tensor("op_240_cast_fp16")]; tensor var_242_cast_fp16 = add(x = var_202_cast_fp16, y = var_240_cast_fp16)[name = tensor("op_242_cast_fp16")]; tensor hidden_states_3_mode_0 = const()[name = tensor("hidden_states_3_mode_0"), val = tensor("EXACT")]; tensor hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_242_cast_fp16)[name = tensor("hidden_states_3_cast_fp16")]; tensor var_262_to_fp16 = const()[name = tensor("op_262_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(13525248)))]; tensor inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_262_to_fp16)[name = tensor("inputs_1_cast_fp16")]; tensor var_272 = const()[name = tensor("op_272"), val = tensor(3)]; tensor var_297 = const()[name = tensor("op_297"), val = tensor(1)]; tensor out_1_axes_0 = const()[name = tensor("out_1_axes_0"), val = tensor([1])]; tensor var_314_to_fp16 = const()[name = tensor("op_314_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_1_cast_fp16 = layer_norm(axes = out_1_axes_0, epsilon = var_314_to_fp16, x = inputs_1_cast_fp16)[name = tensor("out_1_cast_fp16")]; tensor obj_1_variance_0_to_fp16 = const()[name = tensor("obj_1_variance_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17365312)))]; tensor obj_1_gamma_0_to_fp16 = const()[name = tensor("obj_1_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17367936)))]; tensor obj_1_beta_0_to_fp16 = const()[name = tensor("obj_1_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17370560)))]; tensor obj_1_epsilon_0_to_fp16 = const()[name = tensor("obj_1_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor("obj_1_cast_fp16")]; tensor var_336_pad_type_0 = const()[name = tensor("op_336_pad_type_0"), val = tensor("valid")]; tensor var_336_strides_0 = const()[name = tensor("op_336_strides_0"), val = tensor([1, 1])]; tensor var_336_pad_0 = const()[name = tensor("op_336_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_336_dilations_0 = const()[name = tensor("op_336_dilations_0"), val = tensor([1, 1])]; tensor var_336_groups_0 = const()[name = tensor("op_336_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17373184))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18192448))), name = tensor("layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_0_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18192576)))]; tensor var_336_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_336_dilations_0, groups = var_336_groups_0, pad = var_336_pad_0, pad_type = var_336_pad_type_0, strides = var_336_strides_0, weight = layers_0_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_336_cast_fp16")]; tensor var_342_pad_type_0 = const()[name = tensor("op_342_pad_type_0"), val = tensor("valid")]; tensor var_342_strides_0 = const()[name = tensor("op_342_strides_0"), val = tensor([1, 1])]; tensor var_342_pad_0 = const()[name = tensor("op_342_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_342_dilations_0 = const()[name = tensor("op_342_dilations_0"), val = tensor([1, 1])]; tensor var_342_groups_0 = const()[name = tensor("op_342_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18275392))), name = tensor("layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18195200))), shape = tensor([1280, 1280, 1, 1])]; tensor var_342_cast_fp16 = conv(dilations = var_342_dilations_0, groups = var_342_groups_0, pad = var_342_pad_0, pad_type = var_342_pad_type_0, strides = var_342_strides_0, weight = layers_0_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_342_cast_fp16")]; tensor query_1_cast_fp16 = add(x = var_336_cast_fp16, y = var_342_cast_fp16)[name = tensor("query_1_cast_fp16")]; tensor var_351_pad_type_0 = const()[name = tensor("op_351_pad_type_0"), val = tensor("valid")]; tensor var_351_strides_0 = const()[name = tensor("op_351_strides_0"), val = tensor([1, 1])]; tensor var_351_pad_0 = const()[name = tensor("op_351_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_351_dilations_0 = const()[name = tensor("op_351_dilations_0"), val = tensor([1, 1])]; tensor var_351_groups_0 = const()[name = tensor("op_351_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18480256))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19299520))), name = tensor("layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_351_cast_fp16 = conv(dilations = var_351_dilations_0, groups = var_351_groups_0, pad = var_351_pad_0, pad_type = var_351_pad_type_0, strides = var_351_strides_0, weight = layers_0_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_351_cast_fp16")]; tensor var_357_pad_type_0 = const()[name = tensor("op_357_pad_type_0"), val = tensor("valid")]; tensor var_357_strides_0 = const()[name = tensor("op_357_strides_0"), val = tensor([1, 1])]; tensor var_357_pad_0 = const()[name = tensor("op_357_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_357_dilations_0 = const()[name = tensor("op_357_dilations_0"), val = tensor([1, 1])]; tensor var_357_groups_0 = const()[name = tensor("op_357_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19363520))), name = tensor("layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19299648))), shape = tensor([1280, 1280, 1, 1])]; tensor var_357_cast_fp16 = conv(dilations = var_357_dilations_0, groups = var_357_groups_0, pad = var_357_pad_0, pad_type = var_357_pad_type_0, strides = var_357_strides_0, weight = layers_0_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_357_cast_fp16")]; tensor key_1_cast_fp16 = add(x = var_351_cast_fp16, y = var_357_cast_fp16)[name = tensor("key_1_cast_fp16")]; tensor var_367_pad_type_0 = const()[name = tensor("op_367_pad_type_0"), val = tensor("valid")]; tensor var_367_strides_0 = const()[name = tensor("op_367_strides_0"), val = tensor([1, 1])]; tensor var_367_pad_0 = const()[name = tensor("op_367_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_367_dilations_0 = const()[name = tensor("op_367_dilations_0"), val = tensor([1, 1])]; tensor var_367_groups_0 = const()[name = tensor("op_367_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(19568384))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20387648))), name = tensor("layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_0_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20387776)))]; tensor var_367_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_367_dilations_0, groups = var_367_groups_0, pad = var_367_pad_0, pad_type = var_367_pad_type_0, strides = var_367_strides_0, weight = layers_0_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_1_cast_fp16)[name = tensor("op_367_cast_fp16")]; tensor var_373_pad_type_0 = const()[name = tensor("op_373_pad_type_0"), val = tensor("valid")]; tensor var_373_strides_0 = const()[name = tensor("op_373_strides_0"), val = tensor([1, 1])]; tensor var_373_pad_0 = const()[name = tensor("op_373_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_373_dilations_0 = const()[name = tensor("op_373_dilations_0"), val = tensor([1, 1])]; tensor var_373_groups_0 = const()[name = tensor("op_373_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20474048))), name = tensor("layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20390400))), shape = tensor([1280, 1280, 1, 1])]; tensor var_373_cast_fp16 = conv(dilations = var_373_dilations_0, groups = var_373_groups_0, pad = var_373_pad_0, pad_type = var_373_pad_type_0, strides = var_373_strides_0, weight = layers_0_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_1_cast_fp16)[name = tensor("op_373_cast_fp16")]; tensor value_1_cast_fp16 = add(x = var_367_cast_fp16, y = var_373_cast_fp16)[name = tensor("value_1_cast_fp16")]; tensor var_379_begin_0 = const()[name = tensor("op_379_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_379_end_0 = const()[name = tensor("op_379_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_379_end_mask_0 = const()[name = tensor("op_379_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_379_cast_fp16 = slice_by_index(begin = var_379_begin_0, end = var_379_end_0, end_mask = var_379_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_379_cast_fp16")]; tensor var_383_begin_0 = const()[name = tensor("op_383_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_383_end_0 = const()[name = tensor("op_383_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_383_end_mask_0 = const()[name = tensor("op_383_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_383_cast_fp16")]; tensor var_387_begin_0 = const()[name = tensor("op_387_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_387_end_0 = const()[name = tensor("op_387_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_387_end_mask_0 = const()[name = tensor("op_387_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_387_cast_fp16 = slice_by_index(begin = var_387_begin_0, end = var_387_end_0, end_mask = var_387_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_387_cast_fp16")]; tensor var_391_begin_0 = const()[name = tensor("op_391_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_391_end_0 = const()[name = tensor("op_391_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_391_end_mask_0 = const()[name = tensor("op_391_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_391_cast_fp16 = slice_by_index(begin = var_391_begin_0, end = var_391_end_0, end_mask = var_391_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_391_cast_fp16")]; tensor var_395_begin_0 = const()[name = tensor("op_395_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_395_end_0 = const()[name = tensor("op_395_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_395_end_mask_0 = const()[name = tensor("op_395_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_395_cast_fp16")]; tensor var_399_begin_0 = const()[name = tensor("op_399_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_399_end_0 = const()[name = tensor("op_399_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_399_end_mask_0 = const()[name = tensor("op_399_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_399_cast_fp16 = slice_by_index(begin = var_399_begin_0, end = var_399_end_0, end_mask = var_399_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_399_cast_fp16")]; tensor var_403_begin_0 = const()[name = tensor("op_403_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_403_end_0 = const()[name = tensor("op_403_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_403_end_mask_0 = const()[name = tensor("op_403_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_403_cast_fp16 = slice_by_index(begin = var_403_begin_0, end = var_403_end_0, end_mask = var_403_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_403_cast_fp16")]; tensor var_407_begin_0 = const()[name = tensor("op_407_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_407_end_0 = const()[name = tensor("op_407_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_407_end_mask_0 = const()[name = tensor("op_407_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_407_cast_fp16 = slice_by_index(begin = var_407_begin_0, end = var_407_end_0, end_mask = var_407_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_407_cast_fp16")]; tensor var_411_begin_0 = const()[name = tensor("op_411_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_411_end_0 = const()[name = tensor("op_411_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_411_end_mask_0 = const()[name = tensor("op_411_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_411_cast_fp16 = slice_by_index(begin = var_411_begin_0, end = var_411_end_0, end_mask = var_411_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_411_cast_fp16")]; tensor var_415_begin_0 = const()[name = tensor("op_415_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_415_end_0 = const()[name = tensor("op_415_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_415_end_mask_0 = const()[name = tensor("op_415_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_415_cast_fp16 = slice_by_index(begin = var_415_begin_0, end = var_415_end_0, end_mask = var_415_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_415_cast_fp16")]; tensor var_419_begin_0 = const()[name = tensor("op_419_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_419_end_0 = const()[name = tensor("op_419_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_419_end_mask_0 = const()[name = tensor("op_419_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_419_cast_fp16 = slice_by_index(begin = var_419_begin_0, end = var_419_end_0, end_mask = var_419_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_419_cast_fp16")]; tensor var_423_begin_0 = const()[name = tensor("op_423_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_423_end_0 = const()[name = tensor("op_423_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_423_end_mask_0 = const()[name = tensor("op_423_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_423_cast_fp16 = slice_by_index(begin = var_423_begin_0, end = var_423_end_0, end_mask = var_423_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_423_cast_fp16")]; tensor var_427_begin_0 = const()[name = tensor("op_427_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_427_end_0 = const()[name = tensor("op_427_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_427_end_mask_0 = const()[name = tensor("op_427_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_427_cast_fp16 = slice_by_index(begin = var_427_begin_0, end = var_427_end_0, end_mask = var_427_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_427_cast_fp16")]; tensor var_431_begin_0 = const()[name = tensor("op_431_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_431_end_0 = const()[name = tensor("op_431_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_431_end_mask_0 = const()[name = tensor("op_431_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_431_cast_fp16 = slice_by_index(begin = var_431_begin_0, end = var_431_end_0, end_mask = var_431_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_431_cast_fp16")]; tensor var_435_begin_0 = const()[name = tensor("op_435_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_435_end_0 = const()[name = tensor("op_435_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_435_end_mask_0 = const()[name = tensor("op_435_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_435_cast_fp16 = slice_by_index(begin = var_435_begin_0, end = var_435_end_0, end_mask = var_435_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_435_cast_fp16")]; tensor var_439_begin_0 = const()[name = tensor("op_439_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_439_end_0 = const()[name = tensor("op_439_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_439_end_mask_0 = const()[name = tensor("op_439_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_439_cast_fp16 = slice_by_index(begin = var_439_begin_0, end = var_439_end_0, end_mask = var_439_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_439_cast_fp16")]; tensor var_443_begin_0 = const()[name = tensor("op_443_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_443_end_0 = const()[name = tensor("op_443_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_443_end_mask_0 = const()[name = tensor("op_443_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_443_cast_fp16 = slice_by_index(begin = var_443_begin_0, end = var_443_end_0, end_mask = var_443_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_443_cast_fp16")]; tensor var_447_begin_0 = const()[name = tensor("op_447_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_447_end_0 = const()[name = tensor("op_447_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_447_end_mask_0 = const()[name = tensor("op_447_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_447_cast_fp16 = slice_by_index(begin = var_447_begin_0, end = var_447_end_0, end_mask = var_447_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_447_cast_fp16")]; tensor var_451_begin_0 = const()[name = tensor("op_451_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_451_end_0 = const()[name = tensor("op_451_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_451_end_mask_0 = const()[name = tensor("op_451_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_451_cast_fp16 = slice_by_index(begin = var_451_begin_0, end = var_451_end_0, end_mask = var_451_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_451_cast_fp16")]; tensor var_455_begin_0 = const()[name = tensor("op_455_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_455_end_0 = const()[name = tensor("op_455_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_455_end_mask_0 = const()[name = tensor("op_455_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_455_cast_fp16")]; tensor var_464_begin_0 = const()[name = tensor("op_464_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_464_end_0 = const()[name = tensor("op_464_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_464_end_mask_0 = const()[name = tensor("op_464_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_464_cast_fp16 = slice_by_index(begin = var_464_begin_0, end = var_464_end_0, end_mask = var_464_end_mask_0, x = var_379_cast_fp16)[name = tensor("op_464_cast_fp16")]; tensor var_471_begin_0 = const()[name = tensor("op_471_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_471_end_0 = const()[name = tensor("op_471_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_471_end_mask_0 = const()[name = tensor("op_471_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_471_cast_fp16 = slice_by_index(begin = var_471_begin_0, end = var_471_end_0, end_mask = var_471_end_mask_0, x = var_379_cast_fp16)[name = tensor("op_471_cast_fp16")]; tensor var_478_begin_0 = const()[name = tensor("op_478_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_478_end_0 = const()[name = tensor("op_478_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_478_end_mask_0 = const()[name = tensor("op_478_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_478_cast_fp16 = slice_by_index(begin = var_478_begin_0, end = var_478_end_0, end_mask = var_478_end_mask_0, x = var_379_cast_fp16)[name = tensor("op_478_cast_fp16")]; tensor var_485_begin_0 = const()[name = tensor("op_485_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_485_end_0 = const()[name = tensor("op_485_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_485_end_mask_0 = const()[name = tensor("op_485_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_485_cast_fp16 = slice_by_index(begin = var_485_begin_0, end = var_485_end_0, end_mask = var_485_end_mask_0, x = var_379_cast_fp16)[name = tensor("op_485_cast_fp16")]; tensor var_492_begin_0 = const()[name = tensor("op_492_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_492_end_0 = const()[name = tensor("op_492_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_492_end_mask_0 = const()[name = tensor("op_492_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_492_cast_fp16 = slice_by_index(begin = var_492_begin_0, end = var_492_end_0, end_mask = var_492_end_mask_0, x = var_383_cast_fp16)[name = tensor("op_492_cast_fp16")]; tensor var_499_begin_0 = const()[name = tensor("op_499_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_499_end_0 = const()[name = tensor("op_499_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_499_end_mask_0 = const()[name = tensor("op_499_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_499_cast_fp16 = slice_by_index(begin = var_499_begin_0, end = var_499_end_0, end_mask = var_499_end_mask_0, x = var_383_cast_fp16)[name = tensor("op_499_cast_fp16")]; tensor var_506_begin_0 = const()[name = tensor("op_506_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_506_end_0 = const()[name = tensor("op_506_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_506_end_mask_0 = const()[name = tensor("op_506_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_506_cast_fp16 = slice_by_index(begin = var_506_begin_0, end = var_506_end_0, end_mask = var_506_end_mask_0, x = var_383_cast_fp16)[name = tensor("op_506_cast_fp16")]; tensor var_513_begin_0 = const()[name = tensor("op_513_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_513_end_0 = const()[name = tensor("op_513_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_513_end_mask_0 = const()[name = tensor("op_513_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_513_cast_fp16 = slice_by_index(begin = var_513_begin_0, end = var_513_end_0, end_mask = var_513_end_mask_0, x = var_383_cast_fp16)[name = tensor("op_513_cast_fp16")]; tensor var_520_begin_0 = const()[name = tensor("op_520_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_520_end_0 = const()[name = tensor("op_520_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_520_end_mask_0 = const()[name = tensor("op_520_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_520_cast_fp16 = slice_by_index(begin = var_520_begin_0, end = var_520_end_0, end_mask = var_520_end_mask_0, x = var_387_cast_fp16)[name = tensor("op_520_cast_fp16")]; tensor var_527_begin_0 = const()[name = tensor("op_527_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_527_end_0 = const()[name = tensor("op_527_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_527_end_mask_0 = const()[name = tensor("op_527_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_527_cast_fp16 = slice_by_index(begin = var_527_begin_0, end = var_527_end_0, end_mask = var_527_end_mask_0, x = var_387_cast_fp16)[name = tensor("op_527_cast_fp16")]; tensor var_534_begin_0 = const()[name = tensor("op_534_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_534_end_0 = const()[name = tensor("op_534_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_534_end_mask_0 = const()[name = tensor("op_534_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_534_cast_fp16 = slice_by_index(begin = var_534_begin_0, end = var_534_end_0, end_mask = var_534_end_mask_0, x = var_387_cast_fp16)[name = tensor("op_534_cast_fp16")]; tensor var_541_begin_0 = const()[name = tensor("op_541_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_541_end_0 = const()[name = tensor("op_541_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_541_end_mask_0 = const()[name = tensor("op_541_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_541_cast_fp16 = slice_by_index(begin = var_541_begin_0, end = var_541_end_0, end_mask = var_541_end_mask_0, x = var_387_cast_fp16)[name = tensor("op_541_cast_fp16")]; tensor var_548_begin_0 = const()[name = tensor("op_548_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_548_end_0 = const()[name = tensor("op_548_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_548_end_mask_0 = const()[name = tensor("op_548_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_548_cast_fp16 = slice_by_index(begin = var_548_begin_0, end = var_548_end_0, end_mask = var_548_end_mask_0, x = var_391_cast_fp16)[name = tensor("op_548_cast_fp16")]; tensor var_555_begin_0 = const()[name = tensor("op_555_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_555_end_0 = const()[name = tensor("op_555_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_555_end_mask_0 = const()[name = tensor("op_555_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_555_cast_fp16 = slice_by_index(begin = var_555_begin_0, end = var_555_end_0, end_mask = var_555_end_mask_0, x = var_391_cast_fp16)[name = tensor("op_555_cast_fp16")]; tensor var_562_begin_0 = const()[name = tensor("op_562_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_562_end_0 = const()[name = tensor("op_562_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_562_end_mask_0 = const()[name = tensor("op_562_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_562_cast_fp16 = slice_by_index(begin = var_562_begin_0, end = var_562_end_0, end_mask = var_562_end_mask_0, x = var_391_cast_fp16)[name = tensor("op_562_cast_fp16")]; tensor var_569_begin_0 = const()[name = tensor("op_569_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_569_end_0 = const()[name = tensor("op_569_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_569_end_mask_0 = const()[name = tensor("op_569_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_569_cast_fp16 = slice_by_index(begin = var_569_begin_0, end = var_569_end_0, end_mask = var_569_end_mask_0, x = var_391_cast_fp16)[name = tensor("op_569_cast_fp16")]; tensor var_576_begin_0 = const()[name = tensor("op_576_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_576_end_0 = const()[name = tensor("op_576_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_576_end_mask_0 = const()[name = tensor("op_576_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_576_cast_fp16 = slice_by_index(begin = var_576_begin_0, end = var_576_end_0, end_mask = var_576_end_mask_0, x = var_395_cast_fp16)[name = tensor("op_576_cast_fp16")]; tensor var_583_begin_0 = const()[name = tensor("op_583_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_583_end_0 = const()[name = tensor("op_583_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_583_end_mask_0 = const()[name = tensor("op_583_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_583_cast_fp16 = slice_by_index(begin = var_583_begin_0, end = var_583_end_0, end_mask = var_583_end_mask_0, x = var_395_cast_fp16)[name = tensor("op_583_cast_fp16")]; tensor var_590_begin_0 = const()[name = tensor("op_590_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_590_end_0 = const()[name = tensor("op_590_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_590_end_mask_0 = const()[name = tensor("op_590_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_590_cast_fp16 = slice_by_index(begin = var_590_begin_0, end = var_590_end_0, end_mask = var_590_end_mask_0, x = var_395_cast_fp16)[name = tensor("op_590_cast_fp16")]; tensor var_597_begin_0 = const()[name = tensor("op_597_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_597_end_0 = const()[name = tensor("op_597_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_597_end_mask_0 = const()[name = tensor("op_597_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_597_cast_fp16 = slice_by_index(begin = var_597_begin_0, end = var_597_end_0, end_mask = var_597_end_mask_0, x = var_395_cast_fp16)[name = tensor("op_597_cast_fp16")]; tensor var_604_begin_0 = const()[name = tensor("op_604_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_604_end_0 = const()[name = tensor("op_604_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_604_end_mask_0 = const()[name = tensor("op_604_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_604_cast_fp16 = slice_by_index(begin = var_604_begin_0, end = var_604_end_0, end_mask = var_604_end_mask_0, x = var_399_cast_fp16)[name = tensor("op_604_cast_fp16")]; tensor var_611_begin_0 = const()[name = tensor("op_611_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_611_end_0 = const()[name = tensor("op_611_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_611_end_mask_0 = const()[name = tensor("op_611_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_611_cast_fp16 = slice_by_index(begin = var_611_begin_0, end = var_611_end_0, end_mask = var_611_end_mask_0, x = var_399_cast_fp16)[name = tensor("op_611_cast_fp16")]; tensor var_618_begin_0 = const()[name = tensor("op_618_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_618_end_0 = const()[name = tensor("op_618_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_618_end_mask_0 = const()[name = tensor("op_618_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_618_cast_fp16 = slice_by_index(begin = var_618_begin_0, end = var_618_end_0, end_mask = var_618_end_mask_0, x = var_399_cast_fp16)[name = tensor("op_618_cast_fp16")]; tensor var_625_begin_0 = const()[name = tensor("op_625_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_625_end_0 = const()[name = tensor("op_625_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_625_end_mask_0 = const()[name = tensor("op_625_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_625_cast_fp16 = slice_by_index(begin = var_625_begin_0, end = var_625_end_0, end_mask = var_625_end_mask_0, x = var_399_cast_fp16)[name = tensor("op_625_cast_fp16")]; tensor var_632_begin_0 = const()[name = tensor("op_632_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_632_end_0 = const()[name = tensor("op_632_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_632_end_mask_0 = const()[name = tensor("op_632_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_632_cast_fp16 = slice_by_index(begin = var_632_begin_0, end = var_632_end_0, end_mask = var_632_end_mask_0, x = var_403_cast_fp16)[name = tensor("op_632_cast_fp16")]; tensor var_639_begin_0 = const()[name = tensor("op_639_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_639_end_0 = const()[name = tensor("op_639_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_639_end_mask_0 = const()[name = tensor("op_639_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_639_cast_fp16 = slice_by_index(begin = var_639_begin_0, end = var_639_end_0, end_mask = var_639_end_mask_0, x = var_403_cast_fp16)[name = tensor("op_639_cast_fp16")]; tensor var_646_begin_0 = const()[name = tensor("op_646_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_646_end_0 = const()[name = tensor("op_646_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_646_end_mask_0 = const()[name = tensor("op_646_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_646_cast_fp16 = slice_by_index(begin = var_646_begin_0, end = var_646_end_0, end_mask = var_646_end_mask_0, x = var_403_cast_fp16)[name = tensor("op_646_cast_fp16")]; tensor var_653_begin_0 = const()[name = tensor("op_653_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_653_end_0 = const()[name = tensor("op_653_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_653_end_mask_0 = const()[name = tensor("op_653_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_653_cast_fp16 = slice_by_index(begin = var_653_begin_0, end = var_653_end_0, end_mask = var_653_end_mask_0, x = var_403_cast_fp16)[name = tensor("op_653_cast_fp16")]; tensor var_660_begin_0 = const()[name = tensor("op_660_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_660_end_0 = const()[name = tensor("op_660_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_660_end_mask_0 = const()[name = tensor("op_660_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_660_cast_fp16 = slice_by_index(begin = var_660_begin_0, end = var_660_end_0, end_mask = var_660_end_mask_0, x = var_407_cast_fp16)[name = tensor("op_660_cast_fp16")]; tensor var_667_begin_0 = const()[name = tensor("op_667_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_667_end_0 = const()[name = tensor("op_667_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_667_end_mask_0 = const()[name = tensor("op_667_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_667_cast_fp16 = slice_by_index(begin = var_667_begin_0, end = var_667_end_0, end_mask = var_667_end_mask_0, x = var_407_cast_fp16)[name = tensor("op_667_cast_fp16")]; tensor var_674_begin_0 = const()[name = tensor("op_674_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_674_end_0 = const()[name = tensor("op_674_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_674_end_mask_0 = const()[name = tensor("op_674_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_674_cast_fp16 = slice_by_index(begin = var_674_begin_0, end = var_674_end_0, end_mask = var_674_end_mask_0, x = var_407_cast_fp16)[name = tensor("op_674_cast_fp16")]; tensor var_681_begin_0 = const()[name = tensor("op_681_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_681_end_0 = const()[name = tensor("op_681_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_681_end_mask_0 = const()[name = tensor("op_681_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_681_cast_fp16 = slice_by_index(begin = var_681_begin_0, end = var_681_end_0, end_mask = var_681_end_mask_0, x = var_407_cast_fp16)[name = tensor("op_681_cast_fp16")]; tensor var_688_begin_0 = const()[name = tensor("op_688_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_688_end_0 = const()[name = tensor("op_688_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_688_end_mask_0 = const()[name = tensor("op_688_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_688_cast_fp16 = slice_by_index(begin = var_688_begin_0, end = var_688_end_0, end_mask = var_688_end_mask_0, x = var_411_cast_fp16)[name = tensor("op_688_cast_fp16")]; tensor var_695_begin_0 = const()[name = tensor("op_695_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_695_end_0 = const()[name = tensor("op_695_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_695_end_mask_0 = const()[name = tensor("op_695_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_695_cast_fp16 = slice_by_index(begin = var_695_begin_0, end = var_695_end_0, end_mask = var_695_end_mask_0, x = var_411_cast_fp16)[name = tensor("op_695_cast_fp16")]; tensor var_702_begin_0 = const()[name = tensor("op_702_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_702_end_0 = const()[name = tensor("op_702_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_702_end_mask_0 = const()[name = tensor("op_702_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_702_cast_fp16 = slice_by_index(begin = var_702_begin_0, end = var_702_end_0, end_mask = var_702_end_mask_0, x = var_411_cast_fp16)[name = tensor("op_702_cast_fp16")]; tensor var_709_begin_0 = const()[name = tensor("op_709_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_709_end_0 = const()[name = tensor("op_709_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_709_end_mask_0 = const()[name = tensor("op_709_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_709_cast_fp16 = slice_by_index(begin = var_709_begin_0, end = var_709_end_0, end_mask = var_709_end_mask_0, x = var_411_cast_fp16)[name = tensor("op_709_cast_fp16")]; tensor var_716_begin_0 = const()[name = tensor("op_716_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_716_end_0 = const()[name = tensor("op_716_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_716_end_mask_0 = const()[name = tensor("op_716_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_716_cast_fp16 = slice_by_index(begin = var_716_begin_0, end = var_716_end_0, end_mask = var_716_end_mask_0, x = var_415_cast_fp16)[name = tensor("op_716_cast_fp16")]; tensor var_723_begin_0 = const()[name = tensor("op_723_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_723_end_0 = const()[name = tensor("op_723_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_723_end_mask_0 = const()[name = tensor("op_723_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_723_cast_fp16 = slice_by_index(begin = var_723_begin_0, end = var_723_end_0, end_mask = var_723_end_mask_0, x = var_415_cast_fp16)[name = tensor("op_723_cast_fp16")]; tensor var_730_begin_0 = const()[name = tensor("op_730_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_730_end_0 = const()[name = tensor("op_730_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_730_end_mask_0 = const()[name = tensor("op_730_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_730_cast_fp16 = slice_by_index(begin = var_730_begin_0, end = var_730_end_0, end_mask = var_730_end_mask_0, x = var_415_cast_fp16)[name = tensor("op_730_cast_fp16")]; tensor var_737_begin_0 = const()[name = tensor("op_737_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_737_end_0 = const()[name = tensor("op_737_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_737_end_mask_0 = const()[name = tensor("op_737_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_737_cast_fp16 = slice_by_index(begin = var_737_begin_0, end = var_737_end_0, end_mask = var_737_end_mask_0, x = var_415_cast_fp16)[name = tensor("op_737_cast_fp16")]; tensor var_744_begin_0 = const()[name = tensor("op_744_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_744_end_0 = const()[name = tensor("op_744_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_744_end_mask_0 = const()[name = tensor("op_744_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_744_cast_fp16 = slice_by_index(begin = var_744_begin_0, end = var_744_end_0, end_mask = var_744_end_mask_0, x = var_419_cast_fp16)[name = tensor("op_744_cast_fp16")]; tensor var_751_begin_0 = const()[name = tensor("op_751_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_751_end_0 = const()[name = tensor("op_751_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_751_end_mask_0 = const()[name = tensor("op_751_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_751_cast_fp16 = slice_by_index(begin = var_751_begin_0, end = var_751_end_0, end_mask = var_751_end_mask_0, x = var_419_cast_fp16)[name = tensor("op_751_cast_fp16")]; tensor var_758_begin_0 = const()[name = tensor("op_758_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_758_end_0 = const()[name = tensor("op_758_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_758_end_mask_0 = const()[name = tensor("op_758_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_758_cast_fp16 = slice_by_index(begin = var_758_begin_0, end = var_758_end_0, end_mask = var_758_end_mask_0, x = var_419_cast_fp16)[name = tensor("op_758_cast_fp16")]; tensor var_765_begin_0 = const()[name = tensor("op_765_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_765_end_0 = const()[name = tensor("op_765_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_765_end_mask_0 = const()[name = tensor("op_765_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_765_cast_fp16 = slice_by_index(begin = var_765_begin_0, end = var_765_end_0, end_mask = var_765_end_mask_0, x = var_419_cast_fp16)[name = tensor("op_765_cast_fp16")]; tensor var_772_begin_0 = const()[name = tensor("op_772_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_772_end_0 = const()[name = tensor("op_772_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_772_end_mask_0 = const()[name = tensor("op_772_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_772_cast_fp16 = slice_by_index(begin = var_772_begin_0, end = var_772_end_0, end_mask = var_772_end_mask_0, x = var_423_cast_fp16)[name = tensor("op_772_cast_fp16")]; tensor var_779_begin_0 = const()[name = tensor("op_779_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_779_end_0 = const()[name = tensor("op_779_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_779_end_mask_0 = const()[name = tensor("op_779_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_779_cast_fp16 = slice_by_index(begin = var_779_begin_0, end = var_779_end_0, end_mask = var_779_end_mask_0, x = var_423_cast_fp16)[name = tensor("op_779_cast_fp16")]; tensor var_786_begin_0 = const()[name = tensor("op_786_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_786_end_0 = const()[name = tensor("op_786_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_786_end_mask_0 = const()[name = tensor("op_786_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_786_cast_fp16 = slice_by_index(begin = var_786_begin_0, end = var_786_end_0, end_mask = var_786_end_mask_0, x = var_423_cast_fp16)[name = tensor("op_786_cast_fp16")]; tensor var_793_begin_0 = const()[name = tensor("op_793_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_793_end_0 = const()[name = tensor("op_793_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_793_end_mask_0 = const()[name = tensor("op_793_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_793_cast_fp16 = slice_by_index(begin = var_793_begin_0, end = var_793_end_0, end_mask = var_793_end_mask_0, x = var_423_cast_fp16)[name = tensor("op_793_cast_fp16")]; tensor var_800_begin_0 = const()[name = tensor("op_800_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_800_end_0 = const()[name = tensor("op_800_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_800_end_mask_0 = const()[name = tensor("op_800_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_800_cast_fp16 = slice_by_index(begin = var_800_begin_0, end = var_800_end_0, end_mask = var_800_end_mask_0, x = var_427_cast_fp16)[name = tensor("op_800_cast_fp16")]; tensor var_807_begin_0 = const()[name = tensor("op_807_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_807_end_0 = const()[name = tensor("op_807_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_807_end_mask_0 = const()[name = tensor("op_807_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_807_cast_fp16 = slice_by_index(begin = var_807_begin_0, end = var_807_end_0, end_mask = var_807_end_mask_0, x = var_427_cast_fp16)[name = tensor("op_807_cast_fp16")]; tensor var_814_begin_0 = const()[name = tensor("op_814_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_814_end_0 = const()[name = tensor("op_814_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_814_end_mask_0 = const()[name = tensor("op_814_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_814_cast_fp16 = slice_by_index(begin = var_814_begin_0, end = var_814_end_0, end_mask = var_814_end_mask_0, x = var_427_cast_fp16)[name = tensor("op_814_cast_fp16")]; tensor var_821_begin_0 = const()[name = tensor("op_821_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_821_end_0 = const()[name = tensor("op_821_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_821_end_mask_0 = const()[name = tensor("op_821_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_821_cast_fp16 = slice_by_index(begin = var_821_begin_0, end = var_821_end_0, end_mask = var_821_end_mask_0, x = var_427_cast_fp16)[name = tensor("op_821_cast_fp16")]; tensor var_828_begin_0 = const()[name = tensor("op_828_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_828_end_0 = const()[name = tensor("op_828_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_828_end_mask_0 = const()[name = tensor("op_828_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_828_cast_fp16 = slice_by_index(begin = var_828_begin_0, end = var_828_end_0, end_mask = var_828_end_mask_0, x = var_431_cast_fp16)[name = tensor("op_828_cast_fp16")]; tensor var_835_begin_0 = const()[name = tensor("op_835_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_835_end_0 = const()[name = tensor("op_835_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_835_end_mask_0 = const()[name = tensor("op_835_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_835_cast_fp16 = slice_by_index(begin = var_835_begin_0, end = var_835_end_0, end_mask = var_835_end_mask_0, x = var_431_cast_fp16)[name = tensor("op_835_cast_fp16")]; tensor var_842_begin_0 = const()[name = tensor("op_842_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_842_end_0 = const()[name = tensor("op_842_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_842_end_mask_0 = const()[name = tensor("op_842_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_842_cast_fp16 = slice_by_index(begin = var_842_begin_0, end = var_842_end_0, end_mask = var_842_end_mask_0, x = var_431_cast_fp16)[name = tensor("op_842_cast_fp16")]; tensor var_849_begin_0 = const()[name = tensor("op_849_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_849_end_0 = const()[name = tensor("op_849_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_849_end_mask_0 = const()[name = tensor("op_849_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_849_cast_fp16 = slice_by_index(begin = var_849_begin_0, end = var_849_end_0, end_mask = var_849_end_mask_0, x = var_431_cast_fp16)[name = tensor("op_849_cast_fp16")]; tensor var_856_begin_0 = const()[name = tensor("op_856_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_856_end_0 = const()[name = tensor("op_856_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_856_end_mask_0 = const()[name = tensor("op_856_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_856_cast_fp16 = slice_by_index(begin = var_856_begin_0, end = var_856_end_0, end_mask = var_856_end_mask_0, x = var_435_cast_fp16)[name = tensor("op_856_cast_fp16")]; tensor var_863_begin_0 = const()[name = tensor("op_863_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_863_end_0 = const()[name = tensor("op_863_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_863_end_mask_0 = const()[name = tensor("op_863_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_863_cast_fp16 = slice_by_index(begin = var_863_begin_0, end = var_863_end_0, end_mask = var_863_end_mask_0, x = var_435_cast_fp16)[name = tensor("op_863_cast_fp16")]; tensor var_870_begin_0 = const()[name = tensor("op_870_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_870_end_0 = const()[name = tensor("op_870_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_870_end_mask_0 = const()[name = tensor("op_870_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_870_cast_fp16 = slice_by_index(begin = var_870_begin_0, end = var_870_end_0, end_mask = var_870_end_mask_0, x = var_435_cast_fp16)[name = tensor("op_870_cast_fp16")]; tensor var_877_begin_0 = const()[name = tensor("op_877_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_877_end_0 = const()[name = tensor("op_877_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_877_end_mask_0 = const()[name = tensor("op_877_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_877_cast_fp16 = slice_by_index(begin = var_877_begin_0, end = var_877_end_0, end_mask = var_877_end_mask_0, x = var_435_cast_fp16)[name = tensor("op_877_cast_fp16")]; tensor var_884_begin_0 = const()[name = tensor("op_884_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_884_end_0 = const()[name = tensor("op_884_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_884_end_mask_0 = const()[name = tensor("op_884_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_884_cast_fp16 = slice_by_index(begin = var_884_begin_0, end = var_884_end_0, end_mask = var_884_end_mask_0, x = var_439_cast_fp16)[name = tensor("op_884_cast_fp16")]; tensor var_891_begin_0 = const()[name = tensor("op_891_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_891_end_0 = const()[name = tensor("op_891_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_891_end_mask_0 = const()[name = tensor("op_891_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_891_cast_fp16 = slice_by_index(begin = var_891_begin_0, end = var_891_end_0, end_mask = var_891_end_mask_0, x = var_439_cast_fp16)[name = tensor("op_891_cast_fp16")]; tensor var_898_begin_0 = const()[name = tensor("op_898_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_898_end_0 = const()[name = tensor("op_898_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_898_end_mask_0 = const()[name = tensor("op_898_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_898_cast_fp16 = slice_by_index(begin = var_898_begin_0, end = var_898_end_0, end_mask = var_898_end_mask_0, x = var_439_cast_fp16)[name = tensor("op_898_cast_fp16")]; tensor var_905_begin_0 = const()[name = tensor("op_905_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_905_end_0 = const()[name = tensor("op_905_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_905_end_mask_0 = const()[name = tensor("op_905_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_905_cast_fp16 = slice_by_index(begin = var_905_begin_0, end = var_905_end_0, end_mask = var_905_end_mask_0, x = var_439_cast_fp16)[name = tensor("op_905_cast_fp16")]; tensor var_912_begin_0 = const()[name = tensor("op_912_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_912_end_0 = const()[name = tensor("op_912_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_912_end_mask_0 = const()[name = tensor("op_912_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_912_cast_fp16 = slice_by_index(begin = var_912_begin_0, end = var_912_end_0, end_mask = var_912_end_mask_0, x = var_443_cast_fp16)[name = tensor("op_912_cast_fp16")]; tensor var_919_begin_0 = const()[name = tensor("op_919_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_919_end_0 = const()[name = tensor("op_919_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_919_end_mask_0 = const()[name = tensor("op_919_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_919_cast_fp16 = slice_by_index(begin = var_919_begin_0, end = var_919_end_0, end_mask = var_919_end_mask_0, x = var_443_cast_fp16)[name = tensor("op_919_cast_fp16")]; tensor var_926_begin_0 = const()[name = tensor("op_926_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_926_end_0 = const()[name = tensor("op_926_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_926_end_mask_0 = const()[name = tensor("op_926_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_926_cast_fp16 = slice_by_index(begin = var_926_begin_0, end = var_926_end_0, end_mask = var_926_end_mask_0, x = var_443_cast_fp16)[name = tensor("op_926_cast_fp16")]; tensor var_933_begin_0 = const()[name = tensor("op_933_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_933_end_0 = const()[name = tensor("op_933_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_933_end_mask_0 = const()[name = tensor("op_933_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_933_cast_fp16 = slice_by_index(begin = var_933_begin_0, end = var_933_end_0, end_mask = var_933_end_mask_0, x = var_443_cast_fp16)[name = tensor("op_933_cast_fp16")]; tensor var_940_begin_0 = const()[name = tensor("op_940_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_940_end_0 = const()[name = tensor("op_940_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_940_end_mask_0 = const()[name = tensor("op_940_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_940_cast_fp16 = slice_by_index(begin = var_940_begin_0, end = var_940_end_0, end_mask = var_940_end_mask_0, x = var_447_cast_fp16)[name = tensor("op_940_cast_fp16")]; tensor var_947_begin_0 = const()[name = tensor("op_947_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_947_end_0 = const()[name = tensor("op_947_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_947_end_mask_0 = const()[name = tensor("op_947_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_947_cast_fp16 = slice_by_index(begin = var_947_begin_0, end = var_947_end_0, end_mask = var_947_end_mask_0, x = var_447_cast_fp16)[name = tensor("op_947_cast_fp16")]; tensor var_954_begin_0 = const()[name = tensor("op_954_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_954_end_0 = const()[name = tensor("op_954_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_954_end_mask_0 = const()[name = tensor("op_954_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_954_cast_fp16 = slice_by_index(begin = var_954_begin_0, end = var_954_end_0, end_mask = var_954_end_mask_0, x = var_447_cast_fp16)[name = tensor("op_954_cast_fp16")]; tensor var_961_begin_0 = const()[name = tensor("op_961_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_961_end_0 = const()[name = tensor("op_961_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_961_end_mask_0 = const()[name = tensor("op_961_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_961_cast_fp16 = slice_by_index(begin = var_961_begin_0, end = var_961_end_0, end_mask = var_961_end_mask_0, x = var_447_cast_fp16)[name = tensor("op_961_cast_fp16")]; tensor var_968_begin_0 = const()[name = tensor("op_968_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_968_end_0 = const()[name = tensor("op_968_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_968_end_mask_0 = const()[name = tensor("op_968_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = var_451_cast_fp16)[name = tensor("op_968_cast_fp16")]; tensor var_975_begin_0 = const()[name = tensor("op_975_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_975_end_0 = const()[name = tensor("op_975_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_975_end_mask_0 = const()[name = tensor("op_975_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_975_cast_fp16 = slice_by_index(begin = var_975_begin_0, end = var_975_end_0, end_mask = var_975_end_mask_0, x = var_451_cast_fp16)[name = tensor("op_975_cast_fp16")]; tensor var_982_begin_0 = const()[name = tensor("op_982_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_982_end_0 = const()[name = tensor("op_982_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_982_end_mask_0 = const()[name = tensor("op_982_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_982_cast_fp16 = slice_by_index(begin = var_982_begin_0, end = var_982_end_0, end_mask = var_982_end_mask_0, x = var_451_cast_fp16)[name = tensor("op_982_cast_fp16")]; tensor var_989_begin_0 = const()[name = tensor("op_989_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_989_end_0 = const()[name = tensor("op_989_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_989_end_mask_0 = const()[name = tensor("op_989_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_989_cast_fp16 = slice_by_index(begin = var_989_begin_0, end = var_989_end_0, end_mask = var_989_end_mask_0, x = var_451_cast_fp16)[name = tensor("op_989_cast_fp16")]; tensor var_996_begin_0 = const()[name = tensor("op_996_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_996_end_0 = const()[name = tensor("op_996_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_996_end_mask_0 = const()[name = tensor("op_996_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_996_cast_fp16 = slice_by_index(begin = var_996_begin_0, end = var_996_end_0, end_mask = var_996_end_mask_0, x = var_455_cast_fp16)[name = tensor("op_996_cast_fp16")]; tensor var_1003_begin_0 = const()[name = tensor("op_1003_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_1003_end_0 = const()[name = tensor("op_1003_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_1003_end_mask_0 = const()[name = tensor("op_1003_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1003_cast_fp16 = slice_by_index(begin = var_1003_begin_0, end = var_1003_end_0, end_mask = var_1003_end_mask_0, x = var_455_cast_fp16)[name = tensor("op_1003_cast_fp16")]; tensor var_1010_begin_0 = const()[name = tensor("op_1010_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_1010_end_0 = const()[name = tensor("op_1010_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_1010_end_mask_0 = const()[name = tensor("op_1010_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1010_cast_fp16 = slice_by_index(begin = var_1010_begin_0, end = var_1010_end_0, end_mask = var_1010_end_mask_0, x = var_455_cast_fp16)[name = tensor("op_1010_cast_fp16")]; tensor var_1017_begin_0 = const()[name = tensor("op_1017_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_1017_end_0 = const()[name = tensor("op_1017_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_1017_end_mask_0 = const()[name = tensor("op_1017_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1017_cast_fp16 = slice_by_index(begin = var_1017_begin_0, end = var_1017_end_0, end_mask = var_1017_end_mask_0, x = var_455_cast_fp16)[name = tensor("op_1017_cast_fp16")]; tensor k_1_perm_0 = const()[name = tensor("k_1_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_1022_begin_0 = const()[name = tensor("op_1022_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1022_end_0 = const()[name = tensor("op_1022_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_1022_end_mask_0 = const()[name = tensor("op_1022_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_1_cast_fp16 = transpose(perm = k_1_perm_0, x = key_1_cast_fp16)[name = tensor("transpose_31")]; tensor var_1022_cast_fp16 = slice_by_index(begin = var_1022_begin_0, end = var_1022_end_0, end_mask = var_1022_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1022_cast_fp16")]; tensor var_1026_begin_0 = const()[name = tensor("op_1026_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_1026_end_0 = const()[name = tensor("op_1026_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_1026_end_mask_0 = const()[name = tensor("op_1026_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1026_cast_fp16 = slice_by_index(begin = var_1026_begin_0, end = var_1026_end_0, end_mask = var_1026_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1026_cast_fp16")]; tensor var_1030_begin_0 = const()[name = tensor("op_1030_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_1030_end_0 = const()[name = tensor("op_1030_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_1030_end_mask_0 = const()[name = tensor("op_1030_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1030_cast_fp16")]; tensor var_1034_begin_0 = const()[name = tensor("op_1034_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_1034_end_0 = const()[name = tensor("op_1034_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_1034_end_mask_0 = const()[name = tensor("op_1034_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1034_cast_fp16 = slice_by_index(begin = var_1034_begin_0, end = var_1034_end_0, end_mask = var_1034_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1034_cast_fp16")]; tensor var_1038_begin_0 = const()[name = tensor("op_1038_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_1038_end_0 = const()[name = tensor("op_1038_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_1038_end_mask_0 = const()[name = tensor("op_1038_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1038_cast_fp16 = slice_by_index(begin = var_1038_begin_0, end = var_1038_end_0, end_mask = var_1038_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1038_cast_fp16")]; tensor var_1042_begin_0 = const()[name = tensor("op_1042_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_1042_end_0 = const()[name = tensor("op_1042_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_1042_end_mask_0 = const()[name = tensor("op_1042_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1042_cast_fp16")]; tensor var_1046_begin_0 = const()[name = tensor("op_1046_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_1046_end_0 = const()[name = tensor("op_1046_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_1046_end_mask_0 = const()[name = tensor("op_1046_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1046_cast_fp16 = slice_by_index(begin = var_1046_begin_0, end = var_1046_end_0, end_mask = var_1046_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1046_cast_fp16")]; tensor var_1050_begin_0 = const()[name = tensor("op_1050_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_1050_end_0 = const()[name = tensor("op_1050_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_1050_end_mask_0 = const()[name = tensor("op_1050_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1050_cast_fp16 = slice_by_index(begin = var_1050_begin_0, end = var_1050_end_0, end_mask = var_1050_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1050_cast_fp16")]; tensor var_1054_begin_0 = const()[name = tensor("op_1054_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_1054_end_0 = const()[name = tensor("op_1054_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_1054_end_mask_0 = const()[name = tensor("op_1054_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1054_cast_fp16 = slice_by_index(begin = var_1054_begin_0, end = var_1054_end_0, end_mask = var_1054_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1054_cast_fp16")]; tensor var_1058_begin_0 = const()[name = tensor("op_1058_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_1058_end_0 = const()[name = tensor("op_1058_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_1058_end_mask_0 = const()[name = tensor("op_1058_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1058_cast_fp16 = slice_by_index(begin = var_1058_begin_0, end = var_1058_end_0, end_mask = var_1058_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1058_cast_fp16")]; tensor var_1062_begin_0 = const()[name = tensor("op_1062_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_1062_end_0 = const()[name = tensor("op_1062_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_1062_end_mask_0 = const()[name = tensor("op_1062_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1062_cast_fp16 = slice_by_index(begin = var_1062_begin_0, end = var_1062_end_0, end_mask = var_1062_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1062_cast_fp16")]; tensor var_1066_begin_0 = const()[name = tensor("op_1066_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_1066_end_0 = const()[name = tensor("op_1066_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_1066_end_mask_0 = const()[name = tensor("op_1066_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1066_cast_fp16 = slice_by_index(begin = var_1066_begin_0, end = var_1066_end_0, end_mask = var_1066_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1066_cast_fp16")]; tensor var_1070_begin_0 = const()[name = tensor("op_1070_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_1070_end_0 = const()[name = tensor("op_1070_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_1070_end_mask_0 = const()[name = tensor("op_1070_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1070_cast_fp16 = slice_by_index(begin = var_1070_begin_0, end = var_1070_end_0, end_mask = var_1070_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1070_cast_fp16")]; tensor var_1074_begin_0 = const()[name = tensor("op_1074_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_1074_end_0 = const()[name = tensor("op_1074_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_1074_end_mask_0 = const()[name = tensor("op_1074_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1074_cast_fp16 = slice_by_index(begin = var_1074_begin_0, end = var_1074_end_0, end_mask = var_1074_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1074_cast_fp16")]; tensor var_1078_begin_0 = const()[name = tensor("op_1078_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_1078_end_0 = const()[name = tensor("op_1078_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_1078_end_mask_0 = const()[name = tensor("op_1078_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1078_cast_fp16 = slice_by_index(begin = var_1078_begin_0, end = var_1078_end_0, end_mask = var_1078_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1078_cast_fp16")]; tensor var_1082_begin_0 = const()[name = tensor("op_1082_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_1082_end_0 = const()[name = tensor("op_1082_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_1082_end_mask_0 = const()[name = tensor("op_1082_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1082_cast_fp16 = slice_by_index(begin = var_1082_begin_0, end = var_1082_end_0, end_mask = var_1082_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1082_cast_fp16")]; tensor var_1086_begin_0 = const()[name = tensor("op_1086_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_1086_end_0 = const()[name = tensor("op_1086_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_1086_end_mask_0 = const()[name = tensor("op_1086_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1086_cast_fp16 = slice_by_index(begin = var_1086_begin_0, end = var_1086_end_0, end_mask = var_1086_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1086_cast_fp16")]; tensor var_1090_begin_0 = const()[name = tensor("op_1090_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_1090_end_0 = const()[name = tensor("op_1090_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_1090_end_mask_0 = const()[name = tensor("op_1090_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1090_cast_fp16")]; tensor var_1094_begin_0 = const()[name = tensor("op_1094_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_1094_end_0 = const()[name = tensor("op_1094_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_1094_end_mask_0 = const()[name = tensor("op_1094_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1094_cast_fp16 = slice_by_index(begin = var_1094_begin_0, end = var_1094_end_0, end_mask = var_1094_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1094_cast_fp16")]; tensor var_1098_begin_0 = const()[name = tensor("op_1098_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_1098_end_0 = const()[name = tensor("op_1098_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_1098_end_mask_0 = const()[name = tensor("op_1098_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_1098_cast_fp16 = slice_by_index(begin = var_1098_begin_0, end = var_1098_end_0, end_mask = var_1098_end_mask_0, x = k_1_cast_fp16)[name = tensor("op_1098_cast_fp16")]; tensor var_1100_begin_0 = const()[name = tensor("op_1100_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1100_end_0 = const()[name = tensor("op_1100_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_1100_end_mask_0 = const()[name = tensor("op_1100_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1100_cast_fp16 = slice_by_index(begin = var_1100_begin_0, end = var_1100_end_0, end_mask = var_1100_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1100_cast_fp16")]; tensor var_1104_begin_0 = const()[name = tensor("op_1104_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_1104_end_0 = const()[name = tensor("op_1104_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_1104_end_mask_0 = const()[name = tensor("op_1104_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1104_cast_fp16 = slice_by_index(begin = var_1104_begin_0, end = var_1104_end_0, end_mask = var_1104_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1104_cast_fp16")]; tensor var_1108_begin_0 = const()[name = tensor("op_1108_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_1108_end_0 = const()[name = tensor("op_1108_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_1108_end_mask_0 = const()[name = tensor("op_1108_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1108_cast_fp16 = slice_by_index(begin = var_1108_begin_0, end = var_1108_end_0, end_mask = var_1108_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1108_cast_fp16")]; tensor var_1112_begin_0 = const()[name = tensor("op_1112_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_1112_end_0 = const()[name = tensor("op_1112_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_1112_end_mask_0 = const()[name = tensor("op_1112_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1112_cast_fp16")]; tensor var_1116_begin_0 = const()[name = tensor("op_1116_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_1116_end_0 = const()[name = tensor("op_1116_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_1116_end_mask_0 = const()[name = tensor("op_1116_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1116_cast_fp16 = slice_by_index(begin = var_1116_begin_0, end = var_1116_end_0, end_mask = var_1116_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1116_cast_fp16")]; tensor var_1120_begin_0 = const()[name = tensor("op_1120_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_1120_end_0 = const()[name = tensor("op_1120_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_1120_end_mask_0 = const()[name = tensor("op_1120_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1120_cast_fp16 = slice_by_index(begin = var_1120_begin_0, end = var_1120_end_0, end_mask = var_1120_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1120_cast_fp16")]; tensor var_1124_begin_0 = const()[name = tensor("op_1124_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_1124_end_0 = const()[name = tensor("op_1124_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_1124_end_mask_0 = const()[name = tensor("op_1124_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1124_cast_fp16")]; tensor var_1128_begin_0 = const()[name = tensor("op_1128_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_1128_end_0 = const()[name = tensor("op_1128_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_1128_end_mask_0 = const()[name = tensor("op_1128_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1128_cast_fp16 = slice_by_index(begin = var_1128_begin_0, end = var_1128_end_0, end_mask = var_1128_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1128_cast_fp16")]; tensor var_1132_begin_0 = const()[name = tensor("op_1132_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_1132_end_0 = const()[name = tensor("op_1132_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_1132_end_mask_0 = const()[name = tensor("op_1132_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1132_cast_fp16 = slice_by_index(begin = var_1132_begin_0, end = var_1132_end_0, end_mask = var_1132_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1132_cast_fp16")]; tensor var_1136_begin_0 = const()[name = tensor("op_1136_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_1136_end_0 = const()[name = tensor("op_1136_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_1136_end_mask_0 = const()[name = tensor("op_1136_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1136_cast_fp16")]; tensor var_1140_begin_0 = const()[name = tensor("op_1140_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_1140_end_0 = const()[name = tensor("op_1140_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_1140_end_mask_0 = const()[name = tensor("op_1140_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1140_cast_fp16 = slice_by_index(begin = var_1140_begin_0, end = var_1140_end_0, end_mask = var_1140_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1140_cast_fp16")]; tensor var_1144_begin_0 = const()[name = tensor("op_1144_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_1144_end_0 = const()[name = tensor("op_1144_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_1144_end_mask_0 = const()[name = tensor("op_1144_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1144_cast_fp16 = slice_by_index(begin = var_1144_begin_0, end = var_1144_end_0, end_mask = var_1144_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1144_cast_fp16")]; tensor var_1148_begin_0 = const()[name = tensor("op_1148_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_1148_end_0 = const()[name = tensor("op_1148_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_1148_end_mask_0 = const()[name = tensor("op_1148_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1148_cast_fp16")]; tensor var_1152_begin_0 = const()[name = tensor("op_1152_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_1152_end_0 = const()[name = tensor("op_1152_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_1152_end_mask_0 = const()[name = tensor("op_1152_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1152_cast_fp16 = slice_by_index(begin = var_1152_begin_0, end = var_1152_end_0, end_mask = var_1152_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1152_cast_fp16")]; tensor var_1156_begin_0 = const()[name = tensor("op_1156_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_1156_end_0 = const()[name = tensor("op_1156_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_1156_end_mask_0 = const()[name = tensor("op_1156_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1156_cast_fp16 = slice_by_index(begin = var_1156_begin_0, end = var_1156_end_0, end_mask = var_1156_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1156_cast_fp16")]; tensor var_1160_begin_0 = const()[name = tensor("op_1160_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_1160_end_0 = const()[name = tensor("op_1160_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_1160_end_mask_0 = const()[name = tensor("op_1160_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1160_cast_fp16")]; tensor var_1164_begin_0 = const()[name = tensor("op_1164_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_1164_end_0 = const()[name = tensor("op_1164_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_1164_end_mask_0 = const()[name = tensor("op_1164_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1164_cast_fp16 = slice_by_index(begin = var_1164_begin_0, end = var_1164_end_0, end_mask = var_1164_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1164_cast_fp16")]; tensor var_1168_begin_0 = const()[name = tensor("op_1168_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_1168_end_0 = const()[name = tensor("op_1168_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_1168_end_mask_0 = const()[name = tensor("op_1168_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1168_cast_fp16 = slice_by_index(begin = var_1168_begin_0, end = var_1168_end_0, end_mask = var_1168_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1168_cast_fp16")]; tensor var_1172_begin_0 = const()[name = tensor("op_1172_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_1172_end_0 = const()[name = tensor("op_1172_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_1172_end_mask_0 = const()[name = tensor("op_1172_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1172_cast_fp16 = slice_by_index(begin = var_1172_begin_0, end = var_1172_end_0, end_mask = var_1172_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1172_cast_fp16")]; tensor var_1176_begin_0 = const()[name = tensor("op_1176_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_1176_end_0 = const()[name = tensor("op_1176_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_1176_end_mask_0 = const()[name = tensor("op_1176_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1176_cast_fp16 = slice_by_index(begin = var_1176_begin_0, end = var_1176_end_0, end_mask = var_1176_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1176_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1_equation_0, values = (var_1022_cast_fp16, var_464_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3_equation_0, values = (var_1022_cast_fp16, var_471_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5_equation_0, values = (var_1022_cast_fp16, var_478_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5_cast_fp16")]; tensor _SplitHeadsQ__mh_w_7_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_7_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_7_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_7_equation_0, values = (var_1022_cast_fp16, var_485_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_7_cast_fp16")]; tensor _SplitHeadsQ__mh_w_9_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_9_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_9_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_9_equation_0, values = (var_1026_cast_fp16, var_492_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_9_cast_fp16")]; tensor _SplitHeadsQ__mh_w_11_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_11_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_11_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_11_equation_0, values = (var_1026_cast_fp16, var_499_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_11_cast_fp16")]; tensor _SplitHeadsQ__mh_w_13_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_13_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_13_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_13_equation_0, values = (var_1026_cast_fp16, var_506_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_13_cast_fp16")]; tensor _SplitHeadsQ__mh_w_15_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_15_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_15_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_15_equation_0, values = (var_1026_cast_fp16, var_513_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_15_cast_fp16")]; tensor _SplitHeadsQ__mh_w_17_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_17_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_17_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_17_equation_0, values = (var_1030_cast_fp16, var_520_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_17_cast_fp16")]; tensor _SplitHeadsQ__mh_w_19_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_19_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_19_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_19_equation_0, values = (var_1030_cast_fp16, var_527_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_19_cast_fp16")]; tensor _SplitHeadsQ__mh_w_21_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_21_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_21_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_21_equation_0, values = (var_1030_cast_fp16, var_534_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_21_cast_fp16")]; tensor _SplitHeadsQ__mh_w_23_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_23_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_23_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_23_equation_0, values = (var_1030_cast_fp16, var_541_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_23_cast_fp16")]; tensor _SplitHeadsQ__mh_w_25_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_25_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_25_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_25_equation_0, values = (var_1034_cast_fp16, var_548_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_25_cast_fp16")]; tensor _SplitHeadsQ__mh_w_27_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_27_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_27_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_27_equation_0, values = (var_1034_cast_fp16, var_555_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_27_cast_fp16")]; tensor _SplitHeadsQ__mh_w_29_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_29_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_29_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_29_equation_0, values = (var_1034_cast_fp16, var_562_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_29_cast_fp16")]; tensor _SplitHeadsQ__mh_w_31_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_31_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_31_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_31_equation_0, values = (var_1034_cast_fp16, var_569_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_31_cast_fp16")]; tensor _SplitHeadsQ__mh_w_33_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_33_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_33_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_33_equation_0, values = (var_1038_cast_fp16, var_576_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_33_cast_fp16")]; tensor _SplitHeadsQ__mh_w_35_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_35_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_35_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_35_equation_0, values = (var_1038_cast_fp16, var_583_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_35_cast_fp16")]; tensor _SplitHeadsQ__mh_w_37_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_37_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_37_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_37_equation_0, values = (var_1038_cast_fp16, var_590_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_37_cast_fp16")]; tensor _SplitHeadsQ__mh_w_39_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_39_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_39_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_39_equation_0, values = (var_1038_cast_fp16, var_597_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_39_cast_fp16")]; tensor _SplitHeadsQ__mh_w_41_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_41_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_41_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_41_equation_0, values = (var_1042_cast_fp16, var_604_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_41_cast_fp16")]; tensor _SplitHeadsQ__mh_w_43_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_43_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_43_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_43_equation_0, values = (var_1042_cast_fp16, var_611_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_43_cast_fp16")]; tensor _SplitHeadsQ__mh_w_45_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_45_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_45_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_45_equation_0, values = (var_1042_cast_fp16, var_618_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_45_cast_fp16")]; tensor _SplitHeadsQ__mh_w_47_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_47_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_47_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_47_equation_0, values = (var_1042_cast_fp16, var_625_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_47_cast_fp16")]; tensor _SplitHeadsQ__mh_w_49_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_49_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_49_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_49_equation_0, values = (var_1046_cast_fp16, var_632_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_49_cast_fp16")]; tensor _SplitHeadsQ__mh_w_51_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_51_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_51_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_51_equation_0, values = (var_1046_cast_fp16, var_639_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_51_cast_fp16")]; tensor _SplitHeadsQ__mh_w_53_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_53_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_53_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_53_equation_0, values = (var_1046_cast_fp16, var_646_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_53_cast_fp16")]; tensor _SplitHeadsQ__mh_w_55_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_55_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_55_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_55_equation_0, values = (var_1046_cast_fp16, var_653_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_55_cast_fp16")]; tensor _SplitHeadsQ__mh_w_57_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_57_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_57_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_57_equation_0, values = (var_1050_cast_fp16, var_660_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_57_cast_fp16")]; tensor _SplitHeadsQ__mh_w_59_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_59_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_59_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_59_equation_0, values = (var_1050_cast_fp16, var_667_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_59_cast_fp16")]; tensor _SplitHeadsQ__mh_w_61_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_61_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_61_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_61_equation_0, values = (var_1050_cast_fp16, var_674_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_61_cast_fp16")]; tensor _SplitHeadsQ__mh_w_63_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_63_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_63_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_63_equation_0, values = (var_1050_cast_fp16, var_681_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_63_cast_fp16")]; tensor _SplitHeadsQ__mh_w_65_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_65_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_65_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_65_equation_0, values = (var_1054_cast_fp16, var_688_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_65_cast_fp16")]; tensor _SplitHeadsQ__mh_w_67_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_67_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_67_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_67_equation_0, values = (var_1054_cast_fp16, var_695_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_67_cast_fp16")]; tensor _SplitHeadsQ__mh_w_69_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_69_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_69_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_69_equation_0, values = (var_1054_cast_fp16, var_702_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_69_cast_fp16")]; tensor _SplitHeadsQ__mh_w_71_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_71_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_71_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_71_equation_0, values = (var_1054_cast_fp16, var_709_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_71_cast_fp16")]; tensor _SplitHeadsQ__mh_w_73_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_73_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_73_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_73_equation_0, values = (var_1058_cast_fp16, var_716_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_73_cast_fp16")]; tensor _SplitHeadsQ__mh_w_75_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_75_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_75_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_75_equation_0, values = (var_1058_cast_fp16, var_723_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_75_cast_fp16")]; tensor _SplitHeadsQ__mh_w_77_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_77_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_77_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_77_equation_0, values = (var_1058_cast_fp16, var_730_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_77_cast_fp16")]; tensor _SplitHeadsQ__mh_w_79_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_79_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_79_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_79_equation_0, values = (var_1058_cast_fp16, var_737_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_79_cast_fp16")]; tensor _SplitHeadsQ__mh_w_81_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_81_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_81_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_81_equation_0, values = (var_1062_cast_fp16, var_744_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_81_cast_fp16")]; tensor _SplitHeadsQ__mh_w_83_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_83_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_83_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_83_equation_0, values = (var_1062_cast_fp16, var_751_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_83_cast_fp16")]; tensor _SplitHeadsQ__mh_w_85_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_85_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_85_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_85_equation_0, values = (var_1062_cast_fp16, var_758_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_85_cast_fp16")]; tensor _SplitHeadsQ__mh_w_87_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_87_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_87_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_87_equation_0, values = (var_1062_cast_fp16, var_765_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_87_cast_fp16")]; tensor _SplitHeadsQ__mh_w_89_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_89_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_89_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_89_equation_0, values = (var_1066_cast_fp16, var_772_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_89_cast_fp16")]; tensor _SplitHeadsQ__mh_w_91_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_91_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_91_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_91_equation_0, values = (var_1066_cast_fp16, var_779_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_91_cast_fp16")]; tensor _SplitHeadsQ__mh_w_93_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_93_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_93_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_93_equation_0, values = (var_1066_cast_fp16, var_786_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_93_cast_fp16")]; tensor _SplitHeadsQ__mh_w_95_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_95_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_95_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_95_equation_0, values = (var_1066_cast_fp16, var_793_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_95_cast_fp16")]; tensor _SplitHeadsQ__mh_w_97_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_97_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_97_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_97_equation_0, values = (var_1070_cast_fp16, var_800_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_97_cast_fp16")]; tensor _SplitHeadsQ__mh_w_99_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_99_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_99_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_99_equation_0, values = (var_1070_cast_fp16, var_807_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_99_cast_fp16")]; tensor _SplitHeadsQ__mh_w_101_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_101_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_101_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_101_equation_0, values = (var_1070_cast_fp16, var_814_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_101_cast_fp16")]; tensor _SplitHeadsQ__mh_w_103_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_103_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_103_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_103_equation_0, values = (var_1070_cast_fp16, var_821_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_103_cast_fp16")]; tensor _SplitHeadsQ__mh_w_105_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_105_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_105_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_105_equation_0, values = (var_1074_cast_fp16, var_828_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_105_cast_fp16")]; tensor _SplitHeadsQ__mh_w_107_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_107_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_107_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_107_equation_0, values = (var_1074_cast_fp16, var_835_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_107_cast_fp16")]; tensor _SplitHeadsQ__mh_w_109_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_109_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_109_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_109_equation_0, values = (var_1074_cast_fp16, var_842_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_109_cast_fp16")]; tensor _SplitHeadsQ__mh_w_111_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_111_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_111_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_111_equation_0, values = (var_1074_cast_fp16, var_849_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_111_cast_fp16")]; tensor _SplitHeadsQ__mh_w_113_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_113_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_113_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_113_equation_0, values = (var_1078_cast_fp16, var_856_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_113_cast_fp16")]; tensor _SplitHeadsQ__mh_w_115_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_115_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_115_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_115_equation_0, values = (var_1078_cast_fp16, var_863_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_115_cast_fp16")]; tensor _SplitHeadsQ__mh_w_117_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_117_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_117_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_117_equation_0, values = (var_1078_cast_fp16, var_870_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_117_cast_fp16")]; tensor _SplitHeadsQ__mh_w_119_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_119_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_119_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_119_equation_0, values = (var_1078_cast_fp16, var_877_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_119_cast_fp16")]; tensor _SplitHeadsQ__mh_w_121_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_121_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_121_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_121_equation_0, values = (var_1082_cast_fp16, var_884_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_121_cast_fp16")]; tensor _SplitHeadsQ__mh_w_123_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_123_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_123_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_123_equation_0, values = (var_1082_cast_fp16, var_891_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_123_cast_fp16")]; tensor _SplitHeadsQ__mh_w_125_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_125_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_125_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_125_equation_0, values = (var_1082_cast_fp16, var_898_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_125_cast_fp16")]; tensor _SplitHeadsQ__mh_w_127_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_127_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_127_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_127_equation_0, values = (var_1082_cast_fp16, var_905_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_127_cast_fp16")]; tensor _SplitHeadsQ__mh_w_129_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_129_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_129_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_129_equation_0, values = (var_1086_cast_fp16, var_912_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_129_cast_fp16")]; tensor _SplitHeadsQ__mh_w_131_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_131_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_131_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_131_equation_0, values = (var_1086_cast_fp16, var_919_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_131_cast_fp16")]; tensor _SplitHeadsQ__mh_w_133_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_133_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_133_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_133_equation_0, values = (var_1086_cast_fp16, var_926_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_133_cast_fp16")]; tensor _SplitHeadsQ__mh_w_135_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_135_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_135_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_135_equation_0, values = (var_1086_cast_fp16, var_933_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_135_cast_fp16")]; tensor _SplitHeadsQ__mh_w_137_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_137_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_137_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_137_equation_0, values = (var_1090_cast_fp16, var_940_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_137_cast_fp16")]; tensor _SplitHeadsQ__mh_w_139_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_139_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_139_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_139_equation_0, values = (var_1090_cast_fp16, var_947_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_139_cast_fp16")]; tensor _SplitHeadsQ__mh_w_141_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_141_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_141_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_141_equation_0, values = (var_1090_cast_fp16, var_954_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_141_cast_fp16")]; tensor _SplitHeadsQ__mh_w_143_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_143_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_143_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_143_equation_0, values = (var_1090_cast_fp16, var_961_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_143_cast_fp16")]; tensor _SplitHeadsQ__mh_w_145_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_145_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_145_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_145_equation_0, values = (var_1094_cast_fp16, var_968_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_145_cast_fp16")]; tensor _SplitHeadsQ__mh_w_147_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_147_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_147_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_147_equation_0, values = (var_1094_cast_fp16, var_975_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_147_cast_fp16")]; tensor _SplitHeadsQ__mh_w_149_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_149_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_149_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_149_equation_0, values = (var_1094_cast_fp16, var_982_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_149_cast_fp16")]; tensor _SplitHeadsQ__mh_w_151_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_151_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_151_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_151_equation_0, values = (var_1094_cast_fp16, var_989_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_151_cast_fp16")]; tensor _SplitHeadsQ__mh_w_153_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_153_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_153_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_153_equation_0, values = (var_1098_cast_fp16, var_996_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_153_cast_fp16")]; tensor _SplitHeadsQ__mh_w_155_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_155_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_155_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_155_equation_0, values = (var_1098_cast_fp16, var_1003_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_155_cast_fp16")]; tensor _SplitHeadsQ__mh_w_157_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_157_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_157_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_157_equation_0, values = (var_1098_cast_fp16, var_1010_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_157_cast_fp16")]; tensor _SplitHeadsQ__mh_w_159_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_159_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_159_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_159_equation_0, values = (var_1098_cast_fp16, var_1017_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_159_cast_fp16")]; tensor var_1339_to_fp16 = const()[name = tensor("op_1339_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1_cast_fp16, y = var_1339_to_fp16)[name = tensor("aw_chunk_1_cast_fp16")]; tensor var_1341_to_fp16 = const()[name = tensor("op_1341_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3_cast_fp16, y = var_1341_to_fp16)[name = tensor("aw_chunk_3_cast_fp16")]; tensor var_1343_to_fp16 = const()[name = tensor("op_1343_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5_cast_fp16, y = var_1343_to_fp16)[name = tensor("aw_chunk_5_cast_fp16")]; tensor var_1345_to_fp16 = const()[name = tensor("op_1345_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_7_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_7_cast_fp16, y = var_1345_to_fp16)[name = tensor("aw_chunk_7_cast_fp16")]; tensor var_1347_to_fp16 = const()[name = tensor("op_1347_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_9_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_9_cast_fp16, y = var_1347_to_fp16)[name = tensor("aw_chunk_9_cast_fp16")]; tensor var_1349_to_fp16 = const()[name = tensor("op_1349_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_11_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_11_cast_fp16, y = var_1349_to_fp16)[name = tensor("aw_chunk_11_cast_fp16")]; tensor var_1351_to_fp16 = const()[name = tensor("op_1351_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_13_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_13_cast_fp16, y = var_1351_to_fp16)[name = tensor("aw_chunk_13_cast_fp16")]; tensor var_1353_to_fp16 = const()[name = tensor("op_1353_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_15_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_15_cast_fp16, y = var_1353_to_fp16)[name = tensor("aw_chunk_15_cast_fp16")]; tensor var_1355_to_fp16 = const()[name = tensor("op_1355_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_17_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_17_cast_fp16, y = var_1355_to_fp16)[name = tensor("aw_chunk_17_cast_fp16")]; tensor var_1357_to_fp16 = const()[name = tensor("op_1357_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_19_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_19_cast_fp16, y = var_1357_to_fp16)[name = tensor("aw_chunk_19_cast_fp16")]; tensor var_1359_to_fp16 = const()[name = tensor("op_1359_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_21_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_21_cast_fp16, y = var_1359_to_fp16)[name = tensor("aw_chunk_21_cast_fp16")]; tensor var_1361_to_fp16 = const()[name = tensor("op_1361_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_23_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_23_cast_fp16, y = var_1361_to_fp16)[name = tensor("aw_chunk_23_cast_fp16")]; tensor var_1363_to_fp16 = const()[name = tensor("op_1363_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_25_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_25_cast_fp16, y = var_1363_to_fp16)[name = tensor("aw_chunk_25_cast_fp16")]; tensor var_1365_to_fp16 = const()[name = tensor("op_1365_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_27_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_27_cast_fp16, y = var_1365_to_fp16)[name = tensor("aw_chunk_27_cast_fp16")]; tensor var_1367_to_fp16 = const()[name = tensor("op_1367_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_29_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_29_cast_fp16, y = var_1367_to_fp16)[name = tensor("aw_chunk_29_cast_fp16")]; tensor var_1369_to_fp16 = const()[name = tensor("op_1369_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_31_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_31_cast_fp16, y = var_1369_to_fp16)[name = tensor("aw_chunk_31_cast_fp16")]; tensor var_1371_to_fp16 = const()[name = tensor("op_1371_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_33_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_33_cast_fp16, y = var_1371_to_fp16)[name = tensor("aw_chunk_33_cast_fp16")]; tensor var_1373_to_fp16 = const()[name = tensor("op_1373_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_35_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_35_cast_fp16, y = var_1373_to_fp16)[name = tensor("aw_chunk_35_cast_fp16")]; tensor var_1375_to_fp16 = const()[name = tensor("op_1375_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_37_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_37_cast_fp16, y = var_1375_to_fp16)[name = tensor("aw_chunk_37_cast_fp16")]; tensor var_1377_to_fp16 = const()[name = tensor("op_1377_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_39_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_39_cast_fp16, y = var_1377_to_fp16)[name = tensor("aw_chunk_39_cast_fp16")]; tensor var_1379_to_fp16 = const()[name = tensor("op_1379_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_41_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_41_cast_fp16, y = var_1379_to_fp16)[name = tensor("aw_chunk_41_cast_fp16")]; tensor var_1381_to_fp16 = const()[name = tensor("op_1381_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_43_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_43_cast_fp16, y = var_1381_to_fp16)[name = tensor("aw_chunk_43_cast_fp16")]; tensor var_1383_to_fp16 = const()[name = tensor("op_1383_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_45_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_45_cast_fp16, y = var_1383_to_fp16)[name = tensor("aw_chunk_45_cast_fp16")]; tensor var_1385_to_fp16 = const()[name = tensor("op_1385_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_47_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_47_cast_fp16, y = var_1385_to_fp16)[name = tensor("aw_chunk_47_cast_fp16")]; tensor var_1387_to_fp16 = const()[name = tensor("op_1387_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_49_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_49_cast_fp16, y = var_1387_to_fp16)[name = tensor("aw_chunk_49_cast_fp16")]; tensor var_1389_to_fp16 = const()[name = tensor("op_1389_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_51_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_51_cast_fp16, y = var_1389_to_fp16)[name = tensor("aw_chunk_51_cast_fp16")]; tensor var_1391_to_fp16 = const()[name = tensor("op_1391_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_53_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_53_cast_fp16, y = var_1391_to_fp16)[name = tensor("aw_chunk_53_cast_fp16")]; tensor var_1393_to_fp16 = const()[name = tensor("op_1393_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_55_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_55_cast_fp16, y = var_1393_to_fp16)[name = tensor("aw_chunk_55_cast_fp16")]; tensor var_1395_to_fp16 = const()[name = tensor("op_1395_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_57_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_57_cast_fp16, y = var_1395_to_fp16)[name = tensor("aw_chunk_57_cast_fp16")]; tensor var_1397_to_fp16 = const()[name = tensor("op_1397_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_59_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_59_cast_fp16, y = var_1397_to_fp16)[name = tensor("aw_chunk_59_cast_fp16")]; tensor var_1399_to_fp16 = const()[name = tensor("op_1399_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_61_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_61_cast_fp16, y = var_1399_to_fp16)[name = tensor("aw_chunk_61_cast_fp16")]; tensor var_1401_to_fp16 = const()[name = tensor("op_1401_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_63_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_63_cast_fp16, y = var_1401_to_fp16)[name = tensor("aw_chunk_63_cast_fp16")]; tensor var_1403_to_fp16 = const()[name = tensor("op_1403_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_65_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_65_cast_fp16, y = var_1403_to_fp16)[name = tensor("aw_chunk_65_cast_fp16")]; tensor var_1405_to_fp16 = const()[name = tensor("op_1405_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_67_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_67_cast_fp16, y = var_1405_to_fp16)[name = tensor("aw_chunk_67_cast_fp16")]; tensor var_1407_to_fp16 = const()[name = tensor("op_1407_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_69_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_69_cast_fp16, y = var_1407_to_fp16)[name = tensor("aw_chunk_69_cast_fp16")]; tensor var_1409_to_fp16 = const()[name = tensor("op_1409_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_71_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_71_cast_fp16, y = var_1409_to_fp16)[name = tensor("aw_chunk_71_cast_fp16")]; tensor var_1411_to_fp16 = const()[name = tensor("op_1411_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_73_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_73_cast_fp16, y = var_1411_to_fp16)[name = tensor("aw_chunk_73_cast_fp16")]; tensor var_1413_to_fp16 = const()[name = tensor("op_1413_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_75_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_75_cast_fp16, y = var_1413_to_fp16)[name = tensor("aw_chunk_75_cast_fp16")]; tensor var_1415_to_fp16 = const()[name = tensor("op_1415_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_77_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_77_cast_fp16, y = var_1415_to_fp16)[name = tensor("aw_chunk_77_cast_fp16")]; tensor var_1417_to_fp16 = const()[name = tensor("op_1417_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_79_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_79_cast_fp16, y = var_1417_to_fp16)[name = tensor("aw_chunk_79_cast_fp16")]; tensor var_1419_to_fp16 = const()[name = tensor("op_1419_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_81_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_81_cast_fp16, y = var_1419_to_fp16)[name = tensor("aw_chunk_81_cast_fp16")]; tensor var_1421_to_fp16 = const()[name = tensor("op_1421_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_83_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_83_cast_fp16, y = var_1421_to_fp16)[name = tensor("aw_chunk_83_cast_fp16")]; tensor var_1423_to_fp16 = const()[name = tensor("op_1423_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_85_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_85_cast_fp16, y = var_1423_to_fp16)[name = tensor("aw_chunk_85_cast_fp16")]; tensor var_1425_to_fp16 = const()[name = tensor("op_1425_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_87_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_87_cast_fp16, y = var_1425_to_fp16)[name = tensor("aw_chunk_87_cast_fp16")]; tensor var_1427_to_fp16 = const()[name = tensor("op_1427_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_89_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_89_cast_fp16, y = var_1427_to_fp16)[name = tensor("aw_chunk_89_cast_fp16")]; tensor var_1429_to_fp16 = const()[name = tensor("op_1429_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_91_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_91_cast_fp16, y = var_1429_to_fp16)[name = tensor("aw_chunk_91_cast_fp16")]; tensor var_1431_to_fp16 = const()[name = tensor("op_1431_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_93_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_93_cast_fp16, y = var_1431_to_fp16)[name = tensor("aw_chunk_93_cast_fp16")]; tensor var_1433_to_fp16 = const()[name = tensor("op_1433_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_95_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_95_cast_fp16, y = var_1433_to_fp16)[name = tensor("aw_chunk_95_cast_fp16")]; tensor var_1435_to_fp16 = const()[name = tensor("op_1435_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_97_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_97_cast_fp16, y = var_1435_to_fp16)[name = tensor("aw_chunk_97_cast_fp16")]; tensor var_1437_to_fp16 = const()[name = tensor("op_1437_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_99_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_99_cast_fp16, y = var_1437_to_fp16)[name = tensor("aw_chunk_99_cast_fp16")]; tensor var_1439_to_fp16 = const()[name = tensor("op_1439_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_101_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_101_cast_fp16, y = var_1439_to_fp16)[name = tensor("aw_chunk_101_cast_fp16")]; tensor var_1441_to_fp16 = const()[name = tensor("op_1441_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_103_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_103_cast_fp16, y = var_1441_to_fp16)[name = tensor("aw_chunk_103_cast_fp16")]; tensor var_1443_to_fp16 = const()[name = tensor("op_1443_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_105_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_105_cast_fp16, y = var_1443_to_fp16)[name = tensor("aw_chunk_105_cast_fp16")]; tensor var_1445_to_fp16 = const()[name = tensor("op_1445_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_107_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_107_cast_fp16, y = var_1445_to_fp16)[name = tensor("aw_chunk_107_cast_fp16")]; tensor var_1447_to_fp16 = const()[name = tensor("op_1447_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_109_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_109_cast_fp16, y = var_1447_to_fp16)[name = tensor("aw_chunk_109_cast_fp16")]; tensor var_1449_to_fp16 = const()[name = tensor("op_1449_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_111_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_111_cast_fp16, y = var_1449_to_fp16)[name = tensor("aw_chunk_111_cast_fp16")]; tensor var_1451_to_fp16 = const()[name = tensor("op_1451_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_113_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_113_cast_fp16, y = var_1451_to_fp16)[name = tensor("aw_chunk_113_cast_fp16")]; tensor var_1453_to_fp16 = const()[name = tensor("op_1453_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_115_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_115_cast_fp16, y = var_1453_to_fp16)[name = tensor("aw_chunk_115_cast_fp16")]; tensor var_1455_to_fp16 = const()[name = tensor("op_1455_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_117_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_117_cast_fp16, y = var_1455_to_fp16)[name = tensor("aw_chunk_117_cast_fp16")]; tensor var_1457_to_fp16 = const()[name = tensor("op_1457_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_119_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_119_cast_fp16, y = var_1457_to_fp16)[name = tensor("aw_chunk_119_cast_fp16")]; tensor var_1459_to_fp16 = const()[name = tensor("op_1459_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_121_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_121_cast_fp16, y = var_1459_to_fp16)[name = tensor("aw_chunk_121_cast_fp16")]; tensor var_1461_to_fp16 = const()[name = tensor("op_1461_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_123_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_123_cast_fp16, y = var_1461_to_fp16)[name = tensor("aw_chunk_123_cast_fp16")]; tensor var_1463_to_fp16 = const()[name = tensor("op_1463_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_125_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_125_cast_fp16, y = var_1463_to_fp16)[name = tensor("aw_chunk_125_cast_fp16")]; tensor var_1465_to_fp16 = const()[name = tensor("op_1465_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_127_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_127_cast_fp16, y = var_1465_to_fp16)[name = tensor("aw_chunk_127_cast_fp16")]; tensor var_1467_to_fp16 = const()[name = tensor("op_1467_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_129_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_129_cast_fp16, y = var_1467_to_fp16)[name = tensor("aw_chunk_129_cast_fp16")]; tensor var_1469_to_fp16 = const()[name = tensor("op_1469_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_131_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_131_cast_fp16, y = var_1469_to_fp16)[name = tensor("aw_chunk_131_cast_fp16")]; tensor var_1471_to_fp16 = const()[name = tensor("op_1471_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_133_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_133_cast_fp16, y = var_1471_to_fp16)[name = tensor("aw_chunk_133_cast_fp16")]; tensor var_1473_to_fp16 = const()[name = tensor("op_1473_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_135_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_135_cast_fp16, y = var_1473_to_fp16)[name = tensor("aw_chunk_135_cast_fp16")]; tensor var_1475_to_fp16 = const()[name = tensor("op_1475_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_137_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_137_cast_fp16, y = var_1475_to_fp16)[name = tensor("aw_chunk_137_cast_fp16")]; tensor var_1477_to_fp16 = const()[name = tensor("op_1477_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_139_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_139_cast_fp16, y = var_1477_to_fp16)[name = tensor("aw_chunk_139_cast_fp16")]; tensor var_1479_to_fp16 = const()[name = tensor("op_1479_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_141_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_141_cast_fp16, y = var_1479_to_fp16)[name = tensor("aw_chunk_141_cast_fp16")]; tensor var_1481_to_fp16 = const()[name = tensor("op_1481_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_143_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_143_cast_fp16, y = var_1481_to_fp16)[name = tensor("aw_chunk_143_cast_fp16")]; tensor var_1483_to_fp16 = const()[name = tensor("op_1483_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_145_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_145_cast_fp16, y = var_1483_to_fp16)[name = tensor("aw_chunk_145_cast_fp16")]; tensor var_1485_to_fp16 = const()[name = tensor("op_1485_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_147_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_147_cast_fp16, y = var_1485_to_fp16)[name = tensor("aw_chunk_147_cast_fp16")]; tensor var_1487_to_fp16 = const()[name = tensor("op_1487_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_149_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_149_cast_fp16, y = var_1487_to_fp16)[name = tensor("aw_chunk_149_cast_fp16")]; tensor var_1489_to_fp16 = const()[name = tensor("op_1489_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_151_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_151_cast_fp16, y = var_1489_to_fp16)[name = tensor("aw_chunk_151_cast_fp16")]; tensor var_1491_to_fp16 = const()[name = tensor("op_1491_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_153_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_153_cast_fp16, y = var_1491_to_fp16)[name = tensor("aw_chunk_153_cast_fp16")]; tensor var_1493_to_fp16 = const()[name = tensor("op_1493_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_155_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_155_cast_fp16, y = var_1493_to_fp16)[name = tensor("aw_chunk_155_cast_fp16")]; tensor var_1495_to_fp16 = const()[name = tensor("op_1495_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_157_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_157_cast_fp16, y = var_1495_to_fp16)[name = tensor("aw_chunk_157_cast_fp16")]; tensor var_1497_to_fp16 = const()[name = tensor("op_1497_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_159_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_159_cast_fp16, y = var_1497_to_fp16)[name = tensor("aw_chunk_159_cast_fp16")]; tensor var_1499_cast_fp16 = softmax(axis = var_297, x = aw_chunk_1_cast_fp16)[name = tensor("op_1499_cast_fp16")]; tensor var_1500_cast_fp16 = softmax(axis = var_297, x = aw_chunk_3_cast_fp16)[name = tensor("op_1500_cast_fp16")]; tensor var_1501_cast_fp16 = softmax(axis = var_297, x = aw_chunk_5_cast_fp16)[name = tensor("op_1501_cast_fp16")]; tensor var_1502_cast_fp16 = softmax(axis = var_297, x = aw_chunk_7_cast_fp16)[name = tensor("op_1502_cast_fp16")]; tensor var_1503_cast_fp16 = softmax(axis = var_297, x = aw_chunk_9_cast_fp16)[name = tensor("op_1503_cast_fp16")]; tensor var_1504_cast_fp16 = softmax(axis = var_297, x = aw_chunk_11_cast_fp16)[name = tensor("op_1504_cast_fp16")]; tensor var_1505_cast_fp16 = softmax(axis = var_297, x = aw_chunk_13_cast_fp16)[name = tensor("op_1505_cast_fp16")]; tensor var_1506_cast_fp16 = softmax(axis = var_297, x = aw_chunk_15_cast_fp16)[name = tensor("op_1506_cast_fp16")]; tensor var_1507_cast_fp16 = softmax(axis = var_297, x = aw_chunk_17_cast_fp16)[name = tensor("op_1507_cast_fp16")]; tensor var_1508_cast_fp16 = softmax(axis = var_297, x = aw_chunk_19_cast_fp16)[name = tensor("op_1508_cast_fp16")]; tensor var_1509_cast_fp16 = softmax(axis = var_297, x = aw_chunk_21_cast_fp16)[name = tensor("op_1509_cast_fp16")]; tensor var_1510_cast_fp16 = softmax(axis = var_297, x = aw_chunk_23_cast_fp16)[name = tensor("op_1510_cast_fp16")]; tensor var_1511_cast_fp16 = softmax(axis = var_297, x = aw_chunk_25_cast_fp16)[name = tensor("op_1511_cast_fp16")]; tensor var_1512_cast_fp16 = softmax(axis = var_297, x = aw_chunk_27_cast_fp16)[name = tensor("op_1512_cast_fp16")]; tensor var_1513_cast_fp16 = softmax(axis = var_297, x = aw_chunk_29_cast_fp16)[name = tensor("op_1513_cast_fp16")]; tensor var_1514_cast_fp16 = softmax(axis = var_297, x = aw_chunk_31_cast_fp16)[name = tensor("op_1514_cast_fp16")]; tensor var_1515_cast_fp16 = softmax(axis = var_297, x = aw_chunk_33_cast_fp16)[name = tensor("op_1515_cast_fp16")]; tensor var_1516_cast_fp16 = softmax(axis = var_297, x = aw_chunk_35_cast_fp16)[name = tensor("op_1516_cast_fp16")]; tensor var_1517_cast_fp16 = softmax(axis = var_297, x = aw_chunk_37_cast_fp16)[name = tensor("op_1517_cast_fp16")]; tensor var_1518_cast_fp16 = softmax(axis = var_297, x = aw_chunk_39_cast_fp16)[name = tensor("op_1518_cast_fp16")]; tensor var_1519_cast_fp16 = softmax(axis = var_297, x = aw_chunk_41_cast_fp16)[name = tensor("op_1519_cast_fp16")]; tensor var_1520_cast_fp16 = softmax(axis = var_297, x = aw_chunk_43_cast_fp16)[name = tensor("op_1520_cast_fp16")]; tensor var_1521_cast_fp16 = softmax(axis = var_297, x = aw_chunk_45_cast_fp16)[name = tensor("op_1521_cast_fp16")]; tensor var_1522_cast_fp16 = softmax(axis = var_297, x = aw_chunk_47_cast_fp16)[name = tensor("op_1522_cast_fp16")]; tensor var_1523_cast_fp16 = softmax(axis = var_297, x = aw_chunk_49_cast_fp16)[name = tensor("op_1523_cast_fp16")]; tensor var_1524_cast_fp16 = softmax(axis = var_297, x = aw_chunk_51_cast_fp16)[name = tensor("op_1524_cast_fp16")]; tensor var_1525_cast_fp16 = softmax(axis = var_297, x = aw_chunk_53_cast_fp16)[name = tensor("op_1525_cast_fp16")]; tensor var_1526_cast_fp16 = softmax(axis = var_297, x = aw_chunk_55_cast_fp16)[name = tensor("op_1526_cast_fp16")]; tensor var_1527_cast_fp16 = softmax(axis = var_297, x = aw_chunk_57_cast_fp16)[name = tensor("op_1527_cast_fp16")]; tensor var_1528_cast_fp16 = softmax(axis = var_297, x = aw_chunk_59_cast_fp16)[name = tensor("op_1528_cast_fp16")]; tensor var_1529_cast_fp16 = softmax(axis = var_297, x = aw_chunk_61_cast_fp16)[name = tensor("op_1529_cast_fp16")]; tensor var_1530_cast_fp16 = softmax(axis = var_297, x = aw_chunk_63_cast_fp16)[name = tensor("op_1530_cast_fp16")]; tensor var_1531_cast_fp16 = softmax(axis = var_297, x = aw_chunk_65_cast_fp16)[name = tensor("op_1531_cast_fp16")]; tensor var_1532_cast_fp16 = softmax(axis = var_297, x = aw_chunk_67_cast_fp16)[name = tensor("op_1532_cast_fp16")]; tensor var_1533_cast_fp16 = softmax(axis = var_297, x = aw_chunk_69_cast_fp16)[name = tensor("op_1533_cast_fp16")]; tensor var_1534_cast_fp16 = softmax(axis = var_297, x = aw_chunk_71_cast_fp16)[name = tensor("op_1534_cast_fp16")]; tensor var_1535_cast_fp16 = softmax(axis = var_297, x = aw_chunk_73_cast_fp16)[name = tensor("op_1535_cast_fp16")]; tensor var_1536_cast_fp16 = softmax(axis = var_297, x = aw_chunk_75_cast_fp16)[name = tensor("op_1536_cast_fp16")]; tensor var_1537_cast_fp16 = softmax(axis = var_297, x = aw_chunk_77_cast_fp16)[name = tensor("op_1537_cast_fp16")]; tensor var_1538_cast_fp16 = softmax(axis = var_297, x = aw_chunk_79_cast_fp16)[name = tensor("op_1538_cast_fp16")]; tensor var_1539_cast_fp16 = softmax(axis = var_297, x = aw_chunk_81_cast_fp16)[name = tensor("op_1539_cast_fp16")]; tensor var_1540_cast_fp16 = softmax(axis = var_297, x = aw_chunk_83_cast_fp16)[name = tensor("op_1540_cast_fp16")]; tensor var_1541_cast_fp16 = softmax(axis = var_297, x = aw_chunk_85_cast_fp16)[name = tensor("op_1541_cast_fp16")]; tensor var_1542_cast_fp16 = softmax(axis = var_297, x = aw_chunk_87_cast_fp16)[name = tensor("op_1542_cast_fp16")]; tensor var_1543_cast_fp16 = softmax(axis = var_297, x = aw_chunk_89_cast_fp16)[name = tensor("op_1543_cast_fp16")]; tensor var_1544_cast_fp16 = softmax(axis = var_297, x = aw_chunk_91_cast_fp16)[name = tensor("op_1544_cast_fp16")]; tensor var_1545_cast_fp16 = softmax(axis = var_297, x = aw_chunk_93_cast_fp16)[name = tensor("op_1545_cast_fp16")]; tensor var_1546_cast_fp16 = softmax(axis = var_297, x = aw_chunk_95_cast_fp16)[name = tensor("op_1546_cast_fp16")]; tensor var_1547_cast_fp16 = softmax(axis = var_297, x = aw_chunk_97_cast_fp16)[name = tensor("op_1547_cast_fp16")]; tensor var_1548_cast_fp16 = softmax(axis = var_297, x = aw_chunk_99_cast_fp16)[name = tensor("op_1548_cast_fp16")]; tensor var_1549_cast_fp16 = softmax(axis = var_297, x = aw_chunk_101_cast_fp16)[name = tensor("op_1549_cast_fp16")]; tensor var_1550_cast_fp16 = softmax(axis = var_297, x = aw_chunk_103_cast_fp16)[name = tensor("op_1550_cast_fp16")]; tensor var_1551_cast_fp16 = softmax(axis = var_297, x = aw_chunk_105_cast_fp16)[name = tensor("op_1551_cast_fp16")]; tensor var_1552_cast_fp16 = softmax(axis = var_297, x = aw_chunk_107_cast_fp16)[name = tensor("op_1552_cast_fp16")]; tensor var_1553_cast_fp16 = softmax(axis = var_297, x = aw_chunk_109_cast_fp16)[name = tensor("op_1553_cast_fp16")]; tensor var_1554_cast_fp16 = softmax(axis = var_297, x = aw_chunk_111_cast_fp16)[name = tensor("op_1554_cast_fp16")]; tensor var_1555_cast_fp16 = softmax(axis = var_297, x = aw_chunk_113_cast_fp16)[name = tensor("op_1555_cast_fp16")]; tensor var_1556_cast_fp16 = softmax(axis = var_297, x = aw_chunk_115_cast_fp16)[name = tensor("op_1556_cast_fp16")]; tensor var_1557_cast_fp16 = softmax(axis = var_297, x = aw_chunk_117_cast_fp16)[name = tensor("op_1557_cast_fp16")]; tensor var_1558_cast_fp16 = softmax(axis = var_297, x = aw_chunk_119_cast_fp16)[name = tensor("op_1558_cast_fp16")]; tensor var_1559_cast_fp16 = softmax(axis = var_297, x = aw_chunk_121_cast_fp16)[name = tensor("op_1559_cast_fp16")]; tensor var_1560_cast_fp16 = softmax(axis = var_297, x = aw_chunk_123_cast_fp16)[name = tensor("op_1560_cast_fp16")]; tensor var_1561_cast_fp16 = softmax(axis = var_297, x = aw_chunk_125_cast_fp16)[name = tensor("op_1561_cast_fp16")]; tensor var_1562_cast_fp16 = softmax(axis = var_297, x = aw_chunk_127_cast_fp16)[name = tensor("op_1562_cast_fp16")]; tensor var_1563_cast_fp16 = softmax(axis = var_297, x = aw_chunk_129_cast_fp16)[name = tensor("op_1563_cast_fp16")]; tensor var_1564_cast_fp16 = softmax(axis = var_297, x = aw_chunk_131_cast_fp16)[name = tensor("op_1564_cast_fp16")]; tensor var_1565_cast_fp16 = softmax(axis = var_297, x = aw_chunk_133_cast_fp16)[name = tensor("op_1565_cast_fp16")]; tensor var_1566_cast_fp16 = softmax(axis = var_297, x = aw_chunk_135_cast_fp16)[name = tensor("op_1566_cast_fp16")]; tensor var_1567_cast_fp16 = softmax(axis = var_297, x = aw_chunk_137_cast_fp16)[name = tensor("op_1567_cast_fp16")]; tensor var_1568_cast_fp16 = softmax(axis = var_297, x = aw_chunk_139_cast_fp16)[name = tensor("op_1568_cast_fp16")]; tensor var_1569_cast_fp16 = softmax(axis = var_297, x = aw_chunk_141_cast_fp16)[name = tensor("op_1569_cast_fp16")]; tensor var_1570_cast_fp16 = softmax(axis = var_297, x = aw_chunk_143_cast_fp16)[name = tensor("op_1570_cast_fp16")]; tensor var_1571_cast_fp16 = softmax(axis = var_297, x = aw_chunk_145_cast_fp16)[name = tensor("op_1571_cast_fp16")]; tensor var_1572_cast_fp16 = softmax(axis = var_297, x = aw_chunk_147_cast_fp16)[name = tensor("op_1572_cast_fp16")]; tensor var_1573_cast_fp16 = softmax(axis = var_297, x = aw_chunk_149_cast_fp16)[name = tensor("op_1573_cast_fp16")]; tensor var_1574_cast_fp16 = softmax(axis = var_297, x = aw_chunk_151_cast_fp16)[name = tensor("op_1574_cast_fp16")]; tensor var_1575_cast_fp16 = softmax(axis = var_297, x = aw_chunk_153_cast_fp16)[name = tensor("op_1575_cast_fp16")]; tensor var_1576_cast_fp16 = softmax(axis = var_297, x = aw_chunk_155_cast_fp16)[name = tensor("op_1576_cast_fp16")]; tensor var_1577_cast_fp16 = softmax(axis = var_297, x = aw_chunk_157_cast_fp16)[name = tensor("op_1577_cast_fp16")]; tensor var_1578_cast_fp16 = softmax(axis = var_297, x = aw_chunk_159_cast_fp16)[name = tensor("op_1578_cast_fp16")]; tensor var_1580_equation_0 = const()[name = tensor("op_1580_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1580_cast_fp16 = einsum(equation = var_1580_equation_0, values = (var_1100_cast_fp16, var_1499_cast_fp16))[name = tensor("op_1580_cast_fp16")]; tensor var_1582_equation_0 = const()[name = tensor("op_1582_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1582_cast_fp16 = einsum(equation = var_1582_equation_0, values = (var_1100_cast_fp16, var_1500_cast_fp16))[name = tensor("op_1582_cast_fp16")]; tensor var_1584_equation_0 = const()[name = tensor("op_1584_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1584_cast_fp16 = einsum(equation = var_1584_equation_0, values = (var_1100_cast_fp16, var_1501_cast_fp16))[name = tensor("op_1584_cast_fp16")]; tensor var_1586_equation_0 = const()[name = tensor("op_1586_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1586_cast_fp16 = einsum(equation = var_1586_equation_0, values = (var_1100_cast_fp16, var_1502_cast_fp16))[name = tensor("op_1586_cast_fp16")]; tensor var_1588_equation_0 = const()[name = tensor("op_1588_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1588_cast_fp16 = einsum(equation = var_1588_equation_0, values = (var_1104_cast_fp16, var_1503_cast_fp16))[name = tensor("op_1588_cast_fp16")]; tensor var_1590_equation_0 = const()[name = tensor("op_1590_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1590_cast_fp16 = einsum(equation = var_1590_equation_0, values = (var_1104_cast_fp16, var_1504_cast_fp16))[name = tensor("op_1590_cast_fp16")]; tensor var_1592_equation_0 = const()[name = tensor("op_1592_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1592_cast_fp16 = einsum(equation = var_1592_equation_0, values = (var_1104_cast_fp16, var_1505_cast_fp16))[name = tensor("op_1592_cast_fp16")]; tensor var_1594_equation_0 = const()[name = tensor("op_1594_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1594_cast_fp16 = einsum(equation = var_1594_equation_0, values = (var_1104_cast_fp16, var_1506_cast_fp16))[name = tensor("op_1594_cast_fp16")]; tensor var_1596_equation_0 = const()[name = tensor("op_1596_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1596_cast_fp16 = einsum(equation = var_1596_equation_0, values = (var_1108_cast_fp16, var_1507_cast_fp16))[name = tensor("op_1596_cast_fp16")]; tensor var_1598_equation_0 = const()[name = tensor("op_1598_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1598_cast_fp16 = einsum(equation = var_1598_equation_0, values = (var_1108_cast_fp16, var_1508_cast_fp16))[name = tensor("op_1598_cast_fp16")]; tensor var_1600_equation_0 = const()[name = tensor("op_1600_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1600_cast_fp16 = einsum(equation = var_1600_equation_0, values = (var_1108_cast_fp16, var_1509_cast_fp16))[name = tensor("op_1600_cast_fp16")]; tensor var_1602_equation_0 = const()[name = tensor("op_1602_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1602_cast_fp16 = einsum(equation = var_1602_equation_0, values = (var_1108_cast_fp16, var_1510_cast_fp16))[name = tensor("op_1602_cast_fp16")]; tensor var_1604_equation_0 = const()[name = tensor("op_1604_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1604_cast_fp16 = einsum(equation = var_1604_equation_0, values = (var_1112_cast_fp16, var_1511_cast_fp16))[name = tensor("op_1604_cast_fp16")]; tensor var_1606_equation_0 = const()[name = tensor("op_1606_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1606_cast_fp16 = einsum(equation = var_1606_equation_0, values = (var_1112_cast_fp16, var_1512_cast_fp16))[name = tensor("op_1606_cast_fp16")]; tensor var_1608_equation_0 = const()[name = tensor("op_1608_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1608_cast_fp16 = einsum(equation = var_1608_equation_0, values = (var_1112_cast_fp16, var_1513_cast_fp16))[name = tensor("op_1608_cast_fp16")]; tensor var_1610_equation_0 = const()[name = tensor("op_1610_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1610_cast_fp16 = einsum(equation = var_1610_equation_0, values = (var_1112_cast_fp16, var_1514_cast_fp16))[name = tensor("op_1610_cast_fp16")]; tensor var_1612_equation_0 = const()[name = tensor("op_1612_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1612_cast_fp16 = einsum(equation = var_1612_equation_0, values = (var_1116_cast_fp16, var_1515_cast_fp16))[name = tensor("op_1612_cast_fp16")]; tensor var_1614_equation_0 = const()[name = tensor("op_1614_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1614_cast_fp16 = einsum(equation = var_1614_equation_0, values = (var_1116_cast_fp16, var_1516_cast_fp16))[name = tensor("op_1614_cast_fp16")]; tensor var_1616_equation_0 = const()[name = tensor("op_1616_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1616_cast_fp16 = einsum(equation = var_1616_equation_0, values = (var_1116_cast_fp16, var_1517_cast_fp16))[name = tensor("op_1616_cast_fp16")]; tensor var_1618_equation_0 = const()[name = tensor("op_1618_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1618_cast_fp16 = einsum(equation = var_1618_equation_0, values = (var_1116_cast_fp16, var_1518_cast_fp16))[name = tensor("op_1618_cast_fp16")]; tensor var_1620_equation_0 = const()[name = tensor("op_1620_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1620_cast_fp16 = einsum(equation = var_1620_equation_0, values = (var_1120_cast_fp16, var_1519_cast_fp16))[name = tensor("op_1620_cast_fp16")]; tensor var_1622_equation_0 = const()[name = tensor("op_1622_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1622_cast_fp16 = einsum(equation = var_1622_equation_0, values = (var_1120_cast_fp16, var_1520_cast_fp16))[name = tensor("op_1622_cast_fp16")]; tensor var_1624_equation_0 = const()[name = tensor("op_1624_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1624_cast_fp16 = einsum(equation = var_1624_equation_0, values = (var_1120_cast_fp16, var_1521_cast_fp16))[name = tensor("op_1624_cast_fp16")]; tensor var_1626_equation_0 = const()[name = tensor("op_1626_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1626_cast_fp16 = einsum(equation = var_1626_equation_0, values = (var_1120_cast_fp16, var_1522_cast_fp16))[name = tensor("op_1626_cast_fp16")]; tensor var_1628_equation_0 = const()[name = tensor("op_1628_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1628_cast_fp16 = einsum(equation = var_1628_equation_0, values = (var_1124_cast_fp16, var_1523_cast_fp16))[name = tensor("op_1628_cast_fp16")]; tensor var_1630_equation_0 = const()[name = tensor("op_1630_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1630_cast_fp16 = einsum(equation = var_1630_equation_0, values = (var_1124_cast_fp16, var_1524_cast_fp16))[name = tensor("op_1630_cast_fp16")]; tensor var_1632_equation_0 = const()[name = tensor("op_1632_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1632_cast_fp16 = einsum(equation = var_1632_equation_0, values = (var_1124_cast_fp16, var_1525_cast_fp16))[name = tensor("op_1632_cast_fp16")]; tensor var_1634_equation_0 = const()[name = tensor("op_1634_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1634_cast_fp16 = einsum(equation = var_1634_equation_0, values = (var_1124_cast_fp16, var_1526_cast_fp16))[name = tensor("op_1634_cast_fp16")]; tensor var_1636_equation_0 = const()[name = tensor("op_1636_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1636_cast_fp16 = einsum(equation = var_1636_equation_0, values = (var_1128_cast_fp16, var_1527_cast_fp16))[name = tensor("op_1636_cast_fp16")]; tensor var_1638_equation_0 = const()[name = tensor("op_1638_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1638_cast_fp16 = einsum(equation = var_1638_equation_0, values = (var_1128_cast_fp16, var_1528_cast_fp16))[name = tensor("op_1638_cast_fp16")]; tensor var_1640_equation_0 = const()[name = tensor("op_1640_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1640_cast_fp16 = einsum(equation = var_1640_equation_0, values = (var_1128_cast_fp16, var_1529_cast_fp16))[name = tensor("op_1640_cast_fp16")]; tensor var_1642_equation_0 = const()[name = tensor("op_1642_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1642_cast_fp16 = einsum(equation = var_1642_equation_0, values = (var_1128_cast_fp16, var_1530_cast_fp16))[name = tensor("op_1642_cast_fp16")]; tensor var_1644_equation_0 = const()[name = tensor("op_1644_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1644_cast_fp16 = einsum(equation = var_1644_equation_0, values = (var_1132_cast_fp16, var_1531_cast_fp16))[name = tensor("op_1644_cast_fp16")]; tensor var_1646_equation_0 = const()[name = tensor("op_1646_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1646_cast_fp16 = einsum(equation = var_1646_equation_0, values = (var_1132_cast_fp16, var_1532_cast_fp16))[name = tensor("op_1646_cast_fp16")]; tensor var_1648_equation_0 = const()[name = tensor("op_1648_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1648_cast_fp16 = einsum(equation = var_1648_equation_0, values = (var_1132_cast_fp16, var_1533_cast_fp16))[name = tensor("op_1648_cast_fp16")]; tensor var_1650_equation_0 = const()[name = tensor("op_1650_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1650_cast_fp16 = einsum(equation = var_1650_equation_0, values = (var_1132_cast_fp16, var_1534_cast_fp16))[name = tensor("op_1650_cast_fp16")]; tensor var_1652_equation_0 = const()[name = tensor("op_1652_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1652_cast_fp16 = einsum(equation = var_1652_equation_0, values = (var_1136_cast_fp16, var_1535_cast_fp16))[name = tensor("op_1652_cast_fp16")]; tensor var_1654_equation_0 = const()[name = tensor("op_1654_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1654_cast_fp16 = einsum(equation = var_1654_equation_0, values = (var_1136_cast_fp16, var_1536_cast_fp16))[name = tensor("op_1654_cast_fp16")]; tensor var_1656_equation_0 = const()[name = tensor("op_1656_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1656_cast_fp16 = einsum(equation = var_1656_equation_0, values = (var_1136_cast_fp16, var_1537_cast_fp16))[name = tensor("op_1656_cast_fp16")]; tensor var_1658_equation_0 = const()[name = tensor("op_1658_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1658_cast_fp16 = einsum(equation = var_1658_equation_0, values = (var_1136_cast_fp16, var_1538_cast_fp16))[name = tensor("op_1658_cast_fp16")]; tensor var_1660_equation_0 = const()[name = tensor("op_1660_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1660_cast_fp16 = einsum(equation = var_1660_equation_0, values = (var_1140_cast_fp16, var_1539_cast_fp16))[name = tensor("op_1660_cast_fp16")]; tensor var_1662_equation_0 = const()[name = tensor("op_1662_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1662_cast_fp16 = einsum(equation = var_1662_equation_0, values = (var_1140_cast_fp16, var_1540_cast_fp16))[name = tensor("op_1662_cast_fp16")]; tensor var_1664_equation_0 = const()[name = tensor("op_1664_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1664_cast_fp16 = einsum(equation = var_1664_equation_0, values = (var_1140_cast_fp16, var_1541_cast_fp16))[name = tensor("op_1664_cast_fp16")]; tensor var_1666_equation_0 = const()[name = tensor("op_1666_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1666_cast_fp16 = einsum(equation = var_1666_equation_0, values = (var_1140_cast_fp16, var_1542_cast_fp16))[name = tensor("op_1666_cast_fp16")]; tensor var_1668_equation_0 = const()[name = tensor("op_1668_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1668_cast_fp16 = einsum(equation = var_1668_equation_0, values = (var_1144_cast_fp16, var_1543_cast_fp16))[name = tensor("op_1668_cast_fp16")]; tensor var_1670_equation_0 = const()[name = tensor("op_1670_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1670_cast_fp16 = einsum(equation = var_1670_equation_0, values = (var_1144_cast_fp16, var_1544_cast_fp16))[name = tensor("op_1670_cast_fp16")]; tensor var_1672_equation_0 = const()[name = tensor("op_1672_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1672_cast_fp16 = einsum(equation = var_1672_equation_0, values = (var_1144_cast_fp16, var_1545_cast_fp16))[name = tensor("op_1672_cast_fp16")]; tensor var_1674_equation_0 = const()[name = tensor("op_1674_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1674_cast_fp16 = einsum(equation = var_1674_equation_0, values = (var_1144_cast_fp16, var_1546_cast_fp16))[name = tensor("op_1674_cast_fp16")]; tensor var_1676_equation_0 = const()[name = tensor("op_1676_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1676_cast_fp16 = einsum(equation = var_1676_equation_0, values = (var_1148_cast_fp16, var_1547_cast_fp16))[name = tensor("op_1676_cast_fp16")]; tensor var_1678_equation_0 = const()[name = tensor("op_1678_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1678_cast_fp16 = einsum(equation = var_1678_equation_0, values = (var_1148_cast_fp16, var_1548_cast_fp16))[name = tensor("op_1678_cast_fp16")]; tensor var_1680_equation_0 = const()[name = tensor("op_1680_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1680_cast_fp16 = einsum(equation = var_1680_equation_0, values = (var_1148_cast_fp16, var_1549_cast_fp16))[name = tensor("op_1680_cast_fp16")]; tensor var_1682_equation_0 = const()[name = tensor("op_1682_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1682_cast_fp16 = einsum(equation = var_1682_equation_0, values = (var_1148_cast_fp16, var_1550_cast_fp16))[name = tensor("op_1682_cast_fp16")]; tensor var_1684_equation_0 = const()[name = tensor("op_1684_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1684_cast_fp16 = einsum(equation = var_1684_equation_0, values = (var_1152_cast_fp16, var_1551_cast_fp16))[name = tensor("op_1684_cast_fp16")]; tensor var_1686_equation_0 = const()[name = tensor("op_1686_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1686_cast_fp16 = einsum(equation = var_1686_equation_0, values = (var_1152_cast_fp16, var_1552_cast_fp16))[name = tensor("op_1686_cast_fp16")]; tensor var_1688_equation_0 = const()[name = tensor("op_1688_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1688_cast_fp16 = einsum(equation = var_1688_equation_0, values = (var_1152_cast_fp16, var_1553_cast_fp16))[name = tensor("op_1688_cast_fp16")]; tensor var_1690_equation_0 = const()[name = tensor("op_1690_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1690_cast_fp16 = einsum(equation = var_1690_equation_0, values = (var_1152_cast_fp16, var_1554_cast_fp16))[name = tensor("op_1690_cast_fp16")]; tensor var_1692_equation_0 = const()[name = tensor("op_1692_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1692_cast_fp16 = einsum(equation = var_1692_equation_0, values = (var_1156_cast_fp16, var_1555_cast_fp16))[name = tensor("op_1692_cast_fp16")]; tensor var_1694_equation_0 = const()[name = tensor("op_1694_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1694_cast_fp16 = einsum(equation = var_1694_equation_0, values = (var_1156_cast_fp16, var_1556_cast_fp16))[name = tensor("op_1694_cast_fp16")]; tensor var_1696_equation_0 = const()[name = tensor("op_1696_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1696_cast_fp16 = einsum(equation = var_1696_equation_0, values = (var_1156_cast_fp16, var_1557_cast_fp16))[name = tensor("op_1696_cast_fp16")]; tensor var_1698_equation_0 = const()[name = tensor("op_1698_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1698_cast_fp16 = einsum(equation = var_1698_equation_0, values = (var_1156_cast_fp16, var_1558_cast_fp16))[name = tensor("op_1698_cast_fp16")]; tensor var_1700_equation_0 = const()[name = tensor("op_1700_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1700_cast_fp16 = einsum(equation = var_1700_equation_0, values = (var_1160_cast_fp16, var_1559_cast_fp16))[name = tensor("op_1700_cast_fp16")]; tensor var_1702_equation_0 = const()[name = tensor("op_1702_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1702_cast_fp16 = einsum(equation = var_1702_equation_0, values = (var_1160_cast_fp16, var_1560_cast_fp16))[name = tensor("op_1702_cast_fp16")]; tensor var_1704_equation_0 = const()[name = tensor("op_1704_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1704_cast_fp16 = einsum(equation = var_1704_equation_0, values = (var_1160_cast_fp16, var_1561_cast_fp16))[name = tensor("op_1704_cast_fp16")]; tensor var_1706_equation_0 = const()[name = tensor("op_1706_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1706_cast_fp16 = einsum(equation = var_1706_equation_0, values = (var_1160_cast_fp16, var_1562_cast_fp16))[name = tensor("op_1706_cast_fp16")]; tensor var_1708_equation_0 = const()[name = tensor("op_1708_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1708_cast_fp16 = einsum(equation = var_1708_equation_0, values = (var_1164_cast_fp16, var_1563_cast_fp16))[name = tensor("op_1708_cast_fp16")]; tensor var_1710_equation_0 = const()[name = tensor("op_1710_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1710_cast_fp16 = einsum(equation = var_1710_equation_0, values = (var_1164_cast_fp16, var_1564_cast_fp16))[name = tensor("op_1710_cast_fp16")]; tensor var_1712_equation_0 = const()[name = tensor("op_1712_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1712_cast_fp16 = einsum(equation = var_1712_equation_0, values = (var_1164_cast_fp16, var_1565_cast_fp16))[name = tensor("op_1712_cast_fp16")]; tensor var_1714_equation_0 = const()[name = tensor("op_1714_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1714_cast_fp16 = einsum(equation = var_1714_equation_0, values = (var_1164_cast_fp16, var_1566_cast_fp16))[name = tensor("op_1714_cast_fp16")]; tensor var_1716_equation_0 = const()[name = tensor("op_1716_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1716_cast_fp16 = einsum(equation = var_1716_equation_0, values = (var_1168_cast_fp16, var_1567_cast_fp16))[name = tensor("op_1716_cast_fp16")]; tensor var_1718_equation_0 = const()[name = tensor("op_1718_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1718_cast_fp16 = einsum(equation = var_1718_equation_0, values = (var_1168_cast_fp16, var_1568_cast_fp16))[name = tensor("op_1718_cast_fp16")]; tensor var_1720_equation_0 = const()[name = tensor("op_1720_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1720_cast_fp16 = einsum(equation = var_1720_equation_0, values = (var_1168_cast_fp16, var_1569_cast_fp16))[name = tensor("op_1720_cast_fp16")]; tensor var_1722_equation_0 = const()[name = tensor("op_1722_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1168_cast_fp16, var_1570_cast_fp16))[name = tensor("op_1722_cast_fp16")]; tensor var_1724_equation_0 = const()[name = tensor("op_1724_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1724_cast_fp16 = einsum(equation = var_1724_equation_0, values = (var_1172_cast_fp16, var_1571_cast_fp16))[name = tensor("op_1724_cast_fp16")]; tensor var_1726_equation_0 = const()[name = tensor("op_1726_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1172_cast_fp16, var_1572_cast_fp16))[name = tensor("op_1726_cast_fp16")]; tensor var_1728_equation_0 = const()[name = tensor("op_1728_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1728_cast_fp16 = einsum(equation = var_1728_equation_0, values = (var_1172_cast_fp16, var_1573_cast_fp16))[name = tensor("op_1728_cast_fp16")]; tensor var_1730_equation_0 = const()[name = tensor("op_1730_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1172_cast_fp16, var_1574_cast_fp16))[name = tensor("op_1730_cast_fp16")]; tensor var_1732_equation_0 = const()[name = tensor("op_1732_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1732_cast_fp16 = einsum(equation = var_1732_equation_0, values = (var_1176_cast_fp16, var_1575_cast_fp16))[name = tensor("op_1732_cast_fp16")]; tensor var_1734_equation_0 = const()[name = tensor("op_1734_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1176_cast_fp16, var_1576_cast_fp16))[name = tensor("op_1734_cast_fp16")]; tensor var_1736_equation_0 = const()[name = tensor("op_1736_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1736_cast_fp16 = einsum(equation = var_1736_equation_0, values = (var_1176_cast_fp16, var_1577_cast_fp16))[name = tensor("op_1736_cast_fp16")]; tensor var_1738_equation_0 = const()[name = tensor("op_1738_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1176_cast_fp16, var_1578_cast_fp16))[name = tensor("op_1738_cast_fp16")]; tensor var_1740_interleave_0 = const()[name = tensor("op_1740_interleave_0"), val = tensor(false)]; tensor var_1740_cast_fp16 = concat(axis = var_272, interleave = var_1740_interleave_0, values = (var_1580_cast_fp16, var_1582_cast_fp16, var_1584_cast_fp16, var_1586_cast_fp16))[name = tensor("op_1740_cast_fp16")]; tensor var_1742_interleave_0 = const()[name = tensor("op_1742_interleave_0"), val = tensor(false)]; tensor var_1742_cast_fp16 = concat(axis = var_272, interleave = var_1742_interleave_0, values = (var_1588_cast_fp16, var_1590_cast_fp16, var_1592_cast_fp16, var_1594_cast_fp16))[name = tensor("op_1742_cast_fp16")]; tensor var_1744_interleave_0 = const()[name = tensor("op_1744_interleave_0"), val = tensor(false)]; tensor var_1744_cast_fp16 = concat(axis = var_272, interleave = var_1744_interleave_0, values = (var_1596_cast_fp16, var_1598_cast_fp16, var_1600_cast_fp16, var_1602_cast_fp16))[name = tensor("op_1744_cast_fp16")]; tensor var_1746_interleave_0 = const()[name = tensor("op_1746_interleave_0"), val = tensor(false)]; tensor var_1746_cast_fp16 = concat(axis = var_272, interleave = var_1746_interleave_0, values = (var_1604_cast_fp16, var_1606_cast_fp16, var_1608_cast_fp16, var_1610_cast_fp16))[name = tensor("op_1746_cast_fp16")]; tensor var_1748_interleave_0 = const()[name = tensor("op_1748_interleave_0"), val = tensor(false)]; tensor var_1748_cast_fp16 = concat(axis = var_272, interleave = var_1748_interleave_0, values = (var_1612_cast_fp16, var_1614_cast_fp16, var_1616_cast_fp16, var_1618_cast_fp16))[name = tensor("op_1748_cast_fp16")]; tensor var_1750_interleave_0 = const()[name = tensor("op_1750_interleave_0"), val = tensor(false)]; tensor var_1750_cast_fp16 = concat(axis = var_272, interleave = var_1750_interleave_0, values = (var_1620_cast_fp16, var_1622_cast_fp16, var_1624_cast_fp16, var_1626_cast_fp16))[name = tensor("op_1750_cast_fp16")]; tensor var_1752_interleave_0 = const()[name = tensor("op_1752_interleave_0"), val = tensor(false)]; tensor var_1752_cast_fp16 = concat(axis = var_272, interleave = var_1752_interleave_0, values = (var_1628_cast_fp16, var_1630_cast_fp16, var_1632_cast_fp16, var_1634_cast_fp16))[name = tensor("op_1752_cast_fp16")]; tensor var_1754_interleave_0 = const()[name = tensor("op_1754_interleave_0"), val = tensor(false)]; tensor var_1754_cast_fp16 = concat(axis = var_272, interleave = var_1754_interleave_0, values = (var_1636_cast_fp16, var_1638_cast_fp16, var_1640_cast_fp16, var_1642_cast_fp16))[name = tensor("op_1754_cast_fp16")]; tensor var_1756_interleave_0 = const()[name = tensor("op_1756_interleave_0"), val = tensor(false)]; tensor var_1756_cast_fp16 = concat(axis = var_272, interleave = var_1756_interleave_0, values = (var_1644_cast_fp16, var_1646_cast_fp16, var_1648_cast_fp16, var_1650_cast_fp16))[name = tensor("op_1756_cast_fp16")]; tensor var_1758_interleave_0 = const()[name = tensor("op_1758_interleave_0"), val = tensor(false)]; tensor var_1758_cast_fp16 = concat(axis = var_272, interleave = var_1758_interleave_0, values = (var_1652_cast_fp16, var_1654_cast_fp16, var_1656_cast_fp16, var_1658_cast_fp16))[name = tensor("op_1758_cast_fp16")]; tensor var_1760_interleave_0 = const()[name = tensor("op_1760_interleave_0"), val = tensor(false)]; tensor var_1760_cast_fp16 = concat(axis = var_272, interleave = var_1760_interleave_0, values = (var_1660_cast_fp16, var_1662_cast_fp16, var_1664_cast_fp16, var_1666_cast_fp16))[name = tensor("op_1760_cast_fp16")]; tensor var_1762_interleave_0 = const()[name = tensor("op_1762_interleave_0"), val = tensor(false)]; tensor var_1762_cast_fp16 = concat(axis = var_272, interleave = var_1762_interleave_0, values = (var_1668_cast_fp16, var_1670_cast_fp16, var_1672_cast_fp16, var_1674_cast_fp16))[name = tensor("op_1762_cast_fp16")]; tensor var_1764_interleave_0 = const()[name = tensor("op_1764_interleave_0"), val = tensor(false)]; tensor var_1764_cast_fp16 = concat(axis = var_272, interleave = var_1764_interleave_0, values = (var_1676_cast_fp16, var_1678_cast_fp16, var_1680_cast_fp16, var_1682_cast_fp16))[name = tensor("op_1764_cast_fp16")]; tensor var_1766_interleave_0 = const()[name = tensor("op_1766_interleave_0"), val = tensor(false)]; tensor var_1766_cast_fp16 = concat(axis = var_272, interleave = var_1766_interleave_0, values = (var_1684_cast_fp16, var_1686_cast_fp16, var_1688_cast_fp16, var_1690_cast_fp16))[name = tensor("op_1766_cast_fp16")]; tensor var_1768_interleave_0 = const()[name = tensor("op_1768_interleave_0"), val = tensor(false)]; tensor var_1768_cast_fp16 = concat(axis = var_272, interleave = var_1768_interleave_0, values = (var_1692_cast_fp16, var_1694_cast_fp16, var_1696_cast_fp16, var_1698_cast_fp16))[name = tensor("op_1768_cast_fp16")]; tensor var_1770_interleave_0 = const()[name = tensor("op_1770_interleave_0"), val = tensor(false)]; tensor var_1770_cast_fp16 = concat(axis = var_272, interleave = var_1770_interleave_0, values = (var_1700_cast_fp16, var_1702_cast_fp16, var_1704_cast_fp16, var_1706_cast_fp16))[name = tensor("op_1770_cast_fp16")]; tensor var_1772_interleave_0 = const()[name = tensor("op_1772_interleave_0"), val = tensor(false)]; tensor var_1772_cast_fp16 = concat(axis = var_272, interleave = var_1772_interleave_0, values = (var_1708_cast_fp16, var_1710_cast_fp16, var_1712_cast_fp16, var_1714_cast_fp16))[name = tensor("op_1772_cast_fp16")]; tensor var_1774_interleave_0 = const()[name = tensor("op_1774_interleave_0"), val = tensor(false)]; tensor var_1774_cast_fp16 = concat(axis = var_272, interleave = var_1774_interleave_0, values = (var_1716_cast_fp16, var_1718_cast_fp16, var_1720_cast_fp16, var_1722_cast_fp16))[name = tensor("op_1774_cast_fp16")]; tensor var_1776_interleave_0 = const()[name = tensor("op_1776_interleave_0"), val = tensor(false)]; tensor var_1776_cast_fp16 = concat(axis = var_272, interleave = var_1776_interleave_0, values = (var_1724_cast_fp16, var_1726_cast_fp16, var_1728_cast_fp16, var_1730_cast_fp16))[name = tensor("op_1776_cast_fp16")]; tensor var_1778_interleave_0 = const()[name = tensor("op_1778_interleave_0"), val = tensor(false)]; tensor var_1778_cast_fp16 = concat(axis = var_272, interleave = var_1778_interleave_0, values = (var_1732_cast_fp16, var_1734_cast_fp16, var_1736_cast_fp16, var_1738_cast_fp16))[name = tensor("op_1778_cast_fp16")]; tensor input_1_interleave_0 = const()[name = tensor("input_1_interleave_0"), val = tensor(false)]; tensor input_1_cast_fp16 = concat(axis = var_297, interleave = input_1_interleave_0, values = (var_1740_cast_fp16, var_1742_cast_fp16, var_1744_cast_fp16, var_1746_cast_fp16, var_1748_cast_fp16, var_1750_cast_fp16, var_1752_cast_fp16, var_1754_cast_fp16, var_1756_cast_fp16, var_1758_cast_fp16, var_1760_cast_fp16, var_1762_cast_fp16, var_1764_cast_fp16, var_1766_cast_fp16, var_1768_cast_fp16, var_1770_cast_fp16, var_1772_cast_fp16, var_1774_cast_fp16, var_1776_cast_fp16, var_1778_cast_fp16))[name = tensor("input_1_cast_fp16")]; tensor var_1789_pad_type_0 = const()[name = tensor("op_1789_pad_type_0"), val = tensor("valid")]; tensor var_1789_strides_0 = const()[name = tensor("op_1789_strides_0"), val = tensor([1, 1])]; tensor var_1789_pad_0 = const()[name = tensor("op_1789_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1789_dilations_0 = const()[name = tensor("op_1789_dilations_0"), val = tensor([1, 1])]; tensor var_1789_groups_0 = const()[name = tensor("op_1789_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20678912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21498176))), name = tensor("layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_0_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21498304)))]; tensor var_1789_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_1789_dilations_0, groups = var_1789_groups_0, pad = var_1789_pad_0, pad_type = var_1789_pad_type_0, strides = var_1789_strides_0, weight = layers_0_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_1_cast_fp16)[name = tensor("op_1789_cast_fp16")]; tensor var_1795_pad_type_0 = const()[name = tensor("op_1795_pad_type_0"), val = tensor("valid")]; tensor var_1795_strides_0 = const()[name = tensor("op_1795_strides_0"), val = tensor([1, 1])]; tensor var_1795_pad_0 = const()[name = tensor("op_1795_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1795_dilations_0 = const()[name = tensor("op_1795_dilations_0"), val = tensor([1, 1])]; tensor var_1795_groups_0 = const()[name = tensor("op_1795_groups_0"), val = tensor(1)]; tensor layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21553024))), name = tensor("layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21500928))), shape = tensor([1280, 1280, 1, 1])]; tensor var_1795_cast_fp16 = conv(dilations = var_1795_dilations_0, groups = var_1795_groups_0, pad = var_1795_pad_0, pad_type = var_1795_pad_type_0, strides = var_1795_strides_0, weight = layers_0_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_1_cast_fp16)[name = tensor("op_1795_cast_fp16")]; tensor obj_3_cast_fp16 = add(x = var_1789_cast_fp16, y = var_1795_cast_fp16)[name = tensor("obj_3_cast_fp16")]; tensor inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = tensor("inputs_3_cast_fp16")]; tensor out_3_axes_0 = const()[name = tensor("out_3_axes_0"), val = tensor([1])]; tensor var_1806_to_fp16 = const()[name = tensor("op_1806_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_3_cast_fp16 = layer_norm(axes = out_3_axes_0, epsilon = var_1806_to_fp16, x = inputs_3_cast_fp16)[name = tensor("out_3_cast_fp16")]; tensor input_3_gamma_0_to_fp16 = const()[name = tensor("input_3_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21757888)))]; tensor input_3_beta_0_to_fp16 = const()[name = tensor("input_3_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21760512)))]; tensor input_3_epsilon_0_to_fp16 = const()[name = tensor("input_3_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor("input_3_cast_fp16")]; tensor var_1824_pad_type_0 = const()[name = tensor("op_1824_pad_type_0"), val = tensor("valid")]; tensor var_1824_strides_0 = const()[name = tensor("op_1824_strides_0"), val = tensor([1, 1])]; tensor var_1824_pad_0 = const()[name = tensor("op_1824_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1824_dilations_0 = const()[name = tensor("op_1824_dilations_0"), val = tensor([1, 1])]; tensor var_1824_groups_0 = const()[name = tensor("op_1824_groups_0"), val = tensor(1)]; tensor layers_0_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21763136))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25040000))), name = tensor("layers_0_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_0_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25040128)))]; tensor var_1824_cast_fp16 = conv(bias = layers_0_fc1_inlier_module_bias_to_fp16, dilations = var_1824_dilations_0, groups = var_1824_groups_0, pad = var_1824_pad_0, pad_type = var_1824_pad_type_0, strides = var_1824_strides_0, weight = layers_0_fc1_inlier_module_weight_to_fp16_palettized, x = input_3_cast_fp16)[name = tensor("op_1824_cast_fp16")]; tensor var_1830_pad_type_0 = const()[name = tensor("op_1830_pad_type_0"), val = tensor("valid")]; tensor var_1830_strides_0 = const()[name = tensor("op_1830_strides_0"), val = tensor([1, 1])]; tensor var_1830_pad_0 = const()[name = tensor("op_1830_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1830_dilations_0 = const()[name = tensor("op_1830_dilations_0"), val = tensor([1, 1])]; tensor var_1830_groups_0 = const()[name = tensor("op_1830_groups_0"), val = tensor(1)]; tensor layers_0_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25353152))), name = tensor("layers_0_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25050432))), shape = tensor([5120, 1280, 1, 1])]; tensor var_1830_cast_fp16 = conv(dilations = var_1830_dilations_0, groups = var_1830_groups_0, pad = var_1830_pad_0, pad_type = var_1830_pad_type_0, strides = var_1830_strides_0, weight = layers_0_fc1_outlier_module_weight_to_fp16_sparsified, x = input_3_cast_fp16)[name = tensor("op_1830_cast_fp16")]; tensor input_5_cast_fp16 = add(x = var_1824_cast_fp16, y = var_1830_cast_fp16)[name = tensor("input_5_cast_fp16")]; tensor input_7_mode_0 = const()[name = tensor("input_7_mode_0"), val = tensor("EXACT")]; tensor input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; tensor var_1841_pad_type_0 = const()[name = tensor("op_1841_pad_type_0"), val = tensor("valid")]; tensor var_1841_strides_0 = const()[name = tensor("op_1841_strides_0"), val = tensor([1, 1])]; tensor var_1841_pad_0 = const()[name = tensor("op_1841_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1841_dilations_0 = const()[name = tensor("op_1841_dilations_0"), val = tensor([1, 1])]; tensor var_1841_groups_0 = const()[name = tensor("op_1841_groups_0"), val = tensor(1)]; tensor layers_0_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(26172416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29449280))), name = tensor("layers_0_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_0_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_0_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29449408)))]; tensor var_1841_cast_fp16 = conv(bias = layers_0_fc2_inlier_module_bias_to_fp16, dilations = var_1841_dilations_0, groups = var_1841_groups_0, pad = var_1841_pad_0, pad_type = var_1841_pad_type_0, strides = var_1841_strides_0, weight = layers_0_fc2_inlier_module_weight_to_fp16_palettized, x = input_7_cast_fp16)[name = tensor("op_1841_cast_fp16")]; tensor var_1847_pad_type_0 = const()[name = tensor("op_1847_pad_type_0"), val = tensor("valid")]; tensor var_1847_strides_0 = const()[name = tensor("op_1847_strides_0"), val = tensor([1, 1])]; tensor var_1847_pad_0 = const()[name = tensor("op_1847_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1847_dilations_0 = const()[name = tensor("op_1847_dilations_0"), val = tensor([1, 1])]; tensor var_1847_groups_0 = const()[name = tensor("op_1847_groups_0"), val = tensor(1)]; tensor layers_0_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29689088))), name = tensor("layers_0_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29452032))), shape = tensor([1280, 5120, 1, 1])]; tensor var_1847_cast_fp16 = conv(dilations = var_1847_dilations_0, groups = var_1847_groups_0, pad = var_1847_pad_0, pad_type = var_1847_pad_type_0, strides = var_1847_strides_0, weight = layers_0_fc2_outlier_module_weight_to_fp16_sparsified, x = input_7_cast_fp16)[name = tensor("op_1847_cast_fp16")]; tensor hidden_states_5_cast_fp16 = add(x = var_1841_cast_fp16, y = var_1847_cast_fp16)[name = tensor("hidden_states_5_cast_fp16")]; tensor inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor("inputs_5_cast_fp16")]; tensor var_1853 = const()[name = tensor("op_1853"), val = tensor(3)]; tensor var_1878 = const()[name = tensor("op_1878"), val = tensor(1)]; tensor out_5_axes_0 = const()[name = tensor("out_5_axes_0"), val = tensor([1])]; tensor var_1895_to_fp16 = const()[name = tensor("op_1895_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_5_cast_fp16 = layer_norm(axes = out_5_axes_0, epsilon = var_1895_to_fp16, x = inputs_5_cast_fp16)[name = tensor("out_5_cast_fp16")]; tensor obj_5_gamma_0_to_fp16 = const()[name = tensor("obj_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30508352)))]; tensor obj_5_beta_0_to_fp16 = const()[name = tensor("obj_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30510976)))]; tensor obj_5_epsilon_0_to_fp16 = const()[name = tensor("obj_5_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor("obj_5_cast_fp16")]; tensor var_1917_pad_type_0 = const()[name = tensor("op_1917_pad_type_0"), val = tensor("valid")]; tensor var_1917_strides_0 = const()[name = tensor("op_1917_strides_0"), val = tensor([1, 1])]; tensor var_1917_pad_0 = const()[name = tensor("op_1917_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1917_dilations_0 = const()[name = tensor("op_1917_dilations_0"), val = tensor([1, 1])]; tensor var_1917_groups_0 = const()[name = tensor("op_1917_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(30513600))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31332864))), name = tensor("layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_1_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31332992)))]; tensor var_1917_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_1917_dilations_0, groups = var_1917_groups_0, pad = var_1917_pad_0, pad_type = var_1917_pad_type_0, strides = var_1917_strides_0, weight = layers_1_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = tensor("op_1917_cast_fp16")]; tensor var_1923_pad_type_0 = const()[name = tensor("op_1923_pad_type_0"), val = tensor("valid")]; tensor var_1923_strides_0 = const()[name = tensor("op_1923_strides_0"), val = tensor([1, 1])]; tensor var_1923_pad_0 = const()[name = tensor("op_1923_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1923_dilations_0 = const()[name = tensor("op_1923_dilations_0"), val = tensor([1, 1])]; tensor var_1923_groups_0 = const()[name = tensor("op_1923_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31399552))), name = tensor("layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31335616))), shape = tensor([1280, 1280, 1, 1])]; tensor var_1923_cast_fp16 = conv(dilations = var_1923_dilations_0, groups = var_1923_groups_0, pad = var_1923_pad_0, pad_type = var_1923_pad_type_0, strides = var_1923_strides_0, weight = layers_1_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = tensor("op_1923_cast_fp16")]; tensor query_3_cast_fp16 = add(x = var_1917_cast_fp16, y = var_1923_cast_fp16)[name = tensor("query_3_cast_fp16")]; tensor var_1932_pad_type_0 = const()[name = tensor("op_1932_pad_type_0"), val = tensor("valid")]; tensor var_1932_strides_0 = const()[name = tensor("op_1932_strides_0"), val = tensor([1, 1])]; tensor var_1932_pad_0 = const()[name = tensor("op_1932_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1932_dilations_0 = const()[name = tensor("op_1932_dilations_0"), val = tensor([1, 1])]; tensor var_1932_groups_0 = const()[name = tensor("op_1932_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31604416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32423680))), name = tensor("layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_1932_cast_fp16 = conv(dilations = var_1932_dilations_0, groups = var_1932_groups_0, pad = var_1932_pad_0, pad_type = var_1932_pad_type_0, strides = var_1932_strides_0, weight = layers_1_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = tensor("op_1932_cast_fp16")]; tensor var_1938_pad_type_0 = const()[name = tensor("op_1938_pad_type_0"), val = tensor("valid")]; tensor var_1938_strides_0 = const()[name = tensor("op_1938_strides_0"), val = tensor([1, 1])]; tensor var_1938_pad_0 = const()[name = tensor("op_1938_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1938_dilations_0 = const()[name = tensor("op_1938_dilations_0"), val = tensor([1, 1])]; tensor var_1938_groups_0 = const()[name = tensor("op_1938_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32475264))), name = tensor("layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32423808))), shape = tensor([1280, 1280, 1, 1])]; tensor var_1938_cast_fp16 = conv(dilations = var_1938_dilations_0, groups = var_1938_groups_0, pad = var_1938_pad_0, pad_type = var_1938_pad_type_0, strides = var_1938_strides_0, weight = layers_1_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = tensor("op_1938_cast_fp16")]; tensor key_3_cast_fp16 = add(x = var_1932_cast_fp16, y = var_1938_cast_fp16)[name = tensor("key_3_cast_fp16")]; tensor var_1948_pad_type_0 = const()[name = tensor("op_1948_pad_type_0"), val = tensor("valid")]; tensor var_1948_strides_0 = const()[name = tensor("op_1948_strides_0"), val = tensor([1, 1])]; tensor var_1948_pad_0 = const()[name = tensor("op_1948_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1948_dilations_0 = const()[name = tensor("op_1948_dilations_0"), val = tensor([1, 1])]; tensor var_1948_groups_0 = const()[name = tensor("op_1948_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(32680128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33499392))), name = tensor("layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_1_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33499520)))]; tensor var_1948_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_1948_dilations_0, groups = var_1948_groups_0, pad = var_1948_pad_0, pad_type = var_1948_pad_type_0, strides = var_1948_strides_0, weight = layers_1_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_5_cast_fp16)[name = tensor("op_1948_cast_fp16")]; tensor var_1954_pad_type_0 = const()[name = tensor("op_1954_pad_type_0"), val = tensor("valid")]; tensor var_1954_strides_0 = const()[name = tensor("op_1954_strides_0"), val = tensor([1, 1])]; tensor var_1954_pad_0 = const()[name = tensor("op_1954_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_1954_dilations_0 = const()[name = tensor("op_1954_dilations_0"), val = tensor([1, 1])]; tensor var_1954_groups_0 = const()[name = tensor("op_1954_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33549632))), name = tensor("layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33502144))), shape = tensor([1280, 1280, 1, 1])]; tensor var_1954_cast_fp16 = conv(dilations = var_1954_dilations_0, groups = var_1954_groups_0, pad = var_1954_pad_0, pad_type = var_1954_pad_type_0, strides = var_1954_strides_0, weight = layers_1_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_5_cast_fp16)[name = tensor("op_1954_cast_fp16")]; tensor value_3_cast_fp16 = add(x = var_1948_cast_fp16, y = var_1954_cast_fp16)[name = tensor("value_3_cast_fp16")]; tensor var_1960_begin_0 = const()[name = tensor("op_1960_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1960_end_0 = const()[name = tensor("op_1960_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_1960_end_mask_0 = const()[name = tensor("op_1960_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1960_cast_fp16 = slice_by_index(begin = var_1960_begin_0, end = var_1960_end_0, end_mask = var_1960_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1960_cast_fp16")]; tensor var_1964_begin_0 = const()[name = tensor("op_1964_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_1964_end_0 = const()[name = tensor("op_1964_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_1964_end_mask_0 = const()[name = tensor("op_1964_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1964_cast_fp16 = slice_by_index(begin = var_1964_begin_0, end = var_1964_end_0, end_mask = var_1964_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1964_cast_fp16")]; tensor var_1968_begin_0 = const()[name = tensor("op_1968_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_1968_end_0 = const()[name = tensor("op_1968_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_1968_end_mask_0 = const()[name = tensor("op_1968_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1968_cast_fp16 = slice_by_index(begin = var_1968_begin_0, end = var_1968_end_0, end_mask = var_1968_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1968_cast_fp16")]; tensor var_1972_begin_0 = const()[name = tensor("op_1972_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_1972_end_0 = const()[name = tensor("op_1972_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_1972_end_mask_0 = const()[name = tensor("op_1972_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1972_cast_fp16 = slice_by_index(begin = var_1972_begin_0, end = var_1972_end_0, end_mask = var_1972_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1972_cast_fp16")]; tensor var_1976_begin_0 = const()[name = tensor("op_1976_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_1976_end_0 = const()[name = tensor("op_1976_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_1976_end_mask_0 = const()[name = tensor("op_1976_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1976_cast_fp16 = slice_by_index(begin = var_1976_begin_0, end = var_1976_end_0, end_mask = var_1976_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1976_cast_fp16")]; tensor var_1980_begin_0 = const()[name = tensor("op_1980_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_1980_end_0 = const()[name = tensor("op_1980_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_1980_end_mask_0 = const()[name = tensor("op_1980_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1980_cast_fp16 = slice_by_index(begin = var_1980_begin_0, end = var_1980_end_0, end_mask = var_1980_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1980_cast_fp16")]; tensor var_1984_begin_0 = const()[name = tensor("op_1984_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_1984_end_0 = const()[name = tensor("op_1984_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_1984_end_mask_0 = const()[name = tensor("op_1984_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1984_cast_fp16 = slice_by_index(begin = var_1984_begin_0, end = var_1984_end_0, end_mask = var_1984_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1984_cast_fp16")]; tensor var_1988_begin_0 = const()[name = tensor("op_1988_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_1988_end_0 = const()[name = tensor("op_1988_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_1988_end_mask_0 = const()[name = tensor("op_1988_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1988_cast_fp16 = slice_by_index(begin = var_1988_begin_0, end = var_1988_end_0, end_mask = var_1988_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1988_cast_fp16")]; tensor var_1992_begin_0 = const()[name = tensor("op_1992_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_1992_end_0 = const()[name = tensor("op_1992_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_1992_end_mask_0 = const()[name = tensor("op_1992_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1992_cast_fp16 = slice_by_index(begin = var_1992_begin_0, end = var_1992_end_0, end_mask = var_1992_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1992_cast_fp16")]; tensor var_1996_begin_0 = const()[name = tensor("op_1996_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_1996_end_0 = const()[name = tensor("op_1996_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_1996_end_mask_0 = const()[name = tensor("op_1996_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_1996_cast_fp16 = slice_by_index(begin = var_1996_begin_0, end = var_1996_end_0, end_mask = var_1996_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1996_cast_fp16")]; tensor var_2000_begin_0 = const()[name = tensor("op_2000_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_2000_end_0 = const()[name = tensor("op_2000_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_2000_end_mask_0 = const()[name = tensor("op_2000_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2000_cast_fp16 = slice_by_index(begin = var_2000_begin_0, end = var_2000_end_0, end_mask = var_2000_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_2000_cast_fp16")]; tensor var_2004_begin_0 = const()[name = tensor("op_2004_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_2004_end_0 = const()[name = tensor("op_2004_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_2004_end_mask_0 = const()[name = tensor("op_2004_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2004_cast_fp16 = slice_by_index(begin = var_2004_begin_0, end = var_2004_end_0, end_mask = var_2004_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_2004_cast_fp16")]; tensor var_2008_begin_0 = const()[name = tensor("op_2008_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_2008_end_0 = const()[name = tensor("op_2008_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_2008_end_mask_0 = const()[name = tensor("op_2008_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2008_cast_fp16 = slice_by_index(begin = var_2008_begin_0, end = var_2008_end_0, end_mask = var_2008_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_2008_cast_fp16")]; tensor var_2012_begin_0 = const()[name = tensor("op_2012_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_2012_end_0 = const()[name = tensor("op_2012_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_2012_end_mask_0 = const()[name = tensor("op_2012_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2012_cast_fp16 = slice_by_index(begin = var_2012_begin_0, end = var_2012_end_0, end_mask = var_2012_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_2012_cast_fp16")]; tensor var_2016_begin_0 = const()[name = tensor("op_2016_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_2016_end_0 = const()[name = tensor("op_2016_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_2016_end_mask_0 = const()[name = tensor("op_2016_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2016_cast_fp16 = slice_by_index(begin = var_2016_begin_0, end = var_2016_end_0, end_mask = var_2016_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_2016_cast_fp16")]; tensor var_2020_begin_0 = const()[name = tensor("op_2020_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_2020_end_0 = const()[name = tensor("op_2020_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_2020_end_mask_0 = const()[name = tensor("op_2020_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2020_cast_fp16 = slice_by_index(begin = var_2020_begin_0, end = var_2020_end_0, end_mask = var_2020_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_2020_cast_fp16")]; tensor var_2024_begin_0 = const()[name = tensor("op_2024_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_2024_end_0 = const()[name = tensor("op_2024_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_2024_end_mask_0 = const()[name = tensor("op_2024_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2024_cast_fp16 = slice_by_index(begin = var_2024_begin_0, end = var_2024_end_0, end_mask = var_2024_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_2024_cast_fp16")]; tensor var_2028_begin_0 = const()[name = tensor("op_2028_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_2028_end_0 = const()[name = tensor("op_2028_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_2028_end_mask_0 = const()[name = tensor("op_2028_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2028_cast_fp16 = slice_by_index(begin = var_2028_begin_0, end = var_2028_end_0, end_mask = var_2028_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_2028_cast_fp16")]; tensor var_2032_begin_0 = const()[name = tensor("op_2032_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_2032_end_0 = const()[name = tensor("op_2032_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_2032_end_mask_0 = const()[name = tensor("op_2032_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2032_cast_fp16 = slice_by_index(begin = var_2032_begin_0, end = var_2032_end_0, end_mask = var_2032_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_2032_cast_fp16")]; tensor var_2036_begin_0 = const()[name = tensor("op_2036_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_2036_end_0 = const()[name = tensor("op_2036_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_2036_end_mask_0 = const()[name = tensor("op_2036_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2036_cast_fp16 = slice_by_index(begin = var_2036_begin_0, end = var_2036_end_0, end_mask = var_2036_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_2036_cast_fp16")]; tensor var_2045_begin_0 = const()[name = tensor("op_2045_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2045_end_0 = const()[name = tensor("op_2045_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2045_end_mask_0 = const()[name = tensor("op_2045_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2045_cast_fp16 = slice_by_index(begin = var_2045_begin_0, end = var_2045_end_0, end_mask = var_2045_end_mask_0, x = var_1960_cast_fp16)[name = tensor("op_2045_cast_fp16")]; tensor var_2052_begin_0 = const()[name = tensor("op_2052_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2052_end_0 = const()[name = tensor("op_2052_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2052_end_mask_0 = const()[name = tensor("op_2052_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2052_cast_fp16 = slice_by_index(begin = var_2052_begin_0, end = var_2052_end_0, end_mask = var_2052_end_mask_0, x = var_1960_cast_fp16)[name = tensor("op_2052_cast_fp16")]; tensor var_2059_begin_0 = const()[name = tensor("op_2059_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2059_end_0 = const()[name = tensor("op_2059_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2059_end_mask_0 = const()[name = tensor("op_2059_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2059_cast_fp16 = slice_by_index(begin = var_2059_begin_0, end = var_2059_end_0, end_mask = var_2059_end_mask_0, x = var_1960_cast_fp16)[name = tensor("op_2059_cast_fp16")]; tensor var_2066_begin_0 = const()[name = tensor("op_2066_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2066_end_0 = const()[name = tensor("op_2066_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2066_end_mask_0 = const()[name = tensor("op_2066_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2066_cast_fp16 = slice_by_index(begin = var_2066_begin_0, end = var_2066_end_0, end_mask = var_2066_end_mask_0, x = var_1960_cast_fp16)[name = tensor("op_2066_cast_fp16")]; tensor var_2073_begin_0 = const()[name = tensor("op_2073_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2073_end_0 = const()[name = tensor("op_2073_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2073_end_mask_0 = const()[name = tensor("op_2073_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2073_cast_fp16 = slice_by_index(begin = var_2073_begin_0, end = var_2073_end_0, end_mask = var_2073_end_mask_0, x = var_1964_cast_fp16)[name = tensor("op_2073_cast_fp16")]; tensor var_2080_begin_0 = const()[name = tensor("op_2080_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2080_end_0 = const()[name = tensor("op_2080_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2080_end_mask_0 = const()[name = tensor("op_2080_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2080_cast_fp16 = slice_by_index(begin = var_2080_begin_0, end = var_2080_end_0, end_mask = var_2080_end_mask_0, x = var_1964_cast_fp16)[name = tensor("op_2080_cast_fp16")]; tensor var_2087_begin_0 = const()[name = tensor("op_2087_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2087_end_0 = const()[name = tensor("op_2087_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2087_end_mask_0 = const()[name = tensor("op_2087_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2087_cast_fp16 = slice_by_index(begin = var_2087_begin_0, end = var_2087_end_0, end_mask = var_2087_end_mask_0, x = var_1964_cast_fp16)[name = tensor("op_2087_cast_fp16")]; tensor var_2094_begin_0 = const()[name = tensor("op_2094_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2094_end_0 = const()[name = tensor("op_2094_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2094_end_mask_0 = const()[name = tensor("op_2094_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2094_cast_fp16 = slice_by_index(begin = var_2094_begin_0, end = var_2094_end_0, end_mask = var_2094_end_mask_0, x = var_1964_cast_fp16)[name = tensor("op_2094_cast_fp16")]; tensor var_2101_begin_0 = const()[name = tensor("op_2101_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2101_end_0 = const()[name = tensor("op_2101_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2101_end_mask_0 = const()[name = tensor("op_2101_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2101_cast_fp16 = slice_by_index(begin = var_2101_begin_0, end = var_2101_end_0, end_mask = var_2101_end_mask_0, x = var_1968_cast_fp16)[name = tensor("op_2101_cast_fp16")]; tensor var_2108_begin_0 = const()[name = tensor("op_2108_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2108_end_0 = const()[name = tensor("op_2108_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2108_end_mask_0 = const()[name = tensor("op_2108_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2108_cast_fp16 = slice_by_index(begin = var_2108_begin_0, end = var_2108_end_0, end_mask = var_2108_end_mask_0, x = var_1968_cast_fp16)[name = tensor("op_2108_cast_fp16")]; tensor var_2115_begin_0 = const()[name = tensor("op_2115_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2115_end_0 = const()[name = tensor("op_2115_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2115_end_mask_0 = const()[name = tensor("op_2115_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2115_cast_fp16 = slice_by_index(begin = var_2115_begin_0, end = var_2115_end_0, end_mask = var_2115_end_mask_0, x = var_1968_cast_fp16)[name = tensor("op_2115_cast_fp16")]; tensor var_2122_begin_0 = const()[name = tensor("op_2122_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2122_end_0 = const()[name = tensor("op_2122_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2122_end_mask_0 = const()[name = tensor("op_2122_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2122_cast_fp16 = slice_by_index(begin = var_2122_begin_0, end = var_2122_end_0, end_mask = var_2122_end_mask_0, x = var_1968_cast_fp16)[name = tensor("op_2122_cast_fp16")]; tensor var_2129_begin_0 = const()[name = tensor("op_2129_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2129_end_0 = const()[name = tensor("op_2129_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2129_end_mask_0 = const()[name = tensor("op_2129_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2129_cast_fp16 = slice_by_index(begin = var_2129_begin_0, end = var_2129_end_0, end_mask = var_2129_end_mask_0, x = var_1972_cast_fp16)[name = tensor("op_2129_cast_fp16")]; tensor var_2136_begin_0 = const()[name = tensor("op_2136_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2136_end_0 = const()[name = tensor("op_2136_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2136_end_mask_0 = const()[name = tensor("op_2136_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2136_cast_fp16 = slice_by_index(begin = var_2136_begin_0, end = var_2136_end_0, end_mask = var_2136_end_mask_0, x = var_1972_cast_fp16)[name = tensor("op_2136_cast_fp16")]; tensor var_2143_begin_0 = const()[name = tensor("op_2143_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2143_end_0 = const()[name = tensor("op_2143_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2143_end_mask_0 = const()[name = tensor("op_2143_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2143_cast_fp16 = slice_by_index(begin = var_2143_begin_0, end = var_2143_end_0, end_mask = var_2143_end_mask_0, x = var_1972_cast_fp16)[name = tensor("op_2143_cast_fp16")]; tensor var_2150_begin_0 = const()[name = tensor("op_2150_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2150_end_0 = const()[name = tensor("op_2150_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2150_end_mask_0 = const()[name = tensor("op_2150_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2150_cast_fp16 = slice_by_index(begin = var_2150_begin_0, end = var_2150_end_0, end_mask = var_2150_end_mask_0, x = var_1972_cast_fp16)[name = tensor("op_2150_cast_fp16")]; tensor var_2157_begin_0 = const()[name = tensor("op_2157_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2157_end_0 = const()[name = tensor("op_2157_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2157_end_mask_0 = const()[name = tensor("op_2157_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2157_cast_fp16 = slice_by_index(begin = var_2157_begin_0, end = var_2157_end_0, end_mask = var_2157_end_mask_0, x = var_1976_cast_fp16)[name = tensor("op_2157_cast_fp16")]; tensor var_2164_begin_0 = const()[name = tensor("op_2164_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2164_end_0 = const()[name = tensor("op_2164_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2164_end_mask_0 = const()[name = tensor("op_2164_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2164_cast_fp16 = slice_by_index(begin = var_2164_begin_0, end = var_2164_end_0, end_mask = var_2164_end_mask_0, x = var_1976_cast_fp16)[name = tensor("op_2164_cast_fp16")]; tensor var_2171_begin_0 = const()[name = tensor("op_2171_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2171_end_0 = const()[name = tensor("op_2171_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2171_end_mask_0 = const()[name = tensor("op_2171_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2171_cast_fp16 = slice_by_index(begin = var_2171_begin_0, end = var_2171_end_0, end_mask = var_2171_end_mask_0, x = var_1976_cast_fp16)[name = tensor("op_2171_cast_fp16")]; tensor var_2178_begin_0 = const()[name = tensor("op_2178_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2178_end_0 = const()[name = tensor("op_2178_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2178_end_mask_0 = const()[name = tensor("op_2178_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2178_cast_fp16 = slice_by_index(begin = var_2178_begin_0, end = var_2178_end_0, end_mask = var_2178_end_mask_0, x = var_1976_cast_fp16)[name = tensor("op_2178_cast_fp16")]; tensor var_2185_begin_0 = const()[name = tensor("op_2185_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2185_end_0 = const()[name = tensor("op_2185_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2185_end_mask_0 = const()[name = tensor("op_2185_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2185_cast_fp16 = slice_by_index(begin = var_2185_begin_0, end = var_2185_end_0, end_mask = var_2185_end_mask_0, x = var_1980_cast_fp16)[name = tensor("op_2185_cast_fp16")]; tensor var_2192_begin_0 = const()[name = tensor("op_2192_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2192_end_0 = const()[name = tensor("op_2192_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2192_end_mask_0 = const()[name = tensor("op_2192_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2192_cast_fp16 = slice_by_index(begin = var_2192_begin_0, end = var_2192_end_0, end_mask = var_2192_end_mask_0, x = var_1980_cast_fp16)[name = tensor("op_2192_cast_fp16")]; tensor var_2199_begin_0 = const()[name = tensor("op_2199_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2199_end_0 = const()[name = tensor("op_2199_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2199_end_mask_0 = const()[name = tensor("op_2199_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2199_cast_fp16 = slice_by_index(begin = var_2199_begin_0, end = var_2199_end_0, end_mask = var_2199_end_mask_0, x = var_1980_cast_fp16)[name = tensor("op_2199_cast_fp16")]; tensor var_2206_begin_0 = const()[name = tensor("op_2206_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2206_end_0 = const()[name = tensor("op_2206_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2206_end_mask_0 = const()[name = tensor("op_2206_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2206_cast_fp16 = slice_by_index(begin = var_2206_begin_0, end = var_2206_end_0, end_mask = var_2206_end_mask_0, x = var_1980_cast_fp16)[name = tensor("op_2206_cast_fp16")]; tensor var_2213_begin_0 = const()[name = tensor("op_2213_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2213_end_0 = const()[name = tensor("op_2213_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2213_end_mask_0 = const()[name = tensor("op_2213_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2213_cast_fp16 = slice_by_index(begin = var_2213_begin_0, end = var_2213_end_0, end_mask = var_2213_end_mask_0, x = var_1984_cast_fp16)[name = tensor("op_2213_cast_fp16")]; tensor var_2220_begin_0 = const()[name = tensor("op_2220_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2220_end_0 = const()[name = tensor("op_2220_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2220_end_mask_0 = const()[name = tensor("op_2220_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2220_cast_fp16 = slice_by_index(begin = var_2220_begin_0, end = var_2220_end_0, end_mask = var_2220_end_mask_0, x = var_1984_cast_fp16)[name = tensor("op_2220_cast_fp16")]; tensor var_2227_begin_0 = const()[name = tensor("op_2227_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2227_end_0 = const()[name = tensor("op_2227_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2227_end_mask_0 = const()[name = tensor("op_2227_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2227_cast_fp16 = slice_by_index(begin = var_2227_begin_0, end = var_2227_end_0, end_mask = var_2227_end_mask_0, x = var_1984_cast_fp16)[name = tensor("op_2227_cast_fp16")]; tensor var_2234_begin_0 = const()[name = tensor("op_2234_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2234_end_0 = const()[name = tensor("op_2234_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2234_end_mask_0 = const()[name = tensor("op_2234_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2234_cast_fp16 = slice_by_index(begin = var_2234_begin_0, end = var_2234_end_0, end_mask = var_2234_end_mask_0, x = var_1984_cast_fp16)[name = tensor("op_2234_cast_fp16")]; tensor var_2241_begin_0 = const()[name = tensor("op_2241_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2241_end_0 = const()[name = tensor("op_2241_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2241_end_mask_0 = const()[name = tensor("op_2241_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2241_cast_fp16 = slice_by_index(begin = var_2241_begin_0, end = var_2241_end_0, end_mask = var_2241_end_mask_0, x = var_1988_cast_fp16)[name = tensor("op_2241_cast_fp16")]; tensor var_2248_begin_0 = const()[name = tensor("op_2248_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2248_end_0 = const()[name = tensor("op_2248_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2248_end_mask_0 = const()[name = tensor("op_2248_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2248_cast_fp16 = slice_by_index(begin = var_2248_begin_0, end = var_2248_end_0, end_mask = var_2248_end_mask_0, x = var_1988_cast_fp16)[name = tensor("op_2248_cast_fp16")]; tensor var_2255_begin_0 = const()[name = tensor("op_2255_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2255_end_0 = const()[name = tensor("op_2255_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2255_end_mask_0 = const()[name = tensor("op_2255_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2255_cast_fp16 = slice_by_index(begin = var_2255_begin_0, end = var_2255_end_0, end_mask = var_2255_end_mask_0, x = var_1988_cast_fp16)[name = tensor("op_2255_cast_fp16")]; tensor var_2262_begin_0 = const()[name = tensor("op_2262_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2262_end_0 = const()[name = tensor("op_2262_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2262_end_mask_0 = const()[name = tensor("op_2262_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2262_cast_fp16 = slice_by_index(begin = var_2262_begin_0, end = var_2262_end_0, end_mask = var_2262_end_mask_0, x = var_1988_cast_fp16)[name = tensor("op_2262_cast_fp16")]; tensor var_2269_begin_0 = const()[name = tensor("op_2269_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2269_end_0 = const()[name = tensor("op_2269_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2269_end_mask_0 = const()[name = tensor("op_2269_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2269_cast_fp16 = slice_by_index(begin = var_2269_begin_0, end = var_2269_end_0, end_mask = var_2269_end_mask_0, x = var_1992_cast_fp16)[name = tensor("op_2269_cast_fp16")]; tensor var_2276_begin_0 = const()[name = tensor("op_2276_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2276_end_0 = const()[name = tensor("op_2276_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2276_end_mask_0 = const()[name = tensor("op_2276_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2276_cast_fp16 = slice_by_index(begin = var_2276_begin_0, end = var_2276_end_0, end_mask = var_2276_end_mask_0, x = var_1992_cast_fp16)[name = tensor("op_2276_cast_fp16")]; tensor var_2283_begin_0 = const()[name = tensor("op_2283_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2283_end_0 = const()[name = tensor("op_2283_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2283_end_mask_0 = const()[name = tensor("op_2283_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2283_cast_fp16 = slice_by_index(begin = var_2283_begin_0, end = var_2283_end_0, end_mask = var_2283_end_mask_0, x = var_1992_cast_fp16)[name = tensor("op_2283_cast_fp16")]; tensor var_2290_begin_0 = const()[name = tensor("op_2290_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2290_end_0 = const()[name = tensor("op_2290_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2290_end_mask_0 = const()[name = tensor("op_2290_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2290_cast_fp16 = slice_by_index(begin = var_2290_begin_0, end = var_2290_end_0, end_mask = var_2290_end_mask_0, x = var_1992_cast_fp16)[name = tensor("op_2290_cast_fp16")]; tensor var_2297_begin_0 = const()[name = tensor("op_2297_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2297_end_0 = const()[name = tensor("op_2297_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2297_end_mask_0 = const()[name = tensor("op_2297_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2297_cast_fp16 = slice_by_index(begin = var_2297_begin_0, end = var_2297_end_0, end_mask = var_2297_end_mask_0, x = var_1996_cast_fp16)[name = tensor("op_2297_cast_fp16")]; tensor var_2304_begin_0 = const()[name = tensor("op_2304_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2304_end_0 = const()[name = tensor("op_2304_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2304_end_mask_0 = const()[name = tensor("op_2304_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2304_cast_fp16 = slice_by_index(begin = var_2304_begin_0, end = var_2304_end_0, end_mask = var_2304_end_mask_0, x = var_1996_cast_fp16)[name = tensor("op_2304_cast_fp16")]; tensor var_2311_begin_0 = const()[name = tensor("op_2311_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2311_end_0 = const()[name = tensor("op_2311_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2311_end_mask_0 = const()[name = tensor("op_2311_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2311_cast_fp16 = slice_by_index(begin = var_2311_begin_0, end = var_2311_end_0, end_mask = var_2311_end_mask_0, x = var_1996_cast_fp16)[name = tensor("op_2311_cast_fp16")]; tensor var_2318_begin_0 = const()[name = tensor("op_2318_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2318_end_0 = const()[name = tensor("op_2318_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2318_end_mask_0 = const()[name = tensor("op_2318_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2318_cast_fp16 = slice_by_index(begin = var_2318_begin_0, end = var_2318_end_0, end_mask = var_2318_end_mask_0, x = var_1996_cast_fp16)[name = tensor("op_2318_cast_fp16")]; tensor var_2325_begin_0 = const()[name = tensor("op_2325_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2325_end_0 = const()[name = tensor("op_2325_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2325_end_mask_0 = const()[name = tensor("op_2325_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2325_cast_fp16 = slice_by_index(begin = var_2325_begin_0, end = var_2325_end_0, end_mask = var_2325_end_mask_0, x = var_2000_cast_fp16)[name = tensor("op_2325_cast_fp16")]; tensor var_2332_begin_0 = const()[name = tensor("op_2332_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2332_end_0 = const()[name = tensor("op_2332_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2332_end_mask_0 = const()[name = tensor("op_2332_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2332_cast_fp16 = slice_by_index(begin = var_2332_begin_0, end = var_2332_end_0, end_mask = var_2332_end_mask_0, x = var_2000_cast_fp16)[name = tensor("op_2332_cast_fp16")]; tensor var_2339_begin_0 = const()[name = tensor("op_2339_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2339_end_0 = const()[name = tensor("op_2339_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2339_end_mask_0 = const()[name = tensor("op_2339_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2339_cast_fp16 = slice_by_index(begin = var_2339_begin_0, end = var_2339_end_0, end_mask = var_2339_end_mask_0, x = var_2000_cast_fp16)[name = tensor("op_2339_cast_fp16")]; tensor var_2346_begin_0 = const()[name = tensor("op_2346_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2346_end_0 = const()[name = tensor("op_2346_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2346_end_mask_0 = const()[name = tensor("op_2346_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2346_cast_fp16 = slice_by_index(begin = var_2346_begin_0, end = var_2346_end_0, end_mask = var_2346_end_mask_0, x = var_2000_cast_fp16)[name = tensor("op_2346_cast_fp16")]; tensor var_2353_begin_0 = const()[name = tensor("op_2353_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2353_end_0 = const()[name = tensor("op_2353_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2353_end_mask_0 = const()[name = tensor("op_2353_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2353_cast_fp16 = slice_by_index(begin = var_2353_begin_0, end = var_2353_end_0, end_mask = var_2353_end_mask_0, x = var_2004_cast_fp16)[name = tensor("op_2353_cast_fp16")]; tensor var_2360_begin_0 = const()[name = tensor("op_2360_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2360_end_0 = const()[name = tensor("op_2360_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2360_end_mask_0 = const()[name = tensor("op_2360_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2360_cast_fp16 = slice_by_index(begin = var_2360_begin_0, end = var_2360_end_0, end_mask = var_2360_end_mask_0, x = var_2004_cast_fp16)[name = tensor("op_2360_cast_fp16")]; tensor var_2367_begin_0 = const()[name = tensor("op_2367_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2367_end_0 = const()[name = tensor("op_2367_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2367_end_mask_0 = const()[name = tensor("op_2367_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2367_cast_fp16 = slice_by_index(begin = var_2367_begin_0, end = var_2367_end_0, end_mask = var_2367_end_mask_0, x = var_2004_cast_fp16)[name = tensor("op_2367_cast_fp16")]; tensor var_2374_begin_0 = const()[name = tensor("op_2374_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2374_end_0 = const()[name = tensor("op_2374_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2374_end_mask_0 = const()[name = tensor("op_2374_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2374_cast_fp16 = slice_by_index(begin = var_2374_begin_0, end = var_2374_end_0, end_mask = var_2374_end_mask_0, x = var_2004_cast_fp16)[name = tensor("op_2374_cast_fp16")]; tensor var_2381_begin_0 = const()[name = tensor("op_2381_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2381_end_0 = const()[name = tensor("op_2381_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2381_end_mask_0 = const()[name = tensor("op_2381_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2381_cast_fp16 = slice_by_index(begin = var_2381_begin_0, end = var_2381_end_0, end_mask = var_2381_end_mask_0, x = var_2008_cast_fp16)[name = tensor("op_2381_cast_fp16")]; tensor var_2388_begin_0 = const()[name = tensor("op_2388_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2388_end_0 = const()[name = tensor("op_2388_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2388_end_mask_0 = const()[name = tensor("op_2388_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2388_cast_fp16 = slice_by_index(begin = var_2388_begin_0, end = var_2388_end_0, end_mask = var_2388_end_mask_0, x = var_2008_cast_fp16)[name = tensor("op_2388_cast_fp16")]; tensor var_2395_begin_0 = const()[name = tensor("op_2395_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2395_end_0 = const()[name = tensor("op_2395_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2395_end_mask_0 = const()[name = tensor("op_2395_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2395_cast_fp16 = slice_by_index(begin = var_2395_begin_0, end = var_2395_end_0, end_mask = var_2395_end_mask_0, x = var_2008_cast_fp16)[name = tensor("op_2395_cast_fp16")]; tensor var_2402_begin_0 = const()[name = tensor("op_2402_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2402_end_0 = const()[name = tensor("op_2402_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2402_end_mask_0 = const()[name = tensor("op_2402_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2402_cast_fp16 = slice_by_index(begin = var_2402_begin_0, end = var_2402_end_0, end_mask = var_2402_end_mask_0, x = var_2008_cast_fp16)[name = tensor("op_2402_cast_fp16")]; tensor var_2409_begin_0 = const()[name = tensor("op_2409_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2409_end_0 = const()[name = tensor("op_2409_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2409_end_mask_0 = const()[name = tensor("op_2409_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2409_cast_fp16 = slice_by_index(begin = var_2409_begin_0, end = var_2409_end_0, end_mask = var_2409_end_mask_0, x = var_2012_cast_fp16)[name = tensor("op_2409_cast_fp16")]; tensor var_2416_begin_0 = const()[name = tensor("op_2416_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2416_end_0 = const()[name = tensor("op_2416_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2416_end_mask_0 = const()[name = tensor("op_2416_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2416_cast_fp16 = slice_by_index(begin = var_2416_begin_0, end = var_2416_end_0, end_mask = var_2416_end_mask_0, x = var_2012_cast_fp16)[name = tensor("op_2416_cast_fp16")]; tensor var_2423_begin_0 = const()[name = tensor("op_2423_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2423_end_0 = const()[name = tensor("op_2423_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2423_end_mask_0 = const()[name = tensor("op_2423_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2423_cast_fp16 = slice_by_index(begin = var_2423_begin_0, end = var_2423_end_0, end_mask = var_2423_end_mask_0, x = var_2012_cast_fp16)[name = tensor("op_2423_cast_fp16")]; tensor var_2430_begin_0 = const()[name = tensor("op_2430_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2430_end_0 = const()[name = tensor("op_2430_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2430_end_mask_0 = const()[name = tensor("op_2430_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2430_cast_fp16 = slice_by_index(begin = var_2430_begin_0, end = var_2430_end_0, end_mask = var_2430_end_mask_0, x = var_2012_cast_fp16)[name = tensor("op_2430_cast_fp16")]; tensor var_2437_begin_0 = const()[name = tensor("op_2437_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2437_end_0 = const()[name = tensor("op_2437_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2437_end_mask_0 = const()[name = tensor("op_2437_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2437_cast_fp16 = slice_by_index(begin = var_2437_begin_0, end = var_2437_end_0, end_mask = var_2437_end_mask_0, x = var_2016_cast_fp16)[name = tensor("op_2437_cast_fp16")]; tensor var_2444_begin_0 = const()[name = tensor("op_2444_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2444_end_0 = const()[name = tensor("op_2444_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2444_end_mask_0 = const()[name = tensor("op_2444_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2444_cast_fp16 = slice_by_index(begin = var_2444_begin_0, end = var_2444_end_0, end_mask = var_2444_end_mask_0, x = var_2016_cast_fp16)[name = tensor("op_2444_cast_fp16")]; tensor var_2451_begin_0 = const()[name = tensor("op_2451_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2451_end_0 = const()[name = tensor("op_2451_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2451_end_mask_0 = const()[name = tensor("op_2451_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2451_cast_fp16 = slice_by_index(begin = var_2451_begin_0, end = var_2451_end_0, end_mask = var_2451_end_mask_0, x = var_2016_cast_fp16)[name = tensor("op_2451_cast_fp16")]; tensor var_2458_begin_0 = const()[name = tensor("op_2458_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2458_end_0 = const()[name = tensor("op_2458_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2458_end_mask_0 = const()[name = tensor("op_2458_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2458_cast_fp16 = slice_by_index(begin = var_2458_begin_0, end = var_2458_end_0, end_mask = var_2458_end_mask_0, x = var_2016_cast_fp16)[name = tensor("op_2458_cast_fp16")]; tensor var_2465_begin_0 = const()[name = tensor("op_2465_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2465_end_0 = const()[name = tensor("op_2465_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2465_end_mask_0 = const()[name = tensor("op_2465_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2465_cast_fp16 = slice_by_index(begin = var_2465_begin_0, end = var_2465_end_0, end_mask = var_2465_end_mask_0, x = var_2020_cast_fp16)[name = tensor("op_2465_cast_fp16")]; tensor var_2472_begin_0 = const()[name = tensor("op_2472_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2472_end_0 = const()[name = tensor("op_2472_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2472_end_mask_0 = const()[name = tensor("op_2472_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2472_cast_fp16 = slice_by_index(begin = var_2472_begin_0, end = var_2472_end_0, end_mask = var_2472_end_mask_0, x = var_2020_cast_fp16)[name = tensor("op_2472_cast_fp16")]; tensor var_2479_begin_0 = const()[name = tensor("op_2479_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2479_end_0 = const()[name = tensor("op_2479_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2479_end_mask_0 = const()[name = tensor("op_2479_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2479_cast_fp16 = slice_by_index(begin = var_2479_begin_0, end = var_2479_end_0, end_mask = var_2479_end_mask_0, x = var_2020_cast_fp16)[name = tensor("op_2479_cast_fp16")]; tensor var_2486_begin_0 = const()[name = tensor("op_2486_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2486_end_0 = const()[name = tensor("op_2486_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2486_end_mask_0 = const()[name = tensor("op_2486_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2486_cast_fp16 = slice_by_index(begin = var_2486_begin_0, end = var_2486_end_0, end_mask = var_2486_end_mask_0, x = var_2020_cast_fp16)[name = tensor("op_2486_cast_fp16")]; tensor var_2493_begin_0 = const()[name = tensor("op_2493_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2493_end_0 = const()[name = tensor("op_2493_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2493_end_mask_0 = const()[name = tensor("op_2493_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2493_cast_fp16 = slice_by_index(begin = var_2493_begin_0, end = var_2493_end_0, end_mask = var_2493_end_mask_0, x = var_2024_cast_fp16)[name = tensor("op_2493_cast_fp16")]; tensor var_2500_begin_0 = const()[name = tensor("op_2500_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2500_end_0 = const()[name = tensor("op_2500_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2500_end_mask_0 = const()[name = tensor("op_2500_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2500_cast_fp16 = slice_by_index(begin = var_2500_begin_0, end = var_2500_end_0, end_mask = var_2500_end_mask_0, x = var_2024_cast_fp16)[name = tensor("op_2500_cast_fp16")]; tensor var_2507_begin_0 = const()[name = tensor("op_2507_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2507_end_0 = const()[name = tensor("op_2507_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2507_end_mask_0 = const()[name = tensor("op_2507_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2507_cast_fp16 = slice_by_index(begin = var_2507_begin_0, end = var_2507_end_0, end_mask = var_2507_end_mask_0, x = var_2024_cast_fp16)[name = tensor("op_2507_cast_fp16")]; tensor var_2514_begin_0 = const()[name = tensor("op_2514_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2514_end_0 = const()[name = tensor("op_2514_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2514_end_mask_0 = const()[name = tensor("op_2514_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2514_cast_fp16 = slice_by_index(begin = var_2514_begin_0, end = var_2514_end_0, end_mask = var_2514_end_mask_0, x = var_2024_cast_fp16)[name = tensor("op_2514_cast_fp16")]; tensor var_2521_begin_0 = const()[name = tensor("op_2521_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2521_end_0 = const()[name = tensor("op_2521_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2521_end_mask_0 = const()[name = tensor("op_2521_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2521_cast_fp16 = slice_by_index(begin = var_2521_begin_0, end = var_2521_end_0, end_mask = var_2521_end_mask_0, x = var_2028_cast_fp16)[name = tensor("op_2521_cast_fp16")]; tensor var_2528_begin_0 = const()[name = tensor("op_2528_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2528_end_0 = const()[name = tensor("op_2528_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2528_end_mask_0 = const()[name = tensor("op_2528_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2528_cast_fp16 = slice_by_index(begin = var_2528_begin_0, end = var_2528_end_0, end_mask = var_2528_end_mask_0, x = var_2028_cast_fp16)[name = tensor("op_2528_cast_fp16")]; tensor var_2535_begin_0 = const()[name = tensor("op_2535_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2535_end_0 = const()[name = tensor("op_2535_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2535_end_mask_0 = const()[name = tensor("op_2535_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2535_cast_fp16 = slice_by_index(begin = var_2535_begin_0, end = var_2535_end_0, end_mask = var_2535_end_mask_0, x = var_2028_cast_fp16)[name = tensor("op_2535_cast_fp16")]; tensor var_2542_begin_0 = const()[name = tensor("op_2542_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2542_end_0 = const()[name = tensor("op_2542_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2542_end_mask_0 = const()[name = tensor("op_2542_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2542_cast_fp16 = slice_by_index(begin = var_2542_begin_0, end = var_2542_end_0, end_mask = var_2542_end_mask_0, x = var_2028_cast_fp16)[name = tensor("op_2542_cast_fp16")]; tensor var_2549_begin_0 = const()[name = tensor("op_2549_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2549_end_0 = const()[name = tensor("op_2549_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2549_end_mask_0 = const()[name = tensor("op_2549_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2549_cast_fp16 = slice_by_index(begin = var_2549_begin_0, end = var_2549_end_0, end_mask = var_2549_end_mask_0, x = var_2032_cast_fp16)[name = tensor("op_2549_cast_fp16")]; tensor var_2556_begin_0 = const()[name = tensor("op_2556_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2556_end_0 = const()[name = tensor("op_2556_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2556_end_mask_0 = const()[name = tensor("op_2556_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2556_cast_fp16 = slice_by_index(begin = var_2556_begin_0, end = var_2556_end_0, end_mask = var_2556_end_mask_0, x = var_2032_cast_fp16)[name = tensor("op_2556_cast_fp16")]; tensor var_2563_begin_0 = const()[name = tensor("op_2563_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2563_end_0 = const()[name = tensor("op_2563_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2563_end_mask_0 = const()[name = tensor("op_2563_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2563_cast_fp16 = slice_by_index(begin = var_2563_begin_0, end = var_2563_end_0, end_mask = var_2563_end_mask_0, x = var_2032_cast_fp16)[name = tensor("op_2563_cast_fp16")]; tensor var_2570_begin_0 = const()[name = tensor("op_2570_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2570_end_0 = const()[name = tensor("op_2570_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2570_end_mask_0 = const()[name = tensor("op_2570_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2570_cast_fp16 = slice_by_index(begin = var_2570_begin_0, end = var_2570_end_0, end_mask = var_2570_end_mask_0, x = var_2032_cast_fp16)[name = tensor("op_2570_cast_fp16")]; tensor var_2577_begin_0 = const()[name = tensor("op_2577_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2577_end_0 = const()[name = tensor("op_2577_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_2577_end_mask_0 = const()[name = tensor("op_2577_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2577_cast_fp16 = slice_by_index(begin = var_2577_begin_0, end = var_2577_end_0, end_mask = var_2577_end_mask_0, x = var_2036_cast_fp16)[name = tensor("op_2577_cast_fp16")]; tensor var_2584_begin_0 = const()[name = tensor("op_2584_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_2584_end_0 = const()[name = tensor("op_2584_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_2584_end_mask_0 = const()[name = tensor("op_2584_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2584_cast_fp16 = slice_by_index(begin = var_2584_begin_0, end = var_2584_end_0, end_mask = var_2584_end_mask_0, x = var_2036_cast_fp16)[name = tensor("op_2584_cast_fp16")]; tensor var_2591_begin_0 = const()[name = tensor("op_2591_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_2591_end_0 = const()[name = tensor("op_2591_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_2591_end_mask_0 = const()[name = tensor("op_2591_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2591_cast_fp16 = slice_by_index(begin = var_2591_begin_0, end = var_2591_end_0, end_mask = var_2591_end_mask_0, x = var_2036_cast_fp16)[name = tensor("op_2591_cast_fp16")]; tensor var_2598_begin_0 = const()[name = tensor("op_2598_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_2598_end_0 = const()[name = tensor("op_2598_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2598_end_mask_0 = const()[name = tensor("op_2598_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2598_cast_fp16 = slice_by_index(begin = var_2598_begin_0, end = var_2598_end_0, end_mask = var_2598_end_mask_0, x = var_2036_cast_fp16)[name = tensor("op_2598_cast_fp16")]; tensor k_3_perm_0 = const()[name = tensor("k_3_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_2603_begin_0 = const()[name = tensor("op_2603_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2603_end_0 = const()[name = tensor("op_2603_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_2603_end_mask_0 = const()[name = tensor("op_2603_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_3_cast_fp16 = transpose(perm = k_3_perm_0, x = key_3_cast_fp16)[name = tensor("transpose_30")]; tensor var_2603_cast_fp16 = slice_by_index(begin = var_2603_begin_0, end = var_2603_end_0, end_mask = var_2603_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2603_cast_fp16")]; tensor var_2607_begin_0 = const()[name = tensor("op_2607_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_2607_end_0 = const()[name = tensor("op_2607_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_2607_end_mask_0 = const()[name = tensor("op_2607_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2607_cast_fp16 = slice_by_index(begin = var_2607_begin_0, end = var_2607_end_0, end_mask = var_2607_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2607_cast_fp16")]; tensor var_2611_begin_0 = const()[name = tensor("op_2611_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_2611_end_0 = const()[name = tensor("op_2611_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_2611_end_mask_0 = const()[name = tensor("op_2611_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2611_cast_fp16 = slice_by_index(begin = var_2611_begin_0, end = var_2611_end_0, end_mask = var_2611_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2611_cast_fp16")]; tensor var_2615_begin_0 = const()[name = tensor("op_2615_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_2615_end_0 = const()[name = tensor("op_2615_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_2615_end_mask_0 = const()[name = tensor("op_2615_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2615_cast_fp16 = slice_by_index(begin = var_2615_begin_0, end = var_2615_end_0, end_mask = var_2615_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2615_cast_fp16")]; tensor var_2619_begin_0 = const()[name = tensor("op_2619_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_2619_end_0 = const()[name = tensor("op_2619_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_2619_end_mask_0 = const()[name = tensor("op_2619_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2619_cast_fp16 = slice_by_index(begin = var_2619_begin_0, end = var_2619_end_0, end_mask = var_2619_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2619_cast_fp16")]; tensor var_2623_begin_0 = const()[name = tensor("op_2623_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_2623_end_0 = const()[name = tensor("op_2623_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_2623_end_mask_0 = const()[name = tensor("op_2623_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2623_cast_fp16 = slice_by_index(begin = var_2623_begin_0, end = var_2623_end_0, end_mask = var_2623_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2623_cast_fp16")]; tensor var_2627_begin_0 = const()[name = tensor("op_2627_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_2627_end_0 = const()[name = tensor("op_2627_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_2627_end_mask_0 = const()[name = tensor("op_2627_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2627_cast_fp16 = slice_by_index(begin = var_2627_begin_0, end = var_2627_end_0, end_mask = var_2627_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2627_cast_fp16")]; tensor var_2631_begin_0 = const()[name = tensor("op_2631_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_2631_end_0 = const()[name = tensor("op_2631_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_2631_end_mask_0 = const()[name = tensor("op_2631_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2631_cast_fp16 = slice_by_index(begin = var_2631_begin_0, end = var_2631_end_0, end_mask = var_2631_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2631_cast_fp16")]; tensor var_2635_begin_0 = const()[name = tensor("op_2635_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_2635_end_0 = const()[name = tensor("op_2635_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_2635_end_mask_0 = const()[name = tensor("op_2635_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2635_cast_fp16 = slice_by_index(begin = var_2635_begin_0, end = var_2635_end_0, end_mask = var_2635_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2635_cast_fp16")]; tensor var_2639_begin_0 = const()[name = tensor("op_2639_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_2639_end_0 = const()[name = tensor("op_2639_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_2639_end_mask_0 = const()[name = tensor("op_2639_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2639_cast_fp16 = slice_by_index(begin = var_2639_begin_0, end = var_2639_end_0, end_mask = var_2639_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2639_cast_fp16")]; tensor var_2643_begin_0 = const()[name = tensor("op_2643_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_2643_end_0 = const()[name = tensor("op_2643_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_2643_end_mask_0 = const()[name = tensor("op_2643_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2643_cast_fp16 = slice_by_index(begin = var_2643_begin_0, end = var_2643_end_0, end_mask = var_2643_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2643_cast_fp16")]; tensor var_2647_begin_0 = const()[name = tensor("op_2647_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_2647_end_0 = const()[name = tensor("op_2647_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_2647_end_mask_0 = const()[name = tensor("op_2647_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2647_cast_fp16 = slice_by_index(begin = var_2647_begin_0, end = var_2647_end_0, end_mask = var_2647_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2647_cast_fp16")]; tensor var_2651_begin_0 = const()[name = tensor("op_2651_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_2651_end_0 = const()[name = tensor("op_2651_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_2651_end_mask_0 = const()[name = tensor("op_2651_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2651_cast_fp16 = slice_by_index(begin = var_2651_begin_0, end = var_2651_end_0, end_mask = var_2651_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2651_cast_fp16")]; tensor var_2655_begin_0 = const()[name = tensor("op_2655_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_2655_end_0 = const()[name = tensor("op_2655_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_2655_end_mask_0 = const()[name = tensor("op_2655_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2655_cast_fp16 = slice_by_index(begin = var_2655_begin_0, end = var_2655_end_0, end_mask = var_2655_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2655_cast_fp16")]; tensor var_2659_begin_0 = const()[name = tensor("op_2659_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_2659_end_0 = const()[name = tensor("op_2659_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_2659_end_mask_0 = const()[name = tensor("op_2659_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2659_cast_fp16 = slice_by_index(begin = var_2659_begin_0, end = var_2659_end_0, end_mask = var_2659_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2659_cast_fp16")]; tensor var_2663_begin_0 = const()[name = tensor("op_2663_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_2663_end_0 = const()[name = tensor("op_2663_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_2663_end_mask_0 = const()[name = tensor("op_2663_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2663_cast_fp16 = slice_by_index(begin = var_2663_begin_0, end = var_2663_end_0, end_mask = var_2663_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2663_cast_fp16")]; tensor var_2667_begin_0 = const()[name = tensor("op_2667_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_2667_end_0 = const()[name = tensor("op_2667_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_2667_end_mask_0 = const()[name = tensor("op_2667_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2667_cast_fp16 = slice_by_index(begin = var_2667_begin_0, end = var_2667_end_0, end_mask = var_2667_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2667_cast_fp16")]; tensor var_2671_begin_0 = const()[name = tensor("op_2671_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_2671_end_0 = const()[name = tensor("op_2671_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_2671_end_mask_0 = const()[name = tensor("op_2671_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2671_cast_fp16 = slice_by_index(begin = var_2671_begin_0, end = var_2671_end_0, end_mask = var_2671_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2671_cast_fp16")]; tensor var_2675_begin_0 = const()[name = tensor("op_2675_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_2675_end_0 = const()[name = tensor("op_2675_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_2675_end_mask_0 = const()[name = tensor("op_2675_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2675_cast_fp16 = slice_by_index(begin = var_2675_begin_0, end = var_2675_end_0, end_mask = var_2675_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2675_cast_fp16")]; tensor var_2679_begin_0 = const()[name = tensor("op_2679_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_2679_end_0 = const()[name = tensor("op_2679_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_2679_end_mask_0 = const()[name = tensor("op_2679_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_2679_cast_fp16 = slice_by_index(begin = var_2679_begin_0, end = var_2679_end_0, end_mask = var_2679_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_2679_cast_fp16")]; tensor var_2681_begin_0 = const()[name = tensor("op_2681_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2681_end_0 = const()[name = tensor("op_2681_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_2681_end_mask_0 = const()[name = tensor("op_2681_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2681_cast_fp16 = slice_by_index(begin = var_2681_begin_0, end = var_2681_end_0, end_mask = var_2681_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2681_cast_fp16")]; tensor var_2685_begin_0 = const()[name = tensor("op_2685_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_2685_end_0 = const()[name = tensor("op_2685_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_2685_end_mask_0 = const()[name = tensor("op_2685_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2685_cast_fp16 = slice_by_index(begin = var_2685_begin_0, end = var_2685_end_0, end_mask = var_2685_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2685_cast_fp16")]; tensor var_2689_begin_0 = const()[name = tensor("op_2689_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_2689_end_0 = const()[name = tensor("op_2689_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_2689_end_mask_0 = const()[name = tensor("op_2689_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2689_cast_fp16 = slice_by_index(begin = var_2689_begin_0, end = var_2689_end_0, end_mask = var_2689_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2689_cast_fp16")]; tensor var_2693_begin_0 = const()[name = tensor("op_2693_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_2693_end_0 = const()[name = tensor("op_2693_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_2693_end_mask_0 = const()[name = tensor("op_2693_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2693_cast_fp16 = slice_by_index(begin = var_2693_begin_0, end = var_2693_end_0, end_mask = var_2693_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2693_cast_fp16")]; tensor var_2697_begin_0 = const()[name = tensor("op_2697_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_2697_end_0 = const()[name = tensor("op_2697_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_2697_end_mask_0 = const()[name = tensor("op_2697_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2697_cast_fp16 = slice_by_index(begin = var_2697_begin_0, end = var_2697_end_0, end_mask = var_2697_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2697_cast_fp16")]; tensor var_2701_begin_0 = const()[name = tensor("op_2701_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_2701_end_0 = const()[name = tensor("op_2701_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_2701_end_mask_0 = const()[name = tensor("op_2701_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2701_cast_fp16 = slice_by_index(begin = var_2701_begin_0, end = var_2701_end_0, end_mask = var_2701_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2701_cast_fp16")]; tensor var_2705_begin_0 = const()[name = tensor("op_2705_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_2705_end_0 = const()[name = tensor("op_2705_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_2705_end_mask_0 = const()[name = tensor("op_2705_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2705_cast_fp16 = slice_by_index(begin = var_2705_begin_0, end = var_2705_end_0, end_mask = var_2705_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2705_cast_fp16")]; tensor var_2709_begin_0 = const()[name = tensor("op_2709_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_2709_end_0 = const()[name = tensor("op_2709_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_2709_end_mask_0 = const()[name = tensor("op_2709_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2709_cast_fp16 = slice_by_index(begin = var_2709_begin_0, end = var_2709_end_0, end_mask = var_2709_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2709_cast_fp16")]; tensor var_2713_begin_0 = const()[name = tensor("op_2713_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_2713_end_0 = const()[name = tensor("op_2713_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_2713_end_mask_0 = const()[name = tensor("op_2713_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2713_cast_fp16 = slice_by_index(begin = var_2713_begin_0, end = var_2713_end_0, end_mask = var_2713_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2713_cast_fp16")]; tensor var_2717_begin_0 = const()[name = tensor("op_2717_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_2717_end_0 = const()[name = tensor("op_2717_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_2717_end_mask_0 = const()[name = tensor("op_2717_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2717_cast_fp16 = slice_by_index(begin = var_2717_begin_0, end = var_2717_end_0, end_mask = var_2717_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2717_cast_fp16")]; tensor var_2721_begin_0 = const()[name = tensor("op_2721_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_2721_end_0 = const()[name = tensor("op_2721_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_2721_end_mask_0 = const()[name = tensor("op_2721_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2721_cast_fp16 = slice_by_index(begin = var_2721_begin_0, end = var_2721_end_0, end_mask = var_2721_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2721_cast_fp16")]; tensor var_2725_begin_0 = const()[name = tensor("op_2725_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_2725_end_0 = const()[name = tensor("op_2725_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_2725_end_mask_0 = const()[name = tensor("op_2725_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2725_cast_fp16 = slice_by_index(begin = var_2725_begin_0, end = var_2725_end_0, end_mask = var_2725_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2725_cast_fp16")]; tensor var_2729_begin_0 = const()[name = tensor("op_2729_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_2729_end_0 = const()[name = tensor("op_2729_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_2729_end_mask_0 = const()[name = tensor("op_2729_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2729_cast_fp16 = slice_by_index(begin = var_2729_begin_0, end = var_2729_end_0, end_mask = var_2729_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2729_cast_fp16")]; tensor var_2733_begin_0 = const()[name = tensor("op_2733_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_2733_end_0 = const()[name = tensor("op_2733_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_2733_end_mask_0 = const()[name = tensor("op_2733_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2733_cast_fp16 = slice_by_index(begin = var_2733_begin_0, end = var_2733_end_0, end_mask = var_2733_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2733_cast_fp16")]; tensor var_2737_begin_0 = const()[name = tensor("op_2737_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_2737_end_0 = const()[name = tensor("op_2737_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_2737_end_mask_0 = const()[name = tensor("op_2737_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2737_cast_fp16 = slice_by_index(begin = var_2737_begin_0, end = var_2737_end_0, end_mask = var_2737_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2737_cast_fp16")]; tensor var_2741_begin_0 = const()[name = tensor("op_2741_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_2741_end_0 = const()[name = tensor("op_2741_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_2741_end_mask_0 = const()[name = tensor("op_2741_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2741_cast_fp16 = slice_by_index(begin = var_2741_begin_0, end = var_2741_end_0, end_mask = var_2741_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2741_cast_fp16")]; tensor var_2745_begin_0 = const()[name = tensor("op_2745_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_2745_end_0 = const()[name = tensor("op_2745_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_2745_end_mask_0 = const()[name = tensor("op_2745_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2745_cast_fp16 = slice_by_index(begin = var_2745_begin_0, end = var_2745_end_0, end_mask = var_2745_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2745_cast_fp16")]; tensor var_2749_begin_0 = const()[name = tensor("op_2749_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_2749_end_0 = const()[name = tensor("op_2749_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_2749_end_mask_0 = const()[name = tensor("op_2749_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2749_cast_fp16 = slice_by_index(begin = var_2749_begin_0, end = var_2749_end_0, end_mask = var_2749_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2749_cast_fp16")]; tensor var_2753_begin_0 = const()[name = tensor("op_2753_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_2753_end_0 = const()[name = tensor("op_2753_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_2753_end_mask_0 = const()[name = tensor("op_2753_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2753_cast_fp16 = slice_by_index(begin = var_2753_begin_0, end = var_2753_end_0, end_mask = var_2753_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2753_cast_fp16")]; tensor var_2757_begin_0 = const()[name = tensor("op_2757_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_2757_end_0 = const()[name = tensor("op_2757_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_2757_end_mask_0 = const()[name = tensor("op_2757_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_2757_cast_fp16 = slice_by_index(begin = var_2757_begin_0, end = var_2757_end_0, end_mask = var_2757_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2757_cast_fp16")]; tensor _SplitHeadsQ__mh_w_161_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_161_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_161_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_161_equation_0, values = (var_2603_cast_fp16, var_2045_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_161_cast_fp16")]; tensor _SplitHeadsQ__mh_w_163_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_163_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_163_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_163_equation_0, values = (var_2603_cast_fp16, var_2052_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_163_cast_fp16")]; tensor _SplitHeadsQ__mh_w_165_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_165_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_165_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_165_equation_0, values = (var_2603_cast_fp16, var_2059_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_165_cast_fp16")]; tensor _SplitHeadsQ__mh_w_167_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_167_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_167_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_167_equation_0, values = (var_2603_cast_fp16, var_2066_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_167_cast_fp16")]; tensor _SplitHeadsQ__mh_w_169_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_169_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_169_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_169_equation_0, values = (var_2607_cast_fp16, var_2073_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_169_cast_fp16")]; tensor _SplitHeadsQ__mh_w_171_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_171_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_171_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_171_equation_0, values = (var_2607_cast_fp16, var_2080_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_171_cast_fp16")]; tensor _SplitHeadsQ__mh_w_173_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_173_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_173_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_173_equation_0, values = (var_2607_cast_fp16, var_2087_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_173_cast_fp16")]; tensor _SplitHeadsQ__mh_w_175_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_175_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_175_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_175_equation_0, values = (var_2607_cast_fp16, var_2094_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_175_cast_fp16")]; tensor _SplitHeadsQ__mh_w_177_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_177_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_177_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_177_equation_0, values = (var_2611_cast_fp16, var_2101_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_177_cast_fp16")]; tensor _SplitHeadsQ__mh_w_179_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_179_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_179_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_179_equation_0, values = (var_2611_cast_fp16, var_2108_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_179_cast_fp16")]; tensor _SplitHeadsQ__mh_w_181_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_181_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_181_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_181_equation_0, values = (var_2611_cast_fp16, var_2115_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_181_cast_fp16")]; tensor _SplitHeadsQ__mh_w_183_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_183_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_183_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_183_equation_0, values = (var_2611_cast_fp16, var_2122_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_183_cast_fp16")]; tensor _SplitHeadsQ__mh_w_185_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_185_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_185_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_185_equation_0, values = (var_2615_cast_fp16, var_2129_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_185_cast_fp16")]; tensor _SplitHeadsQ__mh_w_187_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_187_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_187_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_187_equation_0, values = (var_2615_cast_fp16, var_2136_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_187_cast_fp16")]; tensor _SplitHeadsQ__mh_w_189_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_189_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_189_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_189_equation_0, values = (var_2615_cast_fp16, var_2143_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_189_cast_fp16")]; tensor _SplitHeadsQ__mh_w_191_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_191_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_191_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_191_equation_0, values = (var_2615_cast_fp16, var_2150_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_191_cast_fp16")]; tensor _SplitHeadsQ__mh_w_193_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_193_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_193_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_193_equation_0, values = (var_2619_cast_fp16, var_2157_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_193_cast_fp16")]; tensor _SplitHeadsQ__mh_w_195_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_195_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_195_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_195_equation_0, values = (var_2619_cast_fp16, var_2164_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_195_cast_fp16")]; tensor _SplitHeadsQ__mh_w_197_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_197_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_197_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_197_equation_0, values = (var_2619_cast_fp16, var_2171_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_197_cast_fp16")]; tensor _SplitHeadsQ__mh_w_199_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_199_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_199_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_199_equation_0, values = (var_2619_cast_fp16, var_2178_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_199_cast_fp16")]; tensor _SplitHeadsQ__mh_w_201_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_201_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_201_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_201_equation_0, values = (var_2623_cast_fp16, var_2185_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_201_cast_fp16")]; tensor _SplitHeadsQ__mh_w_203_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_203_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_203_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_203_equation_0, values = (var_2623_cast_fp16, var_2192_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_203_cast_fp16")]; tensor _SplitHeadsQ__mh_w_205_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_205_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_205_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_205_equation_0, values = (var_2623_cast_fp16, var_2199_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_205_cast_fp16")]; tensor _SplitHeadsQ__mh_w_207_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_207_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_207_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_207_equation_0, values = (var_2623_cast_fp16, var_2206_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_207_cast_fp16")]; tensor _SplitHeadsQ__mh_w_209_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_209_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_209_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_209_equation_0, values = (var_2627_cast_fp16, var_2213_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_209_cast_fp16")]; tensor _SplitHeadsQ__mh_w_211_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_211_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_211_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_211_equation_0, values = (var_2627_cast_fp16, var_2220_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_211_cast_fp16")]; tensor _SplitHeadsQ__mh_w_213_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_213_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_213_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_213_equation_0, values = (var_2627_cast_fp16, var_2227_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_213_cast_fp16")]; tensor _SplitHeadsQ__mh_w_215_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_215_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_215_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_215_equation_0, values = (var_2627_cast_fp16, var_2234_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_215_cast_fp16")]; tensor _SplitHeadsQ__mh_w_217_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_217_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_217_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_217_equation_0, values = (var_2631_cast_fp16, var_2241_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_217_cast_fp16")]; tensor _SplitHeadsQ__mh_w_219_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_219_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_219_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_219_equation_0, values = (var_2631_cast_fp16, var_2248_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_219_cast_fp16")]; tensor _SplitHeadsQ__mh_w_221_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_221_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_221_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_221_equation_0, values = (var_2631_cast_fp16, var_2255_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_221_cast_fp16")]; tensor _SplitHeadsQ__mh_w_223_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_223_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_223_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_223_equation_0, values = (var_2631_cast_fp16, var_2262_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_223_cast_fp16")]; tensor _SplitHeadsQ__mh_w_225_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_225_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_225_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_225_equation_0, values = (var_2635_cast_fp16, var_2269_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_225_cast_fp16")]; tensor _SplitHeadsQ__mh_w_227_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_227_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_227_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_227_equation_0, values = (var_2635_cast_fp16, var_2276_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_227_cast_fp16")]; tensor _SplitHeadsQ__mh_w_229_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_229_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_229_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_229_equation_0, values = (var_2635_cast_fp16, var_2283_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_229_cast_fp16")]; tensor _SplitHeadsQ__mh_w_231_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_231_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_231_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_231_equation_0, values = (var_2635_cast_fp16, var_2290_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_231_cast_fp16")]; tensor _SplitHeadsQ__mh_w_233_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_233_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_233_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_233_equation_0, values = (var_2639_cast_fp16, var_2297_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_233_cast_fp16")]; tensor _SplitHeadsQ__mh_w_235_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_235_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_235_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_235_equation_0, values = (var_2639_cast_fp16, var_2304_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_235_cast_fp16")]; tensor _SplitHeadsQ__mh_w_237_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_237_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_237_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_237_equation_0, values = (var_2639_cast_fp16, var_2311_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_237_cast_fp16")]; tensor _SplitHeadsQ__mh_w_239_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_239_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_239_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_239_equation_0, values = (var_2639_cast_fp16, var_2318_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_239_cast_fp16")]; tensor _SplitHeadsQ__mh_w_241_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_241_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_241_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_241_equation_0, values = (var_2643_cast_fp16, var_2325_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_241_cast_fp16")]; tensor _SplitHeadsQ__mh_w_243_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_243_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_243_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_243_equation_0, values = (var_2643_cast_fp16, var_2332_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_243_cast_fp16")]; tensor _SplitHeadsQ__mh_w_245_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_245_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_245_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_245_equation_0, values = (var_2643_cast_fp16, var_2339_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_245_cast_fp16")]; tensor _SplitHeadsQ__mh_w_247_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_247_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_247_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_247_equation_0, values = (var_2643_cast_fp16, var_2346_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_247_cast_fp16")]; tensor _SplitHeadsQ__mh_w_249_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_249_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_249_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_249_equation_0, values = (var_2647_cast_fp16, var_2353_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_249_cast_fp16")]; tensor _SplitHeadsQ__mh_w_251_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_251_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_251_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_251_equation_0, values = (var_2647_cast_fp16, var_2360_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_251_cast_fp16")]; tensor _SplitHeadsQ__mh_w_253_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_253_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_253_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_253_equation_0, values = (var_2647_cast_fp16, var_2367_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_253_cast_fp16")]; tensor _SplitHeadsQ__mh_w_255_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_255_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_255_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_255_equation_0, values = (var_2647_cast_fp16, var_2374_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_255_cast_fp16")]; tensor _SplitHeadsQ__mh_w_257_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_257_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_257_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_257_equation_0, values = (var_2651_cast_fp16, var_2381_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_257_cast_fp16")]; tensor _SplitHeadsQ__mh_w_259_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_259_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_259_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_259_equation_0, values = (var_2651_cast_fp16, var_2388_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_259_cast_fp16")]; tensor _SplitHeadsQ__mh_w_261_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_261_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_261_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_261_equation_0, values = (var_2651_cast_fp16, var_2395_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_261_cast_fp16")]; tensor _SplitHeadsQ__mh_w_263_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_263_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_263_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_263_equation_0, values = (var_2651_cast_fp16, var_2402_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_263_cast_fp16")]; tensor _SplitHeadsQ__mh_w_265_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_265_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_265_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_265_equation_0, values = (var_2655_cast_fp16, var_2409_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_265_cast_fp16")]; tensor _SplitHeadsQ__mh_w_267_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_267_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_267_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_267_equation_0, values = (var_2655_cast_fp16, var_2416_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_267_cast_fp16")]; tensor _SplitHeadsQ__mh_w_269_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_269_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_269_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_269_equation_0, values = (var_2655_cast_fp16, var_2423_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_269_cast_fp16")]; tensor _SplitHeadsQ__mh_w_271_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_271_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_271_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_271_equation_0, values = (var_2655_cast_fp16, var_2430_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_271_cast_fp16")]; tensor _SplitHeadsQ__mh_w_273_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_273_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_273_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_273_equation_0, values = (var_2659_cast_fp16, var_2437_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_273_cast_fp16")]; tensor _SplitHeadsQ__mh_w_275_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_275_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_275_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_275_equation_0, values = (var_2659_cast_fp16, var_2444_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_275_cast_fp16")]; tensor _SplitHeadsQ__mh_w_277_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_277_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_277_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_277_equation_0, values = (var_2659_cast_fp16, var_2451_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_277_cast_fp16")]; tensor _SplitHeadsQ__mh_w_279_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_279_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_279_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_279_equation_0, values = (var_2659_cast_fp16, var_2458_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_279_cast_fp16")]; tensor _SplitHeadsQ__mh_w_281_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_281_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_281_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_281_equation_0, values = (var_2663_cast_fp16, var_2465_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_281_cast_fp16")]; tensor _SplitHeadsQ__mh_w_283_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_283_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_283_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_283_equation_0, values = (var_2663_cast_fp16, var_2472_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_283_cast_fp16")]; tensor _SplitHeadsQ__mh_w_285_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_285_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_285_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_285_equation_0, values = (var_2663_cast_fp16, var_2479_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_285_cast_fp16")]; tensor _SplitHeadsQ__mh_w_287_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_287_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_287_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_287_equation_0, values = (var_2663_cast_fp16, var_2486_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_287_cast_fp16")]; tensor _SplitHeadsQ__mh_w_289_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_289_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_289_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_289_equation_0, values = (var_2667_cast_fp16, var_2493_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_289_cast_fp16")]; tensor _SplitHeadsQ__mh_w_291_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_291_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_291_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_291_equation_0, values = (var_2667_cast_fp16, var_2500_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_291_cast_fp16")]; tensor _SplitHeadsQ__mh_w_293_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_293_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_293_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_293_equation_0, values = (var_2667_cast_fp16, var_2507_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_293_cast_fp16")]; tensor _SplitHeadsQ__mh_w_295_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_295_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_295_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_295_equation_0, values = (var_2667_cast_fp16, var_2514_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_295_cast_fp16")]; tensor _SplitHeadsQ__mh_w_297_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_297_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_297_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_297_equation_0, values = (var_2671_cast_fp16, var_2521_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_297_cast_fp16")]; tensor _SplitHeadsQ__mh_w_299_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_299_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_299_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_299_equation_0, values = (var_2671_cast_fp16, var_2528_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_299_cast_fp16")]; tensor _SplitHeadsQ__mh_w_301_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_301_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_301_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_301_equation_0, values = (var_2671_cast_fp16, var_2535_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_301_cast_fp16")]; tensor _SplitHeadsQ__mh_w_303_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_303_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_303_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_303_equation_0, values = (var_2671_cast_fp16, var_2542_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_303_cast_fp16")]; tensor _SplitHeadsQ__mh_w_305_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_305_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_305_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_305_equation_0, values = (var_2675_cast_fp16, var_2549_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_305_cast_fp16")]; tensor _SplitHeadsQ__mh_w_307_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_307_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_307_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_307_equation_0, values = (var_2675_cast_fp16, var_2556_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_307_cast_fp16")]; tensor _SplitHeadsQ__mh_w_309_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_309_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_309_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_309_equation_0, values = (var_2675_cast_fp16, var_2563_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_309_cast_fp16")]; tensor _SplitHeadsQ__mh_w_311_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_311_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_311_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_311_equation_0, values = (var_2675_cast_fp16, var_2570_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_311_cast_fp16")]; tensor _SplitHeadsQ__mh_w_313_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_313_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_313_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_313_equation_0, values = (var_2679_cast_fp16, var_2577_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_313_cast_fp16")]; tensor _SplitHeadsQ__mh_w_315_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_315_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_315_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_315_equation_0, values = (var_2679_cast_fp16, var_2584_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_315_cast_fp16")]; tensor _SplitHeadsQ__mh_w_317_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_317_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_317_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_317_equation_0, values = (var_2679_cast_fp16, var_2591_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_317_cast_fp16")]; tensor _SplitHeadsQ__mh_w_319_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_319_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_319_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_319_equation_0, values = (var_2679_cast_fp16, var_2598_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_319_cast_fp16")]; tensor var_2920_to_fp16 = const()[name = tensor("op_2920_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_161_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_161_cast_fp16, y = var_2920_to_fp16)[name = tensor("aw_chunk_161_cast_fp16")]; tensor var_2922_to_fp16 = const()[name = tensor("op_2922_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_163_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_163_cast_fp16, y = var_2922_to_fp16)[name = tensor("aw_chunk_163_cast_fp16")]; tensor var_2924_to_fp16 = const()[name = tensor("op_2924_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_165_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_165_cast_fp16, y = var_2924_to_fp16)[name = tensor("aw_chunk_165_cast_fp16")]; tensor var_2926_to_fp16 = const()[name = tensor("op_2926_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_167_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_167_cast_fp16, y = var_2926_to_fp16)[name = tensor("aw_chunk_167_cast_fp16")]; tensor var_2928_to_fp16 = const()[name = tensor("op_2928_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_169_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_169_cast_fp16, y = var_2928_to_fp16)[name = tensor("aw_chunk_169_cast_fp16")]; tensor var_2930_to_fp16 = const()[name = tensor("op_2930_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_171_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_171_cast_fp16, y = var_2930_to_fp16)[name = tensor("aw_chunk_171_cast_fp16")]; tensor var_2932_to_fp16 = const()[name = tensor("op_2932_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_173_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_173_cast_fp16, y = var_2932_to_fp16)[name = tensor("aw_chunk_173_cast_fp16")]; tensor var_2934_to_fp16 = const()[name = tensor("op_2934_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_175_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_175_cast_fp16, y = var_2934_to_fp16)[name = tensor("aw_chunk_175_cast_fp16")]; tensor var_2936_to_fp16 = const()[name = tensor("op_2936_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_177_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_177_cast_fp16, y = var_2936_to_fp16)[name = tensor("aw_chunk_177_cast_fp16")]; tensor var_2938_to_fp16 = const()[name = tensor("op_2938_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_179_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_179_cast_fp16, y = var_2938_to_fp16)[name = tensor("aw_chunk_179_cast_fp16")]; tensor var_2940_to_fp16 = const()[name = tensor("op_2940_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_181_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_181_cast_fp16, y = var_2940_to_fp16)[name = tensor("aw_chunk_181_cast_fp16")]; tensor var_2942_to_fp16 = const()[name = tensor("op_2942_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_183_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_183_cast_fp16, y = var_2942_to_fp16)[name = tensor("aw_chunk_183_cast_fp16")]; tensor var_2944_to_fp16 = const()[name = tensor("op_2944_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_185_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_185_cast_fp16, y = var_2944_to_fp16)[name = tensor("aw_chunk_185_cast_fp16")]; tensor var_2946_to_fp16 = const()[name = tensor("op_2946_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_187_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_187_cast_fp16, y = var_2946_to_fp16)[name = tensor("aw_chunk_187_cast_fp16")]; tensor var_2948_to_fp16 = const()[name = tensor("op_2948_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_189_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_189_cast_fp16, y = var_2948_to_fp16)[name = tensor("aw_chunk_189_cast_fp16")]; tensor var_2950_to_fp16 = const()[name = tensor("op_2950_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_191_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_191_cast_fp16, y = var_2950_to_fp16)[name = tensor("aw_chunk_191_cast_fp16")]; tensor var_2952_to_fp16 = const()[name = tensor("op_2952_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_193_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_193_cast_fp16, y = var_2952_to_fp16)[name = tensor("aw_chunk_193_cast_fp16")]; tensor var_2954_to_fp16 = const()[name = tensor("op_2954_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_195_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_195_cast_fp16, y = var_2954_to_fp16)[name = tensor("aw_chunk_195_cast_fp16")]; tensor var_2956_to_fp16 = const()[name = tensor("op_2956_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_197_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_197_cast_fp16, y = var_2956_to_fp16)[name = tensor("aw_chunk_197_cast_fp16")]; tensor var_2958_to_fp16 = const()[name = tensor("op_2958_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_199_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_199_cast_fp16, y = var_2958_to_fp16)[name = tensor("aw_chunk_199_cast_fp16")]; tensor var_2960_to_fp16 = const()[name = tensor("op_2960_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_201_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_201_cast_fp16, y = var_2960_to_fp16)[name = tensor("aw_chunk_201_cast_fp16")]; tensor var_2962_to_fp16 = const()[name = tensor("op_2962_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_203_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_203_cast_fp16, y = var_2962_to_fp16)[name = tensor("aw_chunk_203_cast_fp16")]; tensor var_2964_to_fp16 = const()[name = tensor("op_2964_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_205_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_205_cast_fp16, y = var_2964_to_fp16)[name = tensor("aw_chunk_205_cast_fp16")]; tensor var_2966_to_fp16 = const()[name = tensor("op_2966_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_207_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_207_cast_fp16, y = var_2966_to_fp16)[name = tensor("aw_chunk_207_cast_fp16")]; tensor var_2968_to_fp16 = const()[name = tensor("op_2968_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_209_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_209_cast_fp16, y = var_2968_to_fp16)[name = tensor("aw_chunk_209_cast_fp16")]; tensor var_2970_to_fp16 = const()[name = tensor("op_2970_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_211_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_211_cast_fp16, y = var_2970_to_fp16)[name = tensor("aw_chunk_211_cast_fp16")]; tensor var_2972_to_fp16 = const()[name = tensor("op_2972_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_213_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_213_cast_fp16, y = var_2972_to_fp16)[name = tensor("aw_chunk_213_cast_fp16")]; tensor var_2974_to_fp16 = const()[name = tensor("op_2974_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_215_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_215_cast_fp16, y = var_2974_to_fp16)[name = tensor("aw_chunk_215_cast_fp16")]; tensor var_2976_to_fp16 = const()[name = tensor("op_2976_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_217_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_217_cast_fp16, y = var_2976_to_fp16)[name = tensor("aw_chunk_217_cast_fp16")]; tensor var_2978_to_fp16 = const()[name = tensor("op_2978_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_219_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_219_cast_fp16, y = var_2978_to_fp16)[name = tensor("aw_chunk_219_cast_fp16")]; tensor var_2980_to_fp16 = const()[name = tensor("op_2980_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_221_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_221_cast_fp16, y = var_2980_to_fp16)[name = tensor("aw_chunk_221_cast_fp16")]; tensor var_2982_to_fp16 = const()[name = tensor("op_2982_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_223_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_223_cast_fp16, y = var_2982_to_fp16)[name = tensor("aw_chunk_223_cast_fp16")]; tensor var_2984_to_fp16 = const()[name = tensor("op_2984_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_225_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_225_cast_fp16, y = var_2984_to_fp16)[name = tensor("aw_chunk_225_cast_fp16")]; tensor var_2986_to_fp16 = const()[name = tensor("op_2986_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_227_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_227_cast_fp16, y = var_2986_to_fp16)[name = tensor("aw_chunk_227_cast_fp16")]; tensor var_2988_to_fp16 = const()[name = tensor("op_2988_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_229_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_229_cast_fp16, y = var_2988_to_fp16)[name = tensor("aw_chunk_229_cast_fp16")]; tensor var_2990_to_fp16 = const()[name = tensor("op_2990_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_231_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_231_cast_fp16, y = var_2990_to_fp16)[name = tensor("aw_chunk_231_cast_fp16")]; tensor var_2992_to_fp16 = const()[name = tensor("op_2992_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_233_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_233_cast_fp16, y = var_2992_to_fp16)[name = tensor("aw_chunk_233_cast_fp16")]; tensor var_2994_to_fp16 = const()[name = tensor("op_2994_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_235_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_235_cast_fp16, y = var_2994_to_fp16)[name = tensor("aw_chunk_235_cast_fp16")]; tensor var_2996_to_fp16 = const()[name = tensor("op_2996_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_237_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_237_cast_fp16, y = var_2996_to_fp16)[name = tensor("aw_chunk_237_cast_fp16")]; tensor var_2998_to_fp16 = const()[name = tensor("op_2998_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_239_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_239_cast_fp16, y = var_2998_to_fp16)[name = tensor("aw_chunk_239_cast_fp16")]; tensor var_3000_to_fp16 = const()[name = tensor("op_3000_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_241_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_241_cast_fp16, y = var_3000_to_fp16)[name = tensor("aw_chunk_241_cast_fp16")]; tensor var_3002_to_fp16 = const()[name = tensor("op_3002_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_243_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_243_cast_fp16, y = var_3002_to_fp16)[name = tensor("aw_chunk_243_cast_fp16")]; tensor var_3004_to_fp16 = const()[name = tensor("op_3004_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_245_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_245_cast_fp16, y = var_3004_to_fp16)[name = tensor("aw_chunk_245_cast_fp16")]; tensor var_3006_to_fp16 = const()[name = tensor("op_3006_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_247_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_247_cast_fp16, y = var_3006_to_fp16)[name = tensor("aw_chunk_247_cast_fp16")]; tensor var_3008_to_fp16 = const()[name = tensor("op_3008_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_249_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_249_cast_fp16, y = var_3008_to_fp16)[name = tensor("aw_chunk_249_cast_fp16")]; tensor var_3010_to_fp16 = const()[name = tensor("op_3010_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_251_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_251_cast_fp16, y = var_3010_to_fp16)[name = tensor("aw_chunk_251_cast_fp16")]; tensor var_3012_to_fp16 = const()[name = tensor("op_3012_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_253_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_253_cast_fp16, y = var_3012_to_fp16)[name = tensor("aw_chunk_253_cast_fp16")]; tensor var_3014_to_fp16 = const()[name = tensor("op_3014_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_255_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_255_cast_fp16, y = var_3014_to_fp16)[name = tensor("aw_chunk_255_cast_fp16")]; tensor var_3016_to_fp16 = const()[name = tensor("op_3016_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_257_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_257_cast_fp16, y = var_3016_to_fp16)[name = tensor("aw_chunk_257_cast_fp16")]; tensor var_3018_to_fp16 = const()[name = tensor("op_3018_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_259_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_259_cast_fp16, y = var_3018_to_fp16)[name = tensor("aw_chunk_259_cast_fp16")]; tensor var_3020_to_fp16 = const()[name = tensor("op_3020_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_261_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_261_cast_fp16, y = var_3020_to_fp16)[name = tensor("aw_chunk_261_cast_fp16")]; tensor var_3022_to_fp16 = const()[name = tensor("op_3022_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_263_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_263_cast_fp16, y = var_3022_to_fp16)[name = tensor("aw_chunk_263_cast_fp16")]; tensor var_3024_to_fp16 = const()[name = tensor("op_3024_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_265_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_265_cast_fp16, y = var_3024_to_fp16)[name = tensor("aw_chunk_265_cast_fp16")]; tensor var_3026_to_fp16 = const()[name = tensor("op_3026_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_267_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_267_cast_fp16, y = var_3026_to_fp16)[name = tensor("aw_chunk_267_cast_fp16")]; tensor var_3028_to_fp16 = const()[name = tensor("op_3028_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_269_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_269_cast_fp16, y = var_3028_to_fp16)[name = tensor("aw_chunk_269_cast_fp16")]; tensor var_3030_to_fp16 = const()[name = tensor("op_3030_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_271_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_271_cast_fp16, y = var_3030_to_fp16)[name = tensor("aw_chunk_271_cast_fp16")]; tensor var_3032_to_fp16 = const()[name = tensor("op_3032_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_273_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_273_cast_fp16, y = var_3032_to_fp16)[name = tensor("aw_chunk_273_cast_fp16")]; tensor var_3034_to_fp16 = const()[name = tensor("op_3034_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_275_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_275_cast_fp16, y = var_3034_to_fp16)[name = tensor("aw_chunk_275_cast_fp16")]; tensor var_3036_to_fp16 = const()[name = tensor("op_3036_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_277_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_277_cast_fp16, y = var_3036_to_fp16)[name = tensor("aw_chunk_277_cast_fp16")]; tensor var_3038_to_fp16 = const()[name = tensor("op_3038_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_279_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_279_cast_fp16, y = var_3038_to_fp16)[name = tensor("aw_chunk_279_cast_fp16")]; tensor var_3040_to_fp16 = const()[name = tensor("op_3040_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_281_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_281_cast_fp16, y = var_3040_to_fp16)[name = tensor("aw_chunk_281_cast_fp16")]; tensor var_3042_to_fp16 = const()[name = tensor("op_3042_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_283_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_283_cast_fp16, y = var_3042_to_fp16)[name = tensor("aw_chunk_283_cast_fp16")]; tensor var_3044_to_fp16 = const()[name = tensor("op_3044_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_285_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_285_cast_fp16, y = var_3044_to_fp16)[name = tensor("aw_chunk_285_cast_fp16")]; tensor var_3046_to_fp16 = const()[name = tensor("op_3046_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_287_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_287_cast_fp16, y = var_3046_to_fp16)[name = tensor("aw_chunk_287_cast_fp16")]; tensor var_3048_to_fp16 = const()[name = tensor("op_3048_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_289_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_289_cast_fp16, y = var_3048_to_fp16)[name = tensor("aw_chunk_289_cast_fp16")]; tensor var_3050_to_fp16 = const()[name = tensor("op_3050_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_291_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_291_cast_fp16, y = var_3050_to_fp16)[name = tensor("aw_chunk_291_cast_fp16")]; tensor var_3052_to_fp16 = const()[name = tensor("op_3052_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_293_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_293_cast_fp16, y = var_3052_to_fp16)[name = tensor("aw_chunk_293_cast_fp16")]; tensor var_3054_to_fp16 = const()[name = tensor("op_3054_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_295_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_295_cast_fp16, y = var_3054_to_fp16)[name = tensor("aw_chunk_295_cast_fp16")]; tensor var_3056_to_fp16 = const()[name = tensor("op_3056_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_297_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_297_cast_fp16, y = var_3056_to_fp16)[name = tensor("aw_chunk_297_cast_fp16")]; tensor var_3058_to_fp16 = const()[name = tensor("op_3058_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_299_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_299_cast_fp16, y = var_3058_to_fp16)[name = tensor("aw_chunk_299_cast_fp16")]; tensor var_3060_to_fp16 = const()[name = tensor("op_3060_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_301_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_301_cast_fp16, y = var_3060_to_fp16)[name = tensor("aw_chunk_301_cast_fp16")]; tensor var_3062_to_fp16 = const()[name = tensor("op_3062_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_303_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_303_cast_fp16, y = var_3062_to_fp16)[name = tensor("aw_chunk_303_cast_fp16")]; tensor var_3064_to_fp16 = const()[name = tensor("op_3064_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_305_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_305_cast_fp16, y = var_3064_to_fp16)[name = tensor("aw_chunk_305_cast_fp16")]; tensor var_3066_to_fp16 = const()[name = tensor("op_3066_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_307_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_307_cast_fp16, y = var_3066_to_fp16)[name = tensor("aw_chunk_307_cast_fp16")]; tensor var_3068_to_fp16 = const()[name = tensor("op_3068_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_309_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_309_cast_fp16, y = var_3068_to_fp16)[name = tensor("aw_chunk_309_cast_fp16")]; tensor var_3070_to_fp16 = const()[name = tensor("op_3070_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_311_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_311_cast_fp16, y = var_3070_to_fp16)[name = tensor("aw_chunk_311_cast_fp16")]; tensor var_3072_to_fp16 = const()[name = tensor("op_3072_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_313_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_313_cast_fp16, y = var_3072_to_fp16)[name = tensor("aw_chunk_313_cast_fp16")]; tensor var_3074_to_fp16 = const()[name = tensor("op_3074_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_315_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_315_cast_fp16, y = var_3074_to_fp16)[name = tensor("aw_chunk_315_cast_fp16")]; tensor var_3076_to_fp16 = const()[name = tensor("op_3076_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_317_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_317_cast_fp16, y = var_3076_to_fp16)[name = tensor("aw_chunk_317_cast_fp16")]; tensor var_3078_to_fp16 = const()[name = tensor("op_3078_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_319_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_319_cast_fp16, y = var_3078_to_fp16)[name = tensor("aw_chunk_319_cast_fp16")]; tensor var_3080_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_161_cast_fp16)[name = tensor("op_3080_cast_fp16")]; tensor var_3081_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_163_cast_fp16)[name = tensor("op_3081_cast_fp16")]; tensor var_3082_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_165_cast_fp16)[name = tensor("op_3082_cast_fp16")]; tensor var_3083_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_167_cast_fp16)[name = tensor("op_3083_cast_fp16")]; tensor var_3084_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_169_cast_fp16)[name = tensor("op_3084_cast_fp16")]; tensor var_3085_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_171_cast_fp16)[name = tensor("op_3085_cast_fp16")]; tensor var_3086_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_173_cast_fp16)[name = tensor("op_3086_cast_fp16")]; tensor var_3087_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_175_cast_fp16)[name = tensor("op_3087_cast_fp16")]; tensor var_3088_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_177_cast_fp16)[name = tensor("op_3088_cast_fp16")]; tensor var_3089_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_179_cast_fp16)[name = tensor("op_3089_cast_fp16")]; tensor var_3090_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_181_cast_fp16)[name = tensor("op_3090_cast_fp16")]; tensor var_3091_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_183_cast_fp16)[name = tensor("op_3091_cast_fp16")]; tensor var_3092_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_185_cast_fp16)[name = tensor("op_3092_cast_fp16")]; tensor var_3093_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_187_cast_fp16)[name = tensor("op_3093_cast_fp16")]; tensor var_3094_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_189_cast_fp16)[name = tensor("op_3094_cast_fp16")]; tensor var_3095_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_191_cast_fp16)[name = tensor("op_3095_cast_fp16")]; tensor var_3096_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_193_cast_fp16)[name = tensor("op_3096_cast_fp16")]; tensor var_3097_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_195_cast_fp16)[name = tensor("op_3097_cast_fp16")]; tensor var_3098_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_197_cast_fp16)[name = tensor("op_3098_cast_fp16")]; tensor var_3099_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_199_cast_fp16)[name = tensor("op_3099_cast_fp16")]; tensor var_3100_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_201_cast_fp16)[name = tensor("op_3100_cast_fp16")]; tensor var_3101_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_203_cast_fp16)[name = tensor("op_3101_cast_fp16")]; tensor var_3102_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_205_cast_fp16)[name = tensor("op_3102_cast_fp16")]; tensor var_3103_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_207_cast_fp16)[name = tensor("op_3103_cast_fp16")]; tensor var_3104_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_209_cast_fp16)[name = tensor("op_3104_cast_fp16")]; tensor var_3105_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_211_cast_fp16)[name = tensor("op_3105_cast_fp16")]; tensor var_3106_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_213_cast_fp16)[name = tensor("op_3106_cast_fp16")]; tensor var_3107_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_215_cast_fp16)[name = tensor("op_3107_cast_fp16")]; tensor var_3108_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_217_cast_fp16)[name = tensor("op_3108_cast_fp16")]; tensor var_3109_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_219_cast_fp16)[name = tensor("op_3109_cast_fp16")]; tensor var_3110_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_221_cast_fp16)[name = tensor("op_3110_cast_fp16")]; tensor var_3111_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_223_cast_fp16)[name = tensor("op_3111_cast_fp16")]; tensor var_3112_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_225_cast_fp16)[name = tensor("op_3112_cast_fp16")]; tensor var_3113_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_227_cast_fp16)[name = tensor("op_3113_cast_fp16")]; tensor var_3114_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_229_cast_fp16)[name = tensor("op_3114_cast_fp16")]; tensor var_3115_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_231_cast_fp16)[name = tensor("op_3115_cast_fp16")]; tensor var_3116_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_233_cast_fp16)[name = tensor("op_3116_cast_fp16")]; tensor var_3117_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_235_cast_fp16)[name = tensor("op_3117_cast_fp16")]; tensor var_3118_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_237_cast_fp16)[name = tensor("op_3118_cast_fp16")]; tensor var_3119_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_239_cast_fp16)[name = tensor("op_3119_cast_fp16")]; tensor var_3120_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_241_cast_fp16)[name = tensor("op_3120_cast_fp16")]; tensor var_3121_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_243_cast_fp16)[name = tensor("op_3121_cast_fp16")]; tensor var_3122_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_245_cast_fp16)[name = tensor("op_3122_cast_fp16")]; tensor var_3123_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_247_cast_fp16)[name = tensor("op_3123_cast_fp16")]; tensor var_3124_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_249_cast_fp16)[name = tensor("op_3124_cast_fp16")]; tensor var_3125_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_251_cast_fp16)[name = tensor("op_3125_cast_fp16")]; tensor var_3126_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_253_cast_fp16)[name = tensor("op_3126_cast_fp16")]; tensor var_3127_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_255_cast_fp16)[name = tensor("op_3127_cast_fp16")]; tensor var_3128_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_257_cast_fp16)[name = tensor("op_3128_cast_fp16")]; tensor var_3129_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_259_cast_fp16)[name = tensor("op_3129_cast_fp16")]; tensor var_3130_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_261_cast_fp16)[name = tensor("op_3130_cast_fp16")]; tensor var_3131_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_263_cast_fp16)[name = tensor("op_3131_cast_fp16")]; tensor var_3132_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_265_cast_fp16)[name = tensor("op_3132_cast_fp16")]; tensor var_3133_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_267_cast_fp16)[name = tensor("op_3133_cast_fp16")]; tensor var_3134_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_269_cast_fp16)[name = tensor("op_3134_cast_fp16")]; tensor var_3135_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_271_cast_fp16)[name = tensor("op_3135_cast_fp16")]; tensor var_3136_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_273_cast_fp16)[name = tensor("op_3136_cast_fp16")]; tensor var_3137_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_275_cast_fp16)[name = tensor("op_3137_cast_fp16")]; tensor var_3138_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_277_cast_fp16)[name = tensor("op_3138_cast_fp16")]; tensor var_3139_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_279_cast_fp16)[name = tensor("op_3139_cast_fp16")]; tensor var_3140_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_281_cast_fp16)[name = tensor("op_3140_cast_fp16")]; tensor var_3141_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_283_cast_fp16)[name = tensor("op_3141_cast_fp16")]; tensor var_3142_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_285_cast_fp16)[name = tensor("op_3142_cast_fp16")]; tensor var_3143_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_287_cast_fp16)[name = tensor("op_3143_cast_fp16")]; tensor var_3144_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_289_cast_fp16)[name = tensor("op_3144_cast_fp16")]; tensor var_3145_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_291_cast_fp16)[name = tensor("op_3145_cast_fp16")]; tensor var_3146_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_293_cast_fp16)[name = tensor("op_3146_cast_fp16")]; tensor var_3147_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_295_cast_fp16)[name = tensor("op_3147_cast_fp16")]; tensor var_3148_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_297_cast_fp16)[name = tensor("op_3148_cast_fp16")]; tensor var_3149_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_299_cast_fp16)[name = tensor("op_3149_cast_fp16")]; tensor var_3150_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_301_cast_fp16)[name = tensor("op_3150_cast_fp16")]; tensor var_3151_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_303_cast_fp16)[name = tensor("op_3151_cast_fp16")]; tensor var_3152_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_305_cast_fp16)[name = tensor("op_3152_cast_fp16")]; tensor var_3153_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_307_cast_fp16)[name = tensor("op_3153_cast_fp16")]; tensor var_3154_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_309_cast_fp16)[name = tensor("op_3154_cast_fp16")]; tensor var_3155_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_311_cast_fp16)[name = tensor("op_3155_cast_fp16")]; tensor var_3156_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_313_cast_fp16)[name = tensor("op_3156_cast_fp16")]; tensor var_3157_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_315_cast_fp16)[name = tensor("op_3157_cast_fp16")]; tensor var_3158_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_317_cast_fp16)[name = tensor("op_3158_cast_fp16")]; tensor var_3159_cast_fp16 = softmax(axis = var_1878, x = aw_chunk_319_cast_fp16)[name = tensor("op_3159_cast_fp16")]; tensor var_3161_equation_0 = const()[name = tensor("op_3161_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3161_cast_fp16 = einsum(equation = var_3161_equation_0, values = (var_2681_cast_fp16, var_3080_cast_fp16))[name = tensor("op_3161_cast_fp16")]; tensor var_3163_equation_0 = const()[name = tensor("op_3163_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3163_cast_fp16 = einsum(equation = var_3163_equation_0, values = (var_2681_cast_fp16, var_3081_cast_fp16))[name = tensor("op_3163_cast_fp16")]; tensor var_3165_equation_0 = const()[name = tensor("op_3165_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3165_cast_fp16 = einsum(equation = var_3165_equation_0, values = (var_2681_cast_fp16, var_3082_cast_fp16))[name = tensor("op_3165_cast_fp16")]; tensor var_3167_equation_0 = const()[name = tensor("op_3167_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3167_cast_fp16 = einsum(equation = var_3167_equation_0, values = (var_2681_cast_fp16, var_3083_cast_fp16))[name = tensor("op_3167_cast_fp16")]; tensor var_3169_equation_0 = const()[name = tensor("op_3169_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3169_cast_fp16 = einsum(equation = var_3169_equation_0, values = (var_2685_cast_fp16, var_3084_cast_fp16))[name = tensor("op_3169_cast_fp16")]; tensor var_3171_equation_0 = const()[name = tensor("op_3171_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3171_cast_fp16 = einsum(equation = var_3171_equation_0, values = (var_2685_cast_fp16, var_3085_cast_fp16))[name = tensor("op_3171_cast_fp16")]; tensor var_3173_equation_0 = const()[name = tensor("op_3173_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3173_cast_fp16 = einsum(equation = var_3173_equation_0, values = (var_2685_cast_fp16, var_3086_cast_fp16))[name = tensor("op_3173_cast_fp16")]; tensor var_3175_equation_0 = const()[name = tensor("op_3175_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3175_cast_fp16 = einsum(equation = var_3175_equation_0, values = (var_2685_cast_fp16, var_3087_cast_fp16))[name = tensor("op_3175_cast_fp16")]; tensor var_3177_equation_0 = const()[name = tensor("op_3177_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3177_cast_fp16 = einsum(equation = var_3177_equation_0, values = (var_2689_cast_fp16, var_3088_cast_fp16))[name = tensor("op_3177_cast_fp16")]; tensor var_3179_equation_0 = const()[name = tensor("op_3179_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3179_cast_fp16 = einsum(equation = var_3179_equation_0, values = (var_2689_cast_fp16, var_3089_cast_fp16))[name = tensor("op_3179_cast_fp16")]; tensor var_3181_equation_0 = const()[name = tensor("op_3181_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3181_cast_fp16 = einsum(equation = var_3181_equation_0, values = (var_2689_cast_fp16, var_3090_cast_fp16))[name = tensor("op_3181_cast_fp16")]; tensor var_3183_equation_0 = const()[name = tensor("op_3183_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3183_cast_fp16 = einsum(equation = var_3183_equation_0, values = (var_2689_cast_fp16, var_3091_cast_fp16))[name = tensor("op_3183_cast_fp16")]; tensor var_3185_equation_0 = const()[name = tensor("op_3185_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3185_cast_fp16 = einsum(equation = var_3185_equation_0, values = (var_2693_cast_fp16, var_3092_cast_fp16))[name = tensor("op_3185_cast_fp16")]; tensor var_3187_equation_0 = const()[name = tensor("op_3187_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3187_cast_fp16 = einsum(equation = var_3187_equation_0, values = (var_2693_cast_fp16, var_3093_cast_fp16))[name = tensor("op_3187_cast_fp16")]; tensor var_3189_equation_0 = const()[name = tensor("op_3189_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3189_cast_fp16 = einsum(equation = var_3189_equation_0, values = (var_2693_cast_fp16, var_3094_cast_fp16))[name = tensor("op_3189_cast_fp16")]; tensor var_3191_equation_0 = const()[name = tensor("op_3191_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3191_cast_fp16 = einsum(equation = var_3191_equation_0, values = (var_2693_cast_fp16, var_3095_cast_fp16))[name = tensor("op_3191_cast_fp16")]; tensor var_3193_equation_0 = const()[name = tensor("op_3193_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3193_cast_fp16 = einsum(equation = var_3193_equation_0, values = (var_2697_cast_fp16, var_3096_cast_fp16))[name = tensor("op_3193_cast_fp16")]; tensor var_3195_equation_0 = const()[name = tensor("op_3195_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3195_cast_fp16 = einsum(equation = var_3195_equation_0, values = (var_2697_cast_fp16, var_3097_cast_fp16))[name = tensor("op_3195_cast_fp16")]; tensor var_3197_equation_0 = const()[name = tensor("op_3197_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3197_cast_fp16 = einsum(equation = var_3197_equation_0, values = (var_2697_cast_fp16, var_3098_cast_fp16))[name = tensor("op_3197_cast_fp16")]; tensor var_3199_equation_0 = const()[name = tensor("op_3199_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3199_cast_fp16 = einsum(equation = var_3199_equation_0, values = (var_2697_cast_fp16, var_3099_cast_fp16))[name = tensor("op_3199_cast_fp16")]; tensor var_3201_equation_0 = const()[name = tensor("op_3201_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3201_cast_fp16 = einsum(equation = var_3201_equation_0, values = (var_2701_cast_fp16, var_3100_cast_fp16))[name = tensor("op_3201_cast_fp16")]; tensor var_3203_equation_0 = const()[name = tensor("op_3203_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3203_cast_fp16 = einsum(equation = var_3203_equation_0, values = (var_2701_cast_fp16, var_3101_cast_fp16))[name = tensor("op_3203_cast_fp16")]; tensor var_3205_equation_0 = const()[name = tensor("op_3205_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3205_cast_fp16 = einsum(equation = var_3205_equation_0, values = (var_2701_cast_fp16, var_3102_cast_fp16))[name = tensor("op_3205_cast_fp16")]; tensor var_3207_equation_0 = const()[name = tensor("op_3207_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3207_cast_fp16 = einsum(equation = var_3207_equation_0, values = (var_2701_cast_fp16, var_3103_cast_fp16))[name = tensor("op_3207_cast_fp16")]; tensor var_3209_equation_0 = const()[name = tensor("op_3209_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3209_cast_fp16 = einsum(equation = var_3209_equation_0, values = (var_2705_cast_fp16, var_3104_cast_fp16))[name = tensor("op_3209_cast_fp16")]; tensor var_3211_equation_0 = const()[name = tensor("op_3211_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3211_cast_fp16 = einsum(equation = var_3211_equation_0, values = (var_2705_cast_fp16, var_3105_cast_fp16))[name = tensor("op_3211_cast_fp16")]; tensor var_3213_equation_0 = const()[name = tensor("op_3213_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3213_cast_fp16 = einsum(equation = var_3213_equation_0, values = (var_2705_cast_fp16, var_3106_cast_fp16))[name = tensor("op_3213_cast_fp16")]; tensor var_3215_equation_0 = const()[name = tensor("op_3215_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3215_cast_fp16 = einsum(equation = var_3215_equation_0, values = (var_2705_cast_fp16, var_3107_cast_fp16))[name = tensor("op_3215_cast_fp16")]; tensor var_3217_equation_0 = const()[name = tensor("op_3217_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3217_cast_fp16 = einsum(equation = var_3217_equation_0, values = (var_2709_cast_fp16, var_3108_cast_fp16))[name = tensor("op_3217_cast_fp16")]; tensor var_3219_equation_0 = const()[name = tensor("op_3219_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3219_cast_fp16 = einsum(equation = var_3219_equation_0, values = (var_2709_cast_fp16, var_3109_cast_fp16))[name = tensor("op_3219_cast_fp16")]; tensor var_3221_equation_0 = const()[name = tensor("op_3221_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3221_cast_fp16 = einsum(equation = var_3221_equation_0, values = (var_2709_cast_fp16, var_3110_cast_fp16))[name = tensor("op_3221_cast_fp16")]; tensor var_3223_equation_0 = const()[name = tensor("op_3223_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3223_cast_fp16 = einsum(equation = var_3223_equation_0, values = (var_2709_cast_fp16, var_3111_cast_fp16))[name = tensor("op_3223_cast_fp16")]; tensor var_3225_equation_0 = const()[name = tensor("op_3225_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3225_cast_fp16 = einsum(equation = var_3225_equation_0, values = (var_2713_cast_fp16, var_3112_cast_fp16))[name = tensor("op_3225_cast_fp16")]; tensor var_3227_equation_0 = const()[name = tensor("op_3227_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3227_cast_fp16 = einsum(equation = var_3227_equation_0, values = (var_2713_cast_fp16, var_3113_cast_fp16))[name = tensor("op_3227_cast_fp16")]; tensor var_3229_equation_0 = const()[name = tensor("op_3229_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3229_cast_fp16 = einsum(equation = var_3229_equation_0, values = (var_2713_cast_fp16, var_3114_cast_fp16))[name = tensor("op_3229_cast_fp16")]; tensor var_3231_equation_0 = const()[name = tensor("op_3231_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3231_cast_fp16 = einsum(equation = var_3231_equation_0, values = (var_2713_cast_fp16, var_3115_cast_fp16))[name = tensor("op_3231_cast_fp16")]; tensor var_3233_equation_0 = const()[name = tensor("op_3233_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3233_cast_fp16 = einsum(equation = var_3233_equation_0, values = (var_2717_cast_fp16, var_3116_cast_fp16))[name = tensor("op_3233_cast_fp16")]; tensor var_3235_equation_0 = const()[name = tensor("op_3235_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3235_cast_fp16 = einsum(equation = var_3235_equation_0, values = (var_2717_cast_fp16, var_3117_cast_fp16))[name = tensor("op_3235_cast_fp16")]; tensor var_3237_equation_0 = const()[name = tensor("op_3237_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3237_cast_fp16 = einsum(equation = var_3237_equation_0, values = (var_2717_cast_fp16, var_3118_cast_fp16))[name = tensor("op_3237_cast_fp16")]; tensor var_3239_equation_0 = const()[name = tensor("op_3239_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3239_cast_fp16 = einsum(equation = var_3239_equation_0, values = (var_2717_cast_fp16, var_3119_cast_fp16))[name = tensor("op_3239_cast_fp16")]; tensor var_3241_equation_0 = const()[name = tensor("op_3241_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3241_cast_fp16 = einsum(equation = var_3241_equation_0, values = (var_2721_cast_fp16, var_3120_cast_fp16))[name = tensor("op_3241_cast_fp16")]; tensor var_3243_equation_0 = const()[name = tensor("op_3243_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3243_cast_fp16 = einsum(equation = var_3243_equation_0, values = (var_2721_cast_fp16, var_3121_cast_fp16))[name = tensor("op_3243_cast_fp16")]; tensor var_3245_equation_0 = const()[name = tensor("op_3245_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3245_cast_fp16 = einsum(equation = var_3245_equation_0, values = (var_2721_cast_fp16, var_3122_cast_fp16))[name = tensor("op_3245_cast_fp16")]; tensor var_3247_equation_0 = const()[name = tensor("op_3247_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3247_cast_fp16 = einsum(equation = var_3247_equation_0, values = (var_2721_cast_fp16, var_3123_cast_fp16))[name = tensor("op_3247_cast_fp16")]; tensor var_3249_equation_0 = const()[name = tensor("op_3249_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3249_cast_fp16 = einsum(equation = var_3249_equation_0, values = (var_2725_cast_fp16, var_3124_cast_fp16))[name = tensor("op_3249_cast_fp16")]; tensor var_3251_equation_0 = const()[name = tensor("op_3251_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3251_cast_fp16 = einsum(equation = var_3251_equation_0, values = (var_2725_cast_fp16, var_3125_cast_fp16))[name = tensor("op_3251_cast_fp16")]; tensor var_3253_equation_0 = const()[name = tensor("op_3253_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3253_cast_fp16 = einsum(equation = var_3253_equation_0, values = (var_2725_cast_fp16, var_3126_cast_fp16))[name = tensor("op_3253_cast_fp16")]; tensor var_3255_equation_0 = const()[name = tensor("op_3255_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3255_cast_fp16 = einsum(equation = var_3255_equation_0, values = (var_2725_cast_fp16, var_3127_cast_fp16))[name = tensor("op_3255_cast_fp16")]; tensor var_3257_equation_0 = const()[name = tensor("op_3257_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3257_cast_fp16 = einsum(equation = var_3257_equation_0, values = (var_2729_cast_fp16, var_3128_cast_fp16))[name = tensor("op_3257_cast_fp16")]; tensor var_3259_equation_0 = const()[name = tensor("op_3259_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3259_cast_fp16 = einsum(equation = var_3259_equation_0, values = (var_2729_cast_fp16, var_3129_cast_fp16))[name = tensor("op_3259_cast_fp16")]; tensor var_3261_equation_0 = const()[name = tensor("op_3261_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3261_cast_fp16 = einsum(equation = var_3261_equation_0, values = (var_2729_cast_fp16, var_3130_cast_fp16))[name = tensor("op_3261_cast_fp16")]; tensor var_3263_equation_0 = const()[name = tensor("op_3263_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3263_cast_fp16 = einsum(equation = var_3263_equation_0, values = (var_2729_cast_fp16, var_3131_cast_fp16))[name = tensor("op_3263_cast_fp16")]; tensor var_3265_equation_0 = const()[name = tensor("op_3265_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3265_cast_fp16 = einsum(equation = var_3265_equation_0, values = (var_2733_cast_fp16, var_3132_cast_fp16))[name = tensor("op_3265_cast_fp16")]; tensor var_3267_equation_0 = const()[name = tensor("op_3267_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3267_cast_fp16 = einsum(equation = var_3267_equation_0, values = (var_2733_cast_fp16, var_3133_cast_fp16))[name = tensor("op_3267_cast_fp16")]; tensor var_3269_equation_0 = const()[name = tensor("op_3269_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3269_cast_fp16 = einsum(equation = var_3269_equation_0, values = (var_2733_cast_fp16, var_3134_cast_fp16))[name = tensor("op_3269_cast_fp16")]; tensor var_3271_equation_0 = const()[name = tensor("op_3271_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3271_cast_fp16 = einsum(equation = var_3271_equation_0, values = (var_2733_cast_fp16, var_3135_cast_fp16))[name = tensor("op_3271_cast_fp16")]; tensor var_3273_equation_0 = const()[name = tensor("op_3273_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3273_cast_fp16 = einsum(equation = var_3273_equation_0, values = (var_2737_cast_fp16, var_3136_cast_fp16))[name = tensor("op_3273_cast_fp16")]; tensor var_3275_equation_0 = const()[name = tensor("op_3275_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3275_cast_fp16 = einsum(equation = var_3275_equation_0, values = (var_2737_cast_fp16, var_3137_cast_fp16))[name = tensor("op_3275_cast_fp16")]; tensor var_3277_equation_0 = const()[name = tensor("op_3277_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3277_cast_fp16 = einsum(equation = var_3277_equation_0, values = (var_2737_cast_fp16, var_3138_cast_fp16))[name = tensor("op_3277_cast_fp16")]; tensor var_3279_equation_0 = const()[name = tensor("op_3279_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3279_cast_fp16 = einsum(equation = var_3279_equation_0, values = (var_2737_cast_fp16, var_3139_cast_fp16))[name = tensor("op_3279_cast_fp16")]; tensor var_3281_equation_0 = const()[name = tensor("op_3281_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3281_cast_fp16 = einsum(equation = var_3281_equation_0, values = (var_2741_cast_fp16, var_3140_cast_fp16))[name = tensor("op_3281_cast_fp16")]; tensor var_3283_equation_0 = const()[name = tensor("op_3283_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3283_cast_fp16 = einsum(equation = var_3283_equation_0, values = (var_2741_cast_fp16, var_3141_cast_fp16))[name = tensor("op_3283_cast_fp16")]; tensor var_3285_equation_0 = const()[name = tensor("op_3285_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3285_cast_fp16 = einsum(equation = var_3285_equation_0, values = (var_2741_cast_fp16, var_3142_cast_fp16))[name = tensor("op_3285_cast_fp16")]; tensor var_3287_equation_0 = const()[name = tensor("op_3287_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3287_cast_fp16 = einsum(equation = var_3287_equation_0, values = (var_2741_cast_fp16, var_3143_cast_fp16))[name = tensor("op_3287_cast_fp16")]; tensor var_3289_equation_0 = const()[name = tensor("op_3289_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3289_cast_fp16 = einsum(equation = var_3289_equation_0, values = (var_2745_cast_fp16, var_3144_cast_fp16))[name = tensor("op_3289_cast_fp16")]; tensor var_3291_equation_0 = const()[name = tensor("op_3291_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3291_cast_fp16 = einsum(equation = var_3291_equation_0, values = (var_2745_cast_fp16, var_3145_cast_fp16))[name = tensor("op_3291_cast_fp16")]; tensor var_3293_equation_0 = const()[name = tensor("op_3293_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3293_cast_fp16 = einsum(equation = var_3293_equation_0, values = (var_2745_cast_fp16, var_3146_cast_fp16))[name = tensor("op_3293_cast_fp16")]; tensor var_3295_equation_0 = const()[name = tensor("op_3295_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3295_cast_fp16 = einsum(equation = var_3295_equation_0, values = (var_2745_cast_fp16, var_3147_cast_fp16))[name = tensor("op_3295_cast_fp16")]; tensor var_3297_equation_0 = const()[name = tensor("op_3297_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3297_cast_fp16 = einsum(equation = var_3297_equation_0, values = (var_2749_cast_fp16, var_3148_cast_fp16))[name = tensor("op_3297_cast_fp16")]; tensor var_3299_equation_0 = const()[name = tensor("op_3299_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3299_cast_fp16 = einsum(equation = var_3299_equation_0, values = (var_2749_cast_fp16, var_3149_cast_fp16))[name = tensor("op_3299_cast_fp16")]; tensor var_3301_equation_0 = const()[name = tensor("op_3301_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3301_cast_fp16 = einsum(equation = var_3301_equation_0, values = (var_2749_cast_fp16, var_3150_cast_fp16))[name = tensor("op_3301_cast_fp16")]; tensor var_3303_equation_0 = const()[name = tensor("op_3303_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3303_cast_fp16 = einsum(equation = var_3303_equation_0, values = (var_2749_cast_fp16, var_3151_cast_fp16))[name = tensor("op_3303_cast_fp16")]; tensor var_3305_equation_0 = const()[name = tensor("op_3305_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3305_cast_fp16 = einsum(equation = var_3305_equation_0, values = (var_2753_cast_fp16, var_3152_cast_fp16))[name = tensor("op_3305_cast_fp16")]; tensor var_3307_equation_0 = const()[name = tensor("op_3307_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3307_cast_fp16 = einsum(equation = var_3307_equation_0, values = (var_2753_cast_fp16, var_3153_cast_fp16))[name = tensor("op_3307_cast_fp16")]; tensor var_3309_equation_0 = const()[name = tensor("op_3309_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3309_cast_fp16 = einsum(equation = var_3309_equation_0, values = (var_2753_cast_fp16, var_3154_cast_fp16))[name = tensor("op_3309_cast_fp16")]; tensor var_3311_equation_0 = const()[name = tensor("op_3311_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3311_cast_fp16 = einsum(equation = var_3311_equation_0, values = (var_2753_cast_fp16, var_3155_cast_fp16))[name = tensor("op_3311_cast_fp16")]; tensor var_3313_equation_0 = const()[name = tensor("op_3313_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3313_cast_fp16 = einsum(equation = var_3313_equation_0, values = (var_2757_cast_fp16, var_3156_cast_fp16))[name = tensor("op_3313_cast_fp16")]; tensor var_3315_equation_0 = const()[name = tensor("op_3315_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3315_cast_fp16 = einsum(equation = var_3315_equation_0, values = (var_2757_cast_fp16, var_3157_cast_fp16))[name = tensor("op_3315_cast_fp16")]; tensor var_3317_equation_0 = const()[name = tensor("op_3317_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3317_cast_fp16 = einsum(equation = var_3317_equation_0, values = (var_2757_cast_fp16, var_3158_cast_fp16))[name = tensor("op_3317_cast_fp16")]; tensor var_3319_equation_0 = const()[name = tensor("op_3319_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_3319_cast_fp16 = einsum(equation = var_3319_equation_0, values = (var_2757_cast_fp16, var_3159_cast_fp16))[name = tensor("op_3319_cast_fp16")]; tensor var_3321_interleave_0 = const()[name = tensor("op_3321_interleave_0"), val = tensor(false)]; tensor var_3321_cast_fp16 = concat(axis = var_1853, interleave = var_3321_interleave_0, values = (var_3161_cast_fp16, var_3163_cast_fp16, var_3165_cast_fp16, var_3167_cast_fp16))[name = tensor("op_3321_cast_fp16")]; tensor var_3323_interleave_0 = const()[name = tensor("op_3323_interleave_0"), val = tensor(false)]; tensor var_3323_cast_fp16 = concat(axis = var_1853, interleave = var_3323_interleave_0, values = (var_3169_cast_fp16, var_3171_cast_fp16, var_3173_cast_fp16, var_3175_cast_fp16))[name = tensor("op_3323_cast_fp16")]; tensor var_3325_interleave_0 = const()[name = tensor("op_3325_interleave_0"), val = tensor(false)]; tensor var_3325_cast_fp16 = concat(axis = var_1853, interleave = var_3325_interleave_0, values = (var_3177_cast_fp16, var_3179_cast_fp16, var_3181_cast_fp16, var_3183_cast_fp16))[name = tensor("op_3325_cast_fp16")]; tensor var_3327_interleave_0 = const()[name = tensor("op_3327_interleave_0"), val = tensor(false)]; tensor var_3327_cast_fp16 = concat(axis = var_1853, interleave = var_3327_interleave_0, values = (var_3185_cast_fp16, var_3187_cast_fp16, var_3189_cast_fp16, var_3191_cast_fp16))[name = tensor("op_3327_cast_fp16")]; tensor var_3329_interleave_0 = const()[name = tensor("op_3329_interleave_0"), val = tensor(false)]; tensor var_3329_cast_fp16 = concat(axis = var_1853, interleave = var_3329_interleave_0, values = (var_3193_cast_fp16, var_3195_cast_fp16, var_3197_cast_fp16, var_3199_cast_fp16))[name = tensor("op_3329_cast_fp16")]; tensor var_3331_interleave_0 = const()[name = tensor("op_3331_interleave_0"), val = tensor(false)]; tensor var_3331_cast_fp16 = concat(axis = var_1853, interleave = var_3331_interleave_0, values = (var_3201_cast_fp16, var_3203_cast_fp16, var_3205_cast_fp16, var_3207_cast_fp16))[name = tensor("op_3331_cast_fp16")]; tensor var_3333_interleave_0 = const()[name = tensor("op_3333_interleave_0"), val = tensor(false)]; tensor var_3333_cast_fp16 = concat(axis = var_1853, interleave = var_3333_interleave_0, values = (var_3209_cast_fp16, var_3211_cast_fp16, var_3213_cast_fp16, var_3215_cast_fp16))[name = tensor("op_3333_cast_fp16")]; tensor var_3335_interleave_0 = const()[name = tensor("op_3335_interleave_0"), val = tensor(false)]; tensor var_3335_cast_fp16 = concat(axis = var_1853, interleave = var_3335_interleave_0, values = (var_3217_cast_fp16, var_3219_cast_fp16, var_3221_cast_fp16, var_3223_cast_fp16))[name = tensor("op_3335_cast_fp16")]; tensor var_3337_interleave_0 = const()[name = tensor("op_3337_interleave_0"), val = tensor(false)]; tensor var_3337_cast_fp16 = concat(axis = var_1853, interleave = var_3337_interleave_0, values = (var_3225_cast_fp16, var_3227_cast_fp16, var_3229_cast_fp16, var_3231_cast_fp16))[name = tensor("op_3337_cast_fp16")]; tensor var_3339_interleave_0 = const()[name = tensor("op_3339_interleave_0"), val = tensor(false)]; tensor var_3339_cast_fp16 = concat(axis = var_1853, interleave = var_3339_interleave_0, values = (var_3233_cast_fp16, var_3235_cast_fp16, var_3237_cast_fp16, var_3239_cast_fp16))[name = tensor("op_3339_cast_fp16")]; tensor var_3341_interleave_0 = const()[name = tensor("op_3341_interleave_0"), val = tensor(false)]; tensor var_3341_cast_fp16 = concat(axis = var_1853, interleave = var_3341_interleave_0, values = (var_3241_cast_fp16, var_3243_cast_fp16, var_3245_cast_fp16, var_3247_cast_fp16))[name = tensor("op_3341_cast_fp16")]; tensor var_3343_interleave_0 = const()[name = tensor("op_3343_interleave_0"), val = tensor(false)]; tensor var_3343_cast_fp16 = concat(axis = var_1853, interleave = var_3343_interleave_0, values = (var_3249_cast_fp16, var_3251_cast_fp16, var_3253_cast_fp16, var_3255_cast_fp16))[name = tensor("op_3343_cast_fp16")]; tensor var_3345_interleave_0 = const()[name = tensor("op_3345_interleave_0"), val = tensor(false)]; tensor var_3345_cast_fp16 = concat(axis = var_1853, interleave = var_3345_interleave_0, values = (var_3257_cast_fp16, var_3259_cast_fp16, var_3261_cast_fp16, var_3263_cast_fp16))[name = tensor("op_3345_cast_fp16")]; tensor var_3347_interleave_0 = const()[name = tensor("op_3347_interleave_0"), val = tensor(false)]; tensor var_3347_cast_fp16 = concat(axis = var_1853, interleave = var_3347_interleave_0, values = (var_3265_cast_fp16, var_3267_cast_fp16, var_3269_cast_fp16, var_3271_cast_fp16))[name = tensor("op_3347_cast_fp16")]; tensor var_3349_interleave_0 = const()[name = tensor("op_3349_interleave_0"), val = tensor(false)]; tensor var_3349_cast_fp16 = concat(axis = var_1853, interleave = var_3349_interleave_0, values = (var_3273_cast_fp16, var_3275_cast_fp16, var_3277_cast_fp16, var_3279_cast_fp16))[name = tensor("op_3349_cast_fp16")]; tensor var_3351_interleave_0 = const()[name = tensor("op_3351_interleave_0"), val = tensor(false)]; tensor var_3351_cast_fp16 = concat(axis = var_1853, interleave = var_3351_interleave_0, values = (var_3281_cast_fp16, var_3283_cast_fp16, var_3285_cast_fp16, var_3287_cast_fp16))[name = tensor("op_3351_cast_fp16")]; tensor var_3353_interleave_0 = const()[name = tensor("op_3353_interleave_0"), val = tensor(false)]; tensor var_3353_cast_fp16 = concat(axis = var_1853, interleave = var_3353_interleave_0, values = (var_3289_cast_fp16, var_3291_cast_fp16, var_3293_cast_fp16, var_3295_cast_fp16))[name = tensor("op_3353_cast_fp16")]; tensor var_3355_interleave_0 = const()[name = tensor("op_3355_interleave_0"), val = tensor(false)]; tensor var_3355_cast_fp16 = concat(axis = var_1853, interleave = var_3355_interleave_0, values = (var_3297_cast_fp16, var_3299_cast_fp16, var_3301_cast_fp16, var_3303_cast_fp16))[name = tensor("op_3355_cast_fp16")]; tensor var_3357_interleave_0 = const()[name = tensor("op_3357_interleave_0"), val = tensor(false)]; tensor var_3357_cast_fp16 = concat(axis = var_1853, interleave = var_3357_interleave_0, values = (var_3305_cast_fp16, var_3307_cast_fp16, var_3309_cast_fp16, var_3311_cast_fp16))[name = tensor("op_3357_cast_fp16")]; tensor var_3359_interleave_0 = const()[name = tensor("op_3359_interleave_0"), val = tensor(false)]; tensor var_3359_cast_fp16 = concat(axis = var_1853, interleave = var_3359_interleave_0, values = (var_3313_cast_fp16, var_3315_cast_fp16, var_3317_cast_fp16, var_3319_cast_fp16))[name = tensor("op_3359_cast_fp16")]; tensor input_9_interleave_0 = const()[name = tensor("input_9_interleave_0"), val = tensor(false)]; tensor input_9_cast_fp16 = concat(axis = var_1878, interleave = input_9_interleave_0, values = (var_3321_cast_fp16, var_3323_cast_fp16, var_3325_cast_fp16, var_3327_cast_fp16, var_3329_cast_fp16, var_3331_cast_fp16, var_3333_cast_fp16, var_3335_cast_fp16, var_3337_cast_fp16, var_3339_cast_fp16, var_3341_cast_fp16, var_3343_cast_fp16, var_3345_cast_fp16, var_3347_cast_fp16, var_3349_cast_fp16, var_3351_cast_fp16, var_3353_cast_fp16, var_3355_cast_fp16, var_3357_cast_fp16, var_3359_cast_fp16))[name = tensor("input_9_cast_fp16")]; tensor var_3370_pad_type_0 = const()[name = tensor("op_3370_pad_type_0"), val = tensor("valid")]; tensor var_3370_strides_0 = const()[name = tensor("op_3370_strides_0"), val = tensor([1, 1])]; tensor var_3370_pad_0 = const()[name = tensor("op_3370_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3370_dilations_0 = const()[name = tensor("op_3370_dilations_0"), val = tensor([1, 1])]; tensor var_3370_groups_0 = const()[name = tensor("op_3370_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(33754496))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34573760))), name = tensor("layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_1_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34573888)))]; tensor var_3370_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_3370_dilations_0, groups = var_3370_groups_0, pad = var_3370_pad_0, pad_type = var_3370_pad_type_0, strides = var_3370_strides_0, weight = layers_1_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_9_cast_fp16)[name = tensor("op_3370_cast_fp16")]; tensor var_3376_pad_type_0 = const()[name = tensor("op_3376_pad_type_0"), val = tensor("valid")]; tensor var_3376_strides_0 = const()[name = tensor("op_3376_strides_0"), val = tensor([1, 1])]; tensor var_3376_pad_0 = const()[name = tensor("op_3376_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3376_dilations_0 = const()[name = tensor("op_3376_dilations_0"), val = tensor([1, 1])]; tensor var_3376_groups_0 = const()[name = tensor("op_3376_groups_0"), val = tensor(1)]; tensor layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34612736))), name = tensor("layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34576512))), shape = tensor([1280, 1280, 1, 1])]; tensor var_3376_cast_fp16 = conv(dilations = var_3376_dilations_0, groups = var_3376_groups_0, pad = var_3376_pad_0, pad_type = var_3376_pad_type_0, strides = var_3376_strides_0, weight = layers_1_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_9_cast_fp16)[name = tensor("op_3376_cast_fp16")]; tensor obj_7_cast_fp16 = add(x = var_3370_cast_fp16, y = var_3376_cast_fp16)[name = tensor("obj_7_cast_fp16")]; tensor inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = tensor("inputs_7_cast_fp16")]; tensor out_7_axes_0 = const()[name = tensor("out_7_axes_0"), val = tensor([1])]; tensor var_3387_to_fp16 = const()[name = tensor("op_3387_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_7_cast_fp16 = layer_norm(axes = out_7_axes_0, epsilon = var_3387_to_fp16, x = inputs_7_cast_fp16)[name = tensor("out_7_cast_fp16")]; tensor input_11_gamma_0_to_fp16 = const()[name = tensor("input_11_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34817600)))]; tensor input_11_beta_0_to_fp16 = const()[name = tensor("input_11_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34820224)))]; tensor input_11_epsilon_0_to_fp16 = const()[name = tensor("input_11_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor("input_11_cast_fp16")]; tensor var_3405_pad_type_0 = const()[name = tensor("op_3405_pad_type_0"), val = tensor("valid")]; tensor var_3405_strides_0 = const()[name = tensor("op_3405_strides_0"), val = tensor([1, 1])]; tensor var_3405_pad_0 = const()[name = tensor("op_3405_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3405_dilations_0 = const()[name = tensor("op_3405_dilations_0"), val = tensor([1, 1])]; tensor var_3405_groups_0 = const()[name = tensor("op_3405_groups_0"), val = tensor(1)]; tensor layers_1_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34822848))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38099712))), name = tensor("layers_1_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_1_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38099840)))]; tensor var_3405_cast_fp16 = conv(bias = layers_1_fc1_inlier_module_bias_to_fp16, dilations = var_3405_dilations_0, groups = var_3405_groups_0, pad = var_3405_pad_0, pad_type = var_3405_pad_type_0, strides = var_3405_strides_0, weight = layers_1_fc1_inlier_module_weight_to_fp16_palettized, x = input_11_cast_fp16)[name = tensor("op_3405_cast_fp16")]; tensor var_3411_pad_type_0 = const()[name = tensor("op_3411_pad_type_0"), val = tensor("valid")]; tensor var_3411_strides_0 = const()[name = tensor("op_3411_strides_0"), val = tensor([1, 1])]; tensor var_3411_pad_0 = const()[name = tensor("op_3411_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3411_dilations_0 = const()[name = tensor("op_3411_dilations_0"), val = tensor([1, 1])]; tensor var_3411_groups_0 = const()[name = tensor("op_3411_groups_0"), val = tensor(1)]; tensor layers_1_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38256320))), name = tensor("layers_1_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38110144))), shape = tensor([5120, 1280, 1, 1])]; tensor var_3411_cast_fp16 = conv(dilations = var_3411_dilations_0, groups = var_3411_groups_0, pad = var_3411_pad_0, pad_type = var_3411_pad_type_0, strides = var_3411_strides_0, weight = layers_1_fc1_outlier_module_weight_to_fp16_sparsified, x = input_11_cast_fp16)[name = tensor("op_3411_cast_fp16")]; tensor input_13_cast_fp16 = add(x = var_3405_cast_fp16, y = var_3411_cast_fp16)[name = tensor("input_13_cast_fp16")]; tensor input_15_mode_0 = const()[name = tensor("input_15_mode_0"), val = tensor("EXACT")]; tensor input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = tensor("input_15_cast_fp16")]; tensor var_3422_pad_type_0 = const()[name = tensor("op_3422_pad_type_0"), val = tensor("valid")]; tensor var_3422_strides_0 = const()[name = tensor("op_3422_strides_0"), val = tensor([1, 1])]; tensor var_3422_pad_0 = const()[name = tensor("op_3422_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3422_dilations_0 = const()[name = tensor("op_3422_dilations_0"), val = tensor([1, 1])]; tensor var_3422_groups_0 = const()[name = tensor("op_3422_groups_0"), val = tensor(1)]; tensor layers_1_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39075584))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42352448))), name = tensor("layers_1_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_1_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_1_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42352576)))]; tensor var_3422_cast_fp16 = conv(bias = layers_1_fc2_inlier_module_bias_to_fp16, dilations = var_3422_dilations_0, groups = var_3422_groups_0, pad = var_3422_pad_0, pad_type = var_3422_pad_type_0, strides = var_3422_strides_0, weight = layers_1_fc2_inlier_module_weight_to_fp16_palettized, x = input_15_cast_fp16)[name = tensor("op_3422_cast_fp16")]; tensor var_3428_pad_type_0 = const()[name = tensor("op_3428_pad_type_0"), val = tensor("valid")]; tensor var_3428_strides_0 = const()[name = tensor("op_3428_strides_0"), val = tensor([1, 1])]; tensor var_3428_pad_0 = const()[name = tensor("op_3428_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3428_dilations_0 = const()[name = tensor("op_3428_dilations_0"), val = tensor([1, 1])]; tensor var_3428_groups_0 = const()[name = tensor("op_3428_groups_0"), val = tensor(1)]; tensor layers_1_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42567552))), name = tensor("layers_1_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(42355200))), shape = tensor([1280, 5120, 1, 1])]; tensor var_3428_cast_fp16 = conv(dilations = var_3428_dilations_0, groups = var_3428_groups_0, pad = var_3428_pad_0, pad_type = var_3428_pad_type_0, strides = var_3428_strides_0, weight = layers_1_fc2_outlier_module_weight_to_fp16_sparsified, x = input_15_cast_fp16)[name = tensor("op_3428_cast_fp16")]; tensor hidden_states_7_cast_fp16 = add(x = var_3422_cast_fp16, y = var_3428_cast_fp16)[name = tensor("hidden_states_7_cast_fp16")]; tensor inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor("inputs_9_cast_fp16")]; tensor var_3434 = const()[name = tensor("op_3434"), val = tensor(3)]; tensor var_3459 = const()[name = tensor("op_3459"), val = tensor(1)]; tensor out_9_axes_0 = const()[name = tensor("out_9_axes_0"), val = tensor([1])]; tensor var_3476_to_fp16 = const()[name = tensor("op_3476_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_9_cast_fp16 = layer_norm(axes = out_9_axes_0, epsilon = var_3476_to_fp16, x = inputs_9_cast_fp16)[name = tensor("out_9_cast_fp16")]; tensor obj_9_gamma_0_to_fp16 = const()[name = tensor("obj_9_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43386816)))]; tensor obj_9_beta_0_to_fp16 = const()[name = tensor("obj_9_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43389440)))]; tensor obj_9_epsilon_0_to_fp16 = const()[name = tensor("obj_9_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor("obj_9_cast_fp16")]; tensor var_3498_pad_type_0 = const()[name = tensor("op_3498_pad_type_0"), val = tensor("valid")]; tensor var_3498_strides_0 = const()[name = tensor("op_3498_strides_0"), val = tensor([1, 1])]; tensor var_3498_pad_0 = const()[name = tensor("op_3498_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3498_dilations_0 = const()[name = tensor("op_3498_dilations_0"), val = tensor([1, 1])]; tensor var_3498_groups_0 = const()[name = tensor("op_3498_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(43392064))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44211328))), name = tensor("layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_2_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44211456)))]; tensor var_3498_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_3498_dilations_0, groups = var_3498_groups_0, pad = var_3498_pad_0, pad_type = var_3498_pad_type_0, strides = var_3498_strides_0, weight = layers_2_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = tensor("op_3498_cast_fp16")]; tensor var_3504_pad_type_0 = const()[name = tensor("op_3504_pad_type_0"), val = tensor("valid")]; tensor var_3504_strides_0 = const()[name = tensor("op_3504_strides_0"), val = tensor([1, 1])]; tensor var_3504_pad_0 = const()[name = tensor("op_3504_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3504_dilations_0 = const()[name = tensor("op_3504_dilations_0"), val = tensor([1, 1])]; tensor var_3504_groups_0 = const()[name = tensor("op_3504_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44271552))), name = tensor("layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44214080))), shape = tensor([1280, 1280, 1, 1])]; tensor var_3504_cast_fp16 = conv(dilations = var_3504_dilations_0, groups = var_3504_groups_0, pad = var_3504_pad_0, pad_type = var_3504_pad_type_0, strides = var_3504_strides_0, weight = layers_2_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = tensor("op_3504_cast_fp16")]; tensor query_5_cast_fp16 = add(x = var_3498_cast_fp16, y = var_3504_cast_fp16)[name = tensor("query_5_cast_fp16")]; tensor var_3513_pad_type_0 = const()[name = tensor("op_3513_pad_type_0"), val = tensor("valid")]; tensor var_3513_strides_0 = const()[name = tensor("op_3513_strides_0"), val = tensor([1, 1])]; tensor var_3513_pad_0 = const()[name = tensor("op_3513_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3513_dilations_0 = const()[name = tensor("op_3513_dilations_0"), val = tensor([1, 1])]; tensor var_3513_groups_0 = const()[name = tensor("op_3513_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44476416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45295680))), name = tensor("layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_3513_cast_fp16 = conv(dilations = var_3513_dilations_0, groups = var_3513_groups_0, pad = var_3513_pad_0, pad_type = var_3513_pad_type_0, strides = var_3513_strides_0, weight = layers_2_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = tensor("op_3513_cast_fp16")]; tensor var_3519_pad_type_0 = const()[name = tensor("op_3519_pad_type_0"), val = tensor("valid")]; tensor var_3519_strides_0 = const()[name = tensor("op_3519_strides_0"), val = tensor([1, 1])]; tensor var_3519_pad_0 = const()[name = tensor("op_3519_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3519_dilations_0 = const()[name = tensor("op_3519_dilations_0"), val = tensor([1, 1])]; tensor var_3519_groups_0 = const()[name = tensor("op_3519_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45344832))), name = tensor("layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45295808))), shape = tensor([1280, 1280, 1, 1])]; tensor var_3519_cast_fp16 = conv(dilations = var_3519_dilations_0, groups = var_3519_groups_0, pad = var_3519_pad_0, pad_type = var_3519_pad_type_0, strides = var_3519_strides_0, weight = layers_2_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = tensor("op_3519_cast_fp16")]; tensor key_5_cast_fp16 = add(x = var_3513_cast_fp16, y = var_3519_cast_fp16)[name = tensor("key_5_cast_fp16")]; tensor var_3529_pad_type_0 = const()[name = tensor("op_3529_pad_type_0"), val = tensor("valid")]; tensor var_3529_strides_0 = const()[name = tensor("op_3529_strides_0"), val = tensor([1, 1])]; tensor var_3529_pad_0 = const()[name = tensor("op_3529_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3529_dilations_0 = const()[name = tensor("op_3529_dilations_0"), val = tensor([1, 1])]; tensor var_3529_groups_0 = const()[name = tensor("op_3529_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(45549696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46368960))), name = tensor("layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_2_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46369088)))]; tensor var_3529_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_3529_dilations_0, groups = var_3529_groups_0, pad = var_3529_pad_0, pad_type = var_3529_pad_type_0, strides = var_3529_strides_0, weight = layers_2_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_9_cast_fp16)[name = tensor("op_3529_cast_fp16")]; tensor var_3535_pad_type_0 = const()[name = tensor("op_3535_pad_type_0"), val = tensor("valid")]; tensor var_3535_strides_0 = const()[name = tensor("op_3535_strides_0"), val = tensor([1, 1])]; tensor var_3535_pad_0 = const()[name = tensor("op_3535_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_3535_dilations_0 = const()[name = tensor("op_3535_dilations_0"), val = tensor([1, 1])]; tensor var_3535_groups_0 = const()[name = tensor("op_3535_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46410240))), name = tensor("layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46371712))), shape = tensor([1280, 1280, 1, 1])]; tensor var_3535_cast_fp16 = conv(dilations = var_3535_dilations_0, groups = var_3535_groups_0, pad = var_3535_pad_0, pad_type = var_3535_pad_type_0, strides = var_3535_strides_0, weight = layers_2_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_9_cast_fp16)[name = tensor("op_3535_cast_fp16")]; tensor value_5_cast_fp16 = add(x = var_3529_cast_fp16, y = var_3535_cast_fp16)[name = tensor("value_5_cast_fp16")]; tensor var_3541_begin_0 = const()[name = tensor("op_3541_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3541_end_0 = const()[name = tensor("op_3541_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_3541_end_mask_0 = const()[name = tensor("op_3541_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3541_cast_fp16 = slice_by_index(begin = var_3541_begin_0, end = var_3541_end_0, end_mask = var_3541_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3541_cast_fp16")]; tensor var_3545_begin_0 = const()[name = tensor("op_3545_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_3545_end_0 = const()[name = tensor("op_3545_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_3545_end_mask_0 = const()[name = tensor("op_3545_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3545_cast_fp16 = slice_by_index(begin = var_3545_begin_0, end = var_3545_end_0, end_mask = var_3545_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3545_cast_fp16")]; tensor var_3549_begin_0 = const()[name = tensor("op_3549_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_3549_end_0 = const()[name = tensor("op_3549_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_3549_end_mask_0 = const()[name = tensor("op_3549_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3549_cast_fp16 = slice_by_index(begin = var_3549_begin_0, end = var_3549_end_0, end_mask = var_3549_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3549_cast_fp16")]; tensor var_3553_begin_0 = const()[name = tensor("op_3553_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_3553_end_0 = const()[name = tensor("op_3553_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_3553_end_mask_0 = const()[name = tensor("op_3553_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3553_cast_fp16 = slice_by_index(begin = var_3553_begin_0, end = var_3553_end_0, end_mask = var_3553_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3553_cast_fp16")]; tensor var_3557_begin_0 = const()[name = tensor("op_3557_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_3557_end_0 = const()[name = tensor("op_3557_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_3557_end_mask_0 = const()[name = tensor("op_3557_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3557_cast_fp16 = slice_by_index(begin = var_3557_begin_0, end = var_3557_end_0, end_mask = var_3557_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3557_cast_fp16")]; tensor var_3561_begin_0 = const()[name = tensor("op_3561_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_3561_end_0 = const()[name = tensor("op_3561_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_3561_end_mask_0 = const()[name = tensor("op_3561_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3561_cast_fp16 = slice_by_index(begin = var_3561_begin_0, end = var_3561_end_0, end_mask = var_3561_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3561_cast_fp16")]; tensor var_3565_begin_0 = const()[name = tensor("op_3565_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_3565_end_0 = const()[name = tensor("op_3565_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_3565_end_mask_0 = const()[name = tensor("op_3565_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3565_cast_fp16 = slice_by_index(begin = var_3565_begin_0, end = var_3565_end_0, end_mask = var_3565_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3565_cast_fp16")]; tensor var_3569_begin_0 = const()[name = tensor("op_3569_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_3569_end_0 = const()[name = tensor("op_3569_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_3569_end_mask_0 = const()[name = tensor("op_3569_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3569_cast_fp16 = slice_by_index(begin = var_3569_begin_0, end = var_3569_end_0, end_mask = var_3569_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3569_cast_fp16")]; tensor var_3573_begin_0 = const()[name = tensor("op_3573_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_3573_end_0 = const()[name = tensor("op_3573_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_3573_end_mask_0 = const()[name = tensor("op_3573_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3573_cast_fp16 = slice_by_index(begin = var_3573_begin_0, end = var_3573_end_0, end_mask = var_3573_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3573_cast_fp16")]; tensor var_3577_begin_0 = const()[name = tensor("op_3577_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_3577_end_0 = const()[name = tensor("op_3577_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_3577_end_mask_0 = const()[name = tensor("op_3577_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3577_cast_fp16 = slice_by_index(begin = var_3577_begin_0, end = var_3577_end_0, end_mask = var_3577_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3577_cast_fp16")]; tensor var_3581_begin_0 = const()[name = tensor("op_3581_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_3581_end_0 = const()[name = tensor("op_3581_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_3581_end_mask_0 = const()[name = tensor("op_3581_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3581_cast_fp16 = slice_by_index(begin = var_3581_begin_0, end = var_3581_end_0, end_mask = var_3581_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3581_cast_fp16")]; tensor var_3585_begin_0 = const()[name = tensor("op_3585_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_3585_end_0 = const()[name = tensor("op_3585_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_3585_end_mask_0 = const()[name = tensor("op_3585_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3585_cast_fp16 = slice_by_index(begin = var_3585_begin_0, end = var_3585_end_0, end_mask = var_3585_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3585_cast_fp16")]; tensor var_3589_begin_0 = const()[name = tensor("op_3589_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_3589_end_0 = const()[name = tensor("op_3589_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_3589_end_mask_0 = const()[name = tensor("op_3589_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3589_cast_fp16 = slice_by_index(begin = var_3589_begin_0, end = var_3589_end_0, end_mask = var_3589_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3589_cast_fp16")]; tensor var_3593_begin_0 = const()[name = tensor("op_3593_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_3593_end_0 = const()[name = tensor("op_3593_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_3593_end_mask_0 = const()[name = tensor("op_3593_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3593_cast_fp16 = slice_by_index(begin = var_3593_begin_0, end = var_3593_end_0, end_mask = var_3593_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3593_cast_fp16")]; tensor var_3597_begin_0 = const()[name = tensor("op_3597_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_3597_end_0 = const()[name = tensor("op_3597_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_3597_end_mask_0 = const()[name = tensor("op_3597_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3597_cast_fp16 = slice_by_index(begin = var_3597_begin_0, end = var_3597_end_0, end_mask = var_3597_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3597_cast_fp16")]; tensor var_3601_begin_0 = const()[name = tensor("op_3601_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_3601_end_0 = const()[name = tensor("op_3601_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_3601_end_mask_0 = const()[name = tensor("op_3601_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3601_cast_fp16 = slice_by_index(begin = var_3601_begin_0, end = var_3601_end_0, end_mask = var_3601_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3601_cast_fp16")]; tensor var_3605_begin_0 = const()[name = tensor("op_3605_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_3605_end_0 = const()[name = tensor("op_3605_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_3605_end_mask_0 = const()[name = tensor("op_3605_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3605_cast_fp16 = slice_by_index(begin = var_3605_begin_0, end = var_3605_end_0, end_mask = var_3605_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3605_cast_fp16")]; tensor var_3609_begin_0 = const()[name = tensor("op_3609_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_3609_end_0 = const()[name = tensor("op_3609_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_3609_end_mask_0 = const()[name = tensor("op_3609_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3609_cast_fp16 = slice_by_index(begin = var_3609_begin_0, end = var_3609_end_0, end_mask = var_3609_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3609_cast_fp16")]; tensor var_3613_begin_0 = const()[name = tensor("op_3613_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_3613_end_0 = const()[name = tensor("op_3613_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_3613_end_mask_0 = const()[name = tensor("op_3613_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3613_cast_fp16 = slice_by_index(begin = var_3613_begin_0, end = var_3613_end_0, end_mask = var_3613_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3613_cast_fp16")]; tensor var_3617_begin_0 = const()[name = tensor("op_3617_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_3617_end_0 = const()[name = tensor("op_3617_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_3617_end_mask_0 = const()[name = tensor("op_3617_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_3617_cast_fp16 = slice_by_index(begin = var_3617_begin_0, end = var_3617_end_0, end_mask = var_3617_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3617_cast_fp16")]; tensor var_3626_begin_0 = const()[name = tensor("op_3626_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3626_end_0 = const()[name = tensor("op_3626_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_3626_end_mask_0 = const()[name = tensor("op_3626_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3626_cast_fp16 = slice_by_index(begin = var_3626_begin_0, end = var_3626_end_0, end_mask = var_3626_end_mask_0, x = var_3541_cast_fp16)[name = tensor("op_3626_cast_fp16")]; tensor var_3633_begin_0 = const()[name = tensor("op_3633_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_3633_end_0 = const()[name = tensor("op_3633_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_3633_end_mask_0 = const()[name = tensor("op_3633_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3633_cast_fp16 = slice_by_index(begin = var_3633_begin_0, end = var_3633_end_0, end_mask = var_3633_end_mask_0, x = var_3541_cast_fp16)[name = tensor("op_3633_cast_fp16")]; tensor var_3640_begin_0 = const()[name = tensor("op_3640_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_3640_end_0 = const()[name = tensor("op_3640_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_3640_end_mask_0 = const()[name = tensor("op_3640_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3640_cast_fp16 = slice_by_index(begin = var_3640_begin_0, end = var_3640_end_0, end_mask = var_3640_end_mask_0, x = var_3541_cast_fp16)[name = tensor("op_3640_cast_fp16")]; tensor var_3647_begin_0 = const()[name = tensor("op_3647_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_3647_end_0 = const()[name = tensor("op_3647_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_3647_end_mask_0 = const()[name = tensor("op_3647_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3647_cast_fp16 = slice_by_index(begin = var_3647_begin_0, end = var_3647_end_0, end_mask = var_3647_end_mask_0, x = var_3541_cast_fp16)[name = tensor("op_3647_cast_fp16")]; tensor var_3654_begin_0 = const()[name = tensor("op_3654_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3654_end_0 = const()[name = tensor("op_3654_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_3654_end_mask_0 = const()[name = tensor("op_3654_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3654_cast_fp16 = slice_by_index(begin = var_3654_begin_0, end = var_3654_end_0, end_mask = var_3654_end_mask_0, x = var_3545_cast_fp16)[name = tensor("op_3654_cast_fp16")]; tensor var_3661_begin_0 = const()[name = tensor("op_3661_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_3661_end_0 = const()[name = tensor("op_3661_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_3661_end_mask_0 = const()[name = tensor("op_3661_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3661_cast_fp16 = slice_by_index(begin = var_3661_begin_0, end = var_3661_end_0, end_mask = var_3661_end_mask_0, x = var_3545_cast_fp16)[name = tensor("op_3661_cast_fp16")]; tensor var_3668_begin_0 = const()[name = tensor("op_3668_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_3668_end_0 = const()[name = tensor("op_3668_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_3668_end_mask_0 = const()[name = tensor("op_3668_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3668_cast_fp16 = slice_by_index(begin = var_3668_begin_0, end = var_3668_end_0, end_mask = var_3668_end_mask_0, x = var_3545_cast_fp16)[name = tensor("op_3668_cast_fp16")]; tensor var_3675_begin_0 = const()[name = tensor("op_3675_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_3675_end_0 = const()[name = tensor("op_3675_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_3675_end_mask_0 = const()[name = tensor("op_3675_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3675_cast_fp16 = slice_by_index(begin = var_3675_begin_0, end = var_3675_end_0, end_mask = var_3675_end_mask_0, x = var_3545_cast_fp16)[name = tensor("op_3675_cast_fp16")]; tensor var_3682_begin_0 = const()[name = tensor("op_3682_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3682_end_0 = const()[name = tensor("op_3682_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_3682_end_mask_0 = const()[name = tensor("op_3682_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3682_cast_fp16 = slice_by_index(begin = var_3682_begin_0, end = var_3682_end_0, end_mask = var_3682_end_mask_0, x = var_3549_cast_fp16)[name = tensor("op_3682_cast_fp16")]; tensor var_3689_begin_0 = const()[name = tensor("op_3689_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_3689_end_0 = const()[name = tensor("op_3689_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_3689_end_mask_0 = const()[name = tensor("op_3689_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3689_cast_fp16 = slice_by_index(begin = var_3689_begin_0, end = var_3689_end_0, end_mask = var_3689_end_mask_0, x = var_3549_cast_fp16)[name = tensor("op_3689_cast_fp16")]; tensor var_3696_begin_0 = const()[name = tensor("op_3696_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_3696_end_0 = const()[name = tensor("op_3696_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_3696_end_mask_0 = const()[name = tensor("op_3696_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3696_cast_fp16 = slice_by_index(begin = var_3696_begin_0, end = var_3696_end_0, end_mask = var_3696_end_mask_0, x = var_3549_cast_fp16)[name = tensor("op_3696_cast_fp16")]; tensor var_3703_begin_0 = const()[name = tensor("op_3703_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_3703_end_0 = const()[name = tensor("op_3703_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_3703_end_mask_0 = const()[name = tensor("op_3703_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3703_cast_fp16 = slice_by_index(begin = var_3703_begin_0, end = var_3703_end_0, end_mask = var_3703_end_mask_0, x = var_3549_cast_fp16)[name = tensor("op_3703_cast_fp16")]; tensor var_3710_begin_0 = const()[name = tensor("op_3710_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3710_end_0 = const()[name = tensor("op_3710_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_3710_end_mask_0 = const()[name = tensor("op_3710_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3710_cast_fp16 = slice_by_index(begin = var_3710_begin_0, end = var_3710_end_0, end_mask = var_3710_end_mask_0, x = var_3553_cast_fp16)[name = tensor("op_3710_cast_fp16")]; tensor var_3717_begin_0 = const()[name = tensor("op_3717_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_3717_end_0 = const()[name = tensor("op_3717_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_3717_end_mask_0 = const()[name = tensor("op_3717_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3717_cast_fp16 = slice_by_index(begin = var_3717_begin_0, end = var_3717_end_0, end_mask = var_3717_end_mask_0, x = var_3553_cast_fp16)[name = tensor("op_3717_cast_fp16")]; tensor var_3724_begin_0 = const()[name = tensor("op_3724_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_3724_end_0 = const()[name = tensor("op_3724_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_3724_end_mask_0 = const()[name = tensor("op_3724_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3724_cast_fp16 = slice_by_index(begin = var_3724_begin_0, end = var_3724_end_0, end_mask = var_3724_end_mask_0, x = var_3553_cast_fp16)[name = tensor("op_3724_cast_fp16")]; tensor var_3731_begin_0 = const()[name = tensor("op_3731_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_3731_end_0 = const()[name = tensor("op_3731_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_3731_end_mask_0 = const()[name = tensor("op_3731_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3731_cast_fp16 = slice_by_index(begin = var_3731_begin_0, end = var_3731_end_0, end_mask = var_3731_end_mask_0, x = var_3553_cast_fp16)[name = tensor("op_3731_cast_fp16")]; tensor var_3738_begin_0 = const()[name = tensor("op_3738_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3738_end_0 = const()[name = tensor("op_3738_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_3738_end_mask_0 = const()[name = tensor("op_3738_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3738_cast_fp16 = slice_by_index(begin = var_3738_begin_0, end = var_3738_end_0, end_mask = var_3738_end_mask_0, x = var_3557_cast_fp16)[name = tensor("op_3738_cast_fp16")]; tensor var_3745_begin_0 = const()[name = tensor("op_3745_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_3745_end_0 = const()[name = tensor("op_3745_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_3745_end_mask_0 = const()[name = tensor("op_3745_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3745_cast_fp16 = slice_by_index(begin = var_3745_begin_0, end = var_3745_end_0, end_mask = var_3745_end_mask_0, x = var_3557_cast_fp16)[name = tensor("op_3745_cast_fp16")]; tensor var_3752_begin_0 = const()[name = tensor("op_3752_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_3752_end_0 = const()[name = tensor("op_3752_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_3752_end_mask_0 = const()[name = tensor("op_3752_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3752_cast_fp16 = slice_by_index(begin = var_3752_begin_0, end = var_3752_end_0, end_mask = var_3752_end_mask_0, x = var_3557_cast_fp16)[name = tensor("op_3752_cast_fp16")]; tensor var_3759_begin_0 = const()[name = tensor("op_3759_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_3759_end_0 = const()[name = tensor("op_3759_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_3759_end_mask_0 = const()[name = tensor("op_3759_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3759_cast_fp16 = slice_by_index(begin = var_3759_begin_0, end = var_3759_end_0, end_mask = var_3759_end_mask_0, x = var_3557_cast_fp16)[name = tensor("op_3759_cast_fp16")]; tensor var_3766_begin_0 = const()[name = tensor("op_3766_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3766_end_0 = const()[name = tensor("op_3766_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_3766_end_mask_0 = const()[name = tensor("op_3766_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3766_cast_fp16 = slice_by_index(begin = var_3766_begin_0, end = var_3766_end_0, end_mask = var_3766_end_mask_0, x = var_3561_cast_fp16)[name = tensor("op_3766_cast_fp16")]; tensor var_3773_begin_0 = const()[name = tensor("op_3773_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_3773_end_0 = const()[name = tensor("op_3773_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_3773_end_mask_0 = const()[name = tensor("op_3773_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3773_cast_fp16 = slice_by_index(begin = var_3773_begin_0, end = var_3773_end_0, end_mask = var_3773_end_mask_0, x = var_3561_cast_fp16)[name = tensor("op_3773_cast_fp16")]; tensor var_3780_begin_0 = const()[name = tensor("op_3780_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_3780_end_0 = const()[name = tensor("op_3780_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_3780_end_mask_0 = const()[name = tensor("op_3780_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3780_cast_fp16 = slice_by_index(begin = var_3780_begin_0, end = var_3780_end_0, end_mask = var_3780_end_mask_0, x = var_3561_cast_fp16)[name = tensor("op_3780_cast_fp16")]; tensor var_3787_begin_0 = const()[name = tensor("op_3787_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_3787_end_0 = const()[name = tensor("op_3787_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_3787_end_mask_0 = const()[name = tensor("op_3787_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3787_cast_fp16 = slice_by_index(begin = var_3787_begin_0, end = var_3787_end_0, end_mask = var_3787_end_mask_0, x = var_3561_cast_fp16)[name = tensor("op_3787_cast_fp16")]; tensor var_3794_begin_0 = const()[name = tensor("op_3794_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3794_end_0 = const()[name = tensor("op_3794_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_3794_end_mask_0 = const()[name = tensor("op_3794_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3794_cast_fp16 = slice_by_index(begin = var_3794_begin_0, end = var_3794_end_0, end_mask = var_3794_end_mask_0, x = var_3565_cast_fp16)[name = tensor("op_3794_cast_fp16")]; tensor var_3801_begin_0 = const()[name = tensor("op_3801_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_3801_end_0 = const()[name = tensor("op_3801_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_3801_end_mask_0 = const()[name = tensor("op_3801_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3801_cast_fp16 = slice_by_index(begin = var_3801_begin_0, end = var_3801_end_0, end_mask = var_3801_end_mask_0, x = var_3565_cast_fp16)[name = tensor("op_3801_cast_fp16")]; tensor var_3808_begin_0 = const()[name = tensor("op_3808_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_3808_end_0 = const()[name = tensor("op_3808_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_3808_end_mask_0 = const()[name = tensor("op_3808_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3808_cast_fp16 = slice_by_index(begin = var_3808_begin_0, end = var_3808_end_0, end_mask = var_3808_end_mask_0, x = var_3565_cast_fp16)[name = tensor("op_3808_cast_fp16")]; tensor var_3815_begin_0 = const()[name = tensor("op_3815_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_3815_end_0 = const()[name = tensor("op_3815_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_3815_end_mask_0 = const()[name = tensor("op_3815_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3815_cast_fp16 = slice_by_index(begin = var_3815_begin_0, end = var_3815_end_0, end_mask = var_3815_end_mask_0, x = var_3565_cast_fp16)[name = tensor("op_3815_cast_fp16")]; tensor var_3822_begin_0 = const()[name = tensor("op_3822_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3822_end_0 = const()[name = tensor("op_3822_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_3822_end_mask_0 = const()[name = tensor("op_3822_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3822_cast_fp16 = slice_by_index(begin = var_3822_begin_0, end = var_3822_end_0, end_mask = var_3822_end_mask_0, x = var_3569_cast_fp16)[name = tensor("op_3822_cast_fp16")]; tensor var_3829_begin_0 = const()[name = tensor("op_3829_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_3829_end_0 = const()[name = tensor("op_3829_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_3829_end_mask_0 = const()[name = tensor("op_3829_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3829_cast_fp16 = slice_by_index(begin = var_3829_begin_0, end = var_3829_end_0, end_mask = var_3829_end_mask_0, x = var_3569_cast_fp16)[name = tensor("op_3829_cast_fp16")]; tensor var_3836_begin_0 = const()[name = tensor("op_3836_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_3836_end_0 = const()[name = tensor("op_3836_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_3836_end_mask_0 = const()[name = tensor("op_3836_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3836_cast_fp16 = slice_by_index(begin = var_3836_begin_0, end = var_3836_end_0, end_mask = var_3836_end_mask_0, x = var_3569_cast_fp16)[name = tensor("op_3836_cast_fp16")]; tensor var_3843_begin_0 = const()[name = tensor("op_3843_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_3843_end_0 = const()[name = tensor("op_3843_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_3843_end_mask_0 = const()[name = tensor("op_3843_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3843_cast_fp16 = slice_by_index(begin = var_3843_begin_0, end = var_3843_end_0, end_mask = var_3843_end_mask_0, x = var_3569_cast_fp16)[name = tensor("op_3843_cast_fp16")]; tensor var_3850_begin_0 = const()[name = tensor("op_3850_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3850_end_0 = const()[name = tensor("op_3850_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_3850_end_mask_0 = const()[name = tensor("op_3850_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3850_cast_fp16 = slice_by_index(begin = var_3850_begin_0, end = var_3850_end_0, end_mask = var_3850_end_mask_0, x = var_3573_cast_fp16)[name = tensor("op_3850_cast_fp16")]; tensor var_3857_begin_0 = const()[name = tensor("op_3857_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_3857_end_0 = const()[name = tensor("op_3857_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_3857_end_mask_0 = const()[name = tensor("op_3857_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3857_cast_fp16 = slice_by_index(begin = var_3857_begin_0, end = var_3857_end_0, end_mask = var_3857_end_mask_0, x = var_3573_cast_fp16)[name = tensor("op_3857_cast_fp16")]; tensor var_3864_begin_0 = const()[name = tensor("op_3864_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_3864_end_0 = const()[name = tensor("op_3864_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_3864_end_mask_0 = const()[name = tensor("op_3864_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3864_cast_fp16 = slice_by_index(begin = var_3864_begin_0, end = var_3864_end_0, end_mask = var_3864_end_mask_0, x = var_3573_cast_fp16)[name = tensor("op_3864_cast_fp16")]; tensor var_3871_begin_0 = const()[name = tensor("op_3871_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_3871_end_0 = const()[name = tensor("op_3871_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_3871_end_mask_0 = const()[name = tensor("op_3871_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3871_cast_fp16 = slice_by_index(begin = var_3871_begin_0, end = var_3871_end_0, end_mask = var_3871_end_mask_0, x = var_3573_cast_fp16)[name = tensor("op_3871_cast_fp16")]; tensor var_3878_begin_0 = const()[name = tensor("op_3878_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3878_end_0 = const()[name = tensor("op_3878_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_3878_end_mask_0 = const()[name = tensor("op_3878_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3878_cast_fp16 = slice_by_index(begin = var_3878_begin_0, end = var_3878_end_0, end_mask = var_3878_end_mask_0, x = var_3577_cast_fp16)[name = tensor("op_3878_cast_fp16")]; tensor var_3885_begin_0 = const()[name = tensor("op_3885_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_3885_end_0 = const()[name = tensor("op_3885_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_3885_end_mask_0 = const()[name = tensor("op_3885_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3885_cast_fp16 = slice_by_index(begin = var_3885_begin_0, end = var_3885_end_0, end_mask = var_3885_end_mask_0, x = var_3577_cast_fp16)[name = tensor("op_3885_cast_fp16")]; tensor var_3892_begin_0 = const()[name = tensor("op_3892_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_3892_end_0 = const()[name = tensor("op_3892_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_3892_end_mask_0 = const()[name = tensor("op_3892_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3892_cast_fp16 = slice_by_index(begin = var_3892_begin_0, end = var_3892_end_0, end_mask = var_3892_end_mask_0, x = var_3577_cast_fp16)[name = tensor("op_3892_cast_fp16")]; tensor var_3899_begin_0 = const()[name = tensor("op_3899_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_3899_end_0 = const()[name = tensor("op_3899_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_3899_end_mask_0 = const()[name = tensor("op_3899_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3899_cast_fp16 = slice_by_index(begin = var_3899_begin_0, end = var_3899_end_0, end_mask = var_3899_end_mask_0, x = var_3577_cast_fp16)[name = tensor("op_3899_cast_fp16")]; tensor var_3906_begin_0 = const()[name = tensor("op_3906_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3906_end_0 = const()[name = tensor("op_3906_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_3906_end_mask_0 = const()[name = tensor("op_3906_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3906_cast_fp16 = slice_by_index(begin = var_3906_begin_0, end = var_3906_end_0, end_mask = var_3906_end_mask_0, x = var_3581_cast_fp16)[name = tensor("op_3906_cast_fp16")]; tensor var_3913_begin_0 = const()[name = tensor("op_3913_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_3913_end_0 = const()[name = tensor("op_3913_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_3913_end_mask_0 = const()[name = tensor("op_3913_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3913_cast_fp16 = slice_by_index(begin = var_3913_begin_0, end = var_3913_end_0, end_mask = var_3913_end_mask_0, x = var_3581_cast_fp16)[name = tensor("op_3913_cast_fp16")]; tensor var_3920_begin_0 = const()[name = tensor("op_3920_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_3920_end_0 = const()[name = tensor("op_3920_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_3920_end_mask_0 = const()[name = tensor("op_3920_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3920_cast_fp16 = slice_by_index(begin = var_3920_begin_0, end = var_3920_end_0, end_mask = var_3920_end_mask_0, x = var_3581_cast_fp16)[name = tensor("op_3920_cast_fp16")]; tensor var_3927_begin_0 = const()[name = tensor("op_3927_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_3927_end_0 = const()[name = tensor("op_3927_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_3927_end_mask_0 = const()[name = tensor("op_3927_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3927_cast_fp16 = slice_by_index(begin = var_3927_begin_0, end = var_3927_end_0, end_mask = var_3927_end_mask_0, x = var_3581_cast_fp16)[name = tensor("op_3927_cast_fp16")]; tensor var_3934_begin_0 = const()[name = tensor("op_3934_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3934_end_0 = const()[name = tensor("op_3934_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_3934_end_mask_0 = const()[name = tensor("op_3934_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3934_cast_fp16 = slice_by_index(begin = var_3934_begin_0, end = var_3934_end_0, end_mask = var_3934_end_mask_0, x = var_3585_cast_fp16)[name = tensor("op_3934_cast_fp16")]; tensor var_3941_begin_0 = const()[name = tensor("op_3941_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_3941_end_0 = const()[name = tensor("op_3941_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_3941_end_mask_0 = const()[name = tensor("op_3941_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3941_cast_fp16 = slice_by_index(begin = var_3941_begin_0, end = var_3941_end_0, end_mask = var_3941_end_mask_0, x = var_3585_cast_fp16)[name = tensor("op_3941_cast_fp16")]; tensor var_3948_begin_0 = const()[name = tensor("op_3948_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_3948_end_0 = const()[name = tensor("op_3948_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_3948_end_mask_0 = const()[name = tensor("op_3948_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3948_cast_fp16 = slice_by_index(begin = var_3948_begin_0, end = var_3948_end_0, end_mask = var_3948_end_mask_0, x = var_3585_cast_fp16)[name = tensor("op_3948_cast_fp16")]; tensor var_3955_begin_0 = const()[name = tensor("op_3955_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_3955_end_0 = const()[name = tensor("op_3955_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_3955_end_mask_0 = const()[name = tensor("op_3955_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3955_cast_fp16 = slice_by_index(begin = var_3955_begin_0, end = var_3955_end_0, end_mask = var_3955_end_mask_0, x = var_3585_cast_fp16)[name = tensor("op_3955_cast_fp16")]; tensor var_3962_begin_0 = const()[name = tensor("op_3962_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3962_end_0 = const()[name = tensor("op_3962_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_3962_end_mask_0 = const()[name = tensor("op_3962_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3962_cast_fp16 = slice_by_index(begin = var_3962_begin_0, end = var_3962_end_0, end_mask = var_3962_end_mask_0, x = var_3589_cast_fp16)[name = tensor("op_3962_cast_fp16")]; tensor var_3969_begin_0 = const()[name = tensor("op_3969_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_3969_end_0 = const()[name = tensor("op_3969_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_3969_end_mask_0 = const()[name = tensor("op_3969_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3969_cast_fp16 = slice_by_index(begin = var_3969_begin_0, end = var_3969_end_0, end_mask = var_3969_end_mask_0, x = var_3589_cast_fp16)[name = tensor("op_3969_cast_fp16")]; tensor var_3976_begin_0 = const()[name = tensor("op_3976_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_3976_end_0 = const()[name = tensor("op_3976_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_3976_end_mask_0 = const()[name = tensor("op_3976_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3976_cast_fp16 = slice_by_index(begin = var_3976_begin_0, end = var_3976_end_0, end_mask = var_3976_end_mask_0, x = var_3589_cast_fp16)[name = tensor("op_3976_cast_fp16")]; tensor var_3983_begin_0 = const()[name = tensor("op_3983_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_3983_end_0 = const()[name = tensor("op_3983_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_3983_end_mask_0 = const()[name = tensor("op_3983_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3983_cast_fp16 = slice_by_index(begin = var_3983_begin_0, end = var_3983_end_0, end_mask = var_3983_end_mask_0, x = var_3589_cast_fp16)[name = tensor("op_3983_cast_fp16")]; tensor var_3990_begin_0 = const()[name = tensor("op_3990_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3990_end_0 = const()[name = tensor("op_3990_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_3990_end_mask_0 = const()[name = tensor("op_3990_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3990_cast_fp16 = slice_by_index(begin = var_3990_begin_0, end = var_3990_end_0, end_mask = var_3990_end_mask_0, x = var_3593_cast_fp16)[name = tensor("op_3990_cast_fp16")]; tensor var_3997_begin_0 = const()[name = tensor("op_3997_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_3997_end_0 = const()[name = tensor("op_3997_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_3997_end_mask_0 = const()[name = tensor("op_3997_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_3997_cast_fp16 = slice_by_index(begin = var_3997_begin_0, end = var_3997_end_0, end_mask = var_3997_end_mask_0, x = var_3593_cast_fp16)[name = tensor("op_3997_cast_fp16")]; tensor var_4004_begin_0 = const()[name = tensor("op_4004_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_4004_end_0 = const()[name = tensor("op_4004_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_4004_end_mask_0 = const()[name = tensor("op_4004_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4004_cast_fp16 = slice_by_index(begin = var_4004_begin_0, end = var_4004_end_0, end_mask = var_4004_end_mask_0, x = var_3593_cast_fp16)[name = tensor("op_4004_cast_fp16")]; tensor var_4011_begin_0 = const()[name = tensor("op_4011_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_4011_end_0 = const()[name = tensor("op_4011_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_4011_end_mask_0 = const()[name = tensor("op_4011_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4011_cast_fp16 = slice_by_index(begin = var_4011_begin_0, end = var_4011_end_0, end_mask = var_4011_end_mask_0, x = var_3593_cast_fp16)[name = tensor("op_4011_cast_fp16")]; tensor var_4018_begin_0 = const()[name = tensor("op_4018_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4018_end_0 = const()[name = tensor("op_4018_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_4018_end_mask_0 = const()[name = tensor("op_4018_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4018_cast_fp16 = slice_by_index(begin = var_4018_begin_0, end = var_4018_end_0, end_mask = var_4018_end_mask_0, x = var_3597_cast_fp16)[name = tensor("op_4018_cast_fp16")]; tensor var_4025_begin_0 = const()[name = tensor("op_4025_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_4025_end_0 = const()[name = tensor("op_4025_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_4025_end_mask_0 = const()[name = tensor("op_4025_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4025_cast_fp16 = slice_by_index(begin = var_4025_begin_0, end = var_4025_end_0, end_mask = var_4025_end_mask_0, x = var_3597_cast_fp16)[name = tensor("op_4025_cast_fp16")]; tensor var_4032_begin_0 = const()[name = tensor("op_4032_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_4032_end_0 = const()[name = tensor("op_4032_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_4032_end_mask_0 = const()[name = tensor("op_4032_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4032_cast_fp16 = slice_by_index(begin = var_4032_begin_0, end = var_4032_end_0, end_mask = var_4032_end_mask_0, x = var_3597_cast_fp16)[name = tensor("op_4032_cast_fp16")]; tensor var_4039_begin_0 = const()[name = tensor("op_4039_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_4039_end_0 = const()[name = tensor("op_4039_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_4039_end_mask_0 = const()[name = tensor("op_4039_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4039_cast_fp16 = slice_by_index(begin = var_4039_begin_0, end = var_4039_end_0, end_mask = var_4039_end_mask_0, x = var_3597_cast_fp16)[name = tensor("op_4039_cast_fp16")]; tensor var_4046_begin_0 = const()[name = tensor("op_4046_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4046_end_0 = const()[name = tensor("op_4046_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_4046_end_mask_0 = const()[name = tensor("op_4046_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4046_cast_fp16 = slice_by_index(begin = var_4046_begin_0, end = var_4046_end_0, end_mask = var_4046_end_mask_0, x = var_3601_cast_fp16)[name = tensor("op_4046_cast_fp16")]; tensor var_4053_begin_0 = const()[name = tensor("op_4053_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_4053_end_0 = const()[name = tensor("op_4053_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_4053_end_mask_0 = const()[name = tensor("op_4053_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4053_cast_fp16 = slice_by_index(begin = var_4053_begin_0, end = var_4053_end_0, end_mask = var_4053_end_mask_0, x = var_3601_cast_fp16)[name = tensor("op_4053_cast_fp16")]; tensor var_4060_begin_0 = const()[name = tensor("op_4060_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_4060_end_0 = const()[name = tensor("op_4060_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_4060_end_mask_0 = const()[name = tensor("op_4060_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4060_cast_fp16 = slice_by_index(begin = var_4060_begin_0, end = var_4060_end_0, end_mask = var_4060_end_mask_0, x = var_3601_cast_fp16)[name = tensor("op_4060_cast_fp16")]; tensor var_4067_begin_0 = const()[name = tensor("op_4067_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_4067_end_0 = const()[name = tensor("op_4067_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_4067_end_mask_0 = const()[name = tensor("op_4067_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4067_cast_fp16 = slice_by_index(begin = var_4067_begin_0, end = var_4067_end_0, end_mask = var_4067_end_mask_0, x = var_3601_cast_fp16)[name = tensor("op_4067_cast_fp16")]; tensor var_4074_begin_0 = const()[name = tensor("op_4074_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4074_end_0 = const()[name = tensor("op_4074_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_4074_end_mask_0 = const()[name = tensor("op_4074_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4074_cast_fp16 = slice_by_index(begin = var_4074_begin_0, end = var_4074_end_0, end_mask = var_4074_end_mask_0, x = var_3605_cast_fp16)[name = tensor("op_4074_cast_fp16")]; tensor var_4081_begin_0 = const()[name = tensor("op_4081_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_4081_end_0 = const()[name = tensor("op_4081_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_4081_end_mask_0 = const()[name = tensor("op_4081_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4081_cast_fp16 = slice_by_index(begin = var_4081_begin_0, end = var_4081_end_0, end_mask = var_4081_end_mask_0, x = var_3605_cast_fp16)[name = tensor("op_4081_cast_fp16")]; tensor var_4088_begin_0 = const()[name = tensor("op_4088_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_4088_end_0 = const()[name = tensor("op_4088_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_4088_end_mask_0 = const()[name = tensor("op_4088_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4088_cast_fp16 = slice_by_index(begin = var_4088_begin_0, end = var_4088_end_0, end_mask = var_4088_end_mask_0, x = var_3605_cast_fp16)[name = tensor("op_4088_cast_fp16")]; tensor var_4095_begin_0 = const()[name = tensor("op_4095_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_4095_end_0 = const()[name = tensor("op_4095_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_4095_end_mask_0 = const()[name = tensor("op_4095_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4095_cast_fp16 = slice_by_index(begin = var_4095_begin_0, end = var_4095_end_0, end_mask = var_4095_end_mask_0, x = var_3605_cast_fp16)[name = tensor("op_4095_cast_fp16")]; tensor var_4102_begin_0 = const()[name = tensor("op_4102_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4102_end_0 = const()[name = tensor("op_4102_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_4102_end_mask_0 = const()[name = tensor("op_4102_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4102_cast_fp16 = slice_by_index(begin = var_4102_begin_0, end = var_4102_end_0, end_mask = var_4102_end_mask_0, x = var_3609_cast_fp16)[name = tensor("op_4102_cast_fp16")]; tensor var_4109_begin_0 = const()[name = tensor("op_4109_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_4109_end_0 = const()[name = tensor("op_4109_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_4109_end_mask_0 = const()[name = tensor("op_4109_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4109_cast_fp16 = slice_by_index(begin = var_4109_begin_0, end = var_4109_end_0, end_mask = var_4109_end_mask_0, x = var_3609_cast_fp16)[name = tensor("op_4109_cast_fp16")]; tensor var_4116_begin_0 = const()[name = tensor("op_4116_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_4116_end_0 = const()[name = tensor("op_4116_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_4116_end_mask_0 = const()[name = tensor("op_4116_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4116_cast_fp16 = slice_by_index(begin = var_4116_begin_0, end = var_4116_end_0, end_mask = var_4116_end_mask_0, x = var_3609_cast_fp16)[name = tensor("op_4116_cast_fp16")]; tensor var_4123_begin_0 = const()[name = tensor("op_4123_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_4123_end_0 = const()[name = tensor("op_4123_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_4123_end_mask_0 = const()[name = tensor("op_4123_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4123_cast_fp16 = slice_by_index(begin = var_4123_begin_0, end = var_4123_end_0, end_mask = var_4123_end_mask_0, x = var_3609_cast_fp16)[name = tensor("op_4123_cast_fp16")]; tensor var_4130_begin_0 = const()[name = tensor("op_4130_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4130_end_0 = const()[name = tensor("op_4130_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_4130_end_mask_0 = const()[name = tensor("op_4130_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4130_cast_fp16 = slice_by_index(begin = var_4130_begin_0, end = var_4130_end_0, end_mask = var_4130_end_mask_0, x = var_3613_cast_fp16)[name = tensor("op_4130_cast_fp16")]; tensor var_4137_begin_0 = const()[name = tensor("op_4137_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_4137_end_0 = const()[name = tensor("op_4137_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_4137_end_mask_0 = const()[name = tensor("op_4137_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4137_cast_fp16 = slice_by_index(begin = var_4137_begin_0, end = var_4137_end_0, end_mask = var_4137_end_mask_0, x = var_3613_cast_fp16)[name = tensor("op_4137_cast_fp16")]; tensor var_4144_begin_0 = const()[name = tensor("op_4144_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_4144_end_0 = const()[name = tensor("op_4144_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_4144_end_mask_0 = const()[name = tensor("op_4144_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4144_cast_fp16 = slice_by_index(begin = var_4144_begin_0, end = var_4144_end_0, end_mask = var_4144_end_mask_0, x = var_3613_cast_fp16)[name = tensor("op_4144_cast_fp16")]; tensor var_4151_begin_0 = const()[name = tensor("op_4151_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_4151_end_0 = const()[name = tensor("op_4151_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_4151_end_mask_0 = const()[name = tensor("op_4151_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4151_cast_fp16 = slice_by_index(begin = var_4151_begin_0, end = var_4151_end_0, end_mask = var_4151_end_mask_0, x = var_3613_cast_fp16)[name = tensor("op_4151_cast_fp16")]; tensor var_4158_begin_0 = const()[name = tensor("op_4158_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4158_end_0 = const()[name = tensor("op_4158_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_4158_end_mask_0 = const()[name = tensor("op_4158_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4158_cast_fp16 = slice_by_index(begin = var_4158_begin_0, end = var_4158_end_0, end_mask = var_4158_end_mask_0, x = var_3617_cast_fp16)[name = tensor("op_4158_cast_fp16")]; tensor var_4165_begin_0 = const()[name = tensor("op_4165_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_4165_end_0 = const()[name = tensor("op_4165_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_4165_end_mask_0 = const()[name = tensor("op_4165_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4165_cast_fp16 = slice_by_index(begin = var_4165_begin_0, end = var_4165_end_0, end_mask = var_4165_end_mask_0, x = var_3617_cast_fp16)[name = tensor("op_4165_cast_fp16")]; tensor var_4172_begin_0 = const()[name = tensor("op_4172_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_4172_end_0 = const()[name = tensor("op_4172_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_4172_end_mask_0 = const()[name = tensor("op_4172_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4172_cast_fp16 = slice_by_index(begin = var_4172_begin_0, end = var_4172_end_0, end_mask = var_4172_end_mask_0, x = var_3617_cast_fp16)[name = tensor("op_4172_cast_fp16")]; tensor var_4179_begin_0 = const()[name = tensor("op_4179_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_4179_end_0 = const()[name = tensor("op_4179_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_4179_end_mask_0 = const()[name = tensor("op_4179_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4179_cast_fp16 = slice_by_index(begin = var_4179_begin_0, end = var_4179_end_0, end_mask = var_4179_end_mask_0, x = var_3617_cast_fp16)[name = tensor("op_4179_cast_fp16")]; tensor k_5_perm_0 = const()[name = tensor("k_5_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_4184_begin_0 = const()[name = tensor("op_4184_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4184_end_0 = const()[name = tensor("op_4184_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_4184_end_mask_0 = const()[name = tensor("op_4184_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_5_cast_fp16 = transpose(perm = k_5_perm_0, x = key_5_cast_fp16)[name = tensor("transpose_29")]; tensor var_4184_cast_fp16 = slice_by_index(begin = var_4184_begin_0, end = var_4184_end_0, end_mask = var_4184_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4184_cast_fp16")]; tensor var_4188_begin_0 = const()[name = tensor("op_4188_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_4188_end_0 = const()[name = tensor("op_4188_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_4188_end_mask_0 = const()[name = tensor("op_4188_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4188_cast_fp16 = slice_by_index(begin = var_4188_begin_0, end = var_4188_end_0, end_mask = var_4188_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4188_cast_fp16")]; tensor var_4192_begin_0 = const()[name = tensor("op_4192_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_4192_end_0 = const()[name = tensor("op_4192_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_4192_end_mask_0 = const()[name = tensor("op_4192_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4192_cast_fp16 = slice_by_index(begin = var_4192_begin_0, end = var_4192_end_0, end_mask = var_4192_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4192_cast_fp16")]; tensor var_4196_begin_0 = const()[name = tensor("op_4196_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_4196_end_0 = const()[name = tensor("op_4196_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_4196_end_mask_0 = const()[name = tensor("op_4196_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4196_cast_fp16 = slice_by_index(begin = var_4196_begin_0, end = var_4196_end_0, end_mask = var_4196_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4196_cast_fp16")]; tensor var_4200_begin_0 = const()[name = tensor("op_4200_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_4200_end_0 = const()[name = tensor("op_4200_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_4200_end_mask_0 = const()[name = tensor("op_4200_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4200_cast_fp16 = slice_by_index(begin = var_4200_begin_0, end = var_4200_end_0, end_mask = var_4200_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4200_cast_fp16")]; tensor var_4204_begin_0 = const()[name = tensor("op_4204_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_4204_end_0 = const()[name = tensor("op_4204_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_4204_end_mask_0 = const()[name = tensor("op_4204_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4204_cast_fp16 = slice_by_index(begin = var_4204_begin_0, end = var_4204_end_0, end_mask = var_4204_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4204_cast_fp16")]; tensor var_4208_begin_0 = const()[name = tensor("op_4208_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_4208_end_0 = const()[name = tensor("op_4208_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_4208_end_mask_0 = const()[name = tensor("op_4208_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4208_cast_fp16 = slice_by_index(begin = var_4208_begin_0, end = var_4208_end_0, end_mask = var_4208_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4208_cast_fp16")]; tensor var_4212_begin_0 = const()[name = tensor("op_4212_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_4212_end_0 = const()[name = tensor("op_4212_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_4212_end_mask_0 = const()[name = tensor("op_4212_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4212_cast_fp16 = slice_by_index(begin = var_4212_begin_0, end = var_4212_end_0, end_mask = var_4212_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4212_cast_fp16")]; tensor var_4216_begin_0 = const()[name = tensor("op_4216_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_4216_end_0 = const()[name = tensor("op_4216_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_4216_end_mask_0 = const()[name = tensor("op_4216_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4216_cast_fp16 = slice_by_index(begin = var_4216_begin_0, end = var_4216_end_0, end_mask = var_4216_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4216_cast_fp16")]; tensor var_4220_begin_0 = const()[name = tensor("op_4220_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_4220_end_0 = const()[name = tensor("op_4220_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_4220_end_mask_0 = const()[name = tensor("op_4220_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4220_cast_fp16 = slice_by_index(begin = var_4220_begin_0, end = var_4220_end_0, end_mask = var_4220_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4220_cast_fp16")]; tensor var_4224_begin_0 = const()[name = tensor("op_4224_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_4224_end_0 = const()[name = tensor("op_4224_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_4224_end_mask_0 = const()[name = tensor("op_4224_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4224_cast_fp16 = slice_by_index(begin = var_4224_begin_0, end = var_4224_end_0, end_mask = var_4224_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4224_cast_fp16")]; tensor var_4228_begin_0 = const()[name = tensor("op_4228_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_4228_end_0 = const()[name = tensor("op_4228_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_4228_end_mask_0 = const()[name = tensor("op_4228_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4228_cast_fp16 = slice_by_index(begin = var_4228_begin_0, end = var_4228_end_0, end_mask = var_4228_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4228_cast_fp16")]; tensor var_4232_begin_0 = const()[name = tensor("op_4232_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_4232_end_0 = const()[name = tensor("op_4232_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_4232_end_mask_0 = const()[name = tensor("op_4232_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4232_cast_fp16 = slice_by_index(begin = var_4232_begin_0, end = var_4232_end_0, end_mask = var_4232_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4232_cast_fp16")]; tensor var_4236_begin_0 = const()[name = tensor("op_4236_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_4236_end_0 = const()[name = tensor("op_4236_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_4236_end_mask_0 = const()[name = tensor("op_4236_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4236_cast_fp16 = slice_by_index(begin = var_4236_begin_0, end = var_4236_end_0, end_mask = var_4236_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4236_cast_fp16")]; tensor var_4240_begin_0 = const()[name = tensor("op_4240_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_4240_end_0 = const()[name = tensor("op_4240_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_4240_end_mask_0 = const()[name = tensor("op_4240_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4240_cast_fp16 = slice_by_index(begin = var_4240_begin_0, end = var_4240_end_0, end_mask = var_4240_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4240_cast_fp16")]; tensor var_4244_begin_0 = const()[name = tensor("op_4244_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_4244_end_0 = const()[name = tensor("op_4244_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_4244_end_mask_0 = const()[name = tensor("op_4244_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4244_cast_fp16 = slice_by_index(begin = var_4244_begin_0, end = var_4244_end_0, end_mask = var_4244_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4244_cast_fp16")]; tensor var_4248_begin_0 = const()[name = tensor("op_4248_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_4248_end_0 = const()[name = tensor("op_4248_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_4248_end_mask_0 = const()[name = tensor("op_4248_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4248_cast_fp16 = slice_by_index(begin = var_4248_begin_0, end = var_4248_end_0, end_mask = var_4248_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4248_cast_fp16")]; tensor var_4252_begin_0 = const()[name = tensor("op_4252_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_4252_end_0 = const()[name = tensor("op_4252_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_4252_end_mask_0 = const()[name = tensor("op_4252_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4252_cast_fp16 = slice_by_index(begin = var_4252_begin_0, end = var_4252_end_0, end_mask = var_4252_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4252_cast_fp16")]; tensor var_4256_begin_0 = const()[name = tensor("op_4256_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_4256_end_0 = const()[name = tensor("op_4256_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_4256_end_mask_0 = const()[name = tensor("op_4256_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4256_cast_fp16 = slice_by_index(begin = var_4256_begin_0, end = var_4256_end_0, end_mask = var_4256_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4256_cast_fp16")]; tensor var_4260_begin_0 = const()[name = tensor("op_4260_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_4260_end_0 = const()[name = tensor("op_4260_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_4260_end_mask_0 = const()[name = tensor("op_4260_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_4260_cast_fp16 = slice_by_index(begin = var_4260_begin_0, end = var_4260_end_0, end_mask = var_4260_end_mask_0, x = k_5_cast_fp16)[name = tensor("op_4260_cast_fp16")]; tensor var_4262_begin_0 = const()[name = tensor("op_4262_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4262_end_0 = const()[name = tensor("op_4262_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_4262_end_mask_0 = const()[name = tensor("op_4262_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4262_cast_fp16 = slice_by_index(begin = var_4262_begin_0, end = var_4262_end_0, end_mask = var_4262_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4262_cast_fp16")]; tensor var_4266_begin_0 = const()[name = tensor("op_4266_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_4266_end_0 = const()[name = tensor("op_4266_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_4266_end_mask_0 = const()[name = tensor("op_4266_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4266_cast_fp16 = slice_by_index(begin = var_4266_begin_0, end = var_4266_end_0, end_mask = var_4266_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4266_cast_fp16")]; tensor var_4270_begin_0 = const()[name = tensor("op_4270_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_4270_end_0 = const()[name = tensor("op_4270_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_4270_end_mask_0 = const()[name = tensor("op_4270_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4270_cast_fp16 = slice_by_index(begin = var_4270_begin_0, end = var_4270_end_0, end_mask = var_4270_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4270_cast_fp16")]; tensor var_4274_begin_0 = const()[name = tensor("op_4274_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_4274_end_0 = const()[name = tensor("op_4274_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_4274_end_mask_0 = const()[name = tensor("op_4274_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4274_cast_fp16 = slice_by_index(begin = var_4274_begin_0, end = var_4274_end_0, end_mask = var_4274_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4274_cast_fp16")]; tensor var_4278_begin_0 = const()[name = tensor("op_4278_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_4278_end_0 = const()[name = tensor("op_4278_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_4278_end_mask_0 = const()[name = tensor("op_4278_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4278_cast_fp16 = slice_by_index(begin = var_4278_begin_0, end = var_4278_end_0, end_mask = var_4278_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4278_cast_fp16")]; tensor var_4282_begin_0 = const()[name = tensor("op_4282_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_4282_end_0 = const()[name = tensor("op_4282_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_4282_end_mask_0 = const()[name = tensor("op_4282_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4282_cast_fp16 = slice_by_index(begin = var_4282_begin_0, end = var_4282_end_0, end_mask = var_4282_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4282_cast_fp16")]; tensor var_4286_begin_0 = const()[name = tensor("op_4286_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_4286_end_0 = const()[name = tensor("op_4286_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_4286_end_mask_0 = const()[name = tensor("op_4286_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4286_cast_fp16 = slice_by_index(begin = var_4286_begin_0, end = var_4286_end_0, end_mask = var_4286_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4286_cast_fp16")]; tensor var_4290_begin_0 = const()[name = tensor("op_4290_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_4290_end_0 = const()[name = tensor("op_4290_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_4290_end_mask_0 = const()[name = tensor("op_4290_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4290_cast_fp16 = slice_by_index(begin = var_4290_begin_0, end = var_4290_end_0, end_mask = var_4290_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4290_cast_fp16")]; tensor var_4294_begin_0 = const()[name = tensor("op_4294_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_4294_end_0 = const()[name = tensor("op_4294_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_4294_end_mask_0 = const()[name = tensor("op_4294_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4294_cast_fp16 = slice_by_index(begin = var_4294_begin_0, end = var_4294_end_0, end_mask = var_4294_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4294_cast_fp16")]; tensor var_4298_begin_0 = const()[name = tensor("op_4298_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_4298_end_0 = const()[name = tensor("op_4298_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_4298_end_mask_0 = const()[name = tensor("op_4298_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4298_cast_fp16 = slice_by_index(begin = var_4298_begin_0, end = var_4298_end_0, end_mask = var_4298_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4298_cast_fp16")]; tensor var_4302_begin_0 = const()[name = tensor("op_4302_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_4302_end_0 = const()[name = tensor("op_4302_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_4302_end_mask_0 = const()[name = tensor("op_4302_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4302_cast_fp16 = slice_by_index(begin = var_4302_begin_0, end = var_4302_end_0, end_mask = var_4302_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4302_cast_fp16")]; tensor var_4306_begin_0 = const()[name = tensor("op_4306_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_4306_end_0 = const()[name = tensor("op_4306_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_4306_end_mask_0 = const()[name = tensor("op_4306_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4306_cast_fp16 = slice_by_index(begin = var_4306_begin_0, end = var_4306_end_0, end_mask = var_4306_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4306_cast_fp16")]; tensor var_4310_begin_0 = const()[name = tensor("op_4310_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_4310_end_0 = const()[name = tensor("op_4310_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_4310_end_mask_0 = const()[name = tensor("op_4310_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4310_cast_fp16 = slice_by_index(begin = var_4310_begin_0, end = var_4310_end_0, end_mask = var_4310_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4310_cast_fp16")]; tensor var_4314_begin_0 = const()[name = tensor("op_4314_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_4314_end_0 = const()[name = tensor("op_4314_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_4314_end_mask_0 = const()[name = tensor("op_4314_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4314_cast_fp16 = slice_by_index(begin = var_4314_begin_0, end = var_4314_end_0, end_mask = var_4314_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4314_cast_fp16")]; tensor var_4318_begin_0 = const()[name = tensor("op_4318_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_4318_end_0 = const()[name = tensor("op_4318_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_4318_end_mask_0 = const()[name = tensor("op_4318_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4318_cast_fp16 = slice_by_index(begin = var_4318_begin_0, end = var_4318_end_0, end_mask = var_4318_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4318_cast_fp16")]; tensor var_4322_begin_0 = const()[name = tensor("op_4322_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_4322_end_0 = const()[name = tensor("op_4322_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_4322_end_mask_0 = const()[name = tensor("op_4322_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4322_cast_fp16 = slice_by_index(begin = var_4322_begin_0, end = var_4322_end_0, end_mask = var_4322_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4322_cast_fp16")]; tensor var_4326_begin_0 = const()[name = tensor("op_4326_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_4326_end_0 = const()[name = tensor("op_4326_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_4326_end_mask_0 = const()[name = tensor("op_4326_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4326_cast_fp16 = slice_by_index(begin = var_4326_begin_0, end = var_4326_end_0, end_mask = var_4326_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4326_cast_fp16")]; tensor var_4330_begin_0 = const()[name = tensor("op_4330_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_4330_end_0 = const()[name = tensor("op_4330_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_4330_end_mask_0 = const()[name = tensor("op_4330_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4330_cast_fp16 = slice_by_index(begin = var_4330_begin_0, end = var_4330_end_0, end_mask = var_4330_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4330_cast_fp16")]; tensor var_4334_begin_0 = const()[name = tensor("op_4334_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_4334_end_0 = const()[name = tensor("op_4334_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_4334_end_mask_0 = const()[name = tensor("op_4334_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4334_cast_fp16 = slice_by_index(begin = var_4334_begin_0, end = var_4334_end_0, end_mask = var_4334_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4334_cast_fp16")]; tensor var_4338_begin_0 = const()[name = tensor("op_4338_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_4338_end_0 = const()[name = tensor("op_4338_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_4338_end_mask_0 = const()[name = tensor("op_4338_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_4338_cast_fp16 = slice_by_index(begin = var_4338_begin_0, end = var_4338_end_0, end_mask = var_4338_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4338_cast_fp16")]; tensor _SplitHeadsQ__mh_w_321_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_321_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_321_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_321_equation_0, values = (var_4184_cast_fp16, var_3626_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_321_cast_fp16")]; tensor _SplitHeadsQ__mh_w_323_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_323_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_323_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_323_equation_0, values = (var_4184_cast_fp16, var_3633_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_323_cast_fp16")]; tensor _SplitHeadsQ__mh_w_325_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_325_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_325_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_325_equation_0, values = (var_4184_cast_fp16, var_3640_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_325_cast_fp16")]; tensor _SplitHeadsQ__mh_w_327_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_327_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_327_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_327_equation_0, values = (var_4184_cast_fp16, var_3647_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_327_cast_fp16")]; tensor _SplitHeadsQ__mh_w_329_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_329_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_329_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_329_equation_0, values = (var_4188_cast_fp16, var_3654_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_329_cast_fp16")]; tensor _SplitHeadsQ__mh_w_331_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_331_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_331_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_331_equation_0, values = (var_4188_cast_fp16, var_3661_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_331_cast_fp16")]; tensor _SplitHeadsQ__mh_w_333_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_333_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_333_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_333_equation_0, values = (var_4188_cast_fp16, var_3668_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_333_cast_fp16")]; tensor _SplitHeadsQ__mh_w_335_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_335_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_335_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_335_equation_0, values = (var_4188_cast_fp16, var_3675_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_335_cast_fp16")]; tensor _SplitHeadsQ__mh_w_337_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_337_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_337_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_337_equation_0, values = (var_4192_cast_fp16, var_3682_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_337_cast_fp16")]; tensor _SplitHeadsQ__mh_w_339_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_339_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_339_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_339_equation_0, values = (var_4192_cast_fp16, var_3689_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_339_cast_fp16")]; tensor _SplitHeadsQ__mh_w_341_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_341_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_341_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_341_equation_0, values = (var_4192_cast_fp16, var_3696_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_341_cast_fp16")]; tensor _SplitHeadsQ__mh_w_343_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_343_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_343_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_343_equation_0, values = (var_4192_cast_fp16, var_3703_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_343_cast_fp16")]; tensor _SplitHeadsQ__mh_w_345_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_345_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_345_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_345_equation_0, values = (var_4196_cast_fp16, var_3710_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_345_cast_fp16")]; tensor _SplitHeadsQ__mh_w_347_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_347_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_347_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_347_equation_0, values = (var_4196_cast_fp16, var_3717_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_347_cast_fp16")]; tensor _SplitHeadsQ__mh_w_349_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_349_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_349_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_349_equation_0, values = (var_4196_cast_fp16, var_3724_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_349_cast_fp16")]; tensor _SplitHeadsQ__mh_w_351_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_351_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_351_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_351_equation_0, values = (var_4196_cast_fp16, var_3731_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_351_cast_fp16")]; tensor _SplitHeadsQ__mh_w_353_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_353_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_353_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_353_equation_0, values = (var_4200_cast_fp16, var_3738_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_353_cast_fp16")]; tensor _SplitHeadsQ__mh_w_355_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_355_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_355_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_355_equation_0, values = (var_4200_cast_fp16, var_3745_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_355_cast_fp16")]; tensor _SplitHeadsQ__mh_w_357_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_357_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_357_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_357_equation_0, values = (var_4200_cast_fp16, var_3752_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_357_cast_fp16")]; tensor _SplitHeadsQ__mh_w_359_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_359_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_359_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_359_equation_0, values = (var_4200_cast_fp16, var_3759_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_359_cast_fp16")]; tensor _SplitHeadsQ__mh_w_361_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_361_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_361_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_361_equation_0, values = (var_4204_cast_fp16, var_3766_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_361_cast_fp16")]; tensor _SplitHeadsQ__mh_w_363_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_363_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_363_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_363_equation_0, values = (var_4204_cast_fp16, var_3773_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_363_cast_fp16")]; tensor _SplitHeadsQ__mh_w_365_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_365_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_365_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_365_equation_0, values = (var_4204_cast_fp16, var_3780_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_365_cast_fp16")]; tensor _SplitHeadsQ__mh_w_367_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_367_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_367_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_367_equation_0, values = (var_4204_cast_fp16, var_3787_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_367_cast_fp16")]; tensor _SplitHeadsQ__mh_w_369_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_369_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_369_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_369_equation_0, values = (var_4208_cast_fp16, var_3794_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_369_cast_fp16")]; tensor _SplitHeadsQ__mh_w_371_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_371_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_371_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_371_equation_0, values = (var_4208_cast_fp16, var_3801_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_371_cast_fp16")]; tensor _SplitHeadsQ__mh_w_373_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_373_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_373_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_373_equation_0, values = (var_4208_cast_fp16, var_3808_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_373_cast_fp16")]; tensor _SplitHeadsQ__mh_w_375_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_375_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_375_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_375_equation_0, values = (var_4208_cast_fp16, var_3815_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_375_cast_fp16")]; tensor _SplitHeadsQ__mh_w_377_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_377_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_377_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_377_equation_0, values = (var_4212_cast_fp16, var_3822_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_377_cast_fp16")]; tensor _SplitHeadsQ__mh_w_379_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_379_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_379_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_379_equation_0, values = (var_4212_cast_fp16, var_3829_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_379_cast_fp16")]; tensor _SplitHeadsQ__mh_w_381_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_381_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_381_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_381_equation_0, values = (var_4212_cast_fp16, var_3836_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_381_cast_fp16")]; tensor _SplitHeadsQ__mh_w_383_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_383_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_383_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_383_equation_0, values = (var_4212_cast_fp16, var_3843_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_383_cast_fp16")]; tensor _SplitHeadsQ__mh_w_385_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_385_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_385_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_385_equation_0, values = (var_4216_cast_fp16, var_3850_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_385_cast_fp16")]; tensor _SplitHeadsQ__mh_w_387_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_387_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_387_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_387_equation_0, values = (var_4216_cast_fp16, var_3857_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_387_cast_fp16")]; tensor _SplitHeadsQ__mh_w_389_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_389_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_389_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_389_equation_0, values = (var_4216_cast_fp16, var_3864_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_389_cast_fp16")]; tensor _SplitHeadsQ__mh_w_391_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_391_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_391_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_391_equation_0, values = (var_4216_cast_fp16, var_3871_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_391_cast_fp16")]; tensor _SplitHeadsQ__mh_w_393_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_393_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_393_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_393_equation_0, values = (var_4220_cast_fp16, var_3878_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_393_cast_fp16")]; tensor _SplitHeadsQ__mh_w_395_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_395_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_395_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_395_equation_0, values = (var_4220_cast_fp16, var_3885_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_395_cast_fp16")]; tensor _SplitHeadsQ__mh_w_397_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_397_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_397_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_397_equation_0, values = (var_4220_cast_fp16, var_3892_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_397_cast_fp16")]; tensor _SplitHeadsQ__mh_w_399_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_399_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_399_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_399_equation_0, values = (var_4220_cast_fp16, var_3899_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_399_cast_fp16")]; tensor _SplitHeadsQ__mh_w_401_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_401_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_401_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_401_equation_0, values = (var_4224_cast_fp16, var_3906_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_401_cast_fp16")]; tensor _SplitHeadsQ__mh_w_403_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_403_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_403_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_403_equation_0, values = (var_4224_cast_fp16, var_3913_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_403_cast_fp16")]; tensor _SplitHeadsQ__mh_w_405_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_405_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_405_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_405_equation_0, values = (var_4224_cast_fp16, var_3920_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_405_cast_fp16")]; tensor _SplitHeadsQ__mh_w_407_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_407_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_407_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_407_equation_0, values = (var_4224_cast_fp16, var_3927_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_407_cast_fp16")]; tensor _SplitHeadsQ__mh_w_409_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_409_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_409_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_409_equation_0, values = (var_4228_cast_fp16, var_3934_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_409_cast_fp16")]; tensor _SplitHeadsQ__mh_w_411_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_411_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_411_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_411_equation_0, values = (var_4228_cast_fp16, var_3941_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_411_cast_fp16")]; tensor _SplitHeadsQ__mh_w_413_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_413_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_413_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_413_equation_0, values = (var_4228_cast_fp16, var_3948_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_413_cast_fp16")]; tensor _SplitHeadsQ__mh_w_415_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_415_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_415_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_415_equation_0, values = (var_4228_cast_fp16, var_3955_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_415_cast_fp16")]; tensor _SplitHeadsQ__mh_w_417_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_417_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_417_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_417_equation_0, values = (var_4232_cast_fp16, var_3962_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_417_cast_fp16")]; tensor _SplitHeadsQ__mh_w_419_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_419_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_419_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_419_equation_0, values = (var_4232_cast_fp16, var_3969_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_419_cast_fp16")]; tensor _SplitHeadsQ__mh_w_421_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_421_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_421_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_421_equation_0, values = (var_4232_cast_fp16, var_3976_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_421_cast_fp16")]; tensor _SplitHeadsQ__mh_w_423_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_423_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_423_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_423_equation_0, values = (var_4232_cast_fp16, var_3983_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_423_cast_fp16")]; tensor _SplitHeadsQ__mh_w_425_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_425_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_425_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_425_equation_0, values = (var_4236_cast_fp16, var_3990_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_425_cast_fp16")]; tensor _SplitHeadsQ__mh_w_427_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_427_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_427_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_427_equation_0, values = (var_4236_cast_fp16, var_3997_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_427_cast_fp16")]; tensor _SplitHeadsQ__mh_w_429_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_429_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_429_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_429_equation_0, values = (var_4236_cast_fp16, var_4004_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_429_cast_fp16")]; tensor _SplitHeadsQ__mh_w_431_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_431_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_431_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_431_equation_0, values = (var_4236_cast_fp16, var_4011_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_431_cast_fp16")]; tensor _SplitHeadsQ__mh_w_433_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_433_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_433_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_433_equation_0, values = (var_4240_cast_fp16, var_4018_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_433_cast_fp16")]; tensor _SplitHeadsQ__mh_w_435_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_435_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_435_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_435_equation_0, values = (var_4240_cast_fp16, var_4025_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_435_cast_fp16")]; tensor _SplitHeadsQ__mh_w_437_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_437_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_437_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_437_equation_0, values = (var_4240_cast_fp16, var_4032_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_437_cast_fp16")]; tensor _SplitHeadsQ__mh_w_439_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_439_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_439_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_439_equation_0, values = (var_4240_cast_fp16, var_4039_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_439_cast_fp16")]; tensor _SplitHeadsQ__mh_w_441_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_441_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_441_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_441_equation_0, values = (var_4244_cast_fp16, var_4046_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_441_cast_fp16")]; tensor _SplitHeadsQ__mh_w_443_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_443_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_443_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_443_equation_0, values = (var_4244_cast_fp16, var_4053_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_443_cast_fp16")]; tensor _SplitHeadsQ__mh_w_445_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_445_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_445_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_445_equation_0, values = (var_4244_cast_fp16, var_4060_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_445_cast_fp16")]; tensor _SplitHeadsQ__mh_w_447_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_447_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_447_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_447_equation_0, values = (var_4244_cast_fp16, var_4067_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_447_cast_fp16")]; tensor _SplitHeadsQ__mh_w_449_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_449_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_449_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_449_equation_0, values = (var_4248_cast_fp16, var_4074_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_449_cast_fp16")]; tensor _SplitHeadsQ__mh_w_451_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_451_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_451_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_451_equation_0, values = (var_4248_cast_fp16, var_4081_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_451_cast_fp16")]; tensor _SplitHeadsQ__mh_w_453_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_453_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_453_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_453_equation_0, values = (var_4248_cast_fp16, var_4088_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_453_cast_fp16")]; tensor _SplitHeadsQ__mh_w_455_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_455_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_455_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_455_equation_0, values = (var_4248_cast_fp16, var_4095_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_455_cast_fp16")]; tensor _SplitHeadsQ__mh_w_457_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_457_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_457_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_457_equation_0, values = (var_4252_cast_fp16, var_4102_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_457_cast_fp16")]; tensor _SplitHeadsQ__mh_w_459_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_459_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_459_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_459_equation_0, values = (var_4252_cast_fp16, var_4109_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_459_cast_fp16")]; tensor _SplitHeadsQ__mh_w_461_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_461_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_461_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_461_equation_0, values = (var_4252_cast_fp16, var_4116_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_461_cast_fp16")]; tensor _SplitHeadsQ__mh_w_463_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_463_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_463_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_463_equation_0, values = (var_4252_cast_fp16, var_4123_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_463_cast_fp16")]; tensor _SplitHeadsQ__mh_w_465_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_465_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_465_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_465_equation_0, values = (var_4256_cast_fp16, var_4130_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_465_cast_fp16")]; tensor _SplitHeadsQ__mh_w_467_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_467_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_467_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_467_equation_0, values = (var_4256_cast_fp16, var_4137_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_467_cast_fp16")]; tensor _SplitHeadsQ__mh_w_469_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_469_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_469_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_469_equation_0, values = (var_4256_cast_fp16, var_4144_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_469_cast_fp16")]; tensor _SplitHeadsQ__mh_w_471_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_471_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_471_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_471_equation_0, values = (var_4256_cast_fp16, var_4151_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_471_cast_fp16")]; tensor _SplitHeadsQ__mh_w_473_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_473_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_473_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_473_equation_0, values = (var_4260_cast_fp16, var_4158_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_473_cast_fp16")]; tensor _SplitHeadsQ__mh_w_475_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_475_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_475_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_475_equation_0, values = (var_4260_cast_fp16, var_4165_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_475_cast_fp16")]; tensor _SplitHeadsQ__mh_w_477_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_477_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_477_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_477_equation_0, values = (var_4260_cast_fp16, var_4172_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_477_cast_fp16")]; tensor _SplitHeadsQ__mh_w_479_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_479_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_479_equation_0, values = (var_4260_cast_fp16, var_4179_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_479_cast_fp16")]; tensor var_4501_to_fp16 = const()[name = tensor("op_4501_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_321_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_321_cast_fp16, y = var_4501_to_fp16)[name = tensor("aw_chunk_321_cast_fp16")]; tensor var_4503_to_fp16 = const()[name = tensor("op_4503_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_323_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_323_cast_fp16, y = var_4503_to_fp16)[name = tensor("aw_chunk_323_cast_fp16")]; tensor var_4505_to_fp16 = const()[name = tensor("op_4505_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_325_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_325_cast_fp16, y = var_4505_to_fp16)[name = tensor("aw_chunk_325_cast_fp16")]; tensor var_4507_to_fp16 = const()[name = tensor("op_4507_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_327_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_327_cast_fp16, y = var_4507_to_fp16)[name = tensor("aw_chunk_327_cast_fp16")]; tensor var_4509_to_fp16 = const()[name = tensor("op_4509_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_329_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_329_cast_fp16, y = var_4509_to_fp16)[name = tensor("aw_chunk_329_cast_fp16")]; tensor var_4511_to_fp16 = const()[name = tensor("op_4511_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_331_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_331_cast_fp16, y = var_4511_to_fp16)[name = tensor("aw_chunk_331_cast_fp16")]; tensor var_4513_to_fp16 = const()[name = tensor("op_4513_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_333_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_333_cast_fp16, y = var_4513_to_fp16)[name = tensor("aw_chunk_333_cast_fp16")]; tensor var_4515_to_fp16 = const()[name = tensor("op_4515_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_335_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_335_cast_fp16, y = var_4515_to_fp16)[name = tensor("aw_chunk_335_cast_fp16")]; tensor var_4517_to_fp16 = const()[name = tensor("op_4517_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_337_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_337_cast_fp16, y = var_4517_to_fp16)[name = tensor("aw_chunk_337_cast_fp16")]; tensor var_4519_to_fp16 = const()[name = tensor("op_4519_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_339_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_339_cast_fp16, y = var_4519_to_fp16)[name = tensor("aw_chunk_339_cast_fp16")]; tensor var_4521_to_fp16 = const()[name = tensor("op_4521_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_341_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_341_cast_fp16, y = var_4521_to_fp16)[name = tensor("aw_chunk_341_cast_fp16")]; tensor var_4523_to_fp16 = const()[name = tensor("op_4523_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_343_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_343_cast_fp16, y = var_4523_to_fp16)[name = tensor("aw_chunk_343_cast_fp16")]; tensor var_4525_to_fp16 = const()[name = tensor("op_4525_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_345_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_345_cast_fp16, y = var_4525_to_fp16)[name = tensor("aw_chunk_345_cast_fp16")]; tensor var_4527_to_fp16 = const()[name = tensor("op_4527_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_347_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_347_cast_fp16, y = var_4527_to_fp16)[name = tensor("aw_chunk_347_cast_fp16")]; tensor var_4529_to_fp16 = const()[name = tensor("op_4529_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_349_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_349_cast_fp16, y = var_4529_to_fp16)[name = tensor("aw_chunk_349_cast_fp16")]; tensor var_4531_to_fp16 = const()[name = tensor("op_4531_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_351_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_351_cast_fp16, y = var_4531_to_fp16)[name = tensor("aw_chunk_351_cast_fp16")]; tensor var_4533_to_fp16 = const()[name = tensor("op_4533_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_353_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_353_cast_fp16, y = var_4533_to_fp16)[name = tensor("aw_chunk_353_cast_fp16")]; tensor var_4535_to_fp16 = const()[name = tensor("op_4535_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_355_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_355_cast_fp16, y = var_4535_to_fp16)[name = tensor("aw_chunk_355_cast_fp16")]; tensor var_4537_to_fp16 = const()[name = tensor("op_4537_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_357_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_357_cast_fp16, y = var_4537_to_fp16)[name = tensor("aw_chunk_357_cast_fp16")]; tensor var_4539_to_fp16 = const()[name = tensor("op_4539_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_359_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_359_cast_fp16, y = var_4539_to_fp16)[name = tensor("aw_chunk_359_cast_fp16")]; tensor var_4541_to_fp16 = const()[name = tensor("op_4541_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_361_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_361_cast_fp16, y = var_4541_to_fp16)[name = tensor("aw_chunk_361_cast_fp16")]; tensor var_4543_to_fp16 = const()[name = tensor("op_4543_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_363_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_363_cast_fp16, y = var_4543_to_fp16)[name = tensor("aw_chunk_363_cast_fp16")]; tensor var_4545_to_fp16 = const()[name = tensor("op_4545_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_365_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_365_cast_fp16, y = var_4545_to_fp16)[name = tensor("aw_chunk_365_cast_fp16")]; tensor var_4547_to_fp16 = const()[name = tensor("op_4547_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_367_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_367_cast_fp16, y = var_4547_to_fp16)[name = tensor("aw_chunk_367_cast_fp16")]; tensor var_4549_to_fp16 = const()[name = tensor("op_4549_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_369_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_369_cast_fp16, y = var_4549_to_fp16)[name = tensor("aw_chunk_369_cast_fp16")]; tensor var_4551_to_fp16 = const()[name = tensor("op_4551_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_371_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_371_cast_fp16, y = var_4551_to_fp16)[name = tensor("aw_chunk_371_cast_fp16")]; tensor var_4553_to_fp16 = const()[name = tensor("op_4553_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_373_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_373_cast_fp16, y = var_4553_to_fp16)[name = tensor("aw_chunk_373_cast_fp16")]; tensor var_4555_to_fp16 = const()[name = tensor("op_4555_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_375_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_375_cast_fp16, y = var_4555_to_fp16)[name = tensor("aw_chunk_375_cast_fp16")]; tensor var_4557_to_fp16 = const()[name = tensor("op_4557_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_377_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_377_cast_fp16, y = var_4557_to_fp16)[name = tensor("aw_chunk_377_cast_fp16")]; tensor var_4559_to_fp16 = const()[name = tensor("op_4559_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_379_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_379_cast_fp16, y = var_4559_to_fp16)[name = tensor("aw_chunk_379_cast_fp16")]; tensor var_4561_to_fp16 = const()[name = tensor("op_4561_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_381_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_381_cast_fp16, y = var_4561_to_fp16)[name = tensor("aw_chunk_381_cast_fp16")]; tensor var_4563_to_fp16 = const()[name = tensor("op_4563_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_383_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_383_cast_fp16, y = var_4563_to_fp16)[name = tensor("aw_chunk_383_cast_fp16")]; tensor var_4565_to_fp16 = const()[name = tensor("op_4565_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_385_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_385_cast_fp16, y = var_4565_to_fp16)[name = tensor("aw_chunk_385_cast_fp16")]; tensor var_4567_to_fp16 = const()[name = tensor("op_4567_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_387_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_387_cast_fp16, y = var_4567_to_fp16)[name = tensor("aw_chunk_387_cast_fp16")]; tensor var_4569_to_fp16 = const()[name = tensor("op_4569_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_389_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_389_cast_fp16, y = var_4569_to_fp16)[name = tensor("aw_chunk_389_cast_fp16")]; tensor var_4571_to_fp16 = const()[name = tensor("op_4571_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_391_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_391_cast_fp16, y = var_4571_to_fp16)[name = tensor("aw_chunk_391_cast_fp16")]; tensor var_4573_to_fp16 = const()[name = tensor("op_4573_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_393_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_393_cast_fp16, y = var_4573_to_fp16)[name = tensor("aw_chunk_393_cast_fp16")]; tensor var_4575_to_fp16 = const()[name = tensor("op_4575_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_395_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_395_cast_fp16, y = var_4575_to_fp16)[name = tensor("aw_chunk_395_cast_fp16")]; tensor var_4577_to_fp16 = const()[name = tensor("op_4577_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_397_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_397_cast_fp16, y = var_4577_to_fp16)[name = tensor("aw_chunk_397_cast_fp16")]; tensor var_4579_to_fp16 = const()[name = tensor("op_4579_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_399_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_399_cast_fp16, y = var_4579_to_fp16)[name = tensor("aw_chunk_399_cast_fp16")]; tensor var_4581_to_fp16 = const()[name = tensor("op_4581_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_401_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_401_cast_fp16, y = var_4581_to_fp16)[name = tensor("aw_chunk_401_cast_fp16")]; tensor var_4583_to_fp16 = const()[name = tensor("op_4583_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_403_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_403_cast_fp16, y = var_4583_to_fp16)[name = tensor("aw_chunk_403_cast_fp16")]; tensor var_4585_to_fp16 = const()[name = tensor("op_4585_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_405_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_405_cast_fp16, y = var_4585_to_fp16)[name = tensor("aw_chunk_405_cast_fp16")]; tensor var_4587_to_fp16 = const()[name = tensor("op_4587_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_407_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_407_cast_fp16, y = var_4587_to_fp16)[name = tensor("aw_chunk_407_cast_fp16")]; tensor var_4589_to_fp16 = const()[name = tensor("op_4589_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_409_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_409_cast_fp16, y = var_4589_to_fp16)[name = tensor("aw_chunk_409_cast_fp16")]; tensor var_4591_to_fp16 = const()[name = tensor("op_4591_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_411_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_411_cast_fp16, y = var_4591_to_fp16)[name = tensor("aw_chunk_411_cast_fp16")]; tensor var_4593_to_fp16 = const()[name = tensor("op_4593_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_413_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_413_cast_fp16, y = var_4593_to_fp16)[name = tensor("aw_chunk_413_cast_fp16")]; tensor var_4595_to_fp16 = const()[name = tensor("op_4595_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_415_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_415_cast_fp16, y = var_4595_to_fp16)[name = tensor("aw_chunk_415_cast_fp16")]; tensor var_4597_to_fp16 = const()[name = tensor("op_4597_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_417_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_417_cast_fp16, y = var_4597_to_fp16)[name = tensor("aw_chunk_417_cast_fp16")]; tensor var_4599_to_fp16 = const()[name = tensor("op_4599_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_419_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_419_cast_fp16, y = var_4599_to_fp16)[name = tensor("aw_chunk_419_cast_fp16")]; tensor var_4601_to_fp16 = const()[name = tensor("op_4601_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_421_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_421_cast_fp16, y = var_4601_to_fp16)[name = tensor("aw_chunk_421_cast_fp16")]; tensor var_4603_to_fp16 = const()[name = tensor("op_4603_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_423_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_423_cast_fp16, y = var_4603_to_fp16)[name = tensor("aw_chunk_423_cast_fp16")]; tensor var_4605_to_fp16 = const()[name = tensor("op_4605_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_425_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_425_cast_fp16, y = var_4605_to_fp16)[name = tensor("aw_chunk_425_cast_fp16")]; tensor var_4607_to_fp16 = const()[name = tensor("op_4607_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_427_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_427_cast_fp16, y = var_4607_to_fp16)[name = tensor("aw_chunk_427_cast_fp16")]; tensor var_4609_to_fp16 = const()[name = tensor("op_4609_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_429_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_429_cast_fp16, y = var_4609_to_fp16)[name = tensor("aw_chunk_429_cast_fp16")]; tensor var_4611_to_fp16 = const()[name = tensor("op_4611_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_431_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_431_cast_fp16, y = var_4611_to_fp16)[name = tensor("aw_chunk_431_cast_fp16")]; tensor var_4613_to_fp16 = const()[name = tensor("op_4613_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_433_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_433_cast_fp16, y = var_4613_to_fp16)[name = tensor("aw_chunk_433_cast_fp16")]; tensor var_4615_to_fp16 = const()[name = tensor("op_4615_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_435_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_435_cast_fp16, y = var_4615_to_fp16)[name = tensor("aw_chunk_435_cast_fp16")]; tensor var_4617_to_fp16 = const()[name = tensor("op_4617_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_437_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_437_cast_fp16, y = var_4617_to_fp16)[name = tensor("aw_chunk_437_cast_fp16")]; tensor var_4619_to_fp16 = const()[name = tensor("op_4619_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_439_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_439_cast_fp16, y = var_4619_to_fp16)[name = tensor("aw_chunk_439_cast_fp16")]; tensor var_4621_to_fp16 = const()[name = tensor("op_4621_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_441_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_441_cast_fp16, y = var_4621_to_fp16)[name = tensor("aw_chunk_441_cast_fp16")]; tensor var_4623_to_fp16 = const()[name = tensor("op_4623_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_443_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_443_cast_fp16, y = var_4623_to_fp16)[name = tensor("aw_chunk_443_cast_fp16")]; tensor var_4625_to_fp16 = const()[name = tensor("op_4625_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_445_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_445_cast_fp16, y = var_4625_to_fp16)[name = tensor("aw_chunk_445_cast_fp16")]; tensor var_4627_to_fp16 = const()[name = tensor("op_4627_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_447_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_447_cast_fp16, y = var_4627_to_fp16)[name = tensor("aw_chunk_447_cast_fp16")]; tensor var_4629_to_fp16 = const()[name = tensor("op_4629_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_449_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_449_cast_fp16, y = var_4629_to_fp16)[name = tensor("aw_chunk_449_cast_fp16")]; tensor var_4631_to_fp16 = const()[name = tensor("op_4631_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_451_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_451_cast_fp16, y = var_4631_to_fp16)[name = tensor("aw_chunk_451_cast_fp16")]; tensor var_4633_to_fp16 = const()[name = tensor("op_4633_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_453_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_453_cast_fp16, y = var_4633_to_fp16)[name = tensor("aw_chunk_453_cast_fp16")]; tensor var_4635_to_fp16 = const()[name = tensor("op_4635_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_455_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_455_cast_fp16, y = var_4635_to_fp16)[name = tensor("aw_chunk_455_cast_fp16")]; tensor var_4637_to_fp16 = const()[name = tensor("op_4637_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_457_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_457_cast_fp16, y = var_4637_to_fp16)[name = tensor("aw_chunk_457_cast_fp16")]; tensor var_4639_to_fp16 = const()[name = tensor("op_4639_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_459_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_459_cast_fp16, y = var_4639_to_fp16)[name = tensor("aw_chunk_459_cast_fp16")]; tensor var_4641_to_fp16 = const()[name = tensor("op_4641_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_461_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_461_cast_fp16, y = var_4641_to_fp16)[name = tensor("aw_chunk_461_cast_fp16")]; tensor var_4643_to_fp16 = const()[name = tensor("op_4643_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_463_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_463_cast_fp16, y = var_4643_to_fp16)[name = tensor("aw_chunk_463_cast_fp16")]; tensor var_4645_to_fp16 = const()[name = tensor("op_4645_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_465_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_465_cast_fp16, y = var_4645_to_fp16)[name = tensor("aw_chunk_465_cast_fp16")]; tensor var_4647_to_fp16 = const()[name = tensor("op_4647_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_467_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_467_cast_fp16, y = var_4647_to_fp16)[name = tensor("aw_chunk_467_cast_fp16")]; tensor var_4649_to_fp16 = const()[name = tensor("op_4649_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_469_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_469_cast_fp16, y = var_4649_to_fp16)[name = tensor("aw_chunk_469_cast_fp16")]; tensor var_4651_to_fp16 = const()[name = tensor("op_4651_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_471_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_471_cast_fp16, y = var_4651_to_fp16)[name = tensor("aw_chunk_471_cast_fp16")]; tensor var_4653_to_fp16 = const()[name = tensor("op_4653_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_473_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_473_cast_fp16, y = var_4653_to_fp16)[name = tensor("aw_chunk_473_cast_fp16")]; tensor var_4655_to_fp16 = const()[name = tensor("op_4655_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_475_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_475_cast_fp16, y = var_4655_to_fp16)[name = tensor("aw_chunk_475_cast_fp16")]; tensor var_4657_to_fp16 = const()[name = tensor("op_4657_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_477_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_477_cast_fp16, y = var_4657_to_fp16)[name = tensor("aw_chunk_477_cast_fp16")]; tensor var_4659_to_fp16 = const()[name = tensor("op_4659_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_479_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_479_cast_fp16, y = var_4659_to_fp16)[name = tensor("aw_chunk_479_cast_fp16")]; tensor var_4661_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_321_cast_fp16)[name = tensor("op_4661_cast_fp16")]; tensor var_4662_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_323_cast_fp16)[name = tensor("op_4662_cast_fp16")]; tensor var_4663_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_325_cast_fp16)[name = tensor("op_4663_cast_fp16")]; tensor var_4664_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_327_cast_fp16)[name = tensor("op_4664_cast_fp16")]; tensor var_4665_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_329_cast_fp16)[name = tensor("op_4665_cast_fp16")]; tensor var_4666_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_331_cast_fp16)[name = tensor("op_4666_cast_fp16")]; tensor var_4667_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_333_cast_fp16)[name = tensor("op_4667_cast_fp16")]; tensor var_4668_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_335_cast_fp16)[name = tensor("op_4668_cast_fp16")]; tensor var_4669_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_337_cast_fp16)[name = tensor("op_4669_cast_fp16")]; tensor var_4670_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_339_cast_fp16)[name = tensor("op_4670_cast_fp16")]; tensor var_4671_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_341_cast_fp16)[name = tensor("op_4671_cast_fp16")]; tensor var_4672_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_343_cast_fp16)[name = tensor("op_4672_cast_fp16")]; tensor var_4673_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_345_cast_fp16)[name = tensor("op_4673_cast_fp16")]; tensor var_4674_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_347_cast_fp16)[name = tensor("op_4674_cast_fp16")]; tensor var_4675_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_349_cast_fp16)[name = tensor("op_4675_cast_fp16")]; tensor var_4676_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_351_cast_fp16)[name = tensor("op_4676_cast_fp16")]; tensor var_4677_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_353_cast_fp16)[name = tensor("op_4677_cast_fp16")]; tensor var_4678_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_355_cast_fp16)[name = tensor("op_4678_cast_fp16")]; tensor var_4679_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_357_cast_fp16)[name = tensor("op_4679_cast_fp16")]; tensor var_4680_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_359_cast_fp16)[name = tensor("op_4680_cast_fp16")]; tensor var_4681_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_361_cast_fp16)[name = tensor("op_4681_cast_fp16")]; tensor var_4682_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_363_cast_fp16)[name = tensor("op_4682_cast_fp16")]; tensor var_4683_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_365_cast_fp16)[name = tensor("op_4683_cast_fp16")]; tensor var_4684_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_367_cast_fp16)[name = tensor("op_4684_cast_fp16")]; tensor var_4685_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_369_cast_fp16)[name = tensor("op_4685_cast_fp16")]; tensor var_4686_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_371_cast_fp16)[name = tensor("op_4686_cast_fp16")]; tensor var_4687_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_373_cast_fp16)[name = tensor("op_4687_cast_fp16")]; tensor var_4688_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_375_cast_fp16)[name = tensor("op_4688_cast_fp16")]; tensor var_4689_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_377_cast_fp16)[name = tensor("op_4689_cast_fp16")]; tensor var_4690_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_379_cast_fp16)[name = tensor("op_4690_cast_fp16")]; tensor var_4691_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_381_cast_fp16)[name = tensor("op_4691_cast_fp16")]; tensor var_4692_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_383_cast_fp16)[name = tensor("op_4692_cast_fp16")]; tensor var_4693_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_385_cast_fp16)[name = tensor("op_4693_cast_fp16")]; tensor var_4694_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_387_cast_fp16)[name = tensor("op_4694_cast_fp16")]; tensor var_4695_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_389_cast_fp16)[name = tensor("op_4695_cast_fp16")]; tensor var_4696_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_391_cast_fp16)[name = tensor("op_4696_cast_fp16")]; tensor var_4697_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_393_cast_fp16)[name = tensor("op_4697_cast_fp16")]; tensor var_4698_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_395_cast_fp16)[name = tensor("op_4698_cast_fp16")]; tensor var_4699_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_397_cast_fp16)[name = tensor("op_4699_cast_fp16")]; tensor var_4700_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_399_cast_fp16)[name = tensor("op_4700_cast_fp16")]; tensor var_4701_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_401_cast_fp16)[name = tensor("op_4701_cast_fp16")]; tensor var_4702_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_403_cast_fp16)[name = tensor("op_4702_cast_fp16")]; tensor var_4703_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_405_cast_fp16)[name = tensor("op_4703_cast_fp16")]; tensor var_4704_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_407_cast_fp16)[name = tensor("op_4704_cast_fp16")]; tensor var_4705_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_409_cast_fp16)[name = tensor("op_4705_cast_fp16")]; tensor var_4706_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_411_cast_fp16)[name = tensor("op_4706_cast_fp16")]; tensor var_4707_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_413_cast_fp16)[name = tensor("op_4707_cast_fp16")]; tensor var_4708_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_415_cast_fp16)[name = tensor("op_4708_cast_fp16")]; tensor var_4709_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_417_cast_fp16)[name = tensor("op_4709_cast_fp16")]; tensor var_4710_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_419_cast_fp16)[name = tensor("op_4710_cast_fp16")]; tensor var_4711_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_421_cast_fp16)[name = tensor("op_4711_cast_fp16")]; tensor var_4712_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_423_cast_fp16)[name = tensor("op_4712_cast_fp16")]; tensor var_4713_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_425_cast_fp16)[name = tensor("op_4713_cast_fp16")]; tensor var_4714_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_427_cast_fp16)[name = tensor("op_4714_cast_fp16")]; tensor var_4715_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_429_cast_fp16)[name = tensor("op_4715_cast_fp16")]; tensor var_4716_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_431_cast_fp16)[name = tensor("op_4716_cast_fp16")]; tensor var_4717_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_433_cast_fp16)[name = tensor("op_4717_cast_fp16")]; tensor var_4718_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_435_cast_fp16)[name = tensor("op_4718_cast_fp16")]; tensor var_4719_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_437_cast_fp16)[name = tensor("op_4719_cast_fp16")]; tensor var_4720_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_439_cast_fp16)[name = tensor("op_4720_cast_fp16")]; tensor var_4721_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_441_cast_fp16)[name = tensor("op_4721_cast_fp16")]; tensor var_4722_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_443_cast_fp16)[name = tensor("op_4722_cast_fp16")]; tensor var_4723_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_445_cast_fp16)[name = tensor("op_4723_cast_fp16")]; tensor var_4724_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_447_cast_fp16)[name = tensor("op_4724_cast_fp16")]; tensor var_4725_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_449_cast_fp16)[name = tensor("op_4725_cast_fp16")]; tensor var_4726_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_451_cast_fp16)[name = tensor("op_4726_cast_fp16")]; tensor var_4727_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_453_cast_fp16)[name = tensor("op_4727_cast_fp16")]; tensor var_4728_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_455_cast_fp16)[name = tensor("op_4728_cast_fp16")]; tensor var_4729_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_457_cast_fp16)[name = tensor("op_4729_cast_fp16")]; tensor var_4730_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_459_cast_fp16)[name = tensor("op_4730_cast_fp16")]; tensor var_4731_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_461_cast_fp16)[name = tensor("op_4731_cast_fp16")]; tensor var_4732_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_463_cast_fp16)[name = tensor("op_4732_cast_fp16")]; tensor var_4733_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_465_cast_fp16)[name = tensor("op_4733_cast_fp16")]; tensor var_4734_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_467_cast_fp16)[name = tensor("op_4734_cast_fp16")]; tensor var_4735_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_469_cast_fp16)[name = tensor("op_4735_cast_fp16")]; tensor var_4736_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_471_cast_fp16)[name = tensor("op_4736_cast_fp16")]; tensor var_4737_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_473_cast_fp16)[name = tensor("op_4737_cast_fp16")]; tensor var_4738_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_475_cast_fp16)[name = tensor("op_4738_cast_fp16")]; tensor var_4739_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_477_cast_fp16)[name = tensor("op_4739_cast_fp16")]; tensor var_4740_cast_fp16 = softmax(axis = var_3459, x = aw_chunk_479_cast_fp16)[name = tensor("op_4740_cast_fp16")]; tensor var_4742_equation_0 = const()[name = tensor("op_4742_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4742_cast_fp16 = einsum(equation = var_4742_equation_0, values = (var_4262_cast_fp16, var_4661_cast_fp16))[name = tensor("op_4742_cast_fp16")]; tensor var_4744_equation_0 = const()[name = tensor("op_4744_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4744_cast_fp16 = einsum(equation = var_4744_equation_0, values = (var_4262_cast_fp16, var_4662_cast_fp16))[name = tensor("op_4744_cast_fp16")]; tensor var_4746_equation_0 = const()[name = tensor("op_4746_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4746_cast_fp16 = einsum(equation = var_4746_equation_0, values = (var_4262_cast_fp16, var_4663_cast_fp16))[name = tensor("op_4746_cast_fp16")]; tensor var_4748_equation_0 = const()[name = tensor("op_4748_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4748_cast_fp16 = einsum(equation = var_4748_equation_0, values = (var_4262_cast_fp16, var_4664_cast_fp16))[name = tensor("op_4748_cast_fp16")]; tensor var_4750_equation_0 = const()[name = tensor("op_4750_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4750_cast_fp16 = einsum(equation = var_4750_equation_0, values = (var_4266_cast_fp16, var_4665_cast_fp16))[name = tensor("op_4750_cast_fp16")]; tensor var_4752_equation_0 = const()[name = tensor("op_4752_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4752_cast_fp16 = einsum(equation = var_4752_equation_0, values = (var_4266_cast_fp16, var_4666_cast_fp16))[name = tensor("op_4752_cast_fp16")]; tensor var_4754_equation_0 = const()[name = tensor("op_4754_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4754_cast_fp16 = einsum(equation = var_4754_equation_0, values = (var_4266_cast_fp16, var_4667_cast_fp16))[name = tensor("op_4754_cast_fp16")]; tensor var_4756_equation_0 = const()[name = tensor("op_4756_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4756_cast_fp16 = einsum(equation = var_4756_equation_0, values = (var_4266_cast_fp16, var_4668_cast_fp16))[name = tensor("op_4756_cast_fp16")]; tensor var_4758_equation_0 = const()[name = tensor("op_4758_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4758_cast_fp16 = einsum(equation = var_4758_equation_0, values = (var_4270_cast_fp16, var_4669_cast_fp16))[name = tensor("op_4758_cast_fp16")]; tensor var_4760_equation_0 = const()[name = tensor("op_4760_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4760_cast_fp16 = einsum(equation = var_4760_equation_0, values = (var_4270_cast_fp16, var_4670_cast_fp16))[name = tensor("op_4760_cast_fp16")]; tensor var_4762_equation_0 = const()[name = tensor("op_4762_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4762_cast_fp16 = einsum(equation = var_4762_equation_0, values = (var_4270_cast_fp16, var_4671_cast_fp16))[name = tensor("op_4762_cast_fp16")]; tensor var_4764_equation_0 = const()[name = tensor("op_4764_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4764_cast_fp16 = einsum(equation = var_4764_equation_0, values = (var_4270_cast_fp16, var_4672_cast_fp16))[name = tensor("op_4764_cast_fp16")]; tensor var_4766_equation_0 = const()[name = tensor("op_4766_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4766_cast_fp16 = einsum(equation = var_4766_equation_0, values = (var_4274_cast_fp16, var_4673_cast_fp16))[name = tensor("op_4766_cast_fp16")]; tensor var_4768_equation_0 = const()[name = tensor("op_4768_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4768_cast_fp16 = einsum(equation = var_4768_equation_0, values = (var_4274_cast_fp16, var_4674_cast_fp16))[name = tensor("op_4768_cast_fp16")]; tensor var_4770_equation_0 = const()[name = tensor("op_4770_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4770_cast_fp16 = einsum(equation = var_4770_equation_0, values = (var_4274_cast_fp16, var_4675_cast_fp16))[name = tensor("op_4770_cast_fp16")]; tensor var_4772_equation_0 = const()[name = tensor("op_4772_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4772_cast_fp16 = einsum(equation = var_4772_equation_0, values = (var_4274_cast_fp16, var_4676_cast_fp16))[name = tensor("op_4772_cast_fp16")]; tensor var_4774_equation_0 = const()[name = tensor("op_4774_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4774_cast_fp16 = einsum(equation = var_4774_equation_0, values = (var_4278_cast_fp16, var_4677_cast_fp16))[name = tensor("op_4774_cast_fp16")]; tensor var_4776_equation_0 = const()[name = tensor("op_4776_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4776_cast_fp16 = einsum(equation = var_4776_equation_0, values = (var_4278_cast_fp16, var_4678_cast_fp16))[name = tensor("op_4776_cast_fp16")]; tensor var_4778_equation_0 = const()[name = tensor("op_4778_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4778_cast_fp16 = einsum(equation = var_4778_equation_0, values = (var_4278_cast_fp16, var_4679_cast_fp16))[name = tensor("op_4778_cast_fp16")]; tensor var_4780_equation_0 = const()[name = tensor("op_4780_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4780_cast_fp16 = einsum(equation = var_4780_equation_0, values = (var_4278_cast_fp16, var_4680_cast_fp16))[name = tensor("op_4780_cast_fp16")]; tensor var_4782_equation_0 = const()[name = tensor("op_4782_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4782_cast_fp16 = einsum(equation = var_4782_equation_0, values = (var_4282_cast_fp16, var_4681_cast_fp16))[name = tensor("op_4782_cast_fp16")]; tensor var_4784_equation_0 = const()[name = tensor("op_4784_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4784_cast_fp16 = einsum(equation = var_4784_equation_0, values = (var_4282_cast_fp16, var_4682_cast_fp16))[name = tensor("op_4784_cast_fp16")]; tensor var_4786_equation_0 = const()[name = tensor("op_4786_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4786_cast_fp16 = einsum(equation = var_4786_equation_0, values = (var_4282_cast_fp16, var_4683_cast_fp16))[name = tensor("op_4786_cast_fp16")]; tensor var_4788_equation_0 = const()[name = tensor("op_4788_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4788_cast_fp16 = einsum(equation = var_4788_equation_0, values = (var_4282_cast_fp16, var_4684_cast_fp16))[name = tensor("op_4788_cast_fp16")]; tensor var_4790_equation_0 = const()[name = tensor("op_4790_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4790_cast_fp16 = einsum(equation = var_4790_equation_0, values = (var_4286_cast_fp16, var_4685_cast_fp16))[name = tensor("op_4790_cast_fp16")]; tensor var_4792_equation_0 = const()[name = tensor("op_4792_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4792_cast_fp16 = einsum(equation = var_4792_equation_0, values = (var_4286_cast_fp16, var_4686_cast_fp16))[name = tensor("op_4792_cast_fp16")]; tensor var_4794_equation_0 = const()[name = tensor("op_4794_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4794_cast_fp16 = einsum(equation = var_4794_equation_0, values = (var_4286_cast_fp16, var_4687_cast_fp16))[name = tensor("op_4794_cast_fp16")]; tensor var_4796_equation_0 = const()[name = tensor("op_4796_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4796_cast_fp16 = einsum(equation = var_4796_equation_0, values = (var_4286_cast_fp16, var_4688_cast_fp16))[name = tensor("op_4796_cast_fp16")]; tensor var_4798_equation_0 = const()[name = tensor("op_4798_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4798_cast_fp16 = einsum(equation = var_4798_equation_0, values = (var_4290_cast_fp16, var_4689_cast_fp16))[name = tensor("op_4798_cast_fp16")]; tensor var_4800_equation_0 = const()[name = tensor("op_4800_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4800_cast_fp16 = einsum(equation = var_4800_equation_0, values = (var_4290_cast_fp16, var_4690_cast_fp16))[name = tensor("op_4800_cast_fp16")]; tensor var_4802_equation_0 = const()[name = tensor("op_4802_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4802_cast_fp16 = einsum(equation = var_4802_equation_0, values = (var_4290_cast_fp16, var_4691_cast_fp16))[name = tensor("op_4802_cast_fp16")]; tensor var_4804_equation_0 = const()[name = tensor("op_4804_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4804_cast_fp16 = einsum(equation = var_4804_equation_0, values = (var_4290_cast_fp16, var_4692_cast_fp16))[name = tensor("op_4804_cast_fp16")]; tensor var_4806_equation_0 = const()[name = tensor("op_4806_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4806_cast_fp16 = einsum(equation = var_4806_equation_0, values = (var_4294_cast_fp16, var_4693_cast_fp16))[name = tensor("op_4806_cast_fp16")]; tensor var_4808_equation_0 = const()[name = tensor("op_4808_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4808_cast_fp16 = einsum(equation = var_4808_equation_0, values = (var_4294_cast_fp16, var_4694_cast_fp16))[name = tensor("op_4808_cast_fp16")]; tensor var_4810_equation_0 = const()[name = tensor("op_4810_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4810_cast_fp16 = einsum(equation = var_4810_equation_0, values = (var_4294_cast_fp16, var_4695_cast_fp16))[name = tensor("op_4810_cast_fp16")]; tensor var_4812_equation_0 = const()[name = tensor("op_4812_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4812_cast_fp16 = einsum(equation = var_4812_equation_0, values = (var_4294_cast_fp16, var_4696_cast_fp16))[name = tensor("op_4812_cast_fp16")]; tensor var_4814_equation_0 = const()[name = tensor("op_4814_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4814_cast_fp16 = einsum(equation = var_4814_equation_0, values = (var_4298_cast_fp16, var_4697_cast_fp16))[name = tensor("op_4814_cast_fp16")]; tensor var_4816_equation_0 = const()[name = tensor("op_4816_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4816_cast_fp16 = einsum(equation = var_4816_equation_0, values = (var_4298_cast_fp16, var_4698_cast_fp16))[name = tensor("op_4816_cast_fp16")]; tensor var_4818_equation_0 = const()[name = tensor("op_4818_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4818_cast_fp16 = einsum(equation = var_4818_equation_0, values = (var_4298_cast_fp16, var_4699_cast_fp16))[name = tensor("op_4818_cast_fp16")]; tensor var_4820_equation_0 = const()[name = tensor("op_4820_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4820_cast_fp16 = einsum(equation = var_4820_equation_0, values = (var_4298_cast_fp16, var_4700_cast_fp16))[name = tensor("op_4820_cast_fp16")]; tensor var_4822_equation_0 = const()[name = tensor("op_4822_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4822_cast_fp16 = einsum(equation = var_4822_equation_0, values = (var_4302_cast_fp16, var_4701_cast_fp16))[name = tensor("op_4822_cast_fp16")]; tensor var_4824_equation_0 = const()[name = tensor("op_4824_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4824_cast_fp16 = einsum(equation = var_4824_equation_0, values = (var_4302_cast_fp16, var_4702_cast_fp16))[name = tensor("op_4824_cast_fp16")]; tensor var_4826_equation_0 = const()[name = tensor("op_4826_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4826_cast_fp16 = einsum(equation = var_4826_equation_0, values = (var_4302_cast_fp16, var_4703_cast_fp16))[name = tensor("op_4826_cast_fp16")]; tensor var_4828_equation_0 = const()[name = tensor("op_4828_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4828_cast_fp16 = einsum(equation = var_4828_equation_0, values = (var_4302_cast_fp16, var_4704_cast_fp16))[name = tensor("op_4828_cast_fp16")]; tensor var_4830_equation_0 = const()[name = tensor("op_4830_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4830_cast_fp16 = einsum(equation = var_4830_equation_0, values = (var_4306_cast_fp16, var_4705_cast_fp16))[name = tensor("op_4830_cast_fp16")]; tensor var_4832_equation_0 = const()[name = tensor("op_4832_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4832_cast_fp16 = einsum(equation = var_4832_equation_0, values = (var_4306_cast_fp16, var_4706_cast_fp16))[name = tensor("op_4832_cast_fp16")]; tensor var_4834_equation_0 = const()[name = tensor("op_4834_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4834_cast_fp16 = einsum(equation = var_4834_equation_0, values = (var_4306_cast_fp16, var_4707_cast_fp16))[name = tensor("op_4834_cast_fp16")]; tensor var_4836_equation_0 = const()[name = tensor("op_4836_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4836_cast_fp16 = einsum(equation = var_4836_equation_0, values = (var_4306_cast_fp16, var_4708_cast_fp16))[name = tensor("op_4836_cast_fp16")]; tensor var_4838_equation_0 = const()[name = tensor("op_4838_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4838_cast_fp16 = einsum(equation = var_4838_equation_0, values = (var_4310_cast_fp16, var_4709_cast_fp16))[name = tensor("op_4838_cast_fp16")]; tensor var_4840_equation_0 = const()[name = tensor("op_4840_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4840_cast_fp16 = einsum(equation = var_4840_equation_0, values = (var_4310_cast_fp16, var_4710_cast_fp16))[name = tensor("op_4840_cast_fp16")]; tensor var_4842_equation_0 = const()[name = tensor("op_4842_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4842_cast_fp16 = einsum(equation = var_4842_equation_0, values = (var_4310_cast_fp16, var_4711_cast_fp16))[name = tensor("op_4842_cast_fp16")]; tensor var_4844_equation_0 = const()[name = tensor("op_4844_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4844_cast_fp16 = einsum(equation = var_4844_equation_0, values = (var_4310_cast_fp16, var_4712_cast_fp16))[name = tensor("op_4844_cast_fp16")]; tensor var_4846_equation_0 = const()[name = tensor("op_4846_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4846_cast_fp16 = einsum(equation = var_4846_equation_0, values = (var_4314_cast_fp16, var_4713_cast_fp16))[name = tensor("op_4846_cast_fp16")]; tensor var_4848_equation_0 = const()[name = tensor("op_4848_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4848_cast_fp16 = einsum(equation = var_4848_equation_0, values = (var_4314_cast_fp16, var_4714_cast_fp16))[name = tensor("op_4848_cast_fp16")]; tensor var_4850_equation_0 = const()[name = tensor("op_4850_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4850_cast_fp16 = einsum(equation = var_4850_equation_0, values = (var_4314_cast_fp16, var_4715_cast_fp16))[name = tensor("op_4850_cast_fp16")]; tensor var_4852_equation_0 = const()[name = tensor("op_4852_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4852_cast_fp16 = einsum(equation = var_4852_equation_0, values = (var_4314_cast_fp16, var_4716_cast_fp16))[name = tensor("op_4852_cast_fp16")]; tensor var_4854_equation_0 = const()[name = tensor("op_4854_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4854_cast_fp16 = einsum(equation = var_4854_equation_0, values = (var_4318_cast_fp16, var_4717_cast_fp16))[name = tensor("op_4854_cast_fp16")]; tensor var_4856_equation_0 = const()[name = tensor("op_4856_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4856_cast_fp16 = einsum(equation = var_4856_equation_0, values = (var_4318_cast_fp16, var_4718_cast_fp16))[name = tensor("op_4856_cast_fp16")]; tensor var_4858_equation_0 = const()[name = tensor("op_4858_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4858_cast_fp16 = einsum(equation = var_4858_equation_0, values = (var_4318_cast_fp16, var_4719_cast_fp16))[name = tensor("op_4858_cast_fp16")]; tensor var_4860_equation_0 = const()[name = tensor("op_4860_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4860_cast_fp16 = einsum(equation = var_4860_equation_0, values = (var_4318_cast_fp16, var_4720_cast_fp16))[name = tensor("op_4860_cast_fp16")]; tensor var_4862_equation_0 = const()[name = tensor("op_4862_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4862_cast_fp16 = einsum(equation = var_4862_equation_0, values = (var_4322_cast_fp16, var_4721_cast_fp16))[name = tensor("op_4862_cast_fp16")]; tensor var_4864_equation_0 = const()[name = tensor("op_4864_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4864_cast_fp16 = einsum(equation = var_4864_equation_0, values = (var_4322_cast_fp16, var_4722_cast_fp16))[name = tensor("op_4864_cast_fp16")]; tensor var_4866_equation_0 = const()[name = tensor("op_4866_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4866_cast_fp16 = einsum(equation = var_4866_equation_0, values = (var_4322_cast_fp16, var_4723_cast_fp16))[name = tensor("op_4866_cast_fp16")]; tensor var_4868_equation_0 = const()[name = tensor("op_4868_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4868_cast_fp16 = einsum(equation = var_4868_equation_0, values = (var_4322_cast_fp16, var_4724_cast_fp16))[name = tensor("op_4868_cast_fp16")]; tensor var_4870_equation_0 = const()[name = tensor("op_4870_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4870_cast_fp16 = einsum(equation = var_4870_equation_0, values = (var_4326_cast_fp16, var_4725_cast_fp16))[name = tensor("op_4870_cast_fp16")]; tensor var_4872_equation_0 = const()[name = tensor("op_4872_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4872_cast_fp16 = einsum(equation = var_4872_equation_0, values = (var_4326_cast_fp16, var_4726_cast_fp16))[name = tensor("op_4872_cast_fp16")]; tensor var_4874_equation_0 = const()[name = tensor("op_4874_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4874_cast_fp16 = einsum(equation = var_4874_equation_0, values = (var_4326_cast_fp16, var_4727_cast_fp16))[name = tensor("op_4874_cast_fp16")]; tensor var_4876_equation_0 = const()[name = tensor("op_4876_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4876_cast_fp16 = einsum(equation = var_4876_equation_0, values = (var_4326_cast_fp16, var_4728_cast_fp16))[name = tensor("op_4876_cast_fp16")]; tensor var_4878_equation_0 = const()[name = tensor("op_4878_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4878_cast_fp16 = einsum(equation = var_4878_equation_0, values = (var_4330_cast_fp16, var_4729_cast_fp16))[name = tensor("op_4878_cast_fp16")]; tensor var_4880_equation_0 = const()[name = tensor("op_4880_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4880_cast_fp16 = einsum(equation = var_4880_equation_0, values = (var_4330_cast_fp16, var_4730_cast_fp16))[name = tensor("op_4880_cast_fp16")]; tensor var_4882_equation_0 = const()[name = tensor("op_4882_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4882_cast_fp16 = einsum(equation = var_4882_equation_0, values = (var_4330_cast_fp16, var_4731_cast_fp16))[name = tensor("op_4882_cast_fp16")]; tensor var_4884_equation_0 = const()[name = tensor("op_4884_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4884_cast_fp16 = einsum(equation = var_4884_equation_0, values = (var_4330_cast_fp16, var_4732_cast_fp16))[name = tensor("op_4884_cast_fp16")]; tensor var_4886_equation_0 = const()[name = tensor("op_4886_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4886_cast_fp16 = einsum(equation = var_4886_equation_0, values = (var_4334_cast_fp16, var_4733_cast_fp16))[name = tensor("op_4886_cast_fp16")]; tensor var_4888_equation_0 = const()[name = tensor("op_4888_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4888_cast_fp16 = einsum(equation = var_4888_equation_0, values = (var_4334_cast_fp16, var_4734_cast_fp16))[name = tensor("op_4888_cast_fp16")]; tensor var_4890_equation_0 = const()[name = tensor("op_4890_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4890_cast_fp16 = einsum(equation = var_4890_equation_0, values = (var_4334_cast_fp16, var_4735_cast_fp16))[name = tensor("op_4890_cast_fp16")]; tensor var_4892_equation_0 = const()[name = tensor("op_4892_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4892_cast_fp16 = einsum(equation = var_4892_equation_0, values = (var_4334_cast_fp16, var_4736_cast_fp16))[name = tensor("op_4892_cast_fp16")]; tensor var_4894_equation_0 = const()[name = tensor("op_4894_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4894_cast_fp16 = einsum(equation = var_4894_equation_0, values = (var_4338_cast_fp16, var_4737_cast_fp16))[name = tensor("op_4894_cast_fp16")]; tensor var_4896_equation_0 = const()[name = tensor("op_4896_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4896_cast_fp16 = einsum(equation = var_4896_equation_0, values = (var_4338_cast_fp16, var_4738_cast_fp16))[name = tensor("op_4896_cast_fp16")]; tensor var_4898_equation_0 = const()[name = tensor("op_4898_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4898_cast_fp16 = einsum(equation = var_4898_equation_0, values = (var_4338_cast_fp16, var_4739_cast_fp16))[name = tensor("op_4898_cast_fp16")]; tensor var_4900_equation_0 = const()[name = tensor("op_4900_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_4900_cast_fp16 = einsum(equation = var_4900_equation_0, values = (var_4338_cast_fp16, var_4740_cast_fp16))[name = tensor("op_4900_cast_fp16")]; tensor var_4902_interleave_0 = const()[name = tensor("op_4902_interleave_0"), val = tensor(false)]; tensor var_4902_cast_fp16 = concat(axis = var_3434, interleave = var_4902_interleave_0, values = (var_4742_cast_fp16, var_4744_cast_fp16, var_4746_cast_fp16, var_4748_cast_fp16))[name = tensor("op_4902_cast_fp16")]; tensor var_4904_interleave_0 = const()[name = tensor("op_4904_interleave_0"), val = tensor(false)]; tensor var_4904_cast_fp16 = concat(axis = var_3434, interleave = var_4904_interleave_0, values = (var_4750_cast_fp16, var_4752_cast_fp16, var_4754_cast_fp16, var_4756_cast_fp16))[name = tensor("op_4904_cast_fp16")]; tensor var_4906_interleave_0 = const()[name = tensor("op_4906_interleave_0"), val = tensor(false)]; tensor var_4906_cast_fp16 = concat(axis = var_3434, interleave = var_4906_interleave_0, values = (var_4758_cast_fp16, var_4760_cast_fp16, var_4762_cast_fp16, var_4764_cast_fp16))[name = tensor("op_4906_cast_fp16")]; tensor var_4908_interleave_0 = const()[name = tensor("op_4908_interleave_0"), val = tensor(false)]; tensor var_4908_cast_fp16 = concat(axis = var_3434, interleave = var_4908_interleave_0, values = (var_4766_cast_fp16, var_4768_cast_fp16, var_4770_cast_fp16, var_4772_cast_fp16))[name = tensor("op_4908_cast_fp16")]; tensor var_4910_interleave_0 = const()[name = tensor("op_4910_interleave_0"), val = tensor(false)]; tensor var_4910_cast_fp16 = concat(axis = var_3434, interleave = var_4910_interleave_0, values = (var_4774_cast_fp16, var_4776_cast_fp16, var_4778_cast_fp16, var_4780_cast_fp16))[name = tensor("op_4910_cast_fp16")]; tensor var_4912_interleave_0 = const()[name = tensor("op_4912_interleave_0"), val = tensor(false)]; tensor var_4912_cast_fp16 = concat(axis = var_3434, interleave = var_4912_interleave_0, values = (var_4782_cast_fp16, var_4784_cast_fp16, var_4786_cast_fp16, var_4788_cast_fp16))[name = tensor("op_4912_cast_fp16")]; tensor var_4914_interleave_0 = const()[name = tensor("op_4914_interleave_0"), val = tensor(false)]; tensor var_4914_cast_fp16 = concat(axis = var_3434, interleave = var_4914_interleave_0, values = (var_4790_cast_fp16, var_4792_cast_fp16, var_4794_cast_fp16, var_4796_cast_fp16))[name = tensor("op_4914_cast_fp16")]; tensor var_4916_interleave_0 = const()[name = tensor("op_4916_interleave_0"), val = tensor(false)]; tensor var_4916_cast_fp16 = concat(axis = var_3434, interleave = var_4916_interleave_0, values = (var_4798_cast_fp16, var_4800_cast_fp16, var_4802_cast_fp16, var_4804_cast_fp16))[name = tensor("op_4916_cast_fp16")]; tensor var_4918_interleave_0 = const()[name = tensor("op_4918_interleave_0"), val = tensor(false)]; tensor var_4918_cast_fp16 = concat(axis = var_3434, interleave = var_4918_interleave_0, values = (var_4806_cast_fp16, var_4808_cast_fp16, var_4810_cast_fp16, var_4812_cast_fp16))[name = tensor("op_4918_cast_fp16")]; tensor var_4920_interleave_0 = const()[name = tensor("op_4920_interleave_0"), val = tensor(false)]; tensor var_4920_cast_fp16 = concat(axis = var_3434, interleave = var_4920_interleave_0, values = (var_4814_cast_fp16, var_4816_cast_fp16, var_4818_cast_fp16, var_4820_cast_fp16))[name = tensor("op_4920_cast_fp16")]; tensor var_4922_interleave_0 = const()[name = tensor("op_4922_interleave_0"), val = tensor(false)]; tensor var_4922_cast_fp16 = concat(axis = var_3434, interleave = var_4922_interleave_0, values = (var_4822_cast_fp16, var_4824_cast_fp16, var_4826_cast_fp16, var_4828_cast_fp16))[name = tensor("op_4922_cast_fp16")]; tensor var_4924_interleave_0 = const()[name = tensor("op_4924_interleave_0"), val = tensor(false)]; tensor var_4924_cast_fp16 = concat(axis = var_3434, interleave = var_4924_interleave_0, values = (var_4830_cast_fp16, var_4832_cast_fp16, var_4834_cast_fp16, var_4836_cast_fp16))[name = tensor("op_4924_cast_fp16")]; tensor var_4926_interleave_0 = const()[name = tensor("op_4926_interleave_0"), val = tensor(false)]; tensor var_4926_cast_fp16 = concat(axis = var_3434, interleave = var_4926_interleave_0, values = (var_4838_cast_fp16, var_4840_cast_fp16, var_4842_cast_fp16, var_4844_cast_fp16))[name = tensor("op_4926_cast_fp16")]; tensor var_4928_interleave_0 = const()[name = tensor("op_4928_interleave_0"), val = tensor(false)]; tensor var_4928_cast_fp16 = concat(axis = var_3434, interleave = var_4928_interleave_0, values = (var_4846_cast_fp16, var_4848_cast_fp16, var_4850_cast_fp16, var_4852_cast_fp16))[name = tensor("op_4928_cast_fp16")]; tensor var_4930_interleave_0 = const()[name = tensor("op_4930_interleave_0"), val = tensor(false)]; tensor var_4930_cast_fp16 = concat(axis = var_3434, interleave = var_4930_interleave_0, values = (var_4854_cast_fp16, var_4856_cast_fp16, var_4858_cast_fp16, var_4860_cast_fp16))[name = tensor("op_4930_cast_fp16")]; tensor var_4932_interleave_0 = const()[name = tensor("op_4932_interleave_0"), val = tensor(false)]; tensor var_4932_cast_fp16 = concat(axis = var_3434, interleave = var_4932_interleave_0, values = (var_4862_cast_fp16, var_4864_cast_fp16, var_4866_cast_fp16, var_4868_cast_fp16))[name = tensor("op_4932_cast_fp16")]; tensor var_4934_interleave_0 = const()[name = tensor("op_4934_interleave_0"), val = tensor(false)]; tensor var_4934_cast_fp16 = concat(axis = var_3434, interleave = var_4934_interleave_0, values = (var_4870_cast_fp16, var_4872_cast_fp16, var_4874_cast_fp16, var_4876_cast_fp16))[name = tensor("op_4934_cast_fp16")]; tensor var_4936_interleave_0 = const()[name = tensor("op_4936_interleave_0"), val = tensor(false)]; tensor var_4936_cast_fp16 = concat(axis = var_3434, interleave = var_4936_interleave_0, values = (var_4878_cast_fp16, var_4880_cast_fp16, var_4882_cast_fp16, var_4884_cast_fp16))[name = tensor("op_4936_cast_fp16")]; tensor var_4938_interleave_0 = const()[name = tensor("op_4938_interleave_0"), val = tensor(false)]; tensor var_4938_cast_fp16 = concat(axis = var_3434, interleave = var_4938_interleave_0, values = (var_4886_cast_fp16, var_4888_cast_fp16, var_4890_cast_fp16, var_4892_cast_fp16))[name = tensor("op_4938_cast_fp16")]; tensor var_4940_interleave_0 = const()[name = tensor("op_4940_interleave_0"), val = tensor(false)]; tensor var_4940_cast_fp16 = concat(axis = var_3434, interleave = var_4940_interleave_0, values = (var_4894_cast_fp16, var_4896_cast_fp16, var_4898_cast_fp16, var_4900_cast_fp16))[name = tensor("op_4940_cast_fp16")]; tensor input_17_interleave_0 = const()[name = tensor("input_17_interleave_0"), val = tensor(false)]; tensor input_17_cast_fp16 = concat(axis = var_3459, interleave = input_17_interleave_0, values = (var_4902_cast_fp16, var_4904_cast_fp16, var_4906_cast_fp16, var_4908_cast_fp16, var_4910_cast_fp16, var_4912_cast_fp16, var_4914_cast_fp16, var_4916_cast_fp16, var_4918_cast_fp16, var_4920_cast_fp16, var_4922_cast_fp16, var_4924_cast_fp16, var_4926_cast_fp16, var_4928_cast_fp16, var_4930_cast_fp16, var_4932_cast_fp16, var_4934_cast_fp16, var_4936_cast_fp16, var_4938_cast_fp16, var_4940_cast_fp16))[name = tensor("input_17_cast_fp16")]; tensor var_4951_pad_type_0 = const()[name = tensor("op_4951_pad_type_0"), val = tensor("valid")]; tensor var_4951_strides_0 = const()[name = tensor("op_4951_strides_0"), val = tensor([1, 1])]; tensor var_4951_pad_0 = const()[name = tensor("op_4951_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4951_dilations_0 = const()[name = tensor("op_4951_dilations_0"), val = tensor([1, 1])]; tensor var_4951_groups_0 = const()[name = tensor("op_4951_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(46615104))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47434368))), name = tensor("layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_2_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47434496)))]; tensor var_4951_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_4951_dilations_0, groups = var_4951_groups_0, pad = var_4951_pad_0, pad_type = var_4951_pad_type_0, strides = var_4951_strides_0, weight = layers_2_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_17_cast_fp16)[name = tensor("op_4951_cast_fp16")]; tensor var_4957_pad_type_0 = const()[name = tensor("op_4957_pad_type_0"), val = tensor("valid")]; tensor var_4957_strides_0 = const()[name = tensor("op_4957_strides_0"), val = tensor([1, 1])]; tensor var_4957_pad_0 = const()[name = tensor("op_4957_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4957_dilations_0 = const()[name = tensor("op_4957_dilations_0"), val = tensor([1, 1])]; tensor var_4957_groups_0 = const()[name = tensor("op_4957_groups_0"), val = tensor(1)]; tensor layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47467328))), name = tensor("layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47437120))), shape = tensor([1280, 1280, 1, 1])]; tensor var_4957_cast_fp16 = conv(dilations = var_4957_dilations_0, groups = var_4957_groups_0, pad = var_4957_pad_0, pad_type = var_4957_pad_type_0, strides = var_4957_strides_0, weight = layers_2_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_17_cast_fp16)[name = tensor("op_4957_cast_fp16")]; tensor obj_11_cast_fp16 = add(x = var_4951_cast_fp16, y = var_4957_cast_fp16)[name = tensor("obj_11_cast_fp16")]; tensor inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = tensor("inputs_11_cast_fp16")]; tensor out_11_axes_0 = const()[name = tensor("out_11_axes_0"), val = tensor([1])]; tensor var_4968_to_fp16 = const()[name = tensor("op_4968_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_11_cast_fp16 = layer_norm(axes = out_11_axes_0, epsilon = var_4968_to_fp16, x = inputs_11_cast_fp16)[name = tensor("out_11_cast_fp16")]; tensor input_19_gamma_0_to_fp16 = const()[name = tensor("input_19_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47672192)))]; tensor input_19_beta_0_to_fp16 = const()[name = tensor("input_19_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47674816)))]; tensor input_19_epsilon_0_to_fp16 = const()[name = tensor("input_19_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor("input_19_cast_fp16")]; tensor var_4986_pad_type_0 = const()[name = tensor("op_4986_pad_type_0"), val = tensor("valid")]; tensor var_4986_strides_0 = const()[name = tensor("op_4986_strides_0"), val = tensor([1, 1])]; tensor var_4986_pad_0 = const()[name = tensor("op_4986_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4986_dilations_0 = const()[name = tensor("op_4986_dilations_0"), val = tensor([1, 1])]; tensor var_4986_groups_0 = const()[name = tensor("op_4986_groups_0"), val = tensor(1)]; tensor layers_2_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(47677440))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50954304))), name = tensor("layers_2_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_2_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50954432)))]; tensor var_4986_cast_fp16 = conv(bias = layers_2_fc1_inlier_module_bias_to_fp16, dilations = var_4986_dilations_0, groups = var_4986_groups_0, pad = var_4986_pad_0, pad_type = var_4986_pad_type_0, strides = var_4986_strides_0, weight = layers_2_fc1_inlier_module_weight_to_fp16_palettized, x = input_19_cast_fp16)[name = tensor("op_4986_cast_fp16")]; tensor var_4992_pad_type_0 = const()[name = tensor("op_4992_pad_type_0"), val = tensor("valid")]; tensor var_4992_strides_0 = const()[name = tensor("op_4992_strides_0"), val = tensor([1, 1])]; tensor var_4992_pad_0 = const()[name = tensor("op_4992_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_4992_dilations_0 = const()[name = tensor("op_4992_dilations_0"), val = tensor([1, 1])]; tensor var_4992_groups_0 = const()[name = tensor("op_4992_groups_0"), val = tensor(1)]; tensor layers_2_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51003008))), name = tensor("layers_2_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50964736))), shape = tensor([5120, 1280, 1, 1])]; tensor var_4992_cast_fp16 = conv(dilations = var_4992_dilations_0, groups = var_4992_groups_0, pad = var_4992_pad_0, pad_type = var_4992_pad_type_0, strides = var_4992_strides_0, weight = layers_2_fc1_outlier_module_weight_to_fp16_sparsified, x = input_19_cast_fp16)[name = tensor("op_4992_cast_fp16")]; tensor input_21_cast_fp16 = add(x = var_4986_cast_fp16, y = var_4992_cast_fp16)[name = tensor("input_21_cast_fp16")]; tensor input_23_mode_0 = const()[name = tensor("input_23_mode_0"), val = tensor("EXACT")]; tensor input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = tensor("input_23_cast_fp16")]; tensor var_5003_pad_type_0 = const()[name = tensor("op_5003_pad_type_0"), val = tensor("valid")]; tensor var_5003_strides_0 = const()[name = tensor("op_5003_strides_0"), val = tensor([1, 1])]; tensor var_5003_pad_0 = const()[name = tensor("op_5003_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5003_dilations_0 = const()[name = tensor("op_5003_dilations_0"), val = tensor([1, 1])]; tensor var_5003_groups_0 = const()[name = tensor("op_5003_groups_0"), val = tensor(1)]; tensor layers_2_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51822272))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55099136))), name = tensor("layers_2_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_2_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_2_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55099264)))]; tensor var_5003_cast_fp16 = conv(bias = layers_2_fc2_inlier_module_bias_to_fp16, dilations = var_5003_dilations_0, groups = var_5003_groups_0, pad = var_5003_pad_0, pad_type = var_5003_pad_type_0, strides = var_5003_strides_0, weight = layers_2_fc2_inlier_module_weight_to_fp16_palettized, x = input_23_cast_fp16)[name = tensor("op_5003_cast_fp16")]; tensor var_5009_pad_type_0 = const()[name = tensor("op_5009_pad_type_0"), val = tensor("valid")]; tensor var_5009_strides_0 = const()[name = tensor("op_5009_strides_0"), val = tensor([1, 1])]; tensor var_5009_pad_0 = const()[name = tensor("op_5009_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5009_dilations_0 = const()[name = tensor("op_5009_dilations_0"), val = tensor([1, 1])]; tensor var_5009_groups_0 = const()[name = tensor("op_5009_groups_0"), val = tensor(1)]; tensor layers_2_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55315392))), name = tensor("layers_2_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(55101888))), shape = tensor([1280, 5120, 1, 1])]; tensor var_5009_cast_fp16 = conv(dilations = var_5009_dilations_0, groups = var_5009_groups_0, pad = var_5009_pad_0, pad_type = var_5009_pad_type_0, strides = var_5009_strides_0, weight = layers_2_fc2_outlier_module_weight_to_fp16_sparsified, x = input_23_cast_fp16)[name = tensor("op_5009_cast_fp16")]; tensor hidden_states_9_cast_fp16 = add(x = var_5003_cast_fp16, y = var_5009_cast_fp16)[name = tensor("hidden_states_9_cast_fp16")]; tensor inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor("inputs_13_cast_fp16")]; tensor var_5015 = const()[name = tensor("op_5015"), val = tensor(3)]; tensor var_5040 = const()[name = tensor("op_5040"), val = tensor(1)]; tensor out_13_axes_0 = const()[name = tensor("out_13_axes_0"), val = tensor([1])]; tensor var_5057_to_fp16 = const()[name = tensor("op_5057_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_13_cast_fp16 = layer_norm(axes = out_13_axes_0, epsilon = var_5057_to_fp16, x = inputs_13_cast_fp16)[name = tensor("out_13_cast_fp16")]; tensor obj_13_gamma_0_to_fp16 = const()[name = tensor("obj_13_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56134656)))]; tensor obj_13_beta_0_to_fp16 = const()[name = tensor("obj_13_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56137280)))]; tensor obj_13_epsilon_0_to_fp16 = const()[name = tensor("obj_13_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor("obj_13_cast_fp16")]; tensor var_5079_pad_type_0 = const()[name = tensor("op_5079_pad_type_0"), val = tensor("valid")]; tensor var_5079_strides_0 = const()[name = tensor("op_5079_strides_0"), val = tensor([1, 1])]; tensor var_5079_pad_0 = const()[name = tensor("op_5079_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5079_dilations_0 = const()[name = tensor("op_5079_dilations_0"), val = tensor([1, 1])]; tensor var_5079_groups_0 = const()[name = tensor("op_5079_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56139904))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56959168))), name = tensor("layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_3_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56959296)))]; tensor var_5079_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_5079_dilations_0, groups = var_5079_groups_0, pad = var_5079_pad_0, pad_type = var_5079_pad_type_0, strides = var_5079_strides_0, weight = layers_3_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = tensor("op_5079_cast_fp16")]; tensor var_5085_pad_type_0 = const()[name = tensor("op_5085_pad_type_0"), val = tensor("valid")]; tensor var_5085_strides_0 = const()[name = tensor("op_5085_strides_0"), val = tensor([1, 1])]; tensor var_5085_pad_0 = const()[name = tensor("op_5085_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5085_dilations_0 = const()[name = tensor("op_5085_dilations_0"), val = tensor([1, 1])]; tensor var_5085_groups_0 = const()[name = tensor("op_5085_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57013184))), name = tensor("layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56961920))), shape = tensor([1280, 1280, 1, 1])]; tensor var_5085_cast_fp16 = conv(dilations = var_5085_dilations_0, groups = var_5085_groups_0, pad = var_5085_pad_0, pad_type = var_5085_pad_type_0, strides = var_5085_strides_0, weight = layers_3_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = tensor("op_5085_cast_fp16")]; tensor query_7_cast_fp16 = add(x = var_5079_cast_fp16, y = var_5085_cast_fp16)[name = tensor("query_7_cast_fp16")]; tensor var_5094_pad_type_0 = const()[name = tensor("op_5094_pad_type_0"), val = tensor("valid")]; tensor var_5094_strides_0 = const()[name = tensor("op_5094_strides_0"), val = tensor([1, 1])]; tensor var_5094_pad_0 = const()[name = tensor("op_5094_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5094_dilations_0 = const()[name = tensor("op_5094_dilations_0"), val = tensor([1, 1])]; tensor var_5094_groups_0 = const()[name = tensor("op_5094_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(57218048))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58037312))), name = tensor("layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_5094_cast_fp16 = conv(dilations = var_5094_dilations_0, groups = var_5094_groups_0, pad = var_5094_pad_0, pad_type = var_5094_pad_type_0, strides = var_5094_strides_0, weight = layers_3_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = tensor("op_5094_cast_fp16")]; tensor var_5100_pad_type_0 = const()[name = tensor("op_5100_pad_type_0"), val = tensor("valid")]; tensor var_5100_strides_0 = const()[name = tensor("op_5100_strides_0"), val = tensor([1, 1])]; tensor var_5100_pad_0 = const()[name = tensor("op_5100_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5100_dilations_0 = const()[name = tensor("op_5100_dilations_0"), val = tensor([1, 1])]; tensor var_5100_groups_0 = const()[name = tensor("op_5100_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58077888))), name = tensor("layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58037440))), shape = tensor([1280, 1280, 1, 1])]; tensor var_5100_cast_fp16 = conv(dilations = var_5100_dilations_0, groups = var_5100_groups_0, pad = var_5100_pad_0, pad_type = var_5100_pad_type_0, strides = var_5100_strides_0, weight = layers_3_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = tensor("op_5100_cast_fp16")]; tensor key_7_cast_fp16 = add(x = var_5094_cast_fp16, y = var_5100_cast_fp16)[name = tensor("key_7_cast_fp16")]; tensor var_5110_pad_type_0 = const()[name = tensor("op_5110_pad_type_0"), val = tensor("valid")]; tensor var_5110_strides_0 = const()[name = tensor("op_5110_strides_0"), val = tensor([1, 1])]; tensor var_5110_pad_0 = const()[name = tensor("op_5110_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5110_dilations_0 = const()[name = tensor("op_5110_dilations_0"), val = tensor([1, 1])]; tensor var_5110_groups_0 = const()[name = tensor("op_5110_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58282752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59102016))), name = tensor("layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_3_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59102144)))]; tensor var_5110_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_5110_dilations_0, groups = var_5110_groups_0, pad = var_5110_pad_0, pad_type = var_5110_pad_type_0, strides = var_5110_strides_0, weight = layers_3_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_13_cast_fp16)[name = tensor("op_5110_cast_fp16")]; tensor var_5116_pad_type_0 = const()[name = tensor("op_5116_pad_type_0"), val = tensor("valid")]; tensor var_5116_strides_0 = const()[name = tensor("op_5116_strides_0"), val = tensor([1, 1])]; tensor var_5116_pad_0 = const()[name = tensor("op_5116_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_5116_dilations_0 = const()[name = tensor("op_5116_dilations_0"), val = tensor([1, 1])]; tensor var_5116_groups_0 = const()[name = tensor("op_5116_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59138432))), name = tensor("layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59104768))), shape = tensor([1280, 1280, 1, 1])]; tensor var_5116_cast_fp16 = conv(dilations = var_5116_dilations_0, groups = var_5116_groups_0, pad = var_5116_pad_0, pad_type = var_5116_pad_type_0, strides = var_5116_strides_0, weight = layers_3_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_13_cast_fp16)[name = tensor("op_5116_cast_fp16")]; tensor value_7_cast_fp16 = add(x = var_5110_cast_fp16, y = var_5116_cast_fp16)[name = tensor("value_7_cast_fp16")]; tensor var_5122_begin_0 = const()[name = tensor("op_5122_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5122_end_0 = const()[name = tensor("op_5122_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5122_end_mask_0 = const()[name = tensor("op_5122_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5122_cast_fp16 = slice_by_index(begin = var_5122_begin_0, end = var_5122_end_0, end_mask = var_5122_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5122_cast_fp16")]; tensor var_5126_begin_0 = const()[name = tensor("op_5126_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_5126_end_0 = const()[name = tensor("op_5126_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_5126_end_mask_0 = const()[name = tensor("op_5126_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5126_cast_fp16 = slice_by_index(begin = var_5126_begin_0, end = var_5126_end_0, end_mask = var_5126_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5126_cast_fp16")]; tensor var_5130_begin_0 = const()[name = tensor("op_5130_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_5130_end_0 = const()[name = tensor("op_5130_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_5130_end_mask_0 = const()[name = tensor("op_5130_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5130_cast_fp16 = slice_by_index(begin = var_5130_begin_0, end = var_5130_end_0, end_mask = var_5130_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5130_cast_fp16")]; tensor var_5134_begin_0 = const()[name = tensor("op_5134_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_5134_end_0 = const()[name = tensor("op_5134_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_5134_end_mask_0 = const()[name = tensor("op_5134_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5134_cast_fp16 = slice_by_index(begin = var_5134_begin_0, end = var_5134_end_0, end_mask = var_5134_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5134_cast_fp16")]; tensor var_5138_begin_0 = const()[name = tensor("op_5138_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_5138_end_0 = const()[name = tensor("op_5138_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_5138_end_mask_0 = const()[name = tensor("op_5138_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5138_cast_fp16 = slice_by_index(begin = var_5138_begin_0, end = var_5138_end_0, end_mask = var_5138_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5138_cast_fp16")]; tensor var_5142_begin_0 = const()[name = tensor("op_5142_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_5142_end_0 = const()[name = tensor("op_5142_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_5142_end_mask_0 = const()[name = tensor("op_5142_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5142_cast_fp16 = slice_by_index(begin = var_5142_begin_0, end = var_5142_end_0, end_mask = var_5142_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5142_cast_fp16")]; tensor var_5146_begin_0 = const()[name = tensor("op_5146_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_5146_end_0 = const()[name = tensor("op_5146_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_5146_end_mask_0 = const()[name = tensor("op_5146_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5146_cast_fp16 = slice_by_index(begin = var_5146_begin_0, end = var_5146_end_0, end_mask = var_5146_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5146_cast_fp16")]; tensor var_5150_begin_0 = const()[name = tensor("op_5150_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_5150_end_0 = const()[name = tensor("op_5150_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_5150_end_mask_0 = const()[name = tensor("op_5150_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5150_cast_fp16 = slice_by_index(begin = var_5150_begin_0, end = var_5150_end_0, end_mask = var_5150_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5150_cast_fp16")]; tensor var_5154_begin_0 = const()[name = tensor("op_5154_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_5154_end_0 = const()[name = tensor("op_5154_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_5154_end_mask_0 = const()[name = tensor("op_5154_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5154_cast_fp16 = slice_by_index(begin = var_5154_begin_0, end = var_5154_end_0, end_mask = var_5154_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5154_cast_fp16")]; tensor var_5158_begin_0 = const()[name = tensor("op_5158_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_5158_end_0 = const()[name = tensor("op_5158_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_5158_end_mask_0 = const()[name = tensor("op_5158_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5158_cast_fp16 = slice_by_index(begin = var_5158_begin_0, end = var_5158_end_0, end_mask = var_5158_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5158_cast_fp16")]; tensor var_5162_begin_0 = const()[name = tensor("op_5162_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_5162_end_0 = const()[name = tensor("op_5162_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_5162_end_mask_0 = const()[name = tensor("op_5162_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5162_cast_fp16 = slice_by_index(begin = var_5162_begin_0, end = var_5162_end_0, end_mask = var_5162_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5162_cast_fp16")]; tensor var_5166_begin_0 = const()[name = tensor("op_5166_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_5166_end_0 = const()[name = tensor("op_5166_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_5166_end_mask_0 = const()[name = tensor("op_5166_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5166_cast_fp16 = slice_by_index(begin = var_5166_begin_0, end = var_5166_end_0, end_mask = var_5166_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5166_cast_fp16")]; tensor var_5170_begin_0 = const()[name = tensor("op_5170_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_5170_end_0 = const()[name = tensor("op_5170_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_5170_end_mask_0 = const()[name = tensor("op_5170_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5170_cast_fp16 = slice_by_index(begin = var_5170_begin_0, end = var_5170_end_0, end_mask = var_5170_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5170_cast_fp16")]; tensor var_5174_begin_0 = const()[name = tensor("op_5174_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_5174_end_0 = const()[name = tensor("op_5174_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_5174_end_mask_0 = const()[name = tensor("op_5174_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5174_cast_fp16 = slice_by_index(begin = var_5174_begin_0, end = var_5174_end_0, end_mask = var_5174_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5174_cast_fp16")]; tensor var_5178_begin_0 = const()[name = tensor("op_5178_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_5178_end_0 = const()[name = tensor("op_5178_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_5178_end_mask_0 = const()[name = tensor("op_5178_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5178_cast_fp16 = slice_by_index(begin = var_5178_begin_0, end = var_5178_end_0, end_mask = var_5178_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5178_cast_fp16")]; tensor var_5182_begin_0 = const()[name = tensor("op_5182_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_5182_end_0 = const()[name = tensor("op_5182_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_5182_end_mask_0 = const()[name = tensor("op_5182_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5182_cast_fp16 = slice_by_index(begin = var_5182_begin_0, end = var_5182_end_0, end_mask = var_5182_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5182_cast_fp16")]; tensor var_5186_begin_0 = const()[name = tensor("op_5186_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_5186_end_0 = const()[name = tensor("op_5186_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_5186_end_mask_0 = const()[name = tensor("op_5186_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5186_cast_fp16 = slice_by_index(begin = var_5186_begin_0, end = var_5186_end_0, end_mask = var_5186_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5186_cast_fp16")]; tensor var_5190_begin_0 = const()[name = tensor("op_5190_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_5190_end_0 = const()[name = tensor("op_5190_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_5190_end_mask_0 = const()[name = tensor("op_5190_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5190_cast_fp16 = slice_by_index(begin = var_5190_begin_0, end = var_5190_end_0, end_mask = var_5190_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5190_cast_fp16")]; tensor var_5194_begin_0 = const()[name = tensor("op_5194_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_5194_end_0 = const()[name = tensor("op_5194_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_5194_end_mask_0 = const()[name = tensor("op_5194_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5194_cast_fp16 = slice_by_index(begin = var_5194_begin_0, end = var_5194_end_0, end_mask = var_5194_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5194_cast_fp16")]; tensor var_5198_begin_0 = const()[name = tensor("op_5198_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_5198_end_0 = const()[name = tensor("op_5198_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_5198_end_mask_0 = const()[name = tensor("op_5198_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5198_cast_fp16 = slice_by_index(begin = var_5198_begin_0, end = var_5198_end_0, end_mask = var_5198_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_5198_cast_fp16")]; tensor var_5207_begin_0 = const()[name = tensor("op_5207_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5207_end_0 = const()[name = tensor("op_5207_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5207_end_mask_0 = const()[name = tensor("op_5207_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5207_cast_fp16 = slice_by_index(begin = var_5207_begin_0, end = var_5207_end_0, end_mask = var_5207_end_mask_0, x = var_5122_cast_fp16)[name = tensor("op_5207_cast_fp16")]; tensor var_5214_begin_0 = const()[name = tensor("op_5214_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5214_end_0 = const()[name = tensor("op_5214_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5214_end_mask_0 = const()[name = tensor("op_5214_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5214_cast_fp16 = slice_by_index(begin = var_5214_begin_0, end = var_5214_end_0, end_mask = var_5214_end_mask_0, x = var_5122_cast_fp16)[name = tensor("op_5214_cast_fp16")]; tensor var_5221_begin_0 = const()[name = tensor("op_5221_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5221_end_0 = const()[name = tensor("op_5221_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5221_end_mask_0 = const()[name = tensor("op_5221_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5221_cast_fp16 = slice_by_index(begin = var_5221_begin_0, end = var_5221_end_0, end_mask = var_5221_end_mask_0, x = var_5122_cast_fp16)[name = tensor("op_5221_cast_fp16")]; tensor var_5228_begin_0 = const()[name = tensor("op_5228_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5228_end_0 = const()[name = tensor("op_5228_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5228_end_mask_0 = const()[name = tensor("op_5228_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5228_cast_fp16 = slice_by_index(begin = var_5228_begin_0, end = var_5228_end_0, end_mask = var_5228_end_mask_0, x = var_5122_cast_fp16)[name = tensor("op_5228_cast_fp16")]; tensor var_5235_begin_0 = const()[name = tensor("op_5235_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5235_end_0 = const()[name = tensor("op_5235_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5235_end_mask_0 = const()[name = tensor("op_5235_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5235_cast_fp16 = slice_by_index(begin = var_5235_begin_0, end = var_5235_end_0, end_mask = var_5235_end_mask_0, x = var_5126_cast_fp16)[name = tensor("op_5235_cast_fp16")]; tensor var_5242_begin_0 = const()[name = tensor("op_5242_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5242_end_0 = const()[name = tensor("op_5242_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5242_end_mask_0 = const()[name = tensor("op_5242_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5242_cast_fp16 = slice_by_index(begin = var_5242_begin_0, end = var_5242_end_0, end_mask = var_5242_end_mask_0, x = var_5126_cast_fp16)[name = tensor("op_5242_cast_fp16")]; tensor var_5249_begin_0 = const()[name = tensor("op_5249_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5249_end_0 = const()[name = tensor("op_5249_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5249_end_mask_0 = const()[name = tensor("op_5249_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5249_cast_fp16 = slice_by_index(begin = var_5249_begin_0, end = var_5249_end_0, end_mask = var_5249_end_mask_0, x = var_5126_cast_fp16)[name = tensor("op_5249_cast_fp16")]; tensor var_5256_begin_0 = const()[name = tensor("op_5256_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5256_end_0 = const()[name = tensor("op_5256_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5256_end_mask_0 = const()[name = tensor("op_5256_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5256_cast_fp16 = slice_by_index(begin = var_5256_begin_0, end = var_5256_end_0, end_mask = var_5256_end_mask_0, x = var_5126_cast_fp16)[name = tensor("op_5256_cast_fp16")]; tensor var_5263_begin_0 = const()[name = tensor("op_5263_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5263_end_0 = const()[name = tensor("op_5263_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5263_end_mask_0 = const()[name = tensor("op_5263_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5263_cast_fp16 = slice_by_index(begin = var_5263_begin_0, end = var_5263_end_0, end_mask = var_5263_end_mask_0, x = var_5130_cast_fp16)[name = tensor("op_5263_cast_fp16")]; tensor var_5270_begin_0 = const()[name = tensor("op_5270_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5270_end_0 = const()[name = tensor("op_5270_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5270_end_mask_0 = const()[name = tensor("op_5270_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5270_cast_fp16 = slice_by_index(begin = var_5270_begin_0, end = var_5270_end_0, end_mask = var_5270_end_mask_0, x = var_5130_cast_fp16)[name = tensor("op_5270_cast_fp16")]; tensor var_5277_begin_0 = const()[name = tensor("op_5277_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5277_end_0 = const()[name = tensor("op_5277_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5277_end_mask_0 = const()[name = tensor("op_5277_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5277_cast_fp16 = slice_by_index(begin = var_5277_begin_0, end = var_5277_end_0, end_mask = var_5277_end_mask_0, x = var_5130_cast_fp16)[name = tensor("op_5277_cast_fp16")]; tensor var_5284_begin_0 = const()[name = tensor("op_5284_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5284_end_0 = const()[name = tensor("op_5284_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5284_end_mask_0 = const()[name = tensor("op_5284_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5284_cast_fp16 = slice_by_index(begin = var_5284_begin_0, end = var_5284_end_0, end_mask = var_5284_end_mask_0, x = var_5130_cast_fp16)[name = tensor("op_5284_cast_fp16")]; tensor var_5291_begin_0 = const()[name = tensor("op_5291_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5291_end_0 = const()[name = tensor("op_5291_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5291_end_mask_0 = const()[name = tensor("op_5291_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5291_cast_fp16 = slice_by_index(begin = var_5291_begin_0, end = var_5291_end_0, end_mask = var_5291_end_mask_0, x = var_5134_cast_fp16)[name = tensor("op_5291_cast_fp16")]; tensor var_5298_begin_0 = const()[name = tensor("op_5298_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5298_end_0 = const()[name = tensor("op_5298_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5298_end_mask_0 = const()[name = tensor("op_5298_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5298_cast_fp16 = slice_by_index(begin = var_5298_begin_0, end = var_5298_end_0, end_mask = var_5298_end_mask_0, x = var_5134_cast_fp16)[name = tensor("op_5298_cast_fp16")]; tensor var_5305_begin_0 = const()[name = tensor("op_5305_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5305_end_0 = const()[name = tensor("op_5305_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5305_end_mask_0 = const()[name = tensor("op_5305_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5305_cast_fp16 = slice_by_index(begin = var_5305_begin_0, end = var_5305_end_0, end_mask = var_5305_end_mask_0, x = var_5134_cast_fp16)[name = tensor("op_5305_cast_fp16")]; tensor var_5312_begin_0 = const()[name = tensor("op_5312_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5312_end_0 = const()[name = tensor("op_5312_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5312_end_mask_0 = const()[name = tensor("op_5312_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5312_cast_fp16 = slice_by_index(begin = var_5312_begin_0, end = var_5312_end_0, end_mask = var_5312_end_mask_0, x = var_5134_cast_fp16)[name = tensor("op_5312_cast_fp16")]; tensor var_5319_begin_0 = const()[name = tensor("op_5319_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5319_end_0 = const()[name = tensor("op_5319_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5319_end_mask_0 = const()[name = tensor("op_5319_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5319_cast_fp16 = slice_by_index(begin = var_5319_begin_0, end = var_5319_end_0, end_mask = var_5319_end_mask_0, x = var_5138_cast_fp16)[name = tensor("op_5319_cast_fp16")]; tensor var_5326_begin_0 = const()[name = tensor("op_5326_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5326_end_0 = const()[name = tensor("op_5326_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5326_end_mask_0 = const()[name = tensor("op_5326_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5326_cast_fp16 = slice_by_index(begin = var_5326_begin_0, end = var_5326_end_0, end_mask = var_5326_end_mask_0, x = var_5138_cast_fp16)[name = tensor("op_5326_cast_fp16")]; tensor var_5333_begin_0 = const()[name = tensor("op_5333_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5333_end_0 = const()[name = tensor("op_5333_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5333_end_mask_0 = const()[name = tensor("op_5333_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5333_cast_fp16 = slice_by_index(begin = var_5333_begin_0, end = var_5333_end_0, end_mask = var_5333_end_mask_0, x = var_5138_cast_fp16)[name = tensor("op_5333_cast_fp16")]; tensor var_5340_begin_0 = const()[name = tensor("op_5340_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5340_end_0 = const()[name = tensor("op_5340_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5340_end_mask_0 = const()[name = tensor("op_5340_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5340_cast_fp16 = slice_by_index(begin = var_5340_begin_0, end = var_5340_end_0, end_mask = var_5340_end_mask_0, x = var_5138_cast_fp16)[name = tensor("op_5340_cast_fp16")]; tensor var_5347_begin_0 = const()[name = tensor("op_5347_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5347_end_0 = const()[name = tensor("op_5347_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5347_end_mask_0 = const()[name = tensor("op_5347_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5347_cast_fp16 = slice_by_index(begin = var_5347_begin_0, end = var_5347_end_0, end_mask = var_5347_end_mask_0, x = var_5142_cast_fp16)[name = tensor("op_5347_cast_fp16")]; tensor var_5354_begin_0 = const()[name = tensor("op_5354_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5354_end_0 = const()[name = tensor("op_5354_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5354_end_mask_0 = const()[name = tensor("op_5354_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5354_cast_fp16 = slice_by_index(begin = var_5354_begin_0, end = var_5354_end_0, end_mask = var_5354_end_mask_0, x = var_5142_cast_fp16)[name = tensor("op_5354_cast_fp16")]; tensor var_5361_begin_0 = const()[name = tensor("op_5361_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5361_end_0 = const()[name = tensor("op_5361_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5361_end_mask_0 = const()[name = tensor("op_5361_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5361_cast_fp16 = slice_by_index(begin = var_5361_begin_0, end = var_5361_end_0, end_mask = var_5361_end_mask_0, x = var_5142_cast_fp16)[name = tensor("op_5361_cast_fp16")]; tensor var_5368_begin_0 = const()[name = tensor("op_5368_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5368_end_0 = const()[name = tensor("op_5368_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5368_end_mask_0 = const()[name = tensor("op_5368_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5368_cast_fp16 = slice_by_index(begin = var_5368_begin_0, end = var_5368_end_0, end_mask = var_5368_end_mask_0, x = var_5142_cast_fp16)[name = tensor("op_5368_cast_fp16")]; tensor var_5375_begin_0 = const()[name = tensor("op_5375_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5375_end_0 = const()[name = tensor("op_5375_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5375_end_mask_0 = const()[name = tensor("op_5375_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5375_cast_fp16 = slice_by_index(begin = var_5375_begin_0, end = var_5375_end_0, end_mask = var_5375_end_mask_0, x = var_5146_cast_fp16)[name = tensor("op_5375_cast_fp16")]; tensor var_5382_begin_0 = const()[name = tensor("op_5382_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5382_end_0 = const()[name = tensor("op_5382_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5382_end_mask_0 = const()[name = tensor("op_5382_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5382_cast_fp16 = slice_by_index(begin = var_5382_begin_0, end = var_5382_end_0, end_mask = var_5382_end_mask_0, x = var_5146_cast_fp16)[name = tensor("op_5382_cast_fp16")]; tensor var_5389_begin_0 = const()[name = tensor("op_5389_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5389_end_0 = const()[name = tensor("op_5389_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5389_end_mask_0 = const()[name = tensor("op_5389_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5389_cast_fp16 = slice_by_index(begin = var_5389_begin_0, end = var_5389_end_0, end_mask = var_5389_end_mask_0, x = var_5146_cast_fp16)[name = tensor("op_5389_cast_fp16")]; tensor var_5396_begin_0 = const()[name = tensor("op_5396_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5396_end_0 = const()[name = tensor("op_5396_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5396_end_mask_0 = const()[name = tensor("op_5396_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5396_cast_fp16 = slice_by_index(begin = var_5396_begin_0, end = var_5396_end_0, end_mask = var_5396_end_mask_0, x = var_5146_cast_fp16)[name = tensor("op_5396_cast_fp16")]; tensor var_5403_begin_0 = const()[name = tensor("op_5403_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5403_end_0 = const()[name = tensor("op_5403_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5403_end_mask_0 = const()[name = tensor("op_5403_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5403_cast_fp16 = slice_by_index(begin = var_5403_begin_0, end = var_5403_end_0, end_mask = var_5403_end_mask_0, x = var_5150_cast_fp16)[name = tensor("op_5403_cast_fp16")]; tensor var_5410_begin_0 = const()[name = tensor("op_5410_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5410_end_0 = const()[name = tensor("op_5410_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5410_end_mask_0 = const()[name = tensor("op_5410_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5410_cast_fp16 = slice_by_index(begin = var_5410_begin_0, end = var_5410_end_0, end_mask = var_5410_end_mask_0, x = var_5150_cast_fp16)[name = tensor("op_5410_cast_fp16")]; tensor var_5417_begin_0 = const()[name = tensor("op_5417_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5417_end_0 = const()[name = tensor("op_5417_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5417_end_mask_0 = const()[name = tensor("op_5417_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5417_cast_fp16 = slice_by_index(begin = var_5417_begin_0, end = var_5417_end_0, end_mask = var_5417_end_mask_0, x = var_5150_cast_fp16)[name = tensor("op_5417_cast_fp16")]; tensor var_5424_begin_0 = const()[name = tensor("op_5424_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5424_end_0 = const()[name = tensor("op_5424_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5424_end_mask_0 = const()[name = tensor("op_5424_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5424_cast_fp16 = slice_by_index(begin = var_5424_begin_0, end = var_5424_end_0, end_mask = var_5424_end_mask_0, x = var_5150_cast_fp16)[name = tensor("op_5424_cast_fp16")]; tensor var_5431_begin_0 = const()[name = tensor("op_5431_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5431_end_0 = const()[name = tensor("op_5431_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5431_end_mask_0 = const()[name = tensor("op_5431_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5431_cast_fp16 = slice_by_index(begin = var_5431_begin_0, end = var_5431_end_0, end_mask = var_5431_end_mask_0, x = var_5154_cast_fp16)[name = tensor("op_5431_cast_fp16")]; tensor var_5438_begin_0 = const()[name = tensor("op_5438_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5438_end_0 = const()[name = tensor("op_5438_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5438_end_mask_0 = const()[name = tensor("op_5438_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5438_cast_fp16 = slice_by_index(begin = var_5438_begin_0, end = var_5438_end_0, end_mask = var_5438_end_mask_0, x = var_5154_cast_fp16)[name = tensor("op_5438_cast_fp16")]; tensor var_5445_begin_0 = const()[name = tensor("op_5445_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5445_end_0 = const()[name = tensor("op_5445_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5445_end_mask_0 = const()[name = tensor("op_5445_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5445_cast_fp16 = slice_by_index(begin = var_5445_begin_0, end = var_5445_end_0, end_mask = var_5445_end_mask_0, x = var_5154_cast_fp16)[name = tensor("op_5445_cast_fp16")]; tensor var_5452_begin_0 = const()[name = tensor("op_5452_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5452_end_0 = const()[name = tensor("op_5452_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5452_end_mask_0 = const()[name = tensor("op_5452_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5452_cast_fp16 = slice_by_index(begin = var_5452_begin_0, end = var_5452_end_0, end_mask = var_5452_end_mask_0, x = var_5154_cast_fp16)[name = tensor("op_5452_cast_fp16")]; tensor var_5459_begin_0 = const()[name = tensor("op_5459_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5459_end_0 = const()[name = tensor("op_5459_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5459_end_mask_0 = const()[name = tensor("op_5459_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5459_cast_fp16 = slice_by_index(begin = var_5459_begin_0, end = var_5459_end_0, end_mask = var_5459_end_mask_0, x = var_5158_cast_fp16)[name = tensor("op_5459_cast_fp16")]; tensor var_5466_begin_0 = const()[name = tensor("op_5466_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5466_end_0 = const()[name = tensor("op_5466_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5466_end_mask_0 = const()[name = tensor("op_5466_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5466_cast_fp16 = slice_by_index(begin = var_5466_begin_0, end = var_5466_end_0, end_mask = var_5466_end_mask_0, x = var_5158_cast_fp16)[name = tensor("op_5466_cast_fp16")]; tensor var_5473_begin_0 = const()[name = tensor("op_5473_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5473_end_0 = const()[name = tensor("op_5473_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5473_end_mask_0 = const()[name = tensor("op_5473_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5473_cast_fp16 = slice_by_index(begin = var_5473_begin_0, end = var_5473_end_0, end_mask = var_5473_end_mask_0, x = var_5158_cast_fp16)[name = tensor("op_5473_cast_fp16")]; tensor var_5480_begin_0 = const()[name = tensor("op_5480_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5480_end_0 = const()[name = tensor("op_5480_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5480_end_mask_0 = const()[name = tensor("op_5480_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5480_cast_fp16 = slice_by_index(begin = var_5480_begin_0, end = var_5480_end_0, end_mask = var_5480_end_mask_0, x = var_5158_cast_fp16)[name = tensor("op_5480_cast_fp16")]; tensor var_5487_begin_0 = const()[name = tensor("op_5487_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5487_end_0 = const()[name = tensor("op_5487_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5487_end_mask_0 = const()[name = tensor("op_5487_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5487_cast_fp16 = slice_by_index(begin = var_5487_begin_0, end = var_5487_end_0, end_mask = var_5487_end_mask_0, x = var_5162_cast_fp16)[name = tensor("op_5487_cast_fp16")]; tensor var_5494_begin_0 = const()[name = tensor("op_5494_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5494_end_0 = const()[name = tensor("op_5494_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5494_end_mask_0 = const()[name = tensor("op_5494_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5494_cast_fp16 = slice_by_index(begin = var_5494_begin_0, end = var_5494_end_0, end_mask = var_5494_end_mask_0, x = var_5162_cast_fp16)[name = tensor("op_5494_cast_fp16")]; tensor var_5501_begin_0 = const()[name = tensor("op_5501_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5501_end_0 = const()[name = tensor("op_5501_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5501_end_mask_0 = const()[name = tensor("op_5501_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5501_cast_fp16 = slice_by_index(begin = var_5501_begin_0, end = var_5501_end_0, end_mask = var_5501_end_mask_0, x = var_5162_cast_fp16)[name = tensor("op_5501_cast_fp16")]; tensor var_5508_begin_0 = const()[name = tensor("op_5508_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5508_end_0 = const()[name = tensor("op_5508_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5508_end_mask_0 = const()[name = tensor("op_5508_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5508_cast_fp16 = slice_by_index(begin = var_5508_begin_0, end = var_5508_end_0, end_mask = var_5508_end_mask_0, x = var_5162_cast_fp16)[name = tensor("op_5508_cast_fp16")]; tensor var_5515_begin_0 = const()[name = tensor("op_5515_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5515_end_0 = const()[name = tensor("op_5515_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5515_end_mask_0 = const()[name = tensor("op_5515_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5515_cast_fp16 = slice_by_index(begin = var_5515_begin_0, end = var_5515_end_0, end_mask = var_5515_end_mask_0, x = var_5166_cast_fp16)[name = tensor("op_5515_cast_fp16")]; tensor var_5522_begin_0 = const()[name = tensor("op_5522_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5522_end_0 = const()[name = tensor("op_5522_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5522_end_mask_0 = const()[name = tensor("op_5522_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5522_cast_fp16 = slice_by_index(begin = var_5522_begin_0, end = var_5522_end_0, end_mask = var_5522_end_mask_0, x = var_5166_cast_fp16)[name = tensor("op_5522_cast_fp16")]; tensor var_5529_begin_0 = const()[name = tensor("op_5529_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5529_end_0 = const()[name = tensor("op_5529_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5529_end_mask_0 = const()[name = tensor("op_5529_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5529_cast_fp16 = slice_by_index(begin = var_5529_begin_0, end = var_5529_end_0, end_mask = var_5529_end_mask_0, x = var_5166_cast_fp16)[name = tensor("op_5529_cast_fp16")]; tensor var_5536_begin_0 = const()[name = tensor("op_5536_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5536_end_0 = const()[name = tensor("op_5536_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5536_end_mask_0 = const()[name = tensor("op_5536_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5536_cast_fp16 = slice_by_index(begin = var_5536_begin_0, end = var_5536_end_0, end_mask = var_5536_end_mask_0, x = var_5166_cast_fp16)[name = tensor("op_5536_cast_fp16")]; tensor var_5543_begin_0 = const()[name = tensor("op_5543_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5543_end_0 = const()[name = tensor("op_5543_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5543_end_mask_0 = const()[name = tensor("op_5543_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5543_cast_fp16 = slice_by_index(begin = var_5543_begin_0, end = var_5543_end_0, end_mask = var_5543_end_mask_0, x = var_5170_cast_fp16)[name = tensor("op_5543_cast_fp16")]; tensor var_5550_begin_0 = const()[name = tensor("op_5550_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5550_end_0 = const()[name = tensor("op_5550_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5550_end_mask_0 = const()[name = tensor("op_5550_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5550_cast_fp16 = slice_by_index(begin = var_5550_begin_0, end = var_5550_end_0, end_mask = var_5550_end_mask_0, x = var_5170_cast_fp16)[name = tensor("op_5550_cast_fp16")]; tensor var_5557_begin_0 = const()[name = tensor("op_5557_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5557_end_0 = const()[name = tensor("op_5557_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5557_end_mask_0 = const()[name = tensor("op_5557_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5557_cast_fp16 = slice_by_index(begin = var_5557_begin_0, end = var_5557_end_0, end_mask = var_5557_end_mask_0, x = var_5170_cast_fp16)[name = tensor("op_5557_cast_fp16")]; tensor var_5564_begin_0 = const()[name = tensor("op_5564_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5564_end_0 = const()[name = tensor("op_5564_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5564_end_mask_0 = const()[name = tensor("op_5564_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5564_cast_fp16 = slice_by_index(begin = var_5564_begin_0, end = var_5564_end_0, end_mask = var_5564_end_mask_0, x = var_5170_cast_fp16)[name = tensor("op_5564_cast_fp16")]; tensor var_5571_begin_0 = const()[name = tensor("op_5571_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5571_end_0 = const()[name = tensor("op_5571_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5571_end_mask_0 = const()[name = tensor("op_5571_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5571_cast_fp16 = slice_by_index(begin = var_5571_begin_0, end = var_5571_end_0, end_mask = var_5571_end_mask_0, x = var_5174_cast_fp16)[name = tensor("op_5571_cast_fp16")]; tensor var_5578_begin_0 = const()[name = tensor("op_5578_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5578_end_0 = const()[name = tensor("op_5578_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5578_end_mask_0 = const()[name = tensor("op_5578_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5578_cast_fp16 = slice_by_index(begin = var_5578_begin_0, end = var_5578_end_0, end_mask = var_5578_end_mask_0, x = var_5174_cast_fp16)[name = tensor("op_5578_cast_fp16")]; tensor var_5585_begin_0 = const()[name = tensor("op_5585_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5585_end_0 = const()[name = tensor("op_5585_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5585_end_mask_0 = const()[name = tensor("op_5585_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5585_cast_fp16 = slice_by_index(begin = var_5585_begin_0, end = var_5585_end_0, end_mask = var_5585_end_mask_0, x = var_5174_cast_fp16)[name = tensor("op_5585_cast_fp16")]; tensor var_5592_begin_0 = const()[name = tensor("op_5592_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5592_end_0 = const()[name = tensor("op_5592_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5592_end_mask_0 = const()[name = tensor("op_5592_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5592_cast_fp16 = slice_by_index(begin = var_5592_begin_0, end = var_5592_end_0, end_mask = var_5592_end_mask_0, x = var_5174_cast_fp16)[name = tensor("op_5592_cast_fp16")]; tensor var_5599_begin_0 = const()[name = tensor("op_5599_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5599_end_0 = const()[name = tensor("op_5599_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5599_end_mask_0 = const()[name = tensor("op_5599_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5599_cast_fp16 = slice_by_index(begin = var_5599_begin_0, end = var_5599_end_0, end_mask = var_5599_end_mask_0, x = var_5178_cast_fp16)[name = tensor("op_5599_cast_fp16")]; tensor var_5606_begin_0 = const()[name = tensor("op_5606_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5606_end_0 = const()[name = tensor("op_5606_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5606_end_mask_0 = const()[name = tensor("op_5606_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5606_cast_fp16 = slice_by_index(begin = var_5606_begin_0, end = var_5606_end_0, end_mask = var_5606_end_mask_0, x = var_5178_cast_fp16)[name = tensor("op_5606_cast_fp16")]; tensor var_5613_begin_0 = const()[name = tensor("op_5613_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5613_end_0 = const()[name = tensor("op_5613_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5613_end_mask_0 = const()[name = tensor("op_5613_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5613_cast_fp16 = slice_by_index(begin = var_5613_begin_0, end = var_5613_end_0, end_mask = var_5613_end_mask_0, x = var_5178_cast_fp16)[name = tensor("op_5613_cast_fp16")]; tensor var_5620_begin_0 = const()[name = tensor("op_5620_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5620_end_0 = const()[name = tensor("op_5620_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5620_end_mask_0 = const()[name = tensor("op_5620_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5620_cast_fp16 = slice_by_index(begin = var_5620_begin_0, end = var_5620_end_0, end_mask = var_5620_end_mask_0, x = var_5178_cast_fp16)[name = tensor("op_5620_cast_fp16")]; tensor var_5627_begin_0 = const()[name = tensor("op_5627_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5627_end_0 = const()[name = tensor("op_5627_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5627_end_mask_0 = const()[name = tensor("op_5627_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5627_cast_fp16 = slice_by_index(begin = var_5627_begin_0, end = var_5627_end_0, end_mask = var_5627_end_mask_0, x = var_5182_cast_fp16)[name = tensor("op_5627_cast_fp16")]; tensor var_5634_begin_0 = const()[name = tensor("op_5634_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5634_end_0 = const()[name = tensor("op_5634_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5634_end_mask_0 = const()[name = tensor("op_5634_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5634_cast_fp16 = slice_by_index(begin = var_5634_begin_0, end = var_5634_end_0, end_mask = var_5634_end_mask_0, x = var_5182_cast_fp16)[name = tensor("op_5634_cast_fp16")]; tensor var_5641_begin_0 = const()[name = tensor("op_5641_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5641_end_0 = const()[name = tensor("op_5641_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5641_end_mask_0 = const()[name = tensor("op_5641_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5641_cast_fp16 = slice_by_index(begin = var_5641_begin_0, end = var_5641_end_0, end_mask = var_5641_end_mask_0, x = var_5182_cast_fp16)[name = tensor("op_5641_cast_fp16")]; tensor var_5648_begin_0 = const()[name = tensor("op_5648_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5648_end_0 = const()[name = tensor("op_5648_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5648_end_mask_0 = const()[name = tensor("op_5648_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5648_cast_fp16 = slice_by_index(begin = var_5648_begin_0, end = var_5648_end_0, end_mask = var_5648_end_mask_0, x = var_5182_cast_fp16)[name = tensor("op_5648_cast_fp16")]; tensor var_5655_begin_0 = const()[name = tensor("op_5655_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5655_end_0 = const()[name = tensor("op_5655_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5655_end_mask_0 = const()[name = tensor("op_5655_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5655_cast_fp16 = slice_by_index(begin = var_5655_begin_0, end = var_5655_end_0, end_mask = var_5655_end_mask_0, x = var_5186_cast_fp16)[name = tensor("op_5655_cast_fp16")]; tensor var_5662_begin_0 = const()[name = tensor("op_5662_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5662_end_0 = const()[name = tensor("op_5662_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5662_end_mask_0 = const()[name = tensor("op_5662_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5662_cast_fp16 = slice_by_index(begin = var_5662_begin_0, end = var_5662_end_0, end_mask = var_5662_end_mask_0, x = var_5186_cast_fp16)[name = tensor("op_5662_cast_fp16")]; tensor var_5669_begin_0 = const()[name = tensor("op_5669_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5669_end_0 = const()[name = tensor("op_5669_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5669_end_mask_0 = const()[name = tensor("op_5669_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5669_cast_fp16 = slice_by_index(begin = var_5669_begin_0, end = var_5669_end_0, end_mask = var_5669_end_mask_0, x = var_5186_cast_fp16)[name = tensor("op_5669_cast_fp16")]; tensor var_5676_begin_0 = const()[name = tensor("op_5676_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5676_end_0 = const()[name = tensor("op_5676_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5676_end_mask_0 = const()[name = tensor("op_5676_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5676_cast_fp16 = slice_by_index(begin = var_5676_begin_0, end = var_5676_end_0, end_mask = var_5676_end_mask_0, x = var_5186_cast_fp16)[name = tensor("op_5676_cast_fp16")]; tensor var_5683_begin_0 = const()[name = tensor("op_5683_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5683_end_0 = const()[name = tensor("op_5683_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5683_end_mask_0 = const()[name = tensor("op_5683_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5683_cast_fp16 = slice_by_index(begin = var_5683_begin_0, end = var_5683_end_0, end_mask = var_5683_end_mask_0, x = var_5190_cast_fp16)[name = tensor("op_5683_cast_fp16")]; tensor var_5690_begin_0 = const()[name = tensor("op_5690_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5690_end_0 = const()[name = tensor("op_5690_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5690_end_mask_0 = const()[name = tensor("op_5690_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5690_cast_fp16 = slice_by_index(begin = var_5690_begin_0, end = var_5690_end_0, end_mask = var_5690_end_mask_0, x = var_5190_cast_fp16)[name = tensor("op_5690_cast_fp16")]; tensor var_5697_begin_0 = const()[name = tensor("op_5697_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5697_end_0 = const()[name = tensor("op_5697_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5697_end_mask_0 = const()[name = tensor("op_5697_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5697_cast_fp16 = slice_by_index(begin = var_5697_begin_0, end = var_5697_end_0, end_mask = var_5697_end_mask_0, x = var_5190_cast_fp16)[name = tensor("op_5697_cast_fp16")]; tensor var_5704_begin_0 = const()[name = tensor("op_5704_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5704_end_0 = const()[name = tensor("op_5704_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5704_end_mask_0 = const()[name = tensor("op_5704_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5704_cast_fp16 = slice_by_index(begin = var_5704_begin_0, end = var_5704_end_0, end_mask = var_5704_end_mask_0, x = var_5190_cast_fp16)[name = tensor("op_5704_cast_fp16")]; tensor var_5711_begin_0 = const()[name = tensor("op_5711_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5711_end_0 = const()[name = tensor("op_5711_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5711_end_mask_0 = const()[name = tensor("op_5711_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5711_cast_fp16 = slice_by_index(begin = var_5711_begin_0, end = var_5711_end_0, end_mask = var_5711_end_mask_0, x = var_5194_cast_fp16)[name = tensor("op_5711_cast_fp16")]; tensor var_5718_begin_0 = const()[name = tensor("op_5718_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5718_end_0 = const()[name = tensor("op_5718_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5718_end_mask_0 = const()[name = tensor("op_5718_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5718_cast_fp16 = slice_by_index(begin = var_5718_begin_0, end = var_5718_end_0, end_mask = var_5718_end_mask_0, x = var_5194_cast_fp16)[name = tensor("op_5718_cast_fp16")]; tensor var_5725_begin_0 = const()[name = tensor("op_5725_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5725_end_0 = const()[name = tensor("op_5725_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5725_end_mask_0 = const()[name = tensor("op_5725_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5725_cast_fp16 = slice_by_index(begin = var_5725_begin_0, end = var_5725_end_0, end_mask = var_5725_end_mask_0, x = var_5194_cast_fp16)[name = tensor("op_5725_cast_fp16")]; tensor var_5732_begin_0 = const()[name = tensor("op_5732_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5732_end_0 = const()[name = tensor("op_5732_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5732_end_mask_0 = const()[name = tensor("op_5732_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5732_cast_fp16 = slice_by_index(begin = var_5732_begin_0, end = var_5732_end_0, end_mask = var_5732_end_mask_0, x = var_5194_cast_fp16)[name = tensor("op_5732_cast_fp16")]; tensor var_5739_begin_0 = const()[name = tensor("op_5739_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5739_end_0 = const()[name = tensor("op_5739_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_5739_end_mask_0 = const()[name = tensor("op_5739_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5739_cast_fp16 = slice_by_index(begin = var_5739_begin_0, end = var_5739_end_0, end_mask = var_5739_end_mask_0, x = var_5198_cast_fp16)[name = tensor("op_5739_cast_fp16")]; tensor var_5746_begin_0 = const()[name = tensor("op_5746_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_5746_end_0 = const()[name = tensor("op_5746_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_5746_end_mask_0 = const()[name = tensor("op_5746_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5746_cast_fp16 = slice_by_index(begin = var_5746_begin_0, end = var_5746_end_0, end_mask = var_5746_end_mask_0, x = var_5198_cast_fp16)[name = tensor("op_5746_cast_fp16")]; tensor var_5753_begin_0 = const()[name = tensor("op_5753_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_5753_end_0 = const()[name = tensor("op_5753_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_5753_end_mask_0 = const()[name = tensor("op_5753_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5753_cast_fp16 = slice_by_index(begin = var_5753_begin_0, end = var_5753_end_0, end_mask = var_5753_end_mask_0, x = var_5198_cast_fp16)[name = tensor("op_5753_cast_fp16")]; tensor var_5760_begin_0 = const()[name = tensor("op_5760_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_5760_end_0 = const()[name = tensor("op_5760_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5760_end_mask_0 = const()[name = tensor("op_5760_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5760_cast_fp16 = slice_by_index(begin = var_5760_begin_0, end = var_5760_end_0, end_mask = var_5760_end_mask_0, x = var_5198_cast_fp16)[name = tensor("op_5760_cast_fp16")]; tensor k_7_perm_0 = const()[name = tensor("k_7_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_5765_begin_0 = const()[name = tensor("op_5765_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5765_end_0 = const()[name = tensor("op_5765_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_5765_end_mask_0 = const()[name = tensor("op_5765_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_7_cast_fp16 = transpose(perm = k_7_perm_0, x = key_7_cast_fp16)[name = tensor("transpose_28")]; tensor var_5765_cast_fp16 = slice_by_index(begin = var_5765_begin_0, end = var_5765_end_0, end_mask = var_5765_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5765_cast_fp16")]; tensor var_5769_begin_0 = const()[name = tensor("op_5769_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_5769_end_0 = const()[name = tensor("op_5769_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_5769_end_mask_0 = const()[name = tensor("op_5769_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5769_cast_fp16 = slice_by_index(begin = var_5769_begin_0, end = var_5769_end_0, end_mask = var_5769_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5769_cast_fp16")]; tensor var_5773_begin_0 = const()[name = tensor("op_5773_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_5773_end_0 = const()[name = tensor("op_5773_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_5773_end_mask_0 = const()[name = tensor("op_5773_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5773_cast_fp16 = slice_by_index(begin = var_5773_begin_0, end = var_5773_end_0, end_mask = var_5773_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5773_cast_fp16")]; tensor var_5777_begin_0 = const()[name = tensor("op_5777_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_5777_end_0 = const()[name = tensor("op_5777_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_5777_end_mask_0 = const()[name = tensor("op_5777_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5777_cast_fp16 = slice_by_index(begin = var_5777_begin_0, end = var_5777_end_0, end_mask = var_5777_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5777_cast_fp16")]; tensor var_5781_begin_0 = const()[name = tensor("op_5781_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_5781_end_0 = const()[name = tensor("op_5781_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_5781_end_mask_0 = const()[name = tensor("op_5781_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5781_cast_fp16 = slice_by_index(begin = var_5781_begin_0, end = var_5781_end_0, end_mask = var_5781_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5781_cast_fp16")]; tensor var_5785_begin_0 = const()[name = tensor("op_5785_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_5785_end_0 = const()[name = tensor("op_5785_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_5785_end_mask_0 = const()[name = tensor("op_5785_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5785_cast_fp16 = slice_by_index(begin = var_5785_begin_0, end = var_5785_end_0, end_mask = var_5785_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5785_cast_fp16")]; tensor var_5789_begin_0 = const()[name = tensor("op_5789_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_5789_end_0 = const()[name = tensor("op_5789_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_5789_end_mask_0 = const()[name = tensor("op_5789_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5789_cast_fp16 = slice_by_index(begin = var_5789_begin_0, end = var_5789_end_0, end_mask = var_5789_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5789_cast_fp16")]; tensor var_5793_begin_0 = const()[name = tensor("op_5793_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_5793_end_0 = const()[name = tensor("op_5793_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_5793_end_mask_0 = const()[name = tensor("op_5793_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5793_cast_fp16 = slice_by_index(begin = var_5793_begin_0, end = var_5793_end_0, end_mask = var_5793_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5793_cast_fp16")]; tensor var_5797_begin_0 = const()[name = tensor("op_5797_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_5797_end_0 = const()[name = tensor("op_5797_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_5797_end_mask_0 = const()[name = tensor("op_5797_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5797_cast_fp16 = slice_by_index(begin = var_5797_begin_0, end = var_5797_end_0, end_mask = var_5797_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5797_cast_fp16")]; tensor var_5801_begin_0 = const()[name = tensor("op_5801_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_5801_end_0 = const()[name = tensor("op_5801_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_5801_end_mask_0 = const()[name = tensor("op_5801_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5801_cast_fp16 = slice_by_index(begin = var_5801_begin_0, end = var_5801_end_0, end_mask = var_5801_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5801_cast_fp16")]; tensor var_5805_begin_0 = const()[name = tensor("op_5805_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_5805_end_0 = const()[name = tensor("op_5805_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_5805_end_mask_0 = const()[name = tensor("op_5805_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5805_cast_fp16 = slice_by_index(begin = var_5805_begin_0, end = var_5805_end_0, end_mask = var_5805_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5805_cast_fp16")]; tensor var_5809_begin_0 = const()[name = tensor("op_5809_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_5809_end_0 = const()[name = tensor("op_5809_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_5809_end_mask_0 = const()[name = tensor("op_5809_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5809_cast_fp16 = slice_by_index(begin = var_5809_begin_0, end = var_5809_end_0, end_mask = var_5809_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5809_cast_fp16")]; tensor var_5813_begin_0 = const()[name = tensor("op_5813_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_5813_end_0 = const()[name = tensor("op_5813_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_5813_end_mask_0 = const()[name = tensor("op_5813_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5813_cast_fp16 = slice_by_index(begin = var_5813_begin_0, end = var_5813_end_0, end_mask = var_5813_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5813_cast_fp16")]; tensor var_5817_begin_0 = const()[name = tensor("op_5817_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_5817_end_0 = const()[name = tensor("op_5817_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_5817_end_mask_0 = const()[name = tensor("op_5817_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5817_cast_fp16 = slice_by_index(begin = var_5817_begin_0, end = var_5817_end_0, end_mask = var_5817_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5817_cast_fp16")]; tensor var_5821_begin_0 = const()[name = tensor("op_5821_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_5821_end_0 = const()[name = tensor("op_5821_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_5821_end_mask_0 = const()[name = tensor("op_5821_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5821_cast_fp16 = slice_by_index(begin = var_5821_begin_0, end = var_5821_end_0, end_mask = var_5821_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5821_cast_fp16")]; tensor var_5825_begin_0 = const()[name = tensor("op_5825_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_5825_end_0 = const()[name = tensor("op_5825_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_5825_end_mask_0 = const()[name = tensor("op_5825_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5825_cast_fp16 = slice_by_index(begin = var_5825_begin_0, end = var_5825_end_0, end_mask = var_5825_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5825_cast_fp16")]; tensor var_5829_begin_0 = const()[name = tensor("op_5829_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_5829_end_0 = const()[name = tensor("op_5829_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_5829_end_mask_0 = const()[name = tensor("op_5829_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5829_cast_fp16 = slice_by_index(begin = var_5829_begin_0, end = var_5829_end_0, end_mask = var_5829_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5829_cast_fp16")]; tensor var_5833_begin_0 = const()[name = tensor("op_5833_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_5833_end_0 = const()[name = tensor("op_5833_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_5833_end_mask_0 = const()[name = tensor("op_5833_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5833_cast_fp16 = slice_by_index(begin = var_5833_begin_0, end = var_5833_end_0, end_mask = var_5833_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5833_cast_fp16")]; tensor var_5837_begin_0 = const()[name = tensor("op_5837_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_5837_end_0 = const()[name = tensor("op_5837_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_5837_end_mask_0 = const()[name = tensor("op_5837_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5837_cast_fp16 = slice_by_index(begin = var_5837_begin_0, end = var_5837_end_0, end_mask = var_5837_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5837_cast_fp16")]; tensor var_5841_begin_0 = const()[name = tensor("op_5841_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_5841_end_0 = const()[name = tensor("op_5841_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_5841_end_mask_0 = const()[name = tensor("op_5841_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_5841_cast_fp16 = slice_by_index(begin = var_5841_begin_0, end = var_5841_end_0, end_mask = var_5841_end_mask_0, x = k_7_cast_fp16)[name = tensor("op_5841_cast_fp16")]; tensor var_5843_begin_0 = const()[name = tensor("op_5843_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5843_end_0 = const()[name = tensor("op_5843_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_5843_end_mask_0 = const()[name = tensor("op_5843_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5843_cast_fp16 = slice_by_index(begin = var_5843_begin_0, end = var_5843_end_0, end_mask = var_5843_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5843_cast_fp16")]; tensor var_5847_begin_0 = const()[name = tensor("op_5847_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_5847_end_0 = const()[name = tensor("op_5847_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_5847_end_mask_0 = const()[name = tensor("op_5847_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5847_cast_fp16 = slice_by_index(begin = var_5847_begin_0, end = var_5847_end_0, end_mask = var_5847_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5847_cast_fp16")]; tensor var_5851_begin_0 = const()[name = tensor("op_5851_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_5851_end_0 = const()[name = tensor("op_5851_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_5851_end_mask_0 = const()[name = tensor("op_5851_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5851_cast_fp16 = slice_by_index(begin = var_5851_begin_0, end = var_5851_end_0, end_mask = var_5851_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5851_cast_fp16")]; tensor var_5855_begin_0 = const()[name = tensor("op_5855_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_5855_end_0 = const()[name = tensor("op_5855_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_5855_end_mask_0 = const()[name = tensor("op_5855_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5855_cast_fp16 = slice_by_index(begin = var_5855_begin_0, end = var_5855_end_0, end_mask = var_5855_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5855_cast_fp16")]; tensor var_5859_begin_0 = const()[name = tensor("op_5859_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_5859_end_0 = const()[name = tensor("op_5859_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_5859_end_mask_0 = const()[name = tensor("op_5859_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5859_cast_fp16 = slice_by_index(begin = var_5859_begin_0, end = var_5859_end_0, end_mask = var_5859_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5859_cast_fp16")]; tensor var_5863_begin_0 = const()[name = tensor("op_5863_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_5863_end_0 = const()[name = tensor("op_5863_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_5863_end_mask_0 = const()[name = tensor("op_5863_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5863_cast_fp16 = slice_by_index(begin = var_5863_begin_0, end = var_5863_end_0, end_mask = var_5863_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5863_cast_fp16")]; tensor var_5867_begin_0 = const()[name = tensor("op_5867_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_5867_end_0 = const()[name = tensor("op_5867_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_5867_end_mask_0 = const()[name = tensor("op_5867_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5867_cast_fp16 = slice_by_index(begin = var_5867_begin_0, end = var_5867_end_0, end_mask = var_5867_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5867_cast_fp16")]; tensor var_5871_begin_0 = const()[name = tensor("op_5871_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_5871_end_0 = const()[name = tensor("op_5871_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_5871_end_mask_0 = const()[name = tensor("op_5871_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5871_cast_fp16 = slice_by_index(begin = var_5871_begin_0, end = var_5871_end_0, end_mask = var_5871_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5871_cast_fp16")]; tensor var_5875_begin_0 = const()[name = tensor("op_5875_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_5875_end_0 = const()[name = tensor("op_5875_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_5875_end_mask_0 = const()[name = tensor("op_5875_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5875_cast_fp16 = slice_by_index(begin = var_5875_begin_0, end = var_5875_end_0, end_mask = var_5875_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5875_cast_fp16")]; tensor var_5879_begin_0 = const()[name = tensor("op_5879_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_5879_end_0 = const()[name = tensor("op_5879_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_5879_end_mask_0 = const()[name = tensor("op_5879_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5879_cast_fp16 = slice_by_index(begin = var_5879_begin_0, end = var_5879_end_0, end_mask = var_5879_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5879_cast_fp16")]; tensor var_5883_begin_0 = const()[name = tensor("op_5883_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_5883_end_0 = const()[name = tensor("op_5883_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_5883_end_mask_0 = const()[name = tensor("op_5883_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5883_cast_fp16 = slice_by_index(begin = var_5883_begin_0, end = var_5883_end_0, end_mask = var_5883_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5883_cast_fp16")]; tensor var_5887_begin_0 = const()[name = tensor("op_5887_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_5887_end_0 = const()[name = tensor("op_5887_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_5887_end_mask_0 = const()[name = tensor("op_5887_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5887_cast_fp16 = slice_by_index(begin = var_5887_begin_0, end = var_5887_end_0, end_mask = var_5887_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5887_cast_fp16")]; tensor var_5891_begin_0 = const()[name = tensor("op_5891_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_5891_end_0 = const()[name = tensor("op_5891_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_5891_end_mask_0 = const()[name = tensor("op_5891_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5891_cast_fp16 = slice_by_index(begin = var_5891_begin_0, end = var_5891_end_0, end_mask = var_5891_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5891_cast_fp16")]; tensor var_5895_begin_0 = const()[name = tensor("op_5895_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_5895_end_0 = const()[name = tensor("op_5895_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_5895_end_mask_0 = const()[name = tensor("op_5895_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5895_cast_fp16 = slice_by_index(begin = var_5895_begin_0, end = var_5895_end_0, end_mask = var_5895_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5895_cast_fp16")]; tensor var_5899_begin_0 = const()[name = tensor("op_5899_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_5899_end_0 = const()[name = tensor("op_5899_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_5899_end_mask_0 = const()[name = tensor("op_5899_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5899_cast_fp16 = slice_by_index(begin = var_5899_begin_0, end = var_5899_end_0, end_mask = var_5899_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5899_cast_fp16")]; tensor var_5903_begin_0 = const()[name = tensor("op_5903_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_5903_end_0 = const()[name = tensor("op_5903_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_5903_end_mask_0 = const()[name = tensor("op_5903_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5903_cast_fp16 = slice_by_index(begin = var_5903_begin_0, end = var_5903_end_0, end_mask = var_5903_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5903_cast_fp16")]; tensor var_5907_begin_0 = const()[name = tensor("op_5907_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_5907_end_0 = const()[name = tensor("op_5907_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_5907_end_mask_0 = const()[name = tensor("op_5907_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5907_cast_fp16 = slice_by_index(begin = var_5907_begin_0, end = var_5907_end_0, end_mask = var_5907_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5907_cast_fp16")]; tensor var_5911_begin_0 = const()[name = tensor("op_5911_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_5911_end_0 = const()[name = tensor("op_5911_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_5911_end_mask_0 = const()[name = tensor("op_5911_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5911_cast_fp16 = slice_by_index(begin = var_5911_begin_0, end = var_5911_end_0, end_mask = var_5911_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5911_cast_fp16")]; tensor var_5915_begin_0 = const()[name = tensor("op_5915_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_5915_end_0 = const()[name = tensor("op_5915_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_5915_end_mask_0 = const()[name = tensor("op_5915_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5915_cast_fp16 = slice_by_index(begin = var_5915_begin_0, end = var_5915_end_0, end_mask = var_5915_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5915_cast_fp16")]; tensor var_5919_begin_0 = const()[name = tensor("op_5919_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_5919_end_0 = const()[name = tensor("op_5919_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_5919_end_mask_0 = const()[name = tensor("op_5919_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_5919_cast_fp16 = slice_by_index(begin = var_5919_begin_0, end = var_5919_end_0, end_mask = var_5919_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5919_cast_fp16")]; tensor _SplitHeadsQ__mh_w_481_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_481_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_481_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_481_equation_0, values = (var_5765_cast_fp16, var_5207_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_481_cast_fp16")]; tensor _SplitHeadsQ__mh_w_483_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_483_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_483_equation_0, values = (var_5765_cast_fp16, var_5214_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_483_cast_fp16")]; tensor _SplitHeadsQ__mh_w_485_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_485_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_485_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_485_equation_0, values = (var_5765_cast_fp16, var_5221_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_485_cast_fp16")]; tensor _SplitHeadsQ__mh_w_487_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_487_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_487_equation_0, values = (var_5765_cast_fp16, var_5228_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_487_cast_fp16")]; tensor _SplitHeadsQ__mh_w_489_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_489_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_489_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_489_equation_0, values = (var_5769_cast_fp16, var_5235_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_489_cast_fp16")]; tensor _SplitHeadsQ__mh_w_491_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_491_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_491_equation_0, values = (var_5769_cast_fp16, var_5242_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_491_cast_fp16")]; tensor _SplitHeadsQ__mh_w_493_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_493_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_493_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_493_equation_0, values = (var_5769_cast_fp16, var_5249_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_493_cast_fp16")]; tensor _SplitHeadsQ__mh_w_495_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_495_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_495_equation_0, values = (var_5769_cast_fp16, var_5256_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_495_cast_fp16")]; tensor _SplitHeadsQ__mh_w_497_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_497_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_497_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_497_equation_0, values = (var_5773_cast_fp16, var_5263_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_497_cast_fp16")]; tensor _SplitHeadsQ__mh_w_499_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_499_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_499_equation_0, values = (var_5773_cast_fp16, var_5270_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_499_cast_fp16")]; tensor _SplitHeadsQ__mh_w_501_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_501_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_501_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_501_equation_0, values = (var_5773_cast_fp16, var_5277_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_501_cast_fp16")]; tensor _SplitHeadsQ__mh_w_503_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_503_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_503_equation_0, values = (var_5773_cast_fp16, var_5284_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_503_cast_fp16")]; tensor _SplitHeadsQ__mh_w_505_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_505_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_505_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_505_equation_0, values = (var_5777_cast_fp16, var_5291_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_505_cast_fp16")]; tensor _SplitHeadsQ__mh_w_507_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_507_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_507_equation_0, values = (var_5777_cast_fp16, var_5298_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_507_cast_fp16")]; tensor _SplitHeadsQ__mh_w_509_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_509_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_509_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_509_equation_0, values = (var_5777_cast_fp16, var_5305_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_509_cast_fp16")]; tensor _SplitHeadsQ__mh_w_511_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_511_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_511_equation_0, values = (var_5777_cast_fp16, var_5312_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_511_cast_fp16")]; tensor _SplitHeadsQ__mh_w_513_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_513_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_513_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_513_equation_0, values = (var_5781_cast_fp16, var_5319_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_513_cast_fp16")]; tensor _SplitHeadsQ__mh_w_515_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_515_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_515_equation_0, values = (var_5781_cast_fp16, var_5326_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_515_cast_fp16")]; tensor _SplitHeadsQ__mh_w_517_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_517_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_517_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_517_equation_0, values = (var_5781_cast_fp16, var_5333_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_517_cast_fp16")]; tensor _SplitHeadsQ__mh_w_519_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_519_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_519_equation_0, values = (var_5781_cast_fp16, var_5340_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_519_cast_fp16")]; tensor _SplitHeadsQ__mh_w_521_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_521_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_521_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_521_equation_0, values = (var_5785_cast_fp16, var_5347_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_521_cast_fp16")]; tensor _SplitHeadsQ__mh_w_523_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_523_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_523_equation_0, values = (var_5785_cast_fp16, var_5354_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_523_cast_fp16")]; tensor _SplitHeadsQ__mh_w_525_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_525_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_525_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_525_equation_0, values = (var_5785_cast_fp16, var_5361_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_525_cast_fp16")]; tensor _SplitHeadsQ__mh_w_527_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_527_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_527_equation_0, values = (var_5785_cast_fp16, var_5368_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_527_cast_fp16")]; tensor _SplitHeadsQ__mh_w_529_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_529_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_529_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_529_equation_0, values = (var_5789_cast_fp16, var_5375_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_529_cast_fp16")]; tensor _SplitHeadsQ__mh_w_531_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_531_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_531_equation_0, values = (var_5789_cast_fp16, var_5382_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_531_cast_fp16")]; tensor _SplitHeadsQ__mh_w_533_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_533_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_533_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_533_equation_0, values = (var_5789_cast_fp16, var_5389_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_533_cast_fp16")]; tensor _SplitHeadsQ__mh_w_535_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_535_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_535_equation_0, values = (var_5789_cast_fp16, var_5396_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_535_cast_fp16")]; tensor _SplitHeadsQ__mh_w_537_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_537_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_537_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_537_equation_0, values = (var_5793_cast_fp16, var_5403_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_537_cast_fp16")]; tensor _SplitHeadsQ__mh_w_539_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_539_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_539_equation_0, values = (var_5793_cast_fp16, var_5410_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_539_cast_fp16")]; tensor _SplitHeadsQ__mh_w_541_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_541_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_541_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_541_equation_0, values = (var_5793_cast_fp16, var_5417_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_541_cast_fp16")]; tensor _SplitHeadsQ__mh_w_543_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_543_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_543_equation_0, values = (var_5793_cast_fp16, var_5424_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_543_cast_fp16")]; tensor _SplitHeadsQ__mh_w_545_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_545_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_545_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_545_equation_0, values = (var_5797_cast_fp16, var_5431_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_545_cast_fp16")]; tensor _SplitHeadsQ__mh_w_547_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_547_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_547_equation_0, values = (var_5797_cast_fp16, var_5438_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_547_cast_fp16")]; tensor _SplitHeadsQ__mh_w_549_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_549_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_549_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_549_equation_0, values = (var_5797_cast_fp16, var_5445_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_549_cast_fp16")]; tensor _SplitHeadsQ__mh_w_551_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_551_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_551_equation_0, values = (var_5797_cast_fp16, var_5452_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_551_cast_fp16")]; tensor _SplitHeadsQ__mh_w_553_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_553_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_553_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_553_equation_0, values = (var_5801_cast_fp16, var_5459_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_553_cast_fp16")]; tensor _SplitHeadsQ__mh_w_555_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_555_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_555_equation_0, values = (var_5801_cast_fp16, var_5466_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_555_cast_fp16")]; tensor _SplitHeadsQ__mh_w_557_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_557_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_557_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_557_equation_0, values = (var_5801_cast_fp16, var_5473_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_557_cast_fp16")]; tensor _SplitHeadsQ__mh_w_559_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_559_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_559_equation_0, values = (var_5801_cast_fp16, var_5480_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_559_cast_fp16")]; tensor _SplitHeadsQ__mh_w_561_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_561_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_561_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_561_equation_0, values = (var_5805_cast_fp16, var_5487_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_561_cast_fp16")]; tensor _SplitHeadsQ__mh_w_563_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_563_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_563_equation_0, values = (var_5805_cast_fp16, var_5494_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_563_cast_fp16")]; tensor _SplitHeadsQ__mh_w_565_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_565_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_565_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_565_equation_0, values = (var_5805_cast_fp16, var_5501_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_565_cast_fp16")]; tensor _SplitHeadsQ__mh_w_567_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_567_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_567_equation_0, values = (var_5805_cast_fp16, var_5508_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_567_cast_fp16")]; tensor _SplitHeadsQ__mh_w_569_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_569_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_569_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_569_equation_0, values = (var_5809_cast_fp16, var_5515_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_569_cast_fp16")]; tensor _SplitHeadsQ__mh_w_571_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_571_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_571_equation_0, values = (var_5809_cast_fp16, var_5522_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_571_cast_fp16")]; tensor _SplitHeadsQ__mh_w_573_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_573_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_573_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_573_equation_0, values = (var_5809_cast_fp16, var_5529_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_573_cast_fp16")]; tensor _SplitHeadsQ__mh_w_575_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_575_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_575_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_575_equation_0, values = (var_5809_cast_fp16, var_5536_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_575_cast_fp16")]; tensor _SplitHeadsQ__mh_w_577_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_577_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_577_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_577_equation_0, values = (var_5813_cast_fp16, var_5543_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_577_cast_fp16")]; tensor _SplitHeadsQ__mh_w_579_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_579_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_579_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_579_equation_0, values = (var_5813_cast_fp16, var_5550_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_579_cast_fp16")]; tensor _SplitHeadsQ__mh_w_581_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_581_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_581_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_581_equation_0, values = (var_5813_cast_fp16, var_5557_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_581_cast_fp16")]; tensor _SplitHeadsQ__mh_w_583_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_583_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_583_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_583_equation_0, values = (var_5813_cast_fp16, var_5564_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_583_cast_fp16")]; tensor _SplitHeadsQ__mh_w_585_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_585_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_585_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_585_equation_0, values = (var_5817_cast_fp16, var_5571_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_585_cast_fp16")]; tensor _SplitHeadsQ__mh_w_587_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_587_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_587_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_587_equation_0, values = (var_5817_cast_fp16, var_5578_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_587_cast_fp16")]; tensor _SplitHeadsQ__mh_w_589_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_589_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_589_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_589_equation_0, values = (var_5817_cast_fp16, var_5585_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_589_cast_fp16")]; tensor _SplitHeadsQ__mh_w_591_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_591_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_591_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_591_equation_0, values = (var_5817_cast_fp16, var_5592_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_591_cast_fp16")]; tensor _SplitHeadsQ__mh_w_593_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_593_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_593_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_593_equation_0, values = (var_5821_cast_fp16, var_5599_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_593_cast_fp16")]; tensor _SplitHeadsQ__mh_w_595_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_595_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_595_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_595_equation_0, values = (var_5821_cast_fp16, var_5606_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_595_cast_fp16")]; tensor _SplitHeadsQ__mh_w_597_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_597_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_597_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_597_equation_0, values = (var_5821_cast_fp16, var_5613_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_597_cast_fp16")]; tensor _SplitHeadsQ__mh_w_599_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_599_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_599_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_599_equation_0, values = (var_5821_cast_fp16, var_5620_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_599_cast_fp16")]; tensor _SplitHeadsQ__mh_w_601_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_601_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_601_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_601_equation_0, values = (var_5825_cast_fp16, var_5627_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_601_cast_fp16")]; tensor _SplitHeadsQ__mh_w_603_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_603_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_603_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_603_equation_0, values = (var_5825_cast_fp16, var_5634_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_603_cast_fp16")]; tensor _SplitHeadsQ__mh_w_605_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_605_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_605_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_605_equation_0, values = (var_5825_cast_fp16, var_5641_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_605_cast_fp16")]; tensor _SplitHeadsQ__mh_w_607_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_607_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_607_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_607_equation_0, values = (var_5825_cast_fp16, var_5648_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_607_cast_fp16")]; tensor _SplitHeadsQ__mh_w_609_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_609_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_609_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_609_equation_0, values = (var_5829_cast_fp16, var_5655_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_609_cast_fp16")]; tensor _SplitHeadsQ__mh_w_611_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_611_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_611_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_611_equation_0, values = (var_5829_cast_fp16, var_5662_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_611_cast_fp16")]; tensor _SplitHeadsQ__mh_w_613_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_613_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_613_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_613_equation_0, values = (var_5829_cast_fp16, var_5669_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_613_cast_fp16")]; tensor _SplitHeadsQ__mh_w_615_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_615_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_615_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_615_equation_0, values = (var_5829_cast_fp16, var_5676_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_615_cast_fp16")]; tensor _SplitHeadsQ__mh_w_617_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_617_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_617_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_617_equation_0, values = (var_5833_cast_fp16, var_5683_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_617_cast_fp16")]; tensor _SplitHeadsQ__mh_w_619_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_619_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_619_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_619_equation_0, values = (var_5833_cast_fp16, var_5690_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_619_cast_fp16")]; tensor _SplitHeadsQ__mh_w_621_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_621_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_621_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_621_equation_0, values = (var_5833_cast_fp16, var_5697_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_621_cast_fp16")]; tensor _SplitHeadsQ__mh_w_623_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_623_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_623_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_623_equation_0, values = (var_5833_cast_fp16, var_5704_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_623_cast_fp16")]; tensor _SplitHeadsQ__mh_w_625_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_625_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_625_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_625_equation_0, values = (var_5837_cast_fp16, var_5711_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_625_cast_fp16")]; tensor _SplitHeadsQ__mh_w_627_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_627_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_627_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_627_equation_0, values = (var_5837_cast_fp16, var_5718_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_627_cast_fp16")]; tensor _SplitHeadsQ__mh_w_629_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_629_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_629_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_629_equation_0, values = (var_5837_cast_fp16, var_5725_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_629_cast_fp16")]; tensor _SplitHeadsQ__mh_w_631_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_631_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_631_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_631_equation_0, values = (var_5837_cast_fp16, var_5732_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_631_cast_fp16")]; tensor _SplitHeadsQ__mh_w_633_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_633_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_633_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_633_equation_0, values = (var_5841_cast_fp16, var_5739_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_633_cast_fp16")]; tensor _SplitHeadsQ__mh_w_635_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_635_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_635_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_635_equation_0, values = (var_5841_cast_fp16, var_5746_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_635_cast_fp16")]; tensor _SplitHeadsQ__mh_w_637_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_637_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_637_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_637_equation_0, values = (var_5841_cast_fp16, var_5753_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_637_cast_fp16")]; tensor _SplitHeadsQ__mh_w_639_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_639_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_639_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_639_equation_0, values = (var_5841_cast_fp16, var_5760_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_639_cast_fp16")]; tensor var_6082_to_fp16 = const()[name = tensor("op_6082_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_481_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_481_cast_fp16, y = var_6082_to_fp16)[name = tensor("aw_chunk_481_cast_fp16")]; tensor var_6084_to_fp16 = const()[name = tensor("op_6084_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_483_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_483_cast_fp16, y = var_6084_to_fp16)[name = tensor("aw_chunk_483_cast_fp16")]; tensor var_6086_to_fp16 = const()[name = tensor("op_6086_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_485_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_485_cast_fp16, y = var_6086_to_fp16)[name = tensor("aw_chunk_485_cast_fp16")]; tensor var_6088_to_fp16 = const()[name = tensor("op_6088_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_487_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_487_cast_fp16, y = var_6088_to_fp16)[name = tensor("aw_chunk_487_cast_fp16")]; tensor var_6090_to_fp16 = const()[name = tensor("op_6090_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_489_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_489_cast_fp16, y = var_6090_to_fp16)[name = tensor("aw_chunk_489_cast_fp16")]; tensor var_6092_to_fp16 = const()[name = tensor("op_6092_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_491_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_491_cast_fp16, y = var_6092_to_fp16)[name = tensor("aw_chunk_491_cast_fp16")]; tensor var_6094_to_fp16 = const()[name = tensor("op_6094_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_493_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_493_cast_fp16, y = var_6094_to_fp16)[name = tensor("aw_chunk_493_cast_fp16")]; tensor var_6096_to_fp16 = const()[name = tensor("op_6096_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_495_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_495_cast_fp16, y = var_6096_to_fp16)[name = tensor("aw_chunk_495_cast_fp16")]; tensor var_6098_to_fp16 = const()[name = tensor("op_6098_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_497_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_497_cast_fp16, y = var_6098_to_fp16)[name = tensor("aw_chunk_497_cast_fp16")]; tensor var_6100_to_fp16 = const()[name = tensor("op_6100_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_499_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_499_cast_fp16, y = var_6100_to_fp16)[name = tensor("aw_chunk_499_cast_fp16")]; tensor var_6102_to_fp16 = const()[name = tensor("op_6102_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_501_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_501_cast_fp16, y = var_6102_to_fp16)[name = tensor("aw_chunk_501_cast_fp16")]; tensor var_6104_to_fp16 = const()[name = tensor("op_6104_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_503_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_503_cast_fp16, y = var_6104_to_fp16)[name = tensor("aw_chunk_503_cast_fp16")]; tensor var_6106_to_fp16 = const()[name = tensor("op_6106_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_505_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_505_cast_fp16, y = var_6106_to_fp16)[name = tensor("aw_chunk_505_cast_fp16")]; tensor var_6108_to_fp16 = const()[name = tensor("op_6108_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_507_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_507_cast_fp16, y = var_6108_to_fp16)[name = tensor("aw_chunk_507_cast_fp16")]; tensor var_6110_to_fp16 = const()[name = tensor("op_6110_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_509_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_509_cast_fp16, y = var_6110_to_fp16)[name = tensor("aw_chunk_509_cast_fp16")]; tensor var_6112_to_fp16 = const()[name = tensor("op_6112_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_511_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_511_cast_fp16, y = var_6112_to_fp16)[name = tensor("aw_chunk_511_cast_fp16")]; tensor var_6114_to_fp16 = const()[name = tensor("op_6114_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_513_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_513_cast_fp16, y = var_6114_to_fp16)[name = tensor("aw_chunk_513_cast_fp16")]; tensor var_6116_to_fp16 = const()[name = tensor("op_6116_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_515_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_515_cast_fp16, y = var_6116_to_fp16)[name = tensor("aw_chunk_515_cast_fp16")]; tensor var_6118_to_fp16 = const()[name = tensor("op_6118_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_517_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_517_cast_fp16, y = var_6118_to_fp16)[name = tensor("aw_chunk_517_cast_fp16")]; tensor var_6120_to_fp16 = const()[name = tensor("op_6120_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_519_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_519_cast_fp16, y = var_6120_to_fp16)[name = tensor("aw_chunk_519_cast_fp16")]; tensor var_6122_to_fp16 = const()[name = tensor("op_6122_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_521_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_521_cast_fp16, y = var_6122_to_fp16)[name = tensor("aw_chunk_521_cast_fp16")]; tensor var_6124_to_fp16 = const()[name = tensor("op_6124_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_523_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_523_cast_fp16, y = var_6124_to_fp16)[name = tensor("aw_chunk_523_cast_fp16")]; tensor var_6126_to_fp16 = const()[name = tensor("op_6126_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_525_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_525_cast_fp16, y = var_6126_to_fp16)[name = tensor("aw_chunk_525_cast_fp16")]; tensor var_6128_to_fp16 = const()[name = tensor("op_6128_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_527_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_527_cast_fp16, y = var_6128_to_fp16)[name = tensor("aw_chunk_527_cast_fp16")]; tensor var_6130_to_fp16 = const()[name = tensor("op_6130_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_529_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_529_cast_fp16, y = var_6130_to_fp16)[name = tensor("aw_chunk_529_cast_fp16")]; tensor var_6132_to_fp16 = const()[name = tensor("op_6132_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_531_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_531_cast_fp16, y = var_6132_to_fp16)[name = tensor("aw_chunk_531_cast_fp16")]; tensor var_6134_to_fp16 = const()[name = tensor("op_6134_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_533_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_533_cast_fp16, y = var_6134_to_fp16)[name = tensor("aw_chunk_533_cast_fp16")]; tensor var_6136_to_fp16 = const()[name = tensor("op_6136_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_535_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_535_cast_fp16, y = var_6136_to_fp16)[name = tensor("aw_chunk_535_cast_fp16")]; tensor var_6138_to_fp16 = const()[name = tensor("op_6138_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_537_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_537_cast_fp16, y = var_6138_to_fp16)[name = tensor("aw_chunk_537_cast_fp16")]; tensor var_6140_to_fp16 = const()[name = tensor("op_6140_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_539_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_539_cast_fp16, y = var_6140_to_fp16)[name = tensor("aw_chunk_539_cast_fp16")]; tensor var_6142_to_fp16 = const()[name = tensor("op_6142_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_541_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_541_cast_fp16, y = var_6142_to_fp16)[name = tensor("aw_chunk_541_cast_fp16")]; tensor var_6144_to_fp16 = const()[name = tensor("op_6144_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_543_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_543_cast_fp16, y = var_6144_to_fp16)[name = tensor("aw_chunk_543_cast_fp16")]; tensor var_6146_to_fp16 = const()[name = tensor("op_6146_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_545_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_545_cast_fp16, y = var_6146_to_fp16)[name = tensor("aw_chunk_545_cast_fp16")]; tensor var_6148_to_fp16 = const()[name = tensor("op_6148_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_547_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_547_cast_fp16, y = var_6148_to_fp16)[name = tensor("aw_chunk_547_cast_fp16")]; tensor var_6150_to_fp16 = const()[name = tensor("op_6150_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_549_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_549_cast_fp16, y = var_6150_to_fp16)[name = tensor("aw_chunk_549_cast_fp16")]; tensor var_6152_to_fp16 = const()[name = tensor("op_6152_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_551_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_551_cast_fp16, y = var_6152_to_fp16)[name = tensor("aw_chunk_551_cast_fp16")]; tensor var_6154_to_fp16 = const()[name = tensor("op_6154_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_553_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_553_cast_fp16, y = var_6154_to_fp16)[name = tensor("aw_chunk_553_cast_fp16")]; tensor var_6156_to_fp16 = const()[name = tensor("op_6156_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_555_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_555_cast_fp16, y = var_6156_to_fp16)[name = tensor("aw_chunk_555_cast_fp16")]; tensor var_6158_to_fp16 = const()[name = tensor("op_6158_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_557_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_557_cast_fp16, y = var_6158_to_fp16)[name = tensor("aw_chunk_557_cast_fp16")]; tensor var_6160_to_fp16 = const()[name = tensor("op_6160_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_559_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_559_cast_fp16, y = var_6160_to_fp16)[name = tensor("aw_chunk_559_cast_fp16")]; tensor var_6162_to_fp16 = const()[name = tensor("op_6162_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_561_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_561_cast_fp16, y = var_6162_to_fp16)[name = tensor("aw_chunk_561_cast_fp16")]; tensor var_6164_to_fp16 = const()[name = tensor("op_6164_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_563_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_563_cast_fp16, y = var_6164_to_fp16)[name = tensor("aw_chunk_563_cast_fp16")]; tensor var_6166_to_fp16 = const()[name = tensor("op_6166_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_565_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_565_cast_fp16, y = var_6166_to_fp16)[name = tensor("aw_chunk_565_cast_fp16")]; tensor var_6168_to_fp16 = const()[name = tensor("op_6168_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_567_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_567_cast_fp16, y = var_6168_to_fp16)[name = tensor("aw_chunk_567_cast_fp16")]; tensor var_6170_to_fp16 = const()[name = tensor("op_6170_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_569_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_569_cast_fp16, y = var_6170_to_fp16)[name = tensor("aw_chunk_569_cast_fp16")]; tensor var_6172_to_fp16 = const()[name = tensor("op_6172_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_571_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_571_cast_fp16, y = var_6172_to_fp16)[name = tensor("aw_chunk_571_cast_fp16")]; tensor var_6174_to_fp16 = const()[name = tensor("op_6174_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_573_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_573_cast_fp16, y = var_6174_to_fp16)[name = tensor("aw_chunk_573_cast_fp16")]; tensor var_6176_to_fp16 = const()[name = tensor("op_6176_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_575_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_575_cast_fp16, y = var_6176_to_fp16)[name = tensor("aw_chunk_575_cast_fp16")]; tensor var_6178_to_fp16 = const()[name = tensor("op_6178_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_577_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_577_cast_fp16, y = var_6178_to_fp16)[name = tensor("aw_chunk_577_cast_fp16")]; tensor var_6180_to_fp16 = const()[name = tensor("op_6180_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_579_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_579_cast_fp16, y = var_6180_to_fp16)[name = tensor("aw_chunk_579_cast_fp16")]; tensor var_6182_to_fp16 = const()[name = tensor("op_6182_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_581_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_581_cast_fp16, y = var_6182_to_fp16)[name = tensor("aw_chunk_581_cast_fp16")]; tensor var_6184_to_fp16 = const()[name = tensor("op_6184_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_583_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_583_cast_fp16, y = var_6184_to_fp16)[name = tensor("aw_chunk_583_cast_fp16")]; tensor var_6186_to_fp16 = const()[name = tensor("op_6186_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_585_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_585_cast_fp16, y = var_6186_to_fp16)[name = tensor("aw_chunk_585_cast_fp16")]; tensor var_6188_to_fp16 = const()[name = tensor("op_6188_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_587_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_587_cast_fp16, y = var_6188_to_fp16)[name = tensor("aw_chunk_587_cast_fp16")]; tensor var_6190_to_fp16 = const()[name = tensor("op_6190_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_589_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_589_cast_fp16, y = var_6190_to_fp16)[name = tensor("aw_chunk_589_cast_fp16")]; tensor var_6192_to_fp16 = const()[name = tensor("op_6192_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_591_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_591_cast_fp16, y = var_6192_to_fp16)[name = tensor("aw_chunk_591_cast_fp16")]; tensor var_6194_to_fp16 = const()[name = tensor("op_6194_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_593_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_593_cast_fp16, y = var_6194_to_fp16)[name = tensor("aw_chunk_593_cast_fp16")]; tensor var_6196_to_fp16 = const()[name = tensor("op_6196_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_595_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_595_cast_fp16, y = var_6196_to_fp16)[name = tensor("aw_chunk_595_cast_fp16")]; tensor var_6198_to_fp16 = const()[name = tensor("op_6198_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_597_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_597_cast_fp16, y = var_6198_to_fp16)[name = tensor("aw_chunk_597_cast_fp16")]; tensor var_6200_to_fp16 = const()[name = tensor("op_6200_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_599_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_599_cast_fp16, y = var_6200_to_fp16)[name = tensor("aw_chunk_599_cast_fp16")]; tensor var_6202_to_fp16 = const()[name = tensor("op_6202_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_601_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_601_cast_fp16, y = var_6202_to_fp16)[name = tensor("aw_chunk_601_cast_fp16")]; tensor var_6204_to_fp16 = const()[name = tensor("op_6204_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_603_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_603_cast_fp16, y = var_6204_to_fp16)[name = tensor("aw_chunk_603_cast_fp16")]; tensor var_6206_to_fp16 = const()[name = tensor("op_6206_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_605_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_605_cast_fp16, y = var_6206_to_fp16)[name = tensor("aw_chunk_605_cast_fp16")]; tensor var_6208_to_fp16 = const()[name = tensor("op_6208_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_607_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_607_cast_fp16, y = var_6208_to_fp16)[name = tensor("aw_chunk_607_cast_fp16")]; tensor var_6210_to_fp16 = const()[name = tensor("op_6210_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_609_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_609_cast_fp16, y = var_6210_to_fp16)[name = tensor("aw_chunk_609_cast_fp16")]; tensor var_6212_to_fp16 = const()[name = tensor("op_6212_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_611_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_611_cast_fp16, y = var_6212_to_fp16)[name = tensor("aw_chunk_611_cast_fp16")]; tensor var_6214_to_fp16 = const()[name = tensor("op_6214_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_613_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_613_cast_fp16, y = var_6214_to_fp16)[name = tensor("aw_chunk_613_cast_fp16")]; tensor var_6216_to_fp16 = const()[name = tensor("op_6216_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_615_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_615_cast_fp16, y = var_6216_to_fp16)[name = tensor("aw_chunk_615_cast_fp16")]; tensor var_6218_to_fp16 = const()[name = tensor("op_6218_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_617_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_617_cast_fp16, y = var_6218_to_fp16)[name = tensor("aw_chunk_617_cast_fp16")]; tensor var_6220_to_fp16 = const()[name = tensor("op_6220_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_619_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_619_cast_fp16, y = var_6220_to_fp16)[name = tensor("aw_chunk_619_cast_fp16")]; tensor var_6222_to_fp16 = const()[name = tensor("op_6222_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_621_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_621_cast_fp16, y = var_6222_to_fp16)[name = tensor("aw_chunk_621_cast_fp16")]; tensor var_6224_to_fp16 = const()[name = tensor("op_6224_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_623_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_623_cast_fp16, y = var_6224_to_fp16)[name = tensor("aw_chunk_623_cast_fp16")]; tensor var_6226_to_fp16 = const()[name = tensor("op_6226_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_625_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_625_cast_fp16, y = var_6226_to_fp16)[name = tensor("aw_chunk_625_cast_fp16")]; tensor var_6228_to_fp16 = const()[name = tensor("op_6228_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_627_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_627_cast_fp16, y = var_6228_to_fp16)[name = tensor("aw_chunk_627_cast_fp16")]; tensor var_6230_to_fp16 = const()[name = tensor("op_6230_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_629_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_629_cast_fp16, y = var_6230_to_fp16)[name = tensor("aw_chunk_629_cast_fp16")]; tensor var_6232_to_fp16 = const()[name = tensor("op_6232_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_631_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_631_cast_fp16, y = var_6232_to_fp16)[name = tensor("aw_chunk_631_cast_fp16")]; tensor var_6234_to_fp16 = const()[name = tensor("op_6234_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_633_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_633_cast_fp16, y = var_6234_to_fp16)[name = tensor("aw_chunk_633_cast_fp16")]; tensor var_6236_to_fp16 = const()[name = tensor("op_6236_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_635_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_635_cast_fp16, y = var_6236_to_fp16)[name = tensor("aw_chunk_635_cast_fp16")]; tensor var_6238_to_fp16 = const()[name = tensor("op_6238_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_637_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_637_cast_fp16, y = var_6238_to_fp16)[name = tensor("aw_chunk_637_cast_fp16")]; tensor var_6240_to_fp16 = const()[name = tensor("op_6240_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_639_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_639_cast_fp16, y = var_6240_to_fp16)[name = tensor("aw_chunk_639_cast_fp16")]; tensor var_6242_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_481_cast_fp16)[name = tensor("op_6242_cast_fp16")]; tensor var_6243_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_483_cast_fp16)[name = tensor("op_6243_cast_fp16")]; tensor var_6244_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_485_cast_fp16)[name = tensor("op_6244_cast_fp16")]; tensor var_6245_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_487_cast_fp16)[name = tensor("op_6245_cast_fp16")]; tensor var_6246_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_489_cast_fp16)[name = tensor("op_6246_cast_fp16")]; tensor var_6247_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_491_cast_fp16)[name = tensor("op_6247_cast_fp16")]; tensor var_6248_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_493_cast_fp16)[name = tensor("op_6248_cast_fp16")]; tensor var_6249_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_495_cast_fp16)[name = tensor("op_6249_cast_fp16")]; tensor var_6250_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_497_cast_fp16)[name = tensor("op_6250_cast_fp16")]; tensor var_6251_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_499_cast_fp16)[name = tensor("op_6251_cast_fp16")]; tensor var_6252_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_501_cast_fp16)[name = tensor("op_6252_cast_fp16")]; tensor var_6253_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_503_cast_fp16)[name = tensor("op_6253_cast_fp16")]; tensor var_6254_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_505_cast_fp16)[name = tensor("op_6254_cast_fp16")]; tensor var_6255_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_507_cast_fp16)[name = tensor("op_6255_cast_fp16")]; tensor var_6256_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_509_cast_fp16)[name = tensor("op_6256_cast_fp16")]; tensor var_6257_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_511_cast_fp16)[name = tensor("op_6257_cast_fp16")]; tensor var_6258_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_513_cast_fp16)[name = tensor("op_6258_cast_fp16")]; tensor var_6259_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_515_cast_fp16)[name = tensor("op_6259_cast_fp16")]; tensor var_6260_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_517_cast_fp16)[name = tensor("op_6260_cast_fp16")]; tensor var_6261_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_519_cast_fp16)[name = tensor("op_6261_cast_fp16")]; tensor var_6262_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_521_cast_fp16)[name = tensor("op_6262_cast_fp16")]; tensor var_6263_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_523_cast_fp16)[name = tensor("op_6263_cast_fp16")]; tensor var_6264_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_525_cast_fp16)[name = tensor("op_6264_cast_fp16")]; tensor var_6265_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_527_cast_fp16)[name = tensor("op_6265_cast_fp16")]; tensor var_6266_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_529_cast_fp16)[name = tensor("op_6266_cast_fp16")]; tensor var_6267_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_531_cast_fp16)[name = tensor("op_6267_cast_fp16")]; tensor var_6268_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_533_cast_fp16)[name = tensor("op_6268_cast_fp16")]; tensor var_6269_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_535_cast_fp16)[name = tensor("op_6269_cast_fp16")]; tensor var_6270_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_537_cast_fp16)[name = tensor("op_6270_cast_fp16")]; tensor var_6271_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_539_cast_fp16)[name = tensor("op_6271_cast_fp16")]; tensor var_6272_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_541_cast_fp16)[name = tensor("op_6272_cast_fp16")]; tensor var_6273_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_543_cast_fp16)[name = tensor("op_6273_cast_fp16")]; tensor var_6274_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_545_cast_fp16)[name = tensor("op_6274_cast_fp16")]; tensor var_6275_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_547_cast_fp16)[name = tensor("op_6275_cast_fp16")]; tensor var_6276_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_549_cast_fp16)[name = tensor("op_6276_cast_fp16")]; tensor var_6277_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_551_cast_fp16)[name = tensor("op_6277_cast_fp16")]; tensor var_6278_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_553_cast_fp16)[name = tensor("op_6278_cast_fp16")]; tensor var_6279_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_555_cast_fp16)[name = tensor("op_6279_cast_fp16")]; tensor var_6280_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_557_cast_fp16)[name = tensor("op_6280_cast_fp16")]; tensor var_6281_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_559_cast_fp16)[name = tensor("op_6281_cast_fp16")]; tensor var_6282_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_561_cast_fp16)[name = tensor("op_6282_cast_fp16")]; tensor var_6283_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_563_cast_fp16)[name = tensor("op_6283_cast_fp16")]; tensor var_6284_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_565_cast_fp16)[name = tensor("op_6284_cast_fp16")]; tensor var_6285_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_567_cast_fp16)[name = tensor("op_6285_cast_fp16")]; tensor var_6286_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_569_cast_fp16)[name = tensor("op_6286_cast_fp16")]; tensor var_6287_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_571_cast_fp16)[name = tensor("op_6287_cast_fp16")]; tensor var_6288_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_573_cast_fp16)[name = tensor("op_6288_cast_fp16")]; tensor var_6289_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_575_cast_fp16)[name = tensor("op_6289_cast_fp16")]; tensor var_6290_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_577_cast_fp16)[name = tensor("op_6290_cast_fp16")]; tensor var_6291_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_579_cast_fp16)[name = tensor("op_6291_cast_fp16")]; tensor var_6292_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_581_cast_fp16)[name = tensor("op_6292_cast_fp16")]; tensor var_6293_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_583_cast_fp16)[name = tensor("op_6293_cast_fp16")]; tensor var_6294_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_585_cast_fp16)[name = tensor("op_6294_cast_fp16")]; tensor var_6295_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_587_cast_fp16)[name = tensor("op_6295_cast_fp16")]; tensor var_6296_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_589_cast_fp16)[name = tensor("op_6296_cast_fp16")]; tensor var_6297_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_591_cast_fp16)[name = tensor("op_6297_cast_fp16")]; tensor var_6298_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_593_cast_fp16)[name = tensor("op_6298_cast_fp16")]; tensor var_6299_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_595_cast_fp16)[name = tensor("op_6299_cast_fp16")]; tensor var_6300_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_597_cast_fp16)[name = tensor("op_6300_cast_fp16")]; tensor var_6301_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_599_cast_fp16)[name = tensor("op_6301_cast_fp16")]; tensor var_6302_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_601_cast_fp16)[name = tensor("op_6302_cast_fp16")]; tensor var_6303_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_603_cast_fp16)[name = tensor("op_6303_cast_fp16")]; tensor var_6304_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_605_cast_fp16)[name = tensor("op_6304_cast_fp16")]; tensor var_6305_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_607_cast_fp16)[name = tensor("op_6305_cast_fp16")]; tensor var_6306_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_609_cast_fp16)[name = tensor("op_6306_cast_fp16")]; tensor var_6307_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_611_cast_fp16)[name = tensor("op_6307_cast_fp16")]; tensor var_6308_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_613_cast_fp16)[name = tensor("op_6308_cast_fp16")]; tensor var_6309_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_615_cast_fp16)[name = tensor("op_6309_cast_fp16")]; tensor var_6310_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_617_cast_fp16)[name = tensor("op_6310_cast_fp16")]; tensor var_6311_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_619_cast_fp16)[name = tensor("op_6311_cast_fp16")]; tensor var_6312_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_621_cast_fp16)[name = tensor("op_6312_cast_fp16")]; tensor var_6313_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_623_cast_fp16)[name = tensor("op_6313_cast_fp16")]; tensor var_6314_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_625_cast_fp16)[name = tensor("op_6314_cast_fp16")]; tensor var_6315_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_627_cast_fp16)[name = tensor("op_6315_cast_fp16")]; tensor var_6316_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_629_cast_fp16)[name = tensor("op_6316_cast_fp16")]; tensor var_6317_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_631_cast_fp16)[name = tensor("op_6317_cast_fp16")]; tensor var_6318_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_633_cast_fp16)[name = tensor("op_6318_cast_fp16")]; tensor var_6319_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_635_cast_fp16)[name = tensor("op_6319_cast_fp16")]; tensor var_6320_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_637_cast_fp16)[name = tensor("op_6320_cast_fp16")]; tensor var_6321_cast_fp16 = softmax(axis = var_5040, x = aw_chunk_639_cast_fp16)[name = tensor("op_6321_cast_fp16")]; tensor var_6323_equation_0 = const()[name = tensor("op_6323_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6323_cast_fp16 = einsum(equation = var_6323_equation_0, values = (var_5843_cast_fp16, var_6242_cast_fp16))[name = tensor("op_6323_cast_fp16")]; tensor var_6325_equation_0 = const()[name = tensor("op_6325_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6325_cast_fp16 = einsum(equation = var_6325_equation_0, values = (var_5843_cast_fp16, var_6243_cast_fp16))[name = tensor("op_6325_cast_fp16")]; tensor var_6327_equation_0 = const()[name = tensor("op_6327_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6327_cast_fp16 = einsum(equation = var_6327_equation_0, values = (var_5843_cast_fp16, var_6244_cast_fp16))[name = tensor("op_6327_cast_fp16")]; tensor var_6329_equation_0 = const()[name = tensor("op_6329_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6329_cast_fp16 = einsum(equation = var_6329_equation_0, values = (var_5843_cast_fp16, var_6245_cast_fp16))[name = tensor("op_6329_cast_fp16")]; tensor var_6331_equation_0 = const()[name = tensor("op_6331_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6331_cast_fp16 = einsum(equation = var_6331_equation_0, values = (var_5847_cast_fp16, var_6246_cast_fp16))[name = tensor("op_6331_cast_fp16")]; tensor var_6333_equation_0 = const()[name = tensor("op_6333_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6333_cast_fp16 = einsum(equation = var_6333_equation_0, values = (var_5847_cast_fp16, var_6247_cast_fp16))[name = tensor("op_6333_cast_fp16")]; tensor var_6335_equation_0 = const()[name = tensor("op_6335_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6335_cast_fp16 = einsum(equation = var_6335_equation_0, values = (var_5847_cast_fp16, var_6248_cast_fp16))[name = tensor("op_6335_cast_fp16")]; tensor var_6337_equation_0 = const()[name = tensor("op_6337_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6337_cast_fp16 = einsum(equation = var_6337_equation_0, values = (var_5847_cast_fp16, var_6249_cast_fp16))[name = tensor("op_6337_cast_fp16")]; tensor var_6339_equation_0 = const()[name = tensor("op_6339_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6339_cast_fp16 = einsum(equation = var_6339_equation_0, values = (var_5851_cast_fp16, var_6250_cast_fp16))[name = tensor("op_6339_cast_fp16")]; tensor var_6341_equation_0 = const()[name = tensor("op_6341_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6341_cast_fp16 = einsum(equation = var_6341_equation_0, values = (var_5851_cast_fp16, var_6251_cast_fp16))[name = tensor("op_6341_cast_fp16")]; tensor var_6343_equation_0 = const()[name = tensor("op_6343_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6343_cast_fp16 = einsum(equation = var_6343_equation_0, values = (var_5851_cast_fp16, var_6252_cast_fp16))[name = tensor("op_6343_cast_fp16")]; tensor var_6345_equation_0 = const()[name = tensor("op_6345_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6345_cast_fp16 = einsum(equation = var_6345_equation_0, values = (var_5851_cast_fp16, var_6253_cast_fp16))[name = tensor("op_6345_cast_fp16")]; tensor var_6347_equation_0 = const()[name = tensor("op_6347_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6347_cast_fp16 = einsum(equation = var_6347_equation_0, values = (var_5855_cast_fp16, var_6254_cast_fp16))[name = tensor("op_6347_cast_fp16")]; tensor var_6349_equation_0 = const()[name = tensor("op_6349_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6349_cast_fp16 = einsum(equation = var_6349_equation_0, values = (var_5855_cast_fp16, var_6255_cast_fp16))[name = tensor("op_6349_cast_fp16")]; tensor var_6351_equation_0 = const()[name = tensor("op_6351_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6351_cast_fp16 = einsum(equation = var_6351_equation_0, values = (var_5855_cast_fp16, var_6256_cast_fp16))[name = tensor("op_6351_cast_fp16")]; tensor var_6353_equation_0 = const()[name = tensor("op_6353_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6353_cast_fp16 = einsum(equation = var_6353_equation_0, values = (var_5855_cast_fp16, var_6257_cast_fp16))[name = tensor("op_6353_cast_fp16")]; tensor var_6355_equation_0 = const()[name = tensor("op_6355_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6355_cast_fp16 = einsum(equation = var_6355_equation_0, values = (var_5859_cast_fp16, var_6258_cast_fp16))[name = tensor("op_6355_cast_fp16")]; tensor var_6357_equation_0 = const()[name = tensor("op_6357_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6357_cast_fp16 = einsum(equation = var_6357_equation_0, values = (var_5859_cast_fp16, var_6259_cast_fp16))[name = tensor("op_6357_cast_fp16")]; tensor var_6359_equation_0 = const()[name = tensor("op_6359_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6359_cast_fp16 = einsum(equation = var_6359_equation_0, values = (var_5859_cast_fp16, var_6260_cast_fp16))[name = tensor("op_6359_cast_fp16")]; tensor var_6361_equation_0 = const()[name = tensor("op_6361_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6361_cast_fp16 = einsum(equation = var_6361_equation_0, values = (var_5859_cast_fp16, var_6261_cast_fp16))[name = tensor("op_6361_cast_fp16")]; tensor var_6363_equation_0 = const()[name = tensor("op_6363_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6363_cast_fp16 = einsum(equation = var_6363_equation_0, values = (var_5863_cast_fp16, var_6262_cast_fp16))[name = tensor("op_6363_cast_fp16")]; tensor var_6365_equation_0 = const()[name = tensor("op_6365_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6365_cast_fp16 = einsum(equation = var_6365_equation_0, values = (var_5863_cast_fp16, var_6263_cast_fp16))[name = tensor("op_6365_cast_fp16")]; tensor var_6367_equation_0 = const()[name = tensor("op_6367_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6367_cast_fp16 = einsum(equation = var_6367_equation_0, values = (var_5863_cast_fp16, var_6264_cast_fp16))[name = tensor("op_6367_cast_fp16")]; tensor var_6369_equation_0 = const()[name = tensor("op_6369_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6369_cast_fp16 = einsum(equation = var_6369_equation_0, values = (var_5863_cast_fp16, var_6265_cast_fp16))[name = tensor("op_6369_cast_fp16")]; tensor var_6371_equation_0 = const()[name = tensor("op_6371_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6371_cast_fp16 = einsum(equation = var_6371_equation_0, values = (var_5867_cast_fp16, var_6266_cast_fp16))[name = tensor("op_6371_cast_fp16")]; tensor var_6373_equation_0 = const()[name = tensor("op_6373_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6373_cast_fp16 = einsum(equation = var_6373_equation_0, values = (var_5867_cast_fp16, var_6267_cast_fp16))[name = tensor("op_6373_cast_fp16")]; tensor var_6375_equation_0 = const()[name = tensor("op_6375_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6375_cast_fp16 = einsum(equation = var_6375_equation_0, values = (var_5867_cast_fp16, var_6268_cast_fp16))[name = tensor("op_6375_cast_fp16")]; tensor var_6377_equation_0 = const()[name = tensor("op_6377_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6377_cast_fp16 = einsum(equation = var_6377_equation_0, values = (var_5867_cast_fp16, var_6269_cast_fp16))[name = tensor("op_6377_cast_fp16")]; tensor var_6379_equation_0 = const()[name = tensor("op_6379_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6379_cast_fp16 = einsum(equation = var_6379_equation_0, values = (var_5871_cast_fp16, var_6270_cast_fp16))[name = tensor("op_6379_cast_fp16")]; tensor var_6381_equation_0 = const()[name = tensor("op_6381_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6381_cast_fp16 = einsum(equation = var_6381_equation_0, values = (var_5871_cast_fp16, var_6271_cast_fp16))[name = tensor("op_6381_cast_fp16")]; tensor var_6383_equation_0 = const()[name = tensor("op_6383_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6383_cast_fp16 = einsum(equation = var_6383_equation_0, values = (var_5871_cast_fp16, var_6272_cast_fp16))[name = tensor("op_6383_cast_fp16")]; tensor var_6385_equation_0 = const()[name = tensor("op_6385_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6385_cast_fp16 = einsum(equation = var_6385_equation_0, values = (var_5871_cast_fp16, var_6273_cast_fp16))[name = tensor("op_6385_cast_fp16")]; tensor var_6387_equation_0 = const()[name = tensor("op_6387_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6387_cast_fp16 = einsum(equation = var_6387_equation_0, values = (var_5875_cast_fp16, var_6274_cast_fp16))[name = tensor("op_6387_cast_fp16")]; tensor var_6389_equation_0 = const()[name = tensor("op_6389_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6389_cast_fp16 = einsum(equation = var_6389_equation_0, values = (var_5875_cast_fp16, var_6275_cast_fp16))[name = tensor("op_6389_cast_fp16")]; tensor var_6391_equation_0 = const()[name = tensor("op_6391_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6391_cast_fp16 = einsum(equation = var_6391_equation_0, values = (var_5875_cast_fp16, var_6276_cast_fp16))[name = tensor("op_6391_cast_fp16")]; tensor var_6393_equation_0 = const()[name = tensor("op_6393_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6393_cast_fp16 = einsum(equation = var_6393_equation_0, values = (var_5875_cast_fp16, var_6277_cast_fp16))[name = tensor("op_6393_cast_fp16")]; tensor var_6395_equation_0 = const()[name = tensor("op_6395_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6395_cast_fp16 = einsum(equation = var_6395_equation_0, values = (var_5879_cast_fp16, var_6278_cast_fp16))[name = tensor("op_6395_cast_fp16")]; tensor var_6397_equation_0 = const()[name = tensor("op_6397_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6397_cast_fp16 = einsum(equation = var_6397_equation_0, values = (var_5879_cast_fp16, var_6279_cast_fp16))[name = tensor("op_6397_cast_fp16")]; tensor var_6399_equation_0 = const()[name = tensor("op_6399_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6399_cast_fp16 = einsum(equation = var_6399_equation_0, values = (var_5879_cast_fp16, var_6280_cast_fp16))[name = tensor("op_6399_cast_fp16")]; tensor var_6401_equation_0 = const()[name = tensor("op_6401_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6401_cast_fp16 = einsum(equation = var_6401_equation_0, values = (var_5879_cast_fp16, var_6281_cast_fp16))[name = tensor("op_6401_cast_fp16")]; tensor var_6403_equation_0 = const()[name = tensor("op_6403_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6403_cast_fp16 = einsum(equation = var_6403_equation_0, values = (var_5883_cast_fp16, var_6282_cast_fp16))[name = tensor("op_6403_cast_fp16")]; tensor var_6405_equation_0 = const()[name = tensor("op_6405_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6405_cast_fp16 = einsum(equation = var_6405_equation_0, values = (var_5883_cast_fp16, var_6283_cast_fp16))[name = tensor("op_6405_cast_fp16")]; tensor var_6407_equation_0 = const()[name = tensor("op_6407_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6407_cast_fp16 = einsum(equation = var_6407_equation_0, values = (var_5883_cast_fp16, var_6284_cast_fp16))[name = tensor("op_6407_cast_fp16")]; tensor var_6409_equation_0 = const()[name = tensor("op_6409_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6409_cast_fp16 = einsum(equation = var_6409_equation_0, values = (var_5883_cast_fp16, var_6285_cast_fp16))[name = tensor("op_6409_cast_fp16")]; tensor var_6411_equation_0 = const()[name = tensor("op_6411_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6411_cast_fp16 = einsum(equation = var_6411_equation_0, values = (var_5887_cast_fp16, var_6286_cast_fp16))[name = tensor("op_6411_cast_fp16")]; tensor var_6413_equation_0 = const()[name = tensor("op_6413_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6413_cast_fp16 = einsum(equation = var_6413_equation_0, values = (var_5887_cast_fp16, var_6287_cast_fp16))[name = tensor("op_6413_cast_fp16")]; tensor var_6415_equation_0 = const()[name = tensor("op_6415_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6415_cast_fp16 = einsum(equation = var_6415_equation_0, values = (var_5887_cast_fp16, var_6288_cast_fp16))[name = tensor("op_6415_cast_fp16")]; tensor var_6417_equation_0 = const()[name = tensor("op_6417_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6417_cast_fp16 = einsum(equation = var_6417_equation_0, values = (var_5887_cast_fp16, var_6289_cast_fp16))[name = tensor("op_6417_cast_fp16")]; tensor var_6419_equation_0 = const()[name = tensor("op_6419_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6419_cast_fp16 = einsum(equation = var_6419_equation_0, values = (var_5891_cast_fp16, var_6290_cast_fp16))[name = tensor("op_6419_cast_fp16")]; tensor var_6421_equation_0 = const()[name = tensor("op_6421_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6421_cast_fp16 = einsum(equation = var_6421_equation_0, values = (var_5891_cast_fp16, var_6291_cast_fp16))[name = tensor("op_6421_cast_fp16")]; tensor var_6423_equation_0 = const()[name = tensor("op_6423_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6423_cast_fp16 = einsum(equation = var_6423_equation_0, values = (var_5891_cast_fp16, var_6292_cast_fp16))[name = tensor("op_6423_cast_fp16")]; tensor var_6425_equation_0 = const()[name = tensor("op_6425_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6425_cast_fp16 = einsum(equation = var_6425_equation_0, values = (var_5891_cast_fp16, var_6293_cast_fp16))[name = tensor("op_6425_cast_fp16")]; tensor var_6427_equation_0 = const()[name = tensor("op_6427_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6427_cast_fp16 = einsum(equation = var_6427_equation_0, values = (var_5895_cast_fp16, var_6294_cast_fp16))[name = tensor("op_6427_cast_fp16")]; tensor var_6429_equation_0 = const()[name = tensor("op_6429_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6429_cast_fp16 = einsum(equation = var_6429_equation_0, values = (var_5895_cast_fp16, var_6295_cast_fp16))[name = tensor("op_6429_cast_fp16")]; tensor var_6431_equation_0 = const()[name = tensor("op_6431_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6431_cast_fp16 = einsum(equation = var_6431_equation_0, values = (var_5895_cast_fp16, var_6296_cast_fp16))[name = tensor("op_6431_cast_fp16")]; tensor var_6433_equation_0 = const()[name = tensor("op_6433_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6433_cast_fp16 = einsum(equation = var_6433_equation_0, values = (var_5895_cast_fp16, var_6297_cast_fp16))[name = tensor("op_6433_cast_fp16")]; tensor var_6435_equation_0 = const()[name = tensor("op_6435_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6435_cast_fp16 = einsum(equation = var_6435_equation_0, values = (var_5899_cast_fp16, var_6298_cast_fp16))[name = tensor("op_6435_cast_fp16")]; tensor var_6437_equation_0 = const()[name = tensor("op_6437_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6437_cast_fp16 = einsum(equation = var_6437_equation_0, values = (var_5899_cast_fp16, var_6299_cast_fp16))[name = tensor("op_6437_cast_fp16")]; tensor var_6439_equation_0 = const()[name = tensor("op_6439_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6439_cast_fp16 = einsum(equation = var_6439_equation_0, values = (var_5899_cast_fp16, var_6300_cast_fp16))[name = tensor("op_6439_cast_fp16")]; tensor var_6441_equation_0 = const()[name = tensor("op_6441_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6441_cast_fp16 = einsum(equation = var_6441_equation_0, values = (var_5899_cast_fp16, var_6301_cast_fp16))[name = tensor("op_6441_cast_fp16")]; tensor var_6443_equation_0 = const()[name = tensor("op_6443_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6443_cast_fp16 = einsum(equation = var_6443_equation_0, values = (var_5903_cast_fp16, var_6302_cast_fp16))[name = tensor("op_6443_cast_fp16")]; tensor var_6445_equation_0 = const()[name = tensor("op_6445_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6445_cast_fp16 = einsum(equation = var_6445_equation_0, values = (var_5903_cast_fp16, var_6303_cast_fp16))[name = tensor("op_6445_cast_fp16")]; tensor var_6447_equation_0 = const()[name = tensor("op_6447_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6447_cast_fp16 = einsum(equation = var_6447_equation_0, values = (var_5903_cast_fp16, var_6304_cast_fp16))[name = tensor("op_6447_cast_fp16")]; tensor var_6449_equation_0 = const()[name = tensor("op_6449_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6449_cast_fp16 = einsum(equation = var_6449_equation_0, values = (var_5903_cast_fp16, var_6305_cast_fp16))[name = tensor("op_6449_cast_fp16")]; tensor var_6451_equation_0 = const()[name = tensor("op_6451_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6451_cast_fp16 = einsum(equation = var_6451_equation_0, values = (var_5907_cast_fp16, var_6306_cast_fp16))[name = tensor("op_6451_cast_fp16")]; tensor var_6453_equation_0 = const()[name = tensor("op_6453_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6453_cast_fp16 = einsum(equation = var_6453_equation_0, values = (var_5907_cast_fp16, var_6307_cast_fp16))[name = tensor("op_6453_cast_fp16")]; tensor var_6455_equation_0 = const()[name = tensor("op_6455_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6455_cast_fp16 = einsum(equation = var_6455_equation_0, values = (var_5907_cast_fp16, var_6308_cast_fp16))[name = tensor("op_6455_cast_fp16")]; tensor var_6457_equation_0 = const()[name = tensor("op_6457_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6457_cast_fp16 = einsum(equation = var_6457_equation_0, values = (var_5907_cast_fp16, var_6309_cast_fp16))[name = tensor("op_6457_cast_fp16")]; tensor var_6459_equation_0 = const()[name = tensor("op_6459_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6459_cast_fp16 = einsum(equation = var_6459_equation_0, values = (var_5911_cast_fp16, var_6310_cast_fp16))[name = tensor("op_6459_cast_fp16")]; tensor var_6461_equation_0 = const()[name = tensor("op_6461_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6461_cast_fp16 = einsum(equation = var_6461_equation_0, values = (var_5911_cast_fp16, var_6311_cast_fp16))[name = tensor("op_6461_cast_fp16")]; tensor var_6463_equation_0 = const()[name = tensor("op_6463_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6463_cast_fp16 = einsum(equation = var_6463_equation_0, values = (var_5911_cast_fp16, var_6312_cast_fp16))[name = tensor("op_6463_cast_fp16")]; tensor var_6465_equation_0 = const()[name = tensor("op_6465_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6465_cast_fp16 = einsum(equation = var_6465_equation_0, values = (var_5911_cast_fp16, var_6313_cast_fp16))[name = tensor("op_6465_cast_fp16")]; tensor var_6467_equation_0 = const()[name = tensor("op_6467_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6467_cast_fp16 = einsum(equation = var_6467_equation_0, values = (var_5915_cast_fp16, var_6314_cast_fp16))[name = tensor("op_6467_cast_fp16")]; tensor var_6469_equation_0 = const()[name = tensor("op_6469_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6469_cast_fp16 = einsum(equation = var_6469_equation_0, values = (var_5915_cast_fp16, var_6315_cast_fp16))[name = tensor("op_6469_cast_fp16")]; tensor var_6471_equation_0 = const()[name = tensor("op_6471_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6471_cast_fp16 = einsum(equation = var_6471_equation_0, values = (var_5915_cast_fp16, var_6316_cast_fp16))[name = tensor("op_6471_cast_fp16")]; tensor var_6473_equation_0 = const()[name = tensor("op_6473_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6473_cast_fp16 = einsum(equation = var_6473_equation_0, values = (var_5915_cast_fp16, var_6317_cast_fp16))[name = tensor("op_6473_cast_fp16")]; tensor var_6475_equation_0 = const()[name = tensor("op_6475_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6475_cast_fp16 = einsum(equation = var_6475_equation_0, values = (var_5919_cast_fp16, var_6318_cast_fp16))[name = tensor("op_6475_cast_fp16")]; tensor var_6477_equation_0 = const()[name = tensor("op_6477_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6477_cast_fp16 = einsum(equation = var_6477_equation_0, values = (var_5919_cast_fp16, var_6319_cast_fp16))[name = tensor("op_6477_cast_fp16")]; tensor var_6479_equation_0 = const()[name = tensor("op_6479_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6479_cast_fp16 = einsum(equation = var_6479_equation_0, values = (var_5919_cast_fp16, var_6320_cast_fp16))[name = tensor("op_6479_cast_fp16")]; tensor var_6481_equation_0 = const()[name = tensor("op_6481_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_6481_cast_fp16 = einsum(equation = var_6481_equation_0, values = (var_5919_cast_fp16, var_6321_cast_fp16))[name = tensor("op_6481_cast_fp16")]; tensor var_6483_interleave_0 = const()[name = tensor("op_6483_interleave_0"), val = tensor(false)]; tensor var_6483_cast_fp16 = concat(axis = var_5015, interleave = var_6483_interleave_0, values = (var_6323_cast_fp16, var_6325_cast_fp16, var_6327_cast_fp16, var_6329_cast_fp16))[name = tensor("op_6483_cast_fp16")]; tensor var_6485_interleave_0 = const()[name = tensor("op_6485_interleave_0"), val = tensor(false)]; tensor var_6485_cast_fp16 = concat(axis = var_5015, interleave = var_6485_interleave_0, values = (var_6331_cast_fp16, var_6333_cast_fp16, var_6335_cast_fp16, var_6337_cast_fp16))[name = tensor("op_6485_cast_fp16")]; tensor var_6487_interleave_0 = const()[name = tensor("op_6487_interleave_0"), val = tensor(false)]; tensor var_6487_cast_fp16 = concat(axis = var_5015, interleave = var_6487_interleave_0, values = (var_6339_cast_fp16, var_6341_cast_fp16, var_6343_cast_fp16, var_6345_cast_fp16))[name = tensor("op_6487_cast_fp16")]; tensor var_6489_interleave_0 = const()[name = tensor("op_6489_interleave_0"), val = tensor(false)]; tensor var_6489_cast_fp16 = concat(axis = var_5015, interleave = var_6489_interleave_0, values = (var_6347_cast_fp16, var_6349_cast_fp16, var_6351_cast_fp16, var_6353_cast_fp16))[name = tensor("op_6489_cast_fp16")]; tensor var_6491_interleave_0 = const()[name = tensor("op_6491_interleave_0"), val = tensor(false)]; tensor var_6491_cast_fp16 = concat(axis = var_5015, interleave = var_6491_interleave_0, values = (var_6355_cast_fp16, var_6357_cast_fp16, var_6359_cast_fp16, var_6361_cast_fp16))[name = tensor("op_6491_cast_fp16")]; tensor var_6493_interleave_0 = const()[name = tensor("op_6493_interleave_0"), val = tensor(false)]; tensor var_6493_cast_fp16 = concat(axis = var_5015, interleave = var_6493_interleave_0, values = (var_6363_cast_fp16, var_6365_cast_fp16, var_6367_cast_fp16, var_6369_cast_fp16))[name = tensor("op_6493_cast_fp16")]; tensor var_6495_interleave_0 = const()[name = tensor("op_6495_interleave_0"), val = tensor(false)]; tensor var_6495_cast_fp16 = concat(axis = var_5015, interleave = var_6495_interleave_0, values = (var_6371_cast_fp16, var_6373_cast_fp16, var_6375_cast_fp16, var_6377_cast_fp16))[name = tensor("op_6495_cast_fp16")]; tensor var_6497_interleave_0 = const()[name = tensor("op_6497_interleave_0"), val = tensor(false)]; tensor var_6497_cast_fp16 = concat(axis = var_5015, interleave = var_6497_interleave_0, values = (var_6379_cast_fp16, var_6381_cast_fp16, var_6383_cast_fp16, var_6385_cast_fp16))[name = tensor("op_6497_cast_fp16")]; tensor var_6499_interleave_0 = const()[name = tensor("op_6499_interleave_0"), val = tensor(false)]; tensor var_6499_cast_fp16 = concat(axis = var_5015, interleave = var_6499_interleave_0, values = (var_6387_cast_fp16, var_6389_cast_fp16, var_6391_cast_fp16, var_6393_cast_fp16))[name = tensor("op_6499_cast_fp16")]; tensor var_6501_interleave_0 = const()[name = tensor("op_6501_interleave_0"), val = tensor(false)]; tensor var_6501_cast_fp16 = concat(axis = var_5015, interleave = var_6501_interleave_0, values = (var_6395_cast_fp16, var_6397_cast_fp16, var_6399_cast_fp16, var_6401_cast_fp16))[name = tensor("op_6501_cast_fp16")]; tensor var_6503_interleave_0 = const()[name = tensor("op_6503_interleave_0"), val = tensor(false)]; tensor var_6503_cast_fp16 = concat(axis = var_5015, interleave = var_6503_interleave_0, values = (var_6403_cast_fp16, var_6405_cast_fp16, var_6407_cast_fp16, var_6409_cast_fp16))[name = tensor("op_6503_cast_fp16")]; tensor var_6505_interleave_0 = const()[name = tensor("op_6505_interleave_0"), val = tensor(false)]; tensor var_6505_cast_fp16 = concat(axis = var_5015, interleave = var_6505_interleave_0, values = (var_6411_cast_fp16, var_6413_cast_fp16, var_6415_cast_fp16, var_6417_cast_fp16))[name = tensor("op_6505_cast_fp16")]; tensor var_6507_interleave_0 = const()[name = tensor("op_6507_interleave_0"), val = tensor(false)]; tensor var_6507_cast_fp16 = concat(axis = var_5015, interleave = var_6507_interleave_0, values = (var_6419_cast_fp16, var_6421_cast_fp16, var_6423_cast_fp16, var_6425_cast_fp16))[name = tensor("op_6507_cast_fp16")]; tensor var_6509_interleave_0 = const()[name = tensor("op_6509_interleave_0"), val = tensor(false)]; tensor var_6509_cast_fp16 = concat(axis = var_5015, interleave = var_6509_interleave_0, values = (var_6427_cast_fp16, var_6429_cast_fp16, var_6431_cast_fp16, var_6433_cast_fp16))[name = tensor("op_6509_cast_fp16")]; tensor var_6511_interleave_0 = const()[name = tensor("op_6511_interleave_0"), val = tensor(false)]; tensor var_6511_cast_fp16 = concat(axis = var_5015, interleave = var_6511_interleave_0, values = (var_6435_cast_fp16, var_6437_cast_fp16, var_6439_cast_fp16, var_6441_cast_fp16))[name = tensor("op_6511_cast_fp16")]; tensor var_6513_interleave_0 = const()[name = tensor("op_6513_interleave_0"), val = tensor(false)]; tensor var_6513_cast_fp16 = concat(axis = var_5015, interleave = var_6513_interleave_0, values = (var_6443_cast_fp16, var_6445_cast_fp16, var_6447_cast_fp16, var_6449_cast_fp16))[name = tensor("op_6513_cast_fp16")]; tensor var_6515_interleave_0 = const()[name = tensor("op_6515_interleave_0"), val = tensor(false)]; tensor var_6515_cast_fp16 = concat(axis = var_5015, interleave = var_6515_interleave_0, values = (var_6451_cast_fp16, var_6453_cast_fp16, var_6455_cast_fp16, var_6457_cast_fp16))[name = tensor("op_6515_cast_fp16")]; tensor var_6517_interleave_0 = const()[name = tensor("op_6517_interleave_0"), val = tensor(false)]; tensor var_6517_cast_fp16 = concat(axis = var_5015, interleave = var_6517_interleave_0, values = (var_6459_cast_fp16, var_6461_cast_fp16, var_6463_cast_fp16, var_6465_cast_fp16))[name = tensor("op_6517_cast_fp16")]; tensor var_6519_interleave_0 = const()[name = tensor("op_6519_interleave_0"), val = tensor(false)]; tensor var_6519_cast_fp16 = concat(axis = var_5015, interleave = var_6519_interleave_0, values = (var_6467_cast_fp16, var_6469_cast_fp16, var_6471_cast_fp16, var_6473_cast_fp16))[name = tensor("op_6519_cast_fp16")]; tensor var_6521_interleave_0 = const()[name = tensor("op_6521_interleave_0"), val = tensor(false)]; tensor var_6521_cast_fp16 = concat(axis = var_5015, interleave = var_6521_interleave_0, values = (var_6475_cast_fp16, var_6477_cast_fp16, var_6479_cast_fp16, var_6481_cast_fp16))[name = tensor("op_6521_cast_fp16")]; tensor input_25_interleave_0 = const()[name = tensor("input_25_interleave_0"), val = tensor(false)]; tensor input_25_cast_fp16 = concat(axis = var_5040, interleave = input_25_interleave_0, values = (var_6483_cast_fp16, var_6485_cast_fp16, var_6487_cast_fp16, var_6489_cast_fp16, var_6491_cast_fp16, var_6493_cast_fp16, var_6495_cast_fp16, var_6497_cast_fp16, var_6499_cast_fp16, var_6501_cast_fp16, var_6503_cast_fp16, var_6505_cast_fp16, var_6507_cast_fp16, var_6509_cast_fp16, var_6511_cast_fp16, var_6513_cast_fp16, var_6515_cast_fp16, var_6517_cast_fp16, var_6519_cast_fp16, var_6521_cast_fp16))[name = tensor("input_25_cast_fp16")]; tensor var_6532_pad_type_0 = const()[name = tensor("op_6532_pad_type_0"), val = tensor("valid")]; tensor var_6532_strides_0 = const()[name = tensor("op_6532_strides_0"), val = tensor([1, 1])]; tensor var_6532_pad_0 = const()[name = tensor("op_6532_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6532_dilations_0 = const()[name = tensor("op_6532_dilations_0"), val = tensor([1, 1])]; tensor var_6532_groups_0 = const()[name = tensor("op_6532_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(59343296))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60162560))), name = tensor("layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_3_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60162688)))]; tensor var_6532_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_6532_dilations_0, groups = var_6532_groups_0, pad = var_6532_pad_0, pad_type = var_6532_pad_type_0, strides = var_6532_strides_0, weight = layers_3_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_25_cast_fp16)[name = tensor("op_6532_cast_fp16")]; tensor var_6538_pad_type_0 = const()[name = tensor("op_6538_pad_type_0"), val = tensor("valid")]; tensor var_6538_strides_0 = const()[name = tensor("op_6538_strides_0"), val = tensor([1, 1])]; tensor var_6538_pad_0 = const()[name = tensor("op_6538_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6538_dilations_0 = const()[name = tensor("op_6538_dilations_0"), val = tensor([1, 1])]; tensor var_6538_groups_0 = const()[name = tensor("op_6538_groups_0"), val = tensor(1)]; tensor layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60188096))), name = tensor("layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60165312))), shape = tensor([1280, 1280, 1, 1])]; tensor var_6538_cast_fp16 = conv(dilations = var_6538_dilations_0, groups = var_6538_groups_0, pad = var_6538_pad_0, pad_type = var_6538_pad_type_0, strides = var_6538_strides_0, weight = layers_3_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_25_cast_fp16)[name = tensor("op_6538_cast_fp16")]; tensor obj_15_cast_fp16 = add(x = var_6532_cast_fp16, y = var_6538_cast_fp16)[name = tensor("obj_15_cast_fp16")]; tensor inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_15_cast_fp16)[name = tensor("inputs_15_cast_fp16")]; tensor out_15_axes_0 = const()[name = tensor("out_15_axes_0"), val = tensor([1])]; tensor var_6549_to_fp16 = const()[name = tensor("op_6549_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_15_cast_fp16 = layer_norm(axes = out_15_axes_0, epsilon = var_6549_to_fp16, x = inputs_15_cast_fp16)[name = tensor("out_15_cast_fp16")]; tensor input_27_gamma_0_to_fp16 = const()[name = tensor("input_27_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60392960)))]; tensor input_27_beta_0_to_fp16 = const()[name = tensor("input_27_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60395584)))]; tensor input_27_epsilon_0_to_fp16 = const()[name = tensor("input_27_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor("input_27_cast_fp16")]; tensor var_6567_pad_type_0 = const()[name = tensor("op_6567_pad_type_0"), val = tensor("valid")]; tensor var_6567_strides_0 = const()[name = tensor("op_6567_strides_0"), val = tensor([1, 1])]; tensor var_6567_pad_0 = const()[name = tensor("op_6567_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6567_dilations_0 = const()[name = tensor("op_6567_dilations_0"), val = tensor([1, 1])]; tensor var_6567_groups_0 = const()[name = tensor("op_6567_groups_0"), val = tensor(1)]; tensor layers_3_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60398208))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63675072))), name = tensor("layers_3_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_3_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63675200)))]; tensor var_6567_cast_fp16 = conv(bias = layers_3_fc1_inlier_module_bias_to_fp16, dilations = var_6567_dilations_0, groups = var_6567_groups_0, pad = var_6567_pad_0, pad_type = var_6567_pad_type_0, strides = var_6567_strides_0, weight = layers_3_fc1_inlier_module_weight_to_fp16_palettized, x = input_27_cast_fp16)[name = tensor("op_6567_cast_fp16")]; tensor var_6573_pad_type_0 = const()[name = tensor("op_6573_pad_type_0"), val = tensor("valid")]; tensor var_6573_strides_0 = const()[name = tensor("op_6573_strides_0"), val = tensor([1, 1])]; tensor var_6573_pad_0 = const()[name = tensor("op_6573_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6573_dilations_0 = const()[name = tensor("op_6573_dilations_0"), val = tensor([1, 1])]; tensor var_6573_groups_0 = const()[name = tensor("op_6573_groups_0"), val = tensor(1)]; tensor layers_3_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63740864))), name = tensor("layers_3_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63685504))), shape = tensor([5120, 1280, 1, 1])]; tensor var_6573_cast_fp16 = conv(dilations = var_6573_dilations_0, groups = var_6573_groups_0, pad = var_6573_pad_0, pad_type = var_6573_pad_type_0, strides = var_6573_strides_0, weight = layers_3_fc1_outlier_module_weight_to_fp16_sparsified, x = input_27_cast_fp16)[name = tensor("op_6573_cast_fp16")]; tensor input_29_cast_fp16 = add(x = var_6567_cast_fp16, y = var_6573_cast_fp16)[name = tensor("input_29_cast_fp16")]; tensor input_31_mode_0 = const()[name = tensor("input_31_mode_0"), val = tensor("EXACT")]; tensor input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor("input_31_cast_fp16")]; tensor var_6584_pad_type_0 = const()[name = tensor("op_6584_pad_type_0"), val = tensor("valid")]; tensor var_6584_strides_0 = const()[name = tensor("op_6584_strides_0"), val = tensor([1, 1])]; tensor var_6584_pad_0 = const()[name = tensor("op_6584_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6584_dilations_0 = const()[name = tensor("op_6584_dilations_0"), val = tensor([1, 1])]; tensor var_6584_groups_0 = const()[name = tensor("op_6584_groups_0"), val = tensor(1)]; tensor layers_3_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64560128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67836992))), name = tensor("layers_3_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_3_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_3_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67837120)))]; tensor var_6584_cast_fp16 = conv(bias = layers_3_fc2_inlier_module_bias_to_fp16, dilations = var_6584_dilations_0, groups = var_6584_groups_0, pad = var_6584_pad_0, pad_type = var_6584_pad_type_0, strides = var_6584_strides_0, weight = layers_3_fc2_inlier_module_weight_to_fp16_palettized, x = input_31_cast_fp16)[name = tensor("op_6584_cast_fp16")]; tensor var_6590_pad_type_0 = const()[name = tensor("op_6590_pad_type_0"), val = tensor("valid")]; tensor var_6590_strides_0 = const()[name = tensor("op_6590_strides_0"), val = tensor([1, 1])]; tensor var_6590_pad_0 = const()[name = tensor("op_6590_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6590_dilations_0 = const()[name = tensor("op_6590_dilations_0"), val = tensor([1, 1])]; tensor var_6590_groups_0 = const()[name = tensor("op_6590_groups_0"), val = tensor(1)]; tensor layers_3_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68064960))), name = tensor("layers_3_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67839744))), shape = tensor([1280, 5120, 1, 1])]; tensor var_6590_cast_fp16 = conv(dilations = var_6590_dilations_0, groups = var_6590_groups_0, pad = var_6590_pad_0, pad_type = var_6590_pad_type_0, strides = var_6590_strides_0, weight = layers_3_fc2_outlier_module_weight_to_fp16_sparsified, x = input_31_cast_fp16)[name = tensor("op_6590_cast_fp16")]; tensor hidden_states_11_cast_fp16 = add(x = var_6584_cast_fp16, y = var_6590_cast_fp16)[name = tensor("hidden_states_11_cast_fp16")]; tensor inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_11_cast_fp16)[name = tensor("inputs_17_cast_fp16")]; tensor var_6596 = const()[name = tensor("op_6596"), val = tensor(3)]; tensor var_6621 = const()[name = tensor("op_6621"), val = tensor(1)]; tensor out_17_axes_0 = const()[name = tensor("out_17_axes_0"), val = tensor([1])]; tensor var_6638_to_fp16 = const()[name = tensor("op_6638_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_17_cast_fp16 = layer_norm(axes = out_17_axes_0, epsilon = var_6638_to_fp16, x = inputs_17_cast_fp16)[name = tensor("out_17_cast_fp16")]; tensor obj_17_gamma_0_to_fp16 = const()[name = tensor("obj_17_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68884224)))]; tensor obj_17_beta_0_to_fp16 = const()[name = tensor("obj_17_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68886848)))]; tensor obj_17_epsilon_0_to_fp16 = const()[name = tensor("obj_17_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_17_cast_fp16 = batch_norm(beta = obj_17_beta_0_to_fp16, epsilon = obj_17_epsilon_0_to_fp16, gamma = obj_17_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor("obj_17_cast_fp16")]; tensor var_6660_pad_type_0 = const()[name = tensor("op_6660_pad_type_0"), val = tensor("valid")]; tensor var_6660_strides_0 = const()[name = tensor("op_6660_strides_0"), val = tensor([1, 1])]; tensor var_6660_pad_0 = const()[name = tensor("op_6660_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6660_dilations_0 = const()[name = tensor("op_6660_dilations_0"), val = tensor([1, 1])]; tensor var_6660_groups_0 = const()[name = tensor("op_6660_groups_0"), val = tensor(1)]; tensor layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(68889472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69708736))), name = tensor("layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_4_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69708864)))]; tensor var_6660_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_6660_dilations_0, groups = var_6660_groups_0, pad = var_6660_pad_0, pad_type = var_6660_pad_type_0, strides = var_6660_strides_0, weight = layers_4_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = tensor("op_6660_cast_fp16")]; tensor var_6666_pad_type_0 = const()[name = tensor("op_6666_pad_type_0"), val = tensor("valid")]; tensor var_6666_strides_0 = const()[name = tensor("op_6666_strides_0"), val = tensor([1, 1])]; tensor var_6666_pad_0 = const()[name = tensor("op_6666_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6666_dilations_0 = const()[name = tensor("op_6666_dilations_0"), val = tensor([1, 1])]; tensor var_6666_groups_0 = const()[name = tensor("op_6666_groups_0"), val = tensor(1)]; tensor layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69759808))), name = tensor("layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69711488))), shape = tensor([1280, 1280, 1, 1])]; tensor var_6666_cast_fp16 = conv(dilations = var_6666_dilations_0, groups = var_6666_groups_0, pad = var_6666_pad_0, pad_type = var_6666_pad_type_0, strides = var_6666_strides_0, weight = layers_4_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = tensor("op_6666_cast_fp16")]; tensor query_9_cast_fp16 = add(x = var_6660_cast_fp16, y = var_6666_cast_fp16)[name = tensor("query_9_cast_fp16")]; tensor var_6675_pad_type_0 = const()[name = tensor("op_6675_pad_type_0"), val = tensor("valid")]; tensor var_6675_strides_0 = const()[name = tensor("op_6675_strides_0"), val = tensor([1, 1])]; tensor var_6675_pad_0 = const()[name = tensor("op_6675_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6675_dilations_0 = const()[name = tensor("op_6675_dilations_0"), val = tensor([1, 1])]; tensor var_6675_groups_0 = const()[name = tensor("op_6675_groups_0"), val = tensor(1)]; tensor layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(69964672))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70783936))), name = tensor("layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_6675_cast_fp16 = conv(dilations = var_6675_dilations_0, groups = var_6675_groups_0, pad = var_6675_pad_0, pad_type = var_6675_pad_type_0, strides = var_6675_strides_0, weight = layers_4_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = tensor("op_6675_cast_fp16")]; tensor var_6681_pad_type_0 = const()[name = tensor("op_6681_pad_type_0"), val = tensor("valid")]; tensor var_6681_strides_0 = const()[name = tensor("op_6681_strides_0"), val = tensor([1, 1])]; tensor var_6681_pad_0 = const()[name = tensor("op_6681_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6681_dilations_0 = const()[name = tensor("op_6681_dilations_0"), val = tensor([1, 1])]; tensor var_6681_groups_0 = const()[name = tensor("op_6681_groups_0"), val = tensor(1)]; tensor layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70823680))), name = tensor("layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(70784064))), shape = tensor([1280, 1280, 1, 1])]; tensor var_6681_cast_fp16 = conv(dilations = var_6681_dilations_0, groups = var_6681_groups_0, pad = var_6681_pad_0, pad_type = var_6681_pad_type_0, strides = var_6681_strides_0, weight = layers_4_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = tensor("op_6681_cast_fp16")]; tensor key_9_cast_fp16 = add(x = var_6675_cast_fp16, y = var_6681_cast_fp16)[name = tensor("key_9_cast_fp16")]; tensor var_6691_pad_type_0 = const()[name = tensor("op_6691_pad_type_0"), val = tensor("valid")]; tensor var_6691_strides_0 = const()[name = tensor("op_6691_strides_0"), val = tensor([1, 1])]; tensor var_6691_pad_0 = const()[name = tensor("op_6691_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6691_dilations_0 = const()[name = tensor("op_6691_dilations_0"), val = tensor([1, 1])]; tensor var_6691_groups_0 = const()[name = tensor("op_6691_groups_0"), val = tensor(1)]; tensor layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(71028544))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(71847808))), name = tensor("layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_4_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(71847936)))]; tensor var_6691_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_6691_dilations_0, groups = var_6691_groups_0, pad = var_6691_pad_0, pad_type = var_6691_pad_type_0, strides = var_6691_strides_0, weight = layers_4_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_17_cast_fp16)[name = tensor("op_6691_cast_fp16")]; tensor var_6697_pad_type_0 = const()[name = tensor("op_6697_pad_type_0"), val = tensor("valid")]; tensor var_6697_strides_0 = const()[name = tensor("op_6697_strides_0"), val = tensor([1, 1])]; tensor var_6697_pad_0 = const()[name = tensor("op_6697_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_6697_dilations_0 = const()[name = tensor("op_6697_dilations_0"), val = tensor([1, 1])]; tensor var_6697_groups_0 = const()[name = tensor("op_6697_groups_0"), val = tensor(1)]; tensor layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(71883776))), name = tensor("layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(71850560))), shape = tensor([1280, 1280, 1, 1])]; tensor var_6697_cast_fp16 = conv(dilations = var_6697_dilations_0, groups = var_6697_groups_0, pad = var_6697_pad_0, pad_type = var_6697_pad_type_0, strides = var_6697_strides_0, weight = layers_4_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_17_cast_fp16)[name = tensor("op_6697_cast_fp16")]; tensor value_9_cast_fp16 = add(x = var_6691_cast_fp16, y = var_6697_cast_fp16)[name = tensor("value_9_cast_fp16")]; tensor var_6703_begin_0 = const()[name = tensor("op_6703_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6703_end_0 = const()[name = tensor("op_6703_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_6703_end_mask_0 = const()[name = tensor("op_6703_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6703_cast_fp16 = slice_by_index(begin = var_6703_begin_0, end = var_6703_end_0, end_mask = var_6703_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6703_cast_fp16")]; tensor var_6707_begin_0 = const()[name = tensor("op_6707_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_6707_end_0 = const()[name = tensor("op_6707_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_6707_end_mask_0 = const()[name = tensor("op_6707_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6707_cast_fp16 = slice_by_index(begin = var_6707_begin_0, end = var_6707_end_0, end_mask = var_6707_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6707_cast_fp16")]; tensor var_6711_begin_0 = const()[name = tensor("op_6711_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_6711_end_0 = const()[name = tensor("op_6711_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_6711_end_mask_0 = const()[name = tensor("op_6711_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6711_cast_fp16 = slice_by_index(begin = var_6711_begin_0, end = var_6711_end_0, end_mask = var_6711_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6711_cast_fp16")]; tensor var_6715_begin_0 = const()[name = tensor("op_6715_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_6715_end_0 = const()[name = tensor("op_6715_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_6715_end_mask_0 = const()[name = tensor("op_6715_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6715_cast_fp16 = slice_by_index(begin = var_6715_begin_0, end = var_6715_end_0, end_mask = var_6715_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6715_cast_fp16")]; tensor var_6719_begin_0 = const()[name = tensor("op_6719_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_6719_end_0 = const()[name = tensor("op_6719_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_6719_end_mask_0 = const()[name = tensor("op_6719_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6719_cast_fp16 = slice_by_index(begin = var_6719_begin_0, end = var_6719_end_0, end_mask = var_6719_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6719_cast_fp16")]; tensor var_6723_begin_0 = const()[name = tensor("op_6723_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_6723_end_0 = const()[name = tensor("op_6723_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_6723_end_mask_0 = const()[name = tensor("op_6723_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6723_cast_fp16 = slice_by_index(begin = var_6723_begin_0, end = var_6723_end_0, end_mask = var_6723_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6723_cast_fp16")]; tensor var_6727_begin_0 = const()[name = tensor("op_6727_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_6727_end_0 = const()[name = tensor("op_6727_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_6727_end_mask_0 = const()[name = tensor("op_6727_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6727_cast_fp16 = slice_by_index(begin = var_6727_begin_0, end = var_6727_end_0, end_mask = var_6727_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6727_cast_fp16")]; tensor var_6731_begin_0 = const()[name = tensor("op_6731_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_6731_end_0 = const()[name = tensor("op_6731_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_6731_end_mask_0 = const()[name = tensor("op_6731_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6731_cast_fp16 = slice_by_index(begin = var_6731_begin_0, end = var_6731_end_0, end_mask = var_6731_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6731_cast_fp16")]; tensor var_6735_begin_0 = const()[name = tensor("op_6735_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_6735_end_0 = const()[name = tensor("op_6735_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_6735_end_mask_0 = const()[name = tensor("op_6735_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6735_cast_fp16 = slice_by_index(begin = var_6735_begin_0, end = var_6735_end_0, end_mask = var_6735_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6735_cast_fp16")]; tensor var_6739_begin_0 = const()[name = tensor("op_6739_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_6739_end_0 = const()[name = tensor("op_6739_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_6739_end_mask_0 = const()[name = tensor("op_6739_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6739_cast_fp16 = slice_by_index(begin = var_6739_begin_0, end = var_6739_end_0, end_mask = var_6739_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6739_cast_fp16")]; tensor var_6743_begin_0 = const()[name = tensor("op_6743_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_6743_end_0 = const()[name = tensor("op_6743_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_6743_end_mask_0 = const()[name = tensor("op_6743_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6743_cast_fp16 = slice_by_index(begin = var_6743_begin_0, end = var_6743_end_0, end_mask = var_6743_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6743_cast_fp16")]; tensor var_6747_begin_0 = const()[name = tensor("op_6747_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_6747_end_0 = const()[name = tensor("op_6747_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_6747_end_mask_0 = const()[name = tensor("op_6747_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6747_cast_fp16 = slice_by_index(begin = var_6747_begin_0, end = var_6747_end_0, end_mask = var_6747_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6747_cast_fp16")]; tensor var_6751_begin_0 = const()[name = tensor("op_6751_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_6751_end_0 = const()[name = tensor("op_6751_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_6751_end_mask_0 = const()[name = tensor("op_6751_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6751_cast_fp16 = slice_by_index(begin = var_6751_begin_0, end = var_6751_end_0, end_mask = var_6751_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6751_cast_fp16")]; tensor var_6755_begin_0 = const()[name = tensor("op_6755_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_6755_end_0 = const()[name = tensor("op_6755_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_6755_end_mask_0 = const()[name = tensor("op_6755_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6755_cast_fp16 = slice_by_index(begin = var_6755_begin_0, end = var_6755_end_0, end_mask = var_6755_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6755_cast_fp16")]; tensor var_6759_begin_0 = const()[name = tensor("op_6759_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_6759_end_0 = const()[name = tensor("op_6759_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_6759_end_mask_0 = const()[name = tensor("op_6759_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6759_cast_fp16 = slice_by_index(begin = var_6759_begin_0, end = var_6759_end_0, end_mask = var_6759_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6759_cast_fp16")]; tensor var_6763_begin_0 = const()[name = tensor("op_6763_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_6763_end_0 = const()[name = tensor("op_6763_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_6763_end_mask_0 = const()[name = tensor("op_6763_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6763_cast_fp16 = slice_by_index(begin = var_6763_begin_0, end = var_6763_end_0, end_mask = var_6763_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6763_cast_fp16")]; tensor var_6767_begin_0 = const()[name = tensor("op_6767_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_6767_end_0 = const()[name = tensor("op_6767_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_6767_end_mask_0 = const()[name = tensor("op_6767_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6767_cast_fp16 = slice_by_index(begin = var_6767_begin_0, end = var_6767_end_0, end_mask = var_6767_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6767_cast_fp16")]; tensor var_6771_begin_0 = const()[name = tensor("op_6771_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_6771_end_0 = const()[name = tensor("op_6771_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_6771_end_mask_0 = const()[name = tensor("op_6771_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6771_cast_fp16 = slice_by_index(begin = var_6771_begin_0, end = var_6771_end_0, end_mask = var_6771_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6771_cast_fp16")]; tensor var_6775_begin_0 = const()[name = tensor("op_6775_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_6775_end_0 = const()[name = tensor("op_6775_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_6775_end_mask_0 = const()[name = tensor("op_6775_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6775_cast_fp16 = slice_by_index(begin = var_6775_begin_0, end = var_6775_end_0, end_mask = var_6775_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6775_cast_fp16")]; tensor var_6779_begin_0 = const()[name = tensor("op_6779_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_6779_end_0 = const()[name = tensor("op_6779_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_6779_end_mask_0 = const()[name = tensor("op_6779_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_6779_cast_fp16 = slice_by_index(begin = var_6779_begin_0, end = var_6779_end_0, end_mask = var_6779_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6779_cast_fp16")]; tensor var_6788_begin_0 = const()[name = tensor("op_6788_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6788_end_0 = const()[name = tensor("op_6788_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_6788_end_mask_0 = const()[name = tensor("op_6788_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6788_cast_fp16 = slice_by_index(begin = var_6788_begin_0, end = var_6788_end_0, end_mask = var_6788_end_mask_0, x = var_6703_cast_fp16)[name = tensor("op_6788_cast_fp16")]; tensor var_6795_begin_0 = const()[name = tensor("op_6795_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_6795_end_0 = const()[name = tensor("op_6795_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_6795_end_mask_0 = const()[name = tensor("op_6795_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6795_cast_fp16 = slice_by_index(begin = var_6795_begin_0, end = var_6795_end_0, end_mask = var_6795_end_mask_0, x = var_6703_cast_fp16)[name = tensor("op_6795_cast_fp16")]; tensor var_6802_begin_0 = const()[name = tensor("op_6802_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_6802_end_0 = const()[name = tensor("op_6802_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_6802_end_mask_0 = const()[name = tensor("op_6802_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6802_cast_fp16 = slice_by_index(begin = var_6802_begin_0, end = var_6802_end_0, end_mask = var_6802_end_mask_0, x = var_6703_cast_fp16)[name = tensor("op_6802_cast_fp16")]; tensor var_6809_begin_0 = const()[name = tensor("op_6809_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_6809_end_0 = const()[name = tensor("op_6809_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_6809_end_mask_0 = const()[name = tensor("op_6809_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6809_cast_fp16 = slice_by_index(begin = var_6809_begin_0, end = var_6809_end_0, end_mask = var_6809_end_mask_0, x = var_6703_cast_fp16)[name = tensor("op_6809_cast_fp16")]; tensor var_6816_begin_0 = const()[name = tensor("op_6816_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6816_end_0 = const()[name = tensor("op_6816_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_6816_end_mask_0 = const()[name = tensor("op_6816_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6816_cast_fp16 = slice_by_index(begin = var_6816_begin_0, end = var_6816_end_0, end_mask = var_6816_end_mask_0, x = var_6707_cast_fp16)[name = tensor("op_6816_cast_fp16")]; tensor var_6823_begin_0 = const()[name = tensor("op_6823_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_6823_end_0 = const()[name = tensor("op_6823_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_6823_end_mask_0 = const()[name = tensor("op_6823_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6823_cast_fp16 = slice_by_index(begin = var_6823_begin_0, end = var_6823_end_0, end_mask = var_6823_end_mask_0, x = var_6707_cast_fp16)[name = tensor("op_6823_cast_fp16")]; tensor var_6830_begin_0 = const()[name = tensor("op_6830_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_6830_end_0 = const()[name = tensor("op_6830_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_6830_end_mask_0 = const()[name = tensor("op_6830_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6830_cast_fp16 = slice_by_index(begin = var_6830_begin_0, end = var_6830_end_0, end_mask = var_6830_end_mask_0, x = var_6707_cast_fp16)[name = tensor("op_6830_cast_fp16")]; tensor var_6837_begin_0 = const()[name = tensor("op_6837_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_6837_end_0 = const()[name = tensor("op_6837_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_6837_end_mask_0 = const()[name = tensor("op_6837_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6837_cast_fp16 = slice_by_index(begin = var_6837_begin_0, end = var_6837_end_0, end_mask = var_6837_end_mask_0, x = var_6707_cast_fp16)[name = tensor("op_6837_cast_fp16")]; tensor var_6844_begin_0 = const()[name = tensor("op_6844_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6844_end_0 = const()[name = tensor("op_6844_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_6844_end_mask_0 = const()[name = tensor("op_6844_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6844_cast_fp16 = slice_by_index(begin = var_6844_begin_0, end = var_6844_end_0, end_mask = var_6844_end_mask_0, x = var_6711_cast_fp16)[name = tensor("op_6844_cast_fp16")]; tensor var_6851_begin_0 = const()[name = tensor("op_6851_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_6851_end_0 = const()[name = tensor("op_6851_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_6851_end_mask_0 = const()[name = tensor("op_6851_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6851_cast_fp16 = slice_by_index(begin = var_6851_begin_0, end = var_6851_end_0, end_mask = var_6851_end_mask_0, x = var_6711_cast_fp16)[name = tensor("op_6851_cast_fp16")]; tensor var_6858_begin_0 = const()[name = tensor("op_6858_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_6858_end_0 = const()[name = tensor("op_6858_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_6858_end_mask_0 = const()[name = tensor("op_6858_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6858_cast_fp16 = slice_by_index(begin = var_6858_begin_0, end = var_6858_end_0, end_mask = var_6858_end_mask_0, x = var_6711_cast_fp16)[name = tensor("op_6858_cast_fp16")]; tensor var_6865_begin_0 = const()[name = tensor("op_6865_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_6865_end_0 = const()[name = tensor("op_6865_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_6865_end_mask_0 = const()[name = tensor("op_6865_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6865_cast_fp16 = slice_by_index(begin = var_6865_begin_0, end = var_6865_end_0, end_mask = var_6865_end_mask_0, x = var_6711_cast_fp16)[name = tensor("op_6865_cast_fp16")]; tensor var_6872_begin_0 = const()[name = tensor("op_6872_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6872_end_0 = const()[name = tensor("op_6872_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_6872_end_mask_0 = const()[name = tensor("op_6872_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6872_cast_fp16 = slice_by_index(begin = var_6872_begin_0, end = var_6872_end_0, end_mask = var_6872_end_mask_0, x = var_6715_cast_fp16)[name = tensor("op_6872_cast_fp16")]; tensor var_6879_begin_0 = const()[name = tensor("op_6879_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_6879_end_0 = const()[name = tensor("op_6879_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_6879_end_mask_0 = const()[name = tensor("op_6879_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6879_cast_fp16 = slice_by_index(begin = var_6879_begin_0, end = var_6879_end_0, end_mask = var_6879_end_mask_0, x = var_6715_cast_fp16)[name = tensor("op_6879_cast_fp16")]; tensor var_6886_begin_0 = const()[name = tensor("op_6886_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_6886_end_0 = const()[name = tensor("op_6886_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_6886_end_mask_0 = const()[name = tensor("op_6886_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6886_cast_fp16 = slice_by_index(begin = var_6886_begin_0, end = var_6886_end_0, end_mask = var_6886_end_mask_0, x = var_6715_cast_fp16)[name = tensor("op_6886_cast_fp16")]; tensor var_6893_begin_0 = const()[name = tensor("op_6893_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_6893_end_0 = const()[name = tensor("op_6893_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_6893_end_mask_0 = const()[name = tensor("op_6893_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6893_cast_fp16 = slice_by_index(begin = var_6893_begin_0, end = var_6893_end_0, end_mask = var_6893_end_mask_0, x = var_6715_cast_fp16)[name = tensor("op_6893_cast_fp16")]; tensor var_6900_begin_0 = const()[name = tensor("op_6900_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6900_end_0 = const()[name = tensor("op_6900_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_6900_end_mask_0 = const()[name = tensor("op_6900_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6900_cast_fp16 = slice_by_index(begin = var_6900_begin_0, end = var_6900_end_0, end_mask = var_6900_end_mask_0, x = var_6719_cast_fp16)[name = tensor("op_6900_cast_fp16")]; tensor var_6907_begin_0 = const()[name = tensor("op_6907_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_6907_end_0 = const()[name = tensor("op_6907_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_6907_end_mask_0 = const()[name = tensor("op_6907_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6907_cast_fp16 = slice_by_index(begin = var_6907_begin_0, end = var_6907_end_0, end_mask = var_6907_end_mask_0, x = var_6719_cast_fp16)[name = tensor("op_6907_cast_fp16")]; tensor var_6914_begin_0 = const()[name = tensor("op_6914_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_6914_end_0 = const()[name = tensor("op_6914_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_6914_end_mask_0 = const()[name = tensor("op_6914_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6914_cast_fp16 = slice_by_index(begin = var_6914_begin_0, end = var_6914_end_0, end_mask = var_6914_end_mask_0, x = var_6719_cast_fp16)[name = tensor("op_6914_cast_fp16")]; tensor var_6921_begin_0 = const()[name = tensor("op_6921_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_6921_end_0 = const()[name = tensor("op_6921_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_6921_end_mask_0 = const()[name = tensor("op_6921_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6921_cast_fp16 = slice_by_index(begin = var_6921_begin_0, end = var_6921_end_0, end_mask = var_6921_end_mask_0, x = var_6719_cast_fp16)[name = tensor("op_6921_cast_fp16")]; tensor var_6928_begin_0 = const()[name = tensor("op_6928_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6928_end_0 = const()[name = tensor("op_6928_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_6928_end_mask_0 = const()[name = tensor("op_6928_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6928_cast_fp16 = slice_by_index(begin = var_6928_begin_0, end = var_6928_end_0, end_mask = var_6928_end_mask_0, x = var_6723_cast_fp16)[name = tensor("op_6928_cast_fp16")]; tensor var_6935_begin_0 = const()[name = tensor("op_6935_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_6935_end_0 = const()[name = tensor("op_6935_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_6935_end_mask_0 = const()[name = tensor("op_6935_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6935_cast_fp16 = slice_by_index(begin = var_6935_begin_0, end = var_6935_end_0, end_mask = var_6935_end_mask_0, x = var_6723_cast_fp16)[name = tensor("op_6935_cast_fp16")]; tensor var_6942_begin_0 = const()[name = tensor("op_6942_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_6942_end_0 = const()[name = tensor("op_6942_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_6942_end_mask_0 = const()[name = tensor("op_6942_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6942_cast_fp16 = slice_by_index(begin = var_6942_begin_0, end = var_6942_end_0, end_mask = var_6942_end_mask_0, x = var_6723_cast_fp16)[name = tensor("op_6942_cast_fp16")]; tensor var_6949_begin_0 = const()[name = tensor("op_6949_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_6949_end_0 = const()[name = tensor("op_6949_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_6949_end_mask_0 = const()[name = tensor("op_6949_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6949_cast_fp16 = slice_by_index(begin = var_6949_begin_0, end = var_6949_end_0, end_mask = var_6949_end_mask_0, x = var_6723_cast_fp16)[name = tensor("op_6949_cast_fp16")]; tensor var_6956_begin_0 = const()[name = tensor("op_6956_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6956_end_0 = const()[name = tensor("op_6956_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_6956_end_mask_0 = const()[name = tensor("op_6956_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6956_cast_fp16 = slice_by_index(begin = var_6956_begin_0, end = var_6956_end_0, end_mask = var_6956_end_mask_0, x = var_6727_cast_fp16)[name = tensor("op_6956_cast_fp16")]; tensor var_6963_begin_0 = const()[name = tensor("op_6963_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_6963_end_0 = const()[name = tensor("op_6963_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_6963_end_mask_0 = const()[name = tensor("op_6963_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6963_cast_fp16 = slice_by_index(begin = var_6963_begin_0, end = var_6963_end_0, end_mask = var_6963_end_mask_0, x = var_6727_cast_fp16)[name = tensor("op_6963_cast_fp16")]; tensor var_6970_begin_0 = const()[name = tensor("op_6970_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_6970_end_0 = const()[name = tensor("op_6970_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_6970_end_mask_0 = const()[name = tensor("op_6970_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6970_cast_fp16 = slice_by_index(begin = var_6970_begin_0, end = var_6970_end_0, end_mask = var_6970_end_mask_0, x = var_6727_cast_fp16)[name = tensor("op_6970_cast_fp16")]; tensor var_6977_begin_0 = const()[name = tensor("op_6977_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_6977_end_0 = const()[name = tensor("op_6977_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_6977_end_mask_0 = const()[name = tensor("op_6977_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6977_cast_fp16 = slice_by_index(begin = var_6977_begin_0, end = var_6977_end_0, end_mask = var_6977_end_mask_0, x = var_6727_cast_fp16)[name = tensor("op_6977_cast_fp16")]; tensor var_6984_begin_0 = const()[name = tensor("op_6984_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_6984_end_0 = const()[name = tensor("op_6984_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_6984_end_mask_0 = const()[name = tensor("op_6984_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6984_cast_fp16 = slice_by_index(begin = var_6984_begin_0, end = var_6984_end_0, end_mask = var_6984_end_mask_0, x = var_6731_cast_fp16)[name = tensor("op_6984_cast_fp16")]; tensor var_6991_begin_0 = const()[name = tensor("op_6991_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_6991_end_0 = const()[name = tensor("op_6991_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_6991_end_mask_0 = const()[name = tensor("op_6991_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6991_cast_fp16 = slice_by_index(begin = var_6991_begin_0, end = var_6991_end_0, end_mask = var_6991_end_mask_0, x = var_6731_cast_fp16)[name = tensor("op_6991_cast_fp16")]; tensor var_6998_begin_0 = const()[name = tensor("op_6998_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_6998_end_0 = const()[name = tensor("op_6998_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_6998_end_mask_0 = const()[name = tensor("op_6998_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_6998_cast_fp16 = slice_by_index(begin = var_6998_begin_0, end = var_6998_end_0, end_mask = var_6998_end_mask_0, x = var_6731_cast_fp16)[name = tensor("op_6998_cast_fp16")]; tensor var_7005_begin_0 = const()[name = tensor("op_7005_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_7005_end_0 = const()[name = tensor("op_7005_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_7005_end_mask_0 = const()[name = tensor("op_7005_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7005_cast_fp16 = slice_by_index(begin = var_7005_begin_0, end = var_7005_end_0, end_mask = var_7005_end_mask_0, x = var_6731_cast_fp16)[name = tensor("op_7005_cast_fp16")]; tensor var_7012_begin_0 = const()[name = tensor("op_7012_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7012_end_0 = const()[name = tensor("op_7012_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_7012_end_mask_0 = const()[name = tensor("op_7012_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7012_cast_fp16 = slice_by_index(begin = var_7012_begin_0, end = var_7012_end_0, end_mask = var_7012_end_mask_0, x = var_6735_cast_fp16)[name = tensor("op_7012_cast_fp16")]; tensor var_7019_begin_0 = const()[name = tensor("op_7019_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_7019_end_0 = const()[name = tensor("op_7019_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_7019_end_mask_0 = const()[name = tensor("op_7019_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7019_cast_fp16 = slice_by_index(begin = var_7019_begin_0, end = var_7019_end_0, end_mask = var_7019_end_mask_0, x = var_6735_cast_fp16)[name = tensor("op_7019_cast_fp16")]; tensor var_7026_begin_0 = const()[name = tensor("op_7026_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_7026_end_0 = const()[name = tensor("op_7026_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_7026_end_mask_0 = const()[name = tensor("op_7026_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7026_cast_fp16 = slice_by_index(begin = var_7026_begin_0, end = var_7026_end_0, end_mask = var_7026_end_mask_0, x = var_6735_cast_fp16)[name = tensor("op_7026_cast_fp16")]; tensor var_7033_begin_0 = const()[name = tensor("op_7033_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_7033_end_0 = const()[name = tensor("op_7033_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_7033_end_mask_0 = const()[name = tensor("op_7033_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7033_cast_fp16 = slice_by_index(begin = var_7033_begin_0, end = var_7033_end_0, end_mask = var_7033_end_mask_0, x = var_6735_cast_fp16)[name = tensor("op_7033_cast_fp16")]; tensor var_7040_begin_0 = const()[name = tensor("op_7040_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7040_end_0 = const()[name = tensor("op_7040_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_7040_end_mask_0 = const()[name = tensor("op_7040_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7040_cast_fp16 = slice_by_index(begin = var_7040_begin_0, end = var_7040_end_0, end_mask = var_7040_end_mask_0, x = var_6739_cast_fp16)[name = tensor("op_7040_cast_fp16")]; tensor var_7047_begin_0 = const()[name = tensor("op_7047_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_7047_end_0 = const()[name = tensor("op_7047_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_7047_end_mask_0 = const()[name = tensor("op_7047_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7047_cast_fp16 = slice_by_index(begin = var_7047_begin_0, end = var_7047_end_0, end_mask = var_7047_end_mask_0, x = var_6739_cast_fp16)[name = tensor("op_7047_cast_fp16")]; tensor var_7054_begin_0 = const()[name = tensor("op_7054_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_7054_end_0 = const()[name = tensor("op_7054_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_7054_end_mask_0 = const()[name = tensor("op_7054_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7054_cast_fp16 = slice_by_index(begin = var_7054_begin_0, end = var_7054_end_0, end_mask = var_7054_end_mask_0, x = var_6739_cast_fp16)[name = tensor("op_7054_cast_fp16")]; tensor var_7061_begin_0 = const()[name = tensor("op_7061_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_7061_end_0 = const()[name = tensor("op_7061_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_7061_end_mask_0 = const()[name = tensor("op_7061_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7061_cast_fp16 = slice_by_index(begin = var_7061_begin_0, end = var_7061_end_0, end_mask = var_7061_end_mask_0, x = var_6739_cast_fp16)[name = tensor("op_7061_cast_fp16")]; tensor var_7068_begin_0 = const()[name = tensor("op_7068_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7068_end_0 = const()[name = tensor("op_7068_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_7068_end_mask_0 = const()[name = tensor("op_7068_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7068_cast_fp16 = slice_by_index(begin = var_7068_begin_0, end = var_7068_end_0, end_mask = var_7068_end_mask_0, x = var_6743_cast_fp16)[name = tensor("op_7068_cast_fp16")]; tensor var_7075_begin_0 = const()[name = tensor("op_7075_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_7075_end_0 = const()[name = tensor("op_7075_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_7075_end_mask_0 = const()[name = tensor("op_7075_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7075_cast_fp16 = slice_by_index(begin = var_7075_begin_0, end = var_7075_end_0, end_mask = var_7075_end_mask_0, x = var_6743_cast_fp16)[name = tensor("op_7075_cast_fp16")]; tensor var_7082_begin_0 = const()[name = tensor("op_7082_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_7082_end_0 = const()[name = tensor("op_7082_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_7082_end_mask_0 = const()[name = tensor("op_7082_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7082_cast_fp16 = slice_by_index(begin = var_7082_begin_0, end = var_7082_end_0, end_mask = var_7082_end_mask_0, x = var_6743_cast_fp16)[name = tensor("op_7082_cast_fp16")]; tensor var_7089_begin_0 = const()[name = tensor("op_7089_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_7089_end_0 = const()[name = tensor("op_7089_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_7089_end_mask_0 = const()[name = tensor("op_7089_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7089_cast_fp16 = slice_by_index(begin = var_7089_begin_0, end = var_7089_end_0, end_mask = var_7089_end_mask_0, x = var_6743_cast_fp16)[name = tensor("op_7089_cast_fp16")]; tensor var_7096_begin_0 = const()[name = tensor("op_7096_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7096_end_0 = const()[name = tensor("op_7096_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_7096_end_mask_0 = const()[name = tensor("op_7096_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7096_cast_fp16 = slice_by_index(begin = var_7096_begin_0, end = var_7096_end_0, end_mask = var_7096_end_mask_0, x = var_6747_cast_fp16)[name = tensor("op_7096_cast_fp16")]; tensor var_7103_begin_0 = const()[name = tensor("op_7103_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_7103_end_0 = const()[name = tensor("op_7103_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_7103_end_mask_0 = const()[name = tensor("op_7103_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7103_cast_fp16 = slice_by_index(begin = var_7103_begin_0, end = var_7103_end_0, end_mask = var_7103_end_mask_0, x = var_6747_cast_fp16)[name = tensor("op_7103_cast_fp16")]; tensor var_7110_begin_0 = const()[name = tensor("op_7110_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_7110_end_0 = const()[name = tensor("op_7110_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_7110_end_mask_0 = const()[name = tensor("op_7110_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7110_cast_fp16 = slice_by_index(begin = var_7110_begin_0, end = var_7110_end_0, end_mask = var_7110_end_mask_0, x = var_6747_cast_fp16)[name = tensor("op_7110_cast_fp16")]; tensor var_7117_begin_0 = const()[name = tensor("op_7117_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_7117_end_0 = const()[name = tensor("op_7117_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_7117_end_mask_0 = const()[name = tensor("op_7117_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7117_cast_fp16 = slice_by_index(begin = var_7117_begin_0, end = var_7117_end_0, end_mask = var_7117_end_mask_0, x = var_6747_cast_fp16)[name = tensor("op_7117_cast_fp16")]; tensor var_7124_begin_0 = const()[name = tensor("op_7124_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7124_end_0 = const()[name = tensor("op_7124_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_7124_end_mask_0 = const()[name = tensor("op_7124_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7124_cast_fp16 = slice_by_index(begin = var_7124_begin_0, end = var_7124_end_0, end_mask = var_7124_end_mask_0, x = var_6751_cast_fp16)[name = tensor("op_7124_cast_fp16")]; tensor var_7131_begin_0 = const()[name = tensor("op_7131_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_7131_end_0 = const()[name = tensor("op_7131_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_7131_end_mask_0 = const()[name = tensor("op_7131_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7131_cast_fp16 = slice_by_index(begin = var_7131_begin_0, end = var_7131_end_0, end_mask = var_7131_end_mask_0, x = var_6751_cast_fp16)[name = tensor("op_7131_cast_fp16")]; tensor var_7138_begin_0 = const()[name = tensor("op_7138_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_7138_end_0 = const()[name = tensor("op_7138_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_7138_end_mask_0 = const()[name = tensor("op_7138_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7138_cast_fp16 = slice_by_index(begin = var_7138_begin_0, end = var_7138_end_0, end_mask = var_7138_end_mask_0, x = var_6751_cast_fp16)[name = tensor("op_7138_cast_fp16")]; tensor var_7145_begin_0 = const()[name = tensor("op_7145_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_7145_end_0 = const()[name = tensor("op_7145_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_7145_end_mask_0 = const()[name = tensor("op_7145_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7145_cast_fp16 = slice_by_index(begin = var_7145_begin_0, end = var_7145_end_0, end_mask = var_7145_end_mask_0, x = var_6751_cast_fp16)[name = tensor("op_7145_cast_fp16")]; tensor var_7152_begin_0 = const()[name = tensor("op_7152_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7152_end_0 = const()[name = tensor("op_7152_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_7152_end_mask_0 = const()[name = tensor("op_7152_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7152_cast_fp16 = slice_by_index(begin = var_7152_begin_0, end = var_7152_end_0, end_mask = var_7152_end_mask_0, x = var_6755_cast_fp16)[name = tensor("op_7152_cast_fp16")]; tensor var_7159_begin_0 = const()[name = tensor("op_7159_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_7159_end_0 = const()[name = tensor("op_7159_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_7159_end_mask_0 = const()[name = tensor("op_7159_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7159_cast_fp16 = slice_by_index(begin = var_7159_begin_0, end = var_7159_end_0, end_mask = var_7159_end_mask_0, x = var_6755_cast_fp16)[name = tensor("op_7159_cast_fp16")]; tensor var_7166_begin_0 = const()[name = tensor("op_7166_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_7166_end_0 = const()[name = tensor("op_7166_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_7166_end_mask_0 = const()[name = tensor("op_7166_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7166_cast_fp16 = slice_by_index(begin = var_7166_begin_0, end = var_7166_end_0, end_mask = var_7166_end_mask_0, x = var_6755_cast_fp16)[name = tensor("op_7166_cast_fp16")]; tensor var_7173_begin_0 = const()[name = tensor("op_7173_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_7173_end_0 = const()[name = tensor("op_7173_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_7173_end_mask_0 = const()[name = tensor("op_7173_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7173_cast_fp16 = slice_by_index(begin = var_7173_begin_0, end = var_7173_end_0, end_mask = var_7173_end_mask_0, x = var_6755_cast_fp16)[name = tensor("op_7173_cast_fp16")]; tensor var_7180_begin_0 = const()[name = tensor("op_7180_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7180_end_0 = const()[name = tensor("op_7180_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_7180_end_mask_0 = const()[name = tensor("op_7180_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7180_cast_fp16 = slice_by_index(begin = var_7180_begin_0, end = var_7180_end_0, end_mask = var_7180_end_mask_0, x = var_6759_cast_fp16)[name = tensor("op_7180_cast_fp16")]; tensor var_7187_begin_0 = const()[name = tensor("op_7187_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_7187_end_0 = const()[name = tensor("op_7187_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_7187_end_mask_0 = const()[name = tensor("op_7187_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7187_cast_fp16 = slice_by_index(begin = var_7187_begin_0, end = var_7187_end_0, end_mask = var_7187_end_mask_0, x = var_6759_cast_fp16)[name = tensor("op_7187_cast_fp16")]; tensor var_7194_begin_0 = const()[name = tensor("op_7194_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_7194_end_0 = const()[name = tensor("op_7194_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_7194_end_mask_0 = const()[name = tensor("op_7194_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7194_cast_fp16 = slice_by_index(begin = var_7194_begin_0, end = var_7194_end_0, end_mask = var_7194_end_mask_0, x = var_6759_cast_fp16)[name = tensor("op_7194_cast_fp16")]; tensor var_7201_begin_0 = const()[name = tensor("op_7201_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_7201_end_0 = const()[name = tensor("op_7201_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_7201_end_mask_0 = const()[name = tensor("op_7201_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7201_cast_fp16 = slice_by_index(begin = var_7201_begin_0, end = var_7201_end_0, end_mask = var_7201_end_mask_0, x = var_6759_cast_fp16)[name = tensor("op_7201_cast_fp16")]; tensor var_7208_begin_0 = const()[name = tensor("op_7208_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7208_end_0 = const()[name = tensor("op_7208_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_7208_end_mask_0 = const()[name = tensor("op_7208_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7208_cast_fp16 = slice_by_index(begin = var_7208_begin_0, end = var_7208_end_0, end_mask = var_7208_end_mask_0, x = var_6763_cast_fp16)[name = tensor("op_7208_cast_fp16")]; tensor var_7215_begin_0 = const()[name = tensor("op_7215_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_7215_end_0 = const()[name = tensor("op_7215_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_7215_end_mask_0 = const()[name = tensor("op_7215_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7215_cast_fp16 = slice_by_index(begin = var_7215_begin_0, end = var_7215_end_0, end_mask = var_7215_end_mask_0, x = var_6763_cast_fp16)[name = tensor("op_7215_cast_fp16")]; tensor var_7222_begin_0 = const()[name = tensor("op_7222_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_7222_end_0 = const()[name = tensor("op_7222_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_7222_end_mask_0 = const()[name = tensor("op_7222_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7222_cast_fp16 = slice_by_index(begin = var_7222_begin_0, end = var_7222_end_0, end_mask = var_7222_end_mask_0, x = var_6763_cast_fp16)[name = tensor("op_7222_cast_fp16")]; tensor var_7229_begin_0 = const()[name = tensor("op_7229_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_7229_end_0 = const()[name = tensor("op_7229_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_7229_end_mask_0 = const()[name = tensor("op_7229_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7229_cast_fp16 = slice_by_index(begin = var_7229_begin_0, end = var_7229_end_0, end_mask = var_7229_end_mask_0, x = var_6763_cast_fp16)[name = tensor("op_7229_cast_fp16")]; tensor var_7236_begin_0 = const()[name = tensor("op_7236_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7236_end_0 = const()[name = tensor("op_7236_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_7236_end_mask_0 = const()[name = tensor("op_7236_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7236_cast_fp16 = slice_by_index(begin = var_7236_begin_0, end = var_7236_end_0, end_mask = var_7236_end_mask_0, x = var_6767_cast_fp16)[name = tensor("op_7236_cast_fp16")]; tensor var_7243_begin_0 = const()[name = tensor("op_7243_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_7243_end_0 = const()[name = tensor("op_7243_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_7243_end_mask_0 = const()[name = tensor("op_7243_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7243_cast_fp16 = slice_by_index(begin = var_7243_begin_0, end = var_7243_end_0, end_mask = var_7243_end_mask_0, x = var_6767_cast_fp16)[name = tensor("op_7243_cast_fp16")]; tensor var_7250_begin_0 = const()[name = tensor("op_7250_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_7250_end_0 = const()[name = tensor("op_7250_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_7250_end_mask_0 = const()[name = tensor("op_7250_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7250_cast_fp16 = slice_by_index(begin = var_7250_begin_0, end = var_7250_end_0, end_mask = var_7250_end_mask_0, x = var_6767_cast_fp16)[name = tensor("op_7250_cast_fp16")]; tensor var_7257_begin_0 = const()[name = tensor("op_7257_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_7257_end_0 = const()[name = tensor("op_7257_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_7257_end_mask_0 = const()[name = tensor("op_7257_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7257_cast_fp16 = slice_by_index(begin = var_7257_begin_0, end = var_7257_end_0, end_mask = var_7257_end_mask_0, x = var_6767_cast_fp16)[name = tensor("op_7257_cast_fp16")]; tensor var_7264_begin_0 = const()[name = tensor("op_7264_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7264_end_0 = const()[name = tensor("op_7264_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_7264_end_mask_0 = const()[name = tensor("op_7264_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7264_cast_fp16 = slice_by_index(begin = var_7264_begin_0, end = var_7264_end_0, end_mask = var_7264_end_mask_0, x = var_6771_cast_fp16)[name = tensor("op_7264_cast_fp16")]; tensor var_7271_begin_0 = const()[name = tensor("op_7271_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_7271_end_0 = const()[name = tensor("op_7271_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_7271_end_mask_0 = const()[name = tensor("op_7271_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7271_cast_fp16 = slice_by_index(begin = var_7271_begin_0, end = var_7271_end_0, end_mask = var_7271_end_mask_0, x = var_6771_cast_fp16)[name = tensor("op_7271_cast_fp16")]; tensor var_7278_begin_0 = const()[name = tensor("op_7278_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_7278_end_0 = const()[name = tensor("op_7278_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_7278_end_mask_0 = const()[name = tensor("op_7278_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7278_cast_fp16 = slice_by_index(begin = var_7278_begin_0, end = var_7278_end_0, end_mask = var_7278_end_mask_0, x = var_6771_cast_fp16)[name = tensor("op_7278_cast_fp16")]; tensor var_7285_begin_0 = const()[name = tensor("op_7285_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_7285_end_0 = const()[name = tensor("op_7285_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_7285_end_mask_0 = const()[name = tensor("op_7285_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7285_cast_fp16 = slice_by_index(begin = var_7285_begin_0, end = var_7285_end_0, end_mask = var_7285_end_mask_0, x = var_6771_cast_fp16)[name = tensor("op_7285_cast_fp16")]; tensor var_7292_begin_0 = const()[name = tensor("op_7292_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7292_end_0 = const()[name = tensor("op_7292_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_7292_end_mask_0 = const()[name = tensor("op_7292_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7292_cast_fp16 = slice_by_index(begin = var_7292_begin_0, end = var_7292_end_0, end_mask = var_7292_end_mask_0, x = var_6775_cast_fp16)[name = tensor("op_7292_cast_fp16")]; tensor var_7299_begin_0 = const()[name = tensor("op_7299_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_7299_end_0 = const()[name = tensor("op_7299_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_7299_end_mask_0 = const()[name = tensor("op_7299_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7299_cast_fp16 = slice_by_index(begin = var_7299_begin_0, end = var_7299_end_0, end_mask = var_7299_end_mask_0, x = var_6775_cast_fp16)[name = tensor("op_7299_cast_fp16")]; tensor var_7306_begin_0 = const()[name = tensor("op_7306_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_7306_end_0 = const()[name = tensor("op_7306_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_7306_end_mask_0 = const()[name = tensor("op_7306_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7306_cast_fp16 = slice_by_index(begin = var_7306_begin_0, end = var_7306_end_0, end_mask = var_7306_end_mask_0, x = var_6775_cast_fp16)[name = tensor("op_7306_cast_fp16")]; tensor var_7313_begin_0 = const()[name = tensor("op_7313_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_7313_end_0 = const()[name = tensor("op_7313_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_7313_end_mask_0 = const()[name = tensor("op_7313_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7313_cast_fp16 = slice_by_index(begin = var_7313_begin_0, end = var_7313_end_0, end_mask = var_7313_end_mask_0, x = var_6775_cast_fp16)[name = tensor("op_7313_cast_fp16")]; tensor var_7320_begin_0 = const()[name = tensor("op_7320_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7320_end_0 = const()[name = tensor("op_7320_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_7320_end_mask_0 = const()[name = tensor("op_7320_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7320_cast_fp16 = slice_by_index(begin = var_7320_begin_0, end = var_7320_end_0, end_mask = var_7320_end_mask_0, x = var_6779_cast_fp16)[name = tensor("op_7320_cast_fp16")]; tensor var_7327_begin_0 = const()[name = tensor("op_7327_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_7327_end_0 = const()[name = tensor("op_7327_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_7327_end_mask_0 = const()[name = tensor("op_7327_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7327_cast_fp16 = slice_by_index(begin = var_7327_begin_0, end = var_7327_end_0, end_mask = var_7327_end_mask_0, x = var_6779_cast_fp16)[name = tensor("op_7327_cast_fp16")]; tensor var_7334_begin_0 = const()[name = tensor("op_7334_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_7334_end_0 = const()[name = tensor("op_7334_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_7334_end_mask_0 = const()[name = tensor("op_7334_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7334_cast_fp16 = slice_by_index(begin = var_7334_begin_0, end = var_7334_end_0, end_mask = var_7334_end_mask_0, x = var_6779_cast_fp16)[name = tensor("op_7334_cast_fp16")]; tensor var_7341_begin_0 = const()[name = tensor("op_7341_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_7341_end_0 = const()[name = tensor("op_7341_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_7341_end_mask_0 = const()[name = tensor("op_7341_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7341_cast_fp16 = slice_by_index(begin = var_7341_begin_0, end = var_7341_end_0, end_mask = var_7341_end_mask_0, x = var_6779_cast_fp16)[name = tensor("op_7341_cast_fp16")]; tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_7346_begin_0 = const()[name = tensor("op_7346_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7346_end_0 = const()[name = tensor("op_7346_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_7346_end_mask_0 = const()[name = tensor("op_7346_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_9_cast_fp16 = transpose(perm = k_9_perm_0, x = key_9_cast_fp16)[name = tensor("transpose_27")]; tensor var_7346_cast_fp16 = slice_by_index(begin = var_7346_begin_0, end = var_7346_end_0, end_mask = var_7346_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7346_cast_fp16")]; tensor var_7350_begin_0 = const()[name = tensor("op_7350_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_7350_end_0 = const()[name = tensor("op_7350_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_7350_end_mask_0 = const()[name = tensor("op_7350_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7350_cast_fp16 = slice_by_index(begin = var_7350_begin_0, end = var_7350_end_0, end_mask = var_7350_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7350_cast_fp16")]; tensor var_7354_begin_0 = const()[name = tensor("op_7354_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_7354_end_0 = const()[name = tensor("op_7354_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_7354_end_mask_0 = const()[name = tensor("op_7354_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7354_cast_fp16 = slice_by_index(begin = var_7354_begin_0, end = var_7354_end_0, end_mask = var_7354_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7354_cast_fp16")]; tensor var_7358_begin_0 = const()[name = tensor("op_7358_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_7358_end_0 = const()[name = tensor("op_7358_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_7358_end_mask_0 = const()[name = tensor("op_7358_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7358_cast_fp16 = slice_by_index(begin = var_7358_begin_0, end = var_7358_end_0, end_mask = var_7358_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7358_cast_fp16")]; tensor var_7362_begin_0 = const()[name = tensor("op_7362_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_7362_end_0 = const()[name = tensor("op_7362_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_7362_end_mask_0 = const()[name = tensor("op_7362_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7362_cast_fp16 = slice_by_index(begin = var_7362_begin_0, end = var_7362_end_0, end_mask = var_7362_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7362_cast_fp16")]; tensor var_7366_begin_0 = const()[name = tensor("op_7366_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_7366_end_0 = const()[name = tensor("op_7366_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_7366_end_mask_0 = const()[name = tensor("op_7366_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7366_cast_fp16 = slice_by_index(begin = var_7366_begin_0, end = var_7366_end_0, end_mask = var_7366_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7366_cast_fp16")]; tensor var_7370_begin_0 = const()[name = tensor("op_7370_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_7370_end_0 = const()[name = tensor("op_7370_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_7370_end_mask_0 = const()[name = tensor("op_7370_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7370_cast_fp16 = slice_by_index(begin = var_7370_begin_0, end = var_7370_end_0, end_mask = var_7370_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7370_cast_fp16")]; tensor var_7374_begin_0 = const()[name = tensor("op_7374_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_7374_end_0 = const()[name = tensor("op_7374_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_7374_end_mask_0 = const()[name = tensor("op_7374_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7374_cast_fp16 = slice_by_index(begin = var_7374_begin_0, end = var_7374_end_0, end_mask = var_7374_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7374_cast_fp16")]; tensor var_7378_begin_0 = const()[name = tensor("op_7378_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_7378_end_0 = const()[name = tensor("op_7378_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_7378_end_mask_0 = const()[name = tensor("op_7378_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7378_cast_fp16 = slice_by_index(begin = var_7378_begin_0, end = var_7378_end_0, end_mask = var_7378_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7378_cast_fp16")]; tensor var_7382_begin_0 = const()[name = tensor("op_7382_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_7382_end_0 = const()[name = tensor("op_7382_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_7382_end_mask_0 = const()[name = tensor("op_7382_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7382_cast_fp16 = slice_by_index(begin = var_7382_begin_0, end = var_7382_end_0, end_mask = var_7382_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7382_cast_fp16")]; tensor var_7386_begin_0 = const()[name = tensor("op_7386_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_7386_end_0 = const()[name = tensor("op_7386_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_7386_end_mask_0 = const()[name = tensor("op_7386_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7386_cast_fp16 = slice_by_index(begin = var_7386_begin_0, end = var_7386_end_0, end_mask = var_7386_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7386_cast_fp16")]; tensor var_7390_begin_0 = const()[name = tensor("op_7390_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_7390_end_0 = const()[name = tensor("op_7390_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_7390_end_mask_0 = const()[name = tensor("op_7390_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7390_cast_fp16 = slice_by_index(begin = var_7390_begin_0, end = var_7390_end_0, end_mask = var_7390_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7390_cast_fp16")]; tensor var_7394_begin_0 = const()[name = tensor("op_7394_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_7394_end_0 = const()[name = tensor("op_7394_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_7394_end_mask_0 = const()[name = tensor("op_7394_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7394_cast_fp16 = slice_by_index(begin = var_7394_begin_0, end = var_7394_end_0, end_mask = var_7394_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7394_cast_fp16")]; tensor var_7398_begin_0 = const()[name = tensor("op_7398_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_7398_end_0 = const()[name = tensor("op_7398_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_7398_end_mask_0 = const()[name = tensor("op_7398_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7398_cast_fp16 = slice_by_index(begin = var_7398_begin_0, end = var_7398_end_0, end_mask = var_7398_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7398_cast_fp16")]; tensor var_7402_begin_0 = const()[name = tensor("op_7402_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_7402_end_0 = const()[name = tensor("op_7402_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_7402_end_mask_0 = const()[name = tensor("op_7402_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7402_cast_fp16 = slice_by_index(begin = var_7402_begin_0, end = var_7402_end_0, end_mask = var_7402_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7402_cast_fp16")]; tensor var_7406_begin_0 = const()[name = tensor("op_7406_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_7406_end_0 = const()[name = tensor("op_7406_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_7406_end_mask_0 = const()[name = tensor("op_7406_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7406_cast_fp16 = slice_by_index(begin = var_7406_begin_0, end = var_7406_end_0, end_mask = var_7406_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7406_cast_fp16")]; tensor var_7410_begin_0 = const()[name = tensor("op_7410_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_7410_end_0 = const()[name = tensor("op_7410_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_7410_end_mask_0 = const()[name = tensor("op_7410_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7410_cast_fp16 = slice_by_index(begin = var_7410_begin_0, end = var_7410_end_0, end_mask = var_7410_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7410_cast_fp16")]; tensor var_7414_begin_0 = const()[name = tensor("op_7414_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_7414_end_0 = const()[name = tensor("op_7414_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_7414_end_mask_0 = const()[name = tensor("op_7414_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7414_cast_fp16 = slice_by_index(begin = var_7414_begin_0, end = var_7414_end_0, end_mask = var_7414_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7414_cast_fp16")]; tensor var_7418_begin_0 = const()[name = tensor("op_7418_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_7418_end_0 = const()[name = tensor("op_7418_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_7418_end_mask_0 = const()[name = tensor("op_7418_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7418_cast_fp16 = slice_by_index(begin = var_7418_begin_0, end = var_7418_end_0, end_mask = var_7418_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7418_cast_fp16")]; tensor var_7422_begin_0 = const()[name = tensor("op_7422_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_7422_end_0 = const()[name = tensor("op_7422_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_7422_end_mask_0 = const()[name = tensor("op_7422_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_7422_cast_fp16 = slice_by_index(begin = var_7422_begin_0, end = var_7422_end_0, end_mask = var_7422_end_mask_0, x = k_9_cast_fp16)[name = tensor("op_7422_cast_fp16")]; tensor var_7424_begin_0 = const()[name = tensor("op_7424_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_7424_end_0 = const()[name = tensor("op_7424_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_7424_end_mask_0 = const()[name = tensor("op_7424_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7424_cast_fp16 = slice_by_index(begin = var_7424_begin_0, end = var_7424_end_0, end_mask = var_7424_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7424_cast_fp16")]; tensor var_7428_begin_0 = const()[name = tensor("op_7428_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_7428_end_0 = const()[name = tensor("op_7428_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_7428_end_mask_0 = const()[name = tensor("op_7428_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7428_cast_fp16 = slice_by_index(begin = var_7428_begin_0, end = var_7428_end_0, end_mask = var_7428_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7428_cast_fp16")]; tensor var_7432_begin_0 = const()[name = tensor("op_7432_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_7432_end_0 = const()[name = tensor("op_7432_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_7432_end_mask_0 = const()[name = tensor("op_7432_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7432_cast_fp16 = slice_by_index(begin = var_7432_begin_0, end = var_7432_end_0, end_mask = var_7432_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7432_cast_fp16")]; tensor var_7436_begin_0 = const()[name = tensor("op_7436_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_7436_end_0 = const()[name = tensor("op_7436_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_7436_end_mask_0 = const()[name = tensor("op_7436_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7436_cast_fp16 = slice_by_index(begin = var_7436_begin_0, end = var_7436_end_0, end_mask = var_7436_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7436_cast_fp16")]; tensor var_7440_begin_0 = const()[name = tensor("op_7440_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_7440_end_0 = const()[name = tensor("op_7440_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_7440_end_mask_0 = const()[name = tensor("op_7440_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7440_cast_fp16 = slice_by_index(begin = var_7440_begin_0, end = var_7440_end_0, end_mask = var_7440_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7440_cast_fp16")]; tensor var_7444_begin_0 = const()[name = tensor("op_7444_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_7444_end_0 = const()[name = tensor("op_7444_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_7444_end_mask_0 = const()[name = tensor("op_7444_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7444_cast_fp16 = slice_by_index(begin = var_7444_begin_0, end = var_7444_end_0, end_mask = var_7444_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7444_cast_fp16")]; tensor var_7448_begin_0 = const()[name = tensor("op_7448_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_7448_end_0 = const()[name = tensor("op_7448_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_7448_end_mask_0 = const()[name = tensor("op_7448_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7448_cast_fp16 = slice_by_index(begin = var_7448_begin_0, end = var_7448_end_0, end_mask = var_7448_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7448_cast_fp16")]; tensor var_7452_begin_0 = const()[name = tensor("op_7452_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_7452_end_0 = const()[name = tensor("op_7452_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_7452_end_mask_0 = const()[name = tensor("op_7452_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7452_cast_fp16 = slice_by_index(begin = var_7452_begin_0, end = var_7452_end_0, end_mask = var_7452_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7452_cast_fp16")]; tensor var_7456_begin_0 = const()[name = tensor("op_7456_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_7456_end_0 = const()[name = tensor("op_7456_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_7456_end_mask_0 = const()[name = tensor("op_7456_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7456_cast_fp16 = slice_by_index(begin = var_7456_begin_0, end = var_7456_end_0, end_mask = var_7456_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7456_cast_fp16")]; tensor var_7460_begin_0 = const()[name = tensor("op_7460_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_7460_end_0 = const()[name = tensor("op_7460_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_7460_end_mask_0 = const()[name = tensor("op_7460_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7460_cast_fp16 = slice_by_index(begin = var_7460_begin_0, end = var_7460_end_0, end_mask = var_7460_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7460_cast_fp16")]; tensor var_7464_begin_0 = const()[name = tensor("op_7464_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_7464_end_0 = const()[name = tensor("op_7464_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_7464_end_mask_0 = const()[name = tensor("op_7464_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7464_cast_fp16 = slice_by_index(begin = var_7464_begin_0, end = var_7464_end_0, end_mask = var_7464_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7464_cast_fp16")]; tensor var_7468_begin_0 = const()[name = tensor("op_7468_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_7468_end_0 = const()[name = tensor("op_7468_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_7468_end_mask_0 = const()[name = tensor("op_7468_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7468_cast_fp16 = slice_by_index(begin = var_7468_begin_0, end = var_7468_end_0, end_mask = var_7468_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7468_cast_fp16")]; tensor var_7472_begin_0 = const()[name = tensor("op_7472_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_7472_end_0 = const()[name = tensor("op_7472_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_7472_end_mask_0 = const()[name = tensor("op_7472_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7472_cast_fp16 = slice_by_index(begin = var_7472_begin_0, end = var_7472_end_0, end_mask = var_7472_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7472_cast_fp16")]; tensor var_7476_begin_0 = const()[name = tensor("op_7476_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_7476_end_0 = const()[name = tensor("op_7476_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_7476_end_mask_0 = const()[name = tensor("op_7476_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7476_cast_fp16 = slice_by_index(begin = var_7476_begin_0, end = var_7476_end_0, end_mask = var_7476_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7476_cast_fp16")]; tensor var_7480_begin_0 = const()[name = tensor("op_7480_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_7480_end_0 = const()[name = tensor("op_7480_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_7480_end_mask_0 = const()[name = tensor("op_7480_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7480_cast_fp16 = slice_by_index(begin = var_7480_begin_0, end = var_7480_end_0, end_mask = var_7480_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7480_cast_fp16")]; tensor var_7484_begin_0 = const()[name = tensor("op_7484_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_7484_end_0 = const()[name = tensor("op_7484_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_7484_end_mask_0 = const()[name = tensor("op_7484_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7484_cast_fp16 = slice_by_index(begin = var_7484_begin_0, end = var_7484_end_0, end_mask = var_7484_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7484_cast_fp16")]; tensor var_7488_begin_0 = const()[name = tensor("op_7488_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_7488_end_0 = const()[name = tensor("op_7488_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_7488_end_mask_0 = const()[name = tensor("op_7488_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7488_cast_fp16 = slice_by_index(begin = var_7488_begin_0, end = var_7488_end_0, end_mask = var_7488_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7488_cast_fp16")]; tensor var_7492_begin_0 = const()[name = tensor("op_7492_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_7492_end_0 = const()[name = tensor("op_7492_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_7492_end_mask_0 = const()[name = tensor("op_7492_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7492_cast_fp16 = slice_by_index(begin = var_7492_begin_0, end = var_7492_end_0, end_mask = var_7492_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7492_cast_fp16")]; tensor var_7496_begin_0 = const()[name = tensor("op_7496_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_7496_end_0 = const()[name = tensor("op_7496_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_7496_end_mask_0 = const()[name = tensor("op_7496_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7496_cast_fp16 = slice_by_index(begin = var_7496_begin_0, end = var_7496_end_0, end_mask = var_7496_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7496_cast_fp16")]; tensor var_7500_begin_0 = const()[name = tensor("op_7500_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_7500_end_0 = const()[name = tensor("op_7500_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_7500_end_mask_0 = const()[name = tensor("op_7500_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_7500_cast_fp16 = slice_by_index(begin = var_7500_begin_0, end = var_7500_end_0, end_mask = var_7500_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7500_cast_fp16")]; tensor _SplitHeadsQ__mh_w_641_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_641_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_641_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_641_equation_0, values = (var_7346_cast_fp16, var_6788_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_641_cast_fp16")]; tensor _SplitHeadsQ__mh_w_643_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_643_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_643_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_643_equation_0, values = (var_7346_cast_fp16, var_6795_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_643_cast_fp16")]; tensor _SplitHeadsQ__mh_w_645_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_645_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_645_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_645_equation_0, values = (var_7346_cast_fp16, var_6802_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_645_cast_fp16")]; tensor _SplitHeadsQ__mh_w_647_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_647_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_647_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_647_equation_0, values = (var_7346_cast_fp16, var_6809_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_647_cast_fp16")]; tensor _SplitHeadsQ__mh_w_649_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_649_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_649_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_649_equation_0, values = (var_7350_cast_fp16, var_6816_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_649_cast_fp16")]; tensor _SplitHeadsQ__mh_w_651_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_651_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_651_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_651_equation_0, values = (var_7350_cast_fp16, var_6823_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_651_cast_fp16")]; tensor _SplitHeadsQ__mh_w_653_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_653_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_653_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_653_equation_0, values = (var_7350_cast_fp16, var_6830_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_653_cast_fp16")]; tensor _SplitHeadsQ__mh_w_655_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_655_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_655_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_655_equation_0, values = (var_7350_cast_fp16, var_6837_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_655_cast_fp16")]; tensor _SplitHeadsQ__mh_w_657_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_657_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_657_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_657_equation_0, values = (var_7354_cast_fp16, var_6844_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_657_cast_fp16")]; tensor _SplitHeadsQ__mh_w_659_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_659_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_659_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_659_equation_0, values = (var_7354_cast_fp16, var_6851_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_659_cast_fp16")]; tensor _SplitHeadsQ__mh_w_661_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_661_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_661_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_661_equation_0, values = (var_7354_cast_fp16, var_6858_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_661_cast_fp16")]; tensor _SplitHeadsQ__mh_w_663_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_663_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_663_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_663_equation_0, values = (var_7354_cast_fp16, var_6865_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_663_cast_fp16")]; tensor _SplitHeadsQ__mh_w_665_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_665_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_665_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_665_equation_0, values = (var_7358_cast_fp16, var_6872_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_665_cast_fp16")]; tensor _SplitHeadsQ__mh_w_667_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_667_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_667_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_667_equation_0, values = (var_7358_cast_fp16, var_6879_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_667_cast_fp16")]; tensor _SplitHeadsQ__mh_w_669_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_669_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_669_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_669_equation_0, values = (var_7358_cast_fp16, var_6886_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_669_cast_fp16")]; tensor _SplitHeadsQ__mh_w_671_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_671_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_671_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_671_equation_0, values = (var_7358_cast_fp16, var_6893_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_671_cast_fp16")]; tensor _SplitHeadsQ__mh_w_673_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_673_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_673_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_673_equation_0, values = (var_7362_cast_fp16, var_6900_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_673_cast_fp16")]; tensor _SplitHeadsQ__mh_w_675_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_675_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_675_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_675_equation_0, values = (var_7362_cast_fp16, var_6907_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_675_cast_fp16")]; tensor _SplitHeadsQ__mh_w_677_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_677_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_677_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_677_equation_0, values = (var_7362_cast_fp16, var_6914_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_677_cast_fp16")]; tensor _SplitHeadsQ__mh_w_679_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_679_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_679_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_679_equation_0, values = (var_7362_cast_fp16, var_6921_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_679_cast_fp16")]; tensor _SplitHeadsQ__mh_w_681_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_681_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_681_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_681_equation_0, values = (var_7366_cast_fp16, var_6928_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_681_cast_fp16")]; tensor _SplitHeadsQ__mh_w_683_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_683_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_683_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_683_equation_0, values = (var_7366_cast_fp16, var_6935_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_683_cast_fp16")]; tensor _SplitHeadsQ__mh_w_685_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_685_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_685_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_685_equation_0, values = (var_7366_cast_fp16, var_6942_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_685_cast_fp16")]; tensor _SplitHeadsQ__mh_w_687_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_687_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_687_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_687_equation_0, values = (var_7366_cast_fp16, var_6949_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_687_cast_fp16")]; tensor _SplitHeadsQ__mh_w_689_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_689_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_689_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_689_equation_0, values = (var_7370_cast_fp16, var_6956_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_689_cast_fp16")]; tensor _SplitHeadsQ__mh_w_691_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_691_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_691_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_691_equation_0, values = (var_7370_cast_fp16, var_6963_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_691_cast_fp16")]; tensor _SplitHeadsQ__mh_w_693_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_693_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_693_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_693_equation_0, values = (var_7370_cast_fp16, var_6970_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_693_cast_fp16")]; tensor _SplitHeadsQ__mh_w_695_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_695_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_695_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_695_equation_0, values = (var_7370_cast_fp16, var_6977_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_695_cast_fp16")]; tensor _SplitHeadsQ__mh_w_697_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_697_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_697_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_697_equation_0, values = (var_7374_cast_fp16, var_6984_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_697_cast_fp16")]; tensor _SplitHeadsQ__mh_w_699_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_699_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_699_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_699_equation_0, values = (var_7374_cast_fp16, var_6991_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_699_cast_fp16")]; tensor _SplitHeadsQ__mh_w_701_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_701_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_701_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_701_equation_0, values = (var_7374_cast_fp16, var_6998_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_701_cast_fp16")]; tensor _SplitHeadsQ__mh_w_703_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_703_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_703_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_703_equation_0, values = (var_7374_cast_fp16, var_7005_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_703_cast_fp16")]; tensor _SplitHeadsQ__mh_w_705_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_705_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_705_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_705_equation_0, values = (var_7378_cast_fp16, var_7012_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_705_cast_fp16")]; tensor _SplitHeadsQ__mh_w_707_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_707_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_707_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_707_equation_0, values = (var_7378_cast_fp16, var_7019_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_707_cast_fp16")]; tensor _SplitHeadsQ__mh_w_709_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_709_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_709_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_709_equation_0, values = (var_7378_cast_fp16, var_7026_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_709_cast_fp16")]; tensor _SplitHeadsQ__mh_w_711_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_711_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_711_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_711_equation_0, values = (var_7378_cast_fp16, var_7033_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_711_cast_fp16")]; tensor _SplitHeadsQ__mh_w_713_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_713_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_713_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_713_equation_0, values = (var_7382_cast_fp16, var_7040_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_713_cast_fp16")]; tensor _SplitHeadsQ__mh_w_715_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_715_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_715_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_715_equation_0, values = (var_7382_cast_fp16, var_7047_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_715_cast_fp16")]; tensor _SplitHeadsQ__mh_w_717_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_717_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_717_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_717_equation_0, values = (var_7382_cast_fp16, var_7054_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_717_cast_fp16")]; tensor _SplitHeadsQ__mh_w_719_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_719_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_719_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_719_equation_0, values = (var_7382_cast_fp16, var_7061_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_719_cast_fp16")]; tensor _SplitHeadsQ__mh_w_721_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_721_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_721_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_721_equation_0, values = (var_7386_cast_fp16, var_7068_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_721_cast_fp16")]; tensor _SplitHeadsQ__mh_w_723_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_723_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_723_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_723_equation_0, values = (var_7386_cast_fp16, var_7075_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_723_cast_fp16")]; tensor _SplitHeadsQ__mh_w_725_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_725_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_725_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_725_equation_0, values = (var_7386_cast_fp16, var_7082_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_725_cast_fp16")]; tensor _SplitHeadsQ__mh_w_727_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_727_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_727_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_727_equation_0, values = (var_7386_cast_fp16, var_7089_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_727_cast_fp16")]; tensor _SplitHeadsQ__mh_w_729_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_729_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_729_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_729_equation_0, values = (var_7390_cast_fp16, var_7096_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_729_cast_fp16")]; tensor _SplitHeadsQ__mh_w_731_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_731_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_731_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_731_equation_0, values = (var_7390_cast_fp16, var_7103_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_731_cast_fp16")]; tensor _SplitHeadsQ__mh_w_733_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_733_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_733_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_733_equation_0, values = (var_7390_cast_fp16, var_7110_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_733_cast_fp16")]; tensor _SplitHeadsQ__mh_w_735_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_735_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_735_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_735_equation_0, values = (var_7390_cast_fp16, var_7117_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_735_cast_fp16")]; tensor _SplitHeadsQ__mh_w_737_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_737_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_737_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_737_equation_0, values = (var_7394_cast_fp16, var_7124_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_737_cast_fp16")]; tensor _SplitHeadsQ__mh_w_739_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_739_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_739_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_739_equation_0, values = (var_7394_cast_fp16, var_7131_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_739_cast_fp16")]; tensor _SplitHeadsQ__mh_w_741_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_741_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_741_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_741_equation_0, values = (var_7394_cast_fp16, var_7138_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_741_cast_fp16")]; tensor _SplitHeadsQ__mh_w_743_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_743_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_743_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_743_equation_0, values = (var_7394_cast_fp16, var_7145_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_743_cast_fp16")]; tensor _SplitHeadsQ__mh_w_745_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_745_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_745_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_745_equation_0, values = (var_7398_cast_fp16, var_7152_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_745_cast_fp16")]; tensor _SplitHeadsQ__mh_w_747_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_747_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_747_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_747_equation_0, values = (var_7398_cast_fp16, var_7159_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_747_cast_fp16")]; tensor _SplitHeadsQ__mh_w_749_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_749_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_749_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_749_equation_0, values = (var_7398_cast_fp16, var_7166_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_749_cast_fp16")]; tensor _SplitHeadsQ__mh_w_751_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_751_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_751_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_751_equation_0, values = (var_7398_cast_fp16, var_7173_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_751_cast_fp16")]; tensor _SplitHeadsQ__mh_w_753_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_753_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_753_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_753_equation_0, values = (var_7402_cast_fp16, var_7180_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_753_cast_fp16")]; tensor _SplitHeadsQ__mh_w_755_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_755_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_755_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_755_equation_0, values = (var_7402_cast_fp16, var_7187_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_755_cast_fp16")]; tensor _SplitHeadsQ__mh_w_757_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_757_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_757_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_757_equation_0, values = (var_7402_cast_fp16, var_7194_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_757_cast_fp16")]; tensor _SplitHeadsQ__mh_w_759_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_759_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_759_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_759_equation_0, values = (var_7402_cast_fp16, var_7201_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_759_cast_fp16")]; tensor _SplitHeadsQ__mh_w_761_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_761_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_761_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_761_equation_0, values = (var_7406_cast_fp16, var_7208_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_761_cast_fp16")]; tensor _SplitHeadsQ__mh_w_763_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_763_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_763_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_763_equation_0, values = (var_7406_cast_fp16, var_7215_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_763_cast_fp16")]; tensor _SplitHeadsQ__mh_w_765_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_765_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_765_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_765_equation_0, values = (var_7406_cast_fp16, var_7222_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_765_cast_fp16")]; tensor _SplitHeadsQ__mh_w_767_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_767_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_767_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_767_equation_0, values = (var_7406_cast_fp16, var_7229_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_767_cast_fp16")]; tensor _SplitHeadsQ__mh_w_769_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_769_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_769_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_769_equation_0, values = (var_7410_cast_fp16, var_7236_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_769_cast_fp16")]; tensor _SplitHeadsQ__mh_w_771_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_771_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_771_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_771_equation_0, values = (var_7410_cast_fp16, var_7243_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_771_cast_fp16")]; tensor _SplitHeadsQ__mh_w_773_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_773_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_773_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_773_equation_0, values = (var_7410_cast_fp16, var_7250_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_773_cast_fp16")]; tensor _SplitHeadsQ__mh_w_775_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_775_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_775_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_775_equation_0, values = (var_7410_cast_fp16, var_7257_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_775_cast_fp16")]; tensor _SplitHeadsQ__mh_w_777_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_777_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_777_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_777_equation_0, values = (var_7414_cast_fp16, var_7264_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_777_cast_fp16")]; tensor _SplitHeadsQ__mh_w_779_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_779_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_779_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_779_equation_0, values = (var_7414_cast_fp16, var_7271_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_779_cast_fp16")]; tensor _SplitHeadsQ__mh_w_781_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_781_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_781_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_781_equation_0, values = (var_7414_cast_fp16, var_7278_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_781_cast_fp16")]; tensor _SplitHeadsQ__mh_w_783_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_783_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_783_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_783_equation_0, values = (var_7414_cast_fp16, var_7285_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_783_cast_fp16")]; tensor _SplitHeadsQ__mh_w_785_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_785_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_785_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_785_equation_0, values = (var_7418_cast_fp16, var_7292_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_785_cast_fp16")]; tensor _SplitHeadsQ__mh_w_787_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_787_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_787_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_787_equation_0, values = (var_7418_cast_fp16, var_7299_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_787_cast_fp16")]; tensor _SplitHeadsQ__mh_w_789_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_789_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_789_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_789_equation_0, values = (var_7418_cast_fp16, var_7306_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_789_cast_fp16")]; tensor _SplitHeadsQ__mh_w_791_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_791_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_791_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_791_equation_0, values = (var_7418_cast_fp16, var_7313_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_791_cast_fp16")]; tensor _SplitHeadsQ__mh_w_793_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_793_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_793_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_793_equation_0, values = (var_7422_cast_fp16, var_7320_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_793_cast_fp16")]; tensor _SplitHeadsQ__mh_w_795_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_795_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_795_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_795_equation_0, values = (var_7422_cast_fp16, var_7327_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_795_cast_fp16")]; tensor _SplitHeadsQ__mh_w_797_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_797_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_797_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_797_equation_0, values = (var_7422_cast_fp16, var_7334_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_797_cast_fp16")]; tensor _SplitHeadsQ__mh_w_799_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_799_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_799_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_799_equation_0, values = (var_7422_cast_fp16, var_7341_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_799_cast_fp16")]; tensor var_7663_to_fp16 = const()[name = tensor("op_7663_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_641_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_641_cast_fp16, y = var_7663_to_fp16)[name = tensor("aw_chunk_641_cast_fp16")]; tensor var_7665_to_fp16 = const()[name = tensor("op_7665_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_643_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_643_cast_fp16, y = var_7665_to_fp16)[name = tensor("aw_chunk_643_cast_fp16")]; tensor var_7667_to_fp16 = const()[name = tensor("op_7667_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_645_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_645_cast_fp16, y = var_7667_to_fp16)[name = tensor("aw_chunk_645_cast_fp16")]; tensor var_7669_to_fp16 = const()[name = tensor("op_7669_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_647_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_647_cast_fp16, y = var_7669_to_fp16)[name = tensor("aw_chunk_647_cast_fp16")]; tensor var_7671_to_fp16 = const()[name = tensor("op_7671_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_649_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_649_cast_fp16, y = var_7671_to_fp16)[name = tensor("aw_chunk_649_cast_fp16")]; tensor var_7673_to_fp16 = const()[name = tensor("op_7673_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_651_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_651_cast_fp16, y = var_7673_to_fp16)[name = tensor("aw_chunk_651_cast_fp16")]; tensor var_7675_to_fp16 = const()[name = tensor("op_7675_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_653_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_653_cast_fp16, y = var_7675_to_fp16)[name = tensor("aw_chunk_653_cast_fp16")]; tensor var_7677_to_fp16 = const()[name = tensor("op_7677_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_655_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_655_cast_fp16, y = var_7677_to_fp16)[name = tensor("aw_chunk_655_cast_fp16")]; tensor var_7679_to_fp16 = const()[name = tensor("op_7679_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_657_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_657_cast_fp16, y = var_7679_to_fp16)[name = tensor("aw_chunk_657_cast_fp16")]; tensor var_7681_to_fp16 = const()[name = tensor("op_7681_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_659_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_659_cast_fp16, y = var_7681_to_fp16)[name = tensor("aw_chunk_659_cast_fp16")]; tensor var_7683_to_fp16 = const()[name = tensor("op_7683_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_661_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_661_cast_fp16, y = var_7683_to_fp16)[name = tensor("aw_chunk_661_cast_fp16")]; tensor var_7685_to_fp16 = const()[name = tensor("op_7685_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_663_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_663_cast_fp16, y = var_7685_to_fp16)[name = tensor("aw_chunk_663_cast_fp16")]; tensor var_7687_to_fp16 = const()[name = tensor("op_7687_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_665_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_665_cast_fp16, y = var_7687_to_fp16)[name = tensor("aw_chunk_665_cast_fp16")]; tensor var_7689_to_fp16 = const()[name = tensor("op_7689_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_667_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_667_cast_fp16, y = var_7689_to_fp16)[name = tensor("aw_chunk_667_cast_fp16")]; tensor var_7691_to_fp16 = const()[name = tensor("op_7691_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_669_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_669_cast_fp16, y = var_7691_to_fp16)[name = tensor("aw_chunk_669_cast_fp16")]; tensor var_7693_to_fp16 = const()[name = tensor("op_7693_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_671_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_671_cast_fp16, y = var_7693_to_fp16)[name = tensor("aw_chunk_671_cast_fp16")]; tensor var_7695_to_fp16 = const()[name = tensor("op_7695_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_673_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_673_cast_fp16, y = var_7695_to_fp16)[name = tensor("aw_chunk_673_cast_fp16")]; tensor var_7697_to_fp16 = const()[name = tensor("op_7697_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_675_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_675_cast_fp16, y = var_7697_to_fp16)[name = tensor("aw_chunk_675_cast_fp16")]; tensor var_7699_to_fp16 = const()[name = tensor("op_7699_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_677_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_677_cast_fp16, y = var_7699_to_fp16)[name = tensor("aw_chunk_677_cast_fp16")]; tensor var_7701_to_fp16 = const()[name = tensor("op_7701_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_679_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_679_cast_fp16, y = var_7701_to_fp16)[name = tensor("aw_chunk_679_cast_fp16")]; tensor var_7703_to_fp16 = const()[name = tensor("op_7703_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_681_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_681_cast_fp16, y = var_7703_to_fp16)[name = tensor("aw_chunk_681_cast_fp16")]; tensor var_7705_to_fp16 = const()[name = tensor("op_7705_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_683_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_683_cast_fp16, y = var_7705_to_fp16)[name = tensor("aw_chunk_683_cast_fp16")]; tensor var_7707_to_fp16 = const()[name = tensor("op_7707_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_685_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_685_cast_fp16, y = var_7707_to_fp16)[name = tensor("aw_chunk_685_cast_fp16")]; tensor var_7709_to_fp16 = const()[name = tensor("op_7709_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_687_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_687_cast_fp16, y = var_7709_to_fp16)[name = tensor("aw_chunk_687_cast_fp16")]; tensor var_7711_to_fp16 = const()[name = tensor("op_7711_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_689_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_689_cast_fp16, y = var_7711_to_fp16)[name = tensor("aw_chunk_689_cast_fp16")]; tensor var_7713_to_fp16 = const()[name = tensor("op_7713_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_691_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_691_cast_fp16, y = var_7713_to_fp16)[name = tensor("aw_chunk_691_cast_fp16")]; tensor var_7715_to_fp16 = const()[name = tensor("op_7715_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_693_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_693_cast_fp16, y = var_7715_to_fp16)[name = tensor("aw_chunk_693_cast_fp16")]; tensor var_7717_to_fp16 = const()[name = tensor("op_7717_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_695_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_695_cast_fp16, y = var_7717_to_fp16)[name = tensor("aw_chunk_695_cast_fp16")]; tensor var_7719_to_fp16 = const()[name = tensor("op_7719_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_697_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_697_cast_fp16, y = var_7719_to_fp16)[name = tensor("aw_chunk_697_cast_fp16")]; tensor var_7721_to_fp16 = const()[name = tensor("op_7721_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_699_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_699_cast_fp16, y = var_7721_to_fp16)[name = tensor("aw_chunk_699_cast_fp16")]; tensor var_7723_to_fp16 = const()[name = tensor("op_7723_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_701_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_701_cast_fp16, y = var_7723_to_fp16)[name = tensor("aw_chunk_701_cast_fp16")]; tensor var_7725_to_fp16 = const()[name = tensor("op_7725_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_703_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_703_cast_fp16, y = var_7725_to_fp16)[name = tensor("aw_chunk_703_cast_fp16")]; tensor var_7727_to_fp16 = const()[name = tensor("op_7727_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_705_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_705_cast_fp16, y = var_7727_to_fp16)[name = tensor("aw_chunk_705_cast_fp16")]; tensor var_7729_to_fp16 = const()[name = tensor("op_7729_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_707_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_707_cast_fp16, y = var_7729_to_fp16)[name = tensor("aw_chunk_707_cast_fp16")]; tensor var_7731_to_fp16 = const()[name = tensor("op_7731_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_709_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_709_cast_fp16, y = var_7731_to_fp16)[name = tensor("aw_chunk_709_cast_fp16")]; tensor var_7733_to_fp16 = const()[name = tensor("op_7733_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_711_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_711_cast_fp16, y = var_7733_to_fp16)[name = tensor("aw_chunk_711_cast_fp16")]; tensor var_7735_to_fp16 = const()[name = tensor("op_7735_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_713_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_713_cast_fp16, y = var_7735_to_fp16)[name = tensor("aw_chunk_713_cast_fp16")]; tensor var_7737_to_fp16 = const()[name = tensor("op_7737_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_715_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_715_cast_fp16, y = var_7737_to_fp16)[name = tensor("aw_chunk_715_cast_fp16")]; tensor var_7739_to_fp16 = const()[name = tensor("op_7739_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_717_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_717_cast_fp16, y = var_7739_to_fp16)[name = tensor("aw_chunk_717_cast_fp16")]; tensor var_7741_to_fp16 = const()[name = tensor("op_7741_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_719_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_719_cast_fp16, y = var_7741_to_fp16)[name = tensor("aw_chunk_719_cast_fp16")]; tensor var_7743_to_fp16 = const()[name = tensor("op_7743_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_721_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_721_cast_fp16, y = var_7743_to_fp16)[name = tensor("aw_chunk_721_cast_fp16")]; tensor var_7745_to_fp16 = const()[name = tensor("op_7745_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_723_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_723_cast_fp16, y = var_7745_to_fp16)[name = tensor("aw_chunk_723_cast_fp16")]; tensor var_7747_to_fp16 = const()[name = tensor("op_7747_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_725_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_725_cast_fp16, y = var_7747_to_fp16)[name = tensor("aw_chunk_725_cast_fp16")]; tensor var_7749_to_fp16 = const()[name = tensor("op_7749_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_727_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_727_cast_fp16, y = var_7749_to_fp16)[name = tensor("aw_chunk_727_cast_fp16")]; tensor var_7751_to_fp16 = const()[name = tensor("op_7751_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_729_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_729_cast_fp16, y = var_7751_to_fp16)[name = tensor("aw_chunk_729_cast_fp16")]; tensor var_7753_to_fp16 = const()[name = tensor("op_7753_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_731_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_731_cast_fp16, y = var_7753_to_fp16)[name = tensor("aw_chunk_731_cast_fp16")]; tensor var_7755_to_fp16 = const()[name = tensor("op_7755_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_733_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_733_cast_fp16, y = var_7755_to_fp16)[name = tensor("aw_chunk_733_cast_fp16")]; tensor var_7757_to_fp16 = const()[name = tensor("op_7757_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_735_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_735_cast_fp16, y = var_7757_to_fp16)[name = tensor("aw_chunk_735_cast_fp16")]; tensor var_7759_to_fp16 = const()[name = tensor("op_7759_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_737_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_737_cast_fp16, y = var_7759_to_fp16)[name = tensor("aw_chunk_737_cast_fp16")]; tensor var_7761_to_fp16 = const()[name = tensor("op_7761_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_739_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_739_cast_fp16, y = var_7761_to_fp16)[name = tensor("aw_chunk_739_cast_fp16")]; tensor var_7763_to_fp16 = const()[name = tensor("op_7763_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_741_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_741_cast_fp16, y = var_7763_to_fp16)[name = tensor("aw_chunk_741_cast_fp16")]; tensor var_7765_to_fp16 = const()[name = tensor("op_7765_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_743_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_743_cast_fp16, y = var_7765_to_fp16)[name = tensor("aw_chunk_743_cast_fp16")]; tensor var_7767_to_fp16 = const()[name = tensor("op_7767_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_745_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_745_cast_fp16, y = var_7767_to_fp16)[name = tensor("aw_chunk_745_cast_fp16")]; tensor var_7769_to_fp16 = const()[name = tensor("op_7769_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_747_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_747_cast_fp16, y = var_7769_to_fp16)[name = tensor("aw_chunk_747_cast_fp16")]; tensor var_7771_to_fp16 = const()[name = tensor("op_7771_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_749_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_749_cast_fp16, y = var_7771_to_fp16)[name = tensor("aw_chunk_749_cast_fp16")]; tensor var_7773_to_fp16 = const()[name = tensor("op_7773_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_751_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_751_cast_fp16, y = var_7773_to_fp16)[name = tensor("aw_chunk_751_cast_fp16")]; tensor var_7775_to_fp16 = const()[name = tensor("op_7775_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_753_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_753_cast_fp16, y = var_7775_to_fp16)[name = tensor("aw_chunk_753_cast_fp16")]; tensor var_7777_to_fp16 = const()[name = tensor("op_7777_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_755_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_755_cast_fp16, y = var_7777_to_fp16)[name = tensor("aw_chunk_755_cast_fp16")]; tensor var_7779_to_fp16 = const()[name = tensor("op_7779_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_757_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_757_cast_fp16, y = var_7779_to_fp16)[name = tensor("aw_chunk_757_cast_fp16")]; tensor var_7781_to_fp16 = const()[name = tensor("op_7781_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_759_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_759_cast_fp16, y = var_7781_to_fp16)[name = tensor("aw_chunk_759_cast_fp16")]; tensor var_7783_to_fp16 = const()[name = tensor("op_7783_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_761_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_761_cast_fp16, y = var_7783_to_fp16)[name = tensor("aw_chunk_761_cast_fp16")]; tensor var_7785_to_fp16 = const()[name = tensor("op_7785_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_763_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_763_cast_fp16, y = var_7785_to_fp16)[name = tensor("aw_chunk_763_cast_fp16")]; tensor var_7787_to_fp16 = const()[name = tensor("op_7787_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_765_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_765_cast_fp16, y = var_7787_to_fp16)[name = tensor("aw_chunk_765_cast_fp16")]; tensor var_7789_to_fp16 = const()[name = tensor("op_7789_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_767_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_767_cast_fp16, y = var_7789_to_fp16)[name = tensor("aw_chunk_767_cast_fp16")]; tensor var_7791_to_fp16 = const()[name = tensor("op_7791_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_769_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_769_cast_fp16, y = var_7791_to_fp16)[name = tensor("aw_chunk_769_cast_fp16")]; tensor var_7793_to_fp16 = const()[name = tensor("op_7793_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_771_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_771_cast_fp16, y = var_7793_to_fp16)[name = tensor("aw_chunk_771_cast_fp16")]; tensor var_7795_to_fp16 = const()[name = tensor("op_7795_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_773_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_773_cast_fp16, y = var_7795_to_fp16)[name = tensor("aw_chunk_773_cast_fp16")]; tensor var_7797_to_fp16 = const()[name = tensor("op_7797_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_775_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_775_cast_fp16, y = var_7797_to_fp16)[name = tensor("aw_chunk_775_cast_fp16")]; tensor var_7799_to_fp16 = const()[name = tensor("op_7799_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_777_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_777_cast_fp16, y = var_7799_to_fp16)[name = tensor("aw_chunk_777_cast_fp16")]; tensor var_7801_to_fp16 = const()[name = tensor("op_7801_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_779_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_779_cast_fp16, y = var_7801_to_fp16)[name = tensor("aw_chunk_779_cast_fp16")]; tensor var_7803_to_fp16 = const()[name = tensor("op_7803_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_781_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_781_cast_fp16, y = var_7803_to_fp16)[name = tensor("aw_chunk_781_cast_fp16")]; tensor var_7805_to_fp16 = const()[name = tensor("op_7805_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_783_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_783_cast_fp16, y = var_7805_to_fp16)[name = tensor("aw_chunk_783_cast_fp16")]; tensor var_7807_to_fp16 = const()[name = tensor("op_7807_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_785_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_785_cast_fp16, y = var_7807_to_fp16)[name = tensor("aw_chunk_785_cast_fp16")]; tensor var_7809_to_fp16 = const()[name = tensor("op_7809_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_787_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_787_cast_fp16, y = var_7809_to_fp16)[name = tensor("aw_chunk_787_cast_fp16")]; tensor var_7811_to_fp16 = const()[name = tensor("op_7811_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_789_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_789_cast_fp16, y = var_7811_to_fp16)[name = tensor("aw_chunk_789_cast_fp16")]; tensor var_7813_to_fp16 = const()[name = tensor("op_7813_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_791_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_791_cast_fp16, y = var_7813_to_fp16)[name = tensor("aw_chunk_791_cast_fp16")]; tensor var_7815_to_fp16 = const()[name = tensor("op_7815_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_793_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_793_cast_fp16, y = var_7815_to_fp16)[name = tensor("aw_chunk_793_cast_fp16")]; tensor var_7817_to_fp16 = const()[name = tensor("op_7817_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_795_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_795_cast_fp16, y = var_7817_to_fp16)[name = tensor("aw_chunk_795_cast_fp16")]; tensor var_7819_to_fp16 = const()[name = tensor("op_7819_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_797_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_797_cast_fp16, y = var_7819_to_fp16)[name = tensor("aw_chunk_797_cast_fp16")]; tensor var_7821_to_fp16 = const()[name = tensor("op_7821_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_799_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_799_cast_fp16, y = var_7821_to_fp16)[name = tensor("aw_chunk_799_cast_fp16")]; tensor var_7823_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_641_cast_fp16)[name = tensor("op_7823_cast_fp16")]; tensor var_7824_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_643_cast_fp16)[name = tensor("op_7824_cast_fp16")]; tensor var_7825_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_645_cast_fp16)[name = tensor("op_7825_cast_fp16")]; tensor var_7826_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_647_cast_fp16)[name = tensor("op_7826_cast_fp16")]; tensor var_7827_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_649_cast_fp16)[name = tensor("op_7827_cast_fp16")]; tensor var_7828_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_651_cast_fp16)[name = tensor("op_7828_cast_fp16")]; tensor var_7829_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_653_cast_fp16)[name = tensor("op_7829_cast_fp16")]; tensor var_7830_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_655_cast_fp16)[name = tensor("op_7830_cast_fp16")]; tensor var_7831_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_657_cast_fp16)[name = tensor("op_7831_cast_fp16")]; tensor var_7832_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_659_cast_fp16)[name = tensor("op_7832_cast_fp16")]; tensor var_7833_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_661_cast_fp16)[name = tensor("op_7833_cast_fp16")]; tensor var_7834_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_663_cast_fp16)[name = tensor("op_7834_cast_fp16")]; tensor var_7835_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_665_cast_fp16)[name = tensor("op_7835_cast_fp16")]; tensor var_7836_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_667_cast_fp16)[name = tensor("op_7836_cast_fp16")]; tensor var_7837_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_669_cast_fp16)[name = tensor("op_7837_cast_fp16")]; tensor var_7838_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_671_cast_fp16)[name = tensor("op_7838_cast_fp16")]; tensor var_7839_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_673_cast_fp16)[name = tensor("op_7839_cast_fp16")]; tensor var_7840_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_675_cast_fp16)[name = tensor("op_7840_cast_fp16")]; tensor var_7841_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_677_cast_fp16)[name = tensor("op_7841_cast_fp16")]; tensor var_7842_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_679_cast_fp16)[name = tensor("op_7842_cast_fp16")]; tensor var_7843_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_681_cast_fp16)[name = tensor("op_7843_cast_fp16")]; tensor var_7844_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_683_cast_fp16)[name = tensor("op_7844_cast_fp16")]; tensor var_7845_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_685_cast_fp16)[name = tensor("op_7845_cast_fp16")]; tensor var_7846_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_687_cast_fp16)[name = tensor("op_7846_cast_fp16")]; tensor var_7847_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_689_cast_fp16)[name = tensor("op_7847_cast_fp16")]; tensor var_7848_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_691_cast_fp16)[name = tensor("op_7848_cast_fp16")]; tensor var_7849_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_693_cast_fp16)[name = tensor("op_7849_cast_fp16")]; tensor var_7850_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_695_cast_fp16)[name = tensor("op_7850_cast_fp16")]; tensor var_7851_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_697_cast_fp16)[name = tensor("op_7851_cast_fp16")]; tensor var_7852_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_699_cast_fp16)[name = tensor("op_7852_cast_fp16")]; tensor var_7853_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_701_cast_fp16)[name = tensor("op_7853_cast_fp16")]; tensor var_7854_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_703_cast_fp16)[name = tensor("op_7854_cast_fp16")]; tensor var_7855_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_705_cast_fp16)[name = tensor("op_7855_cast_fp16")]; tensor var_7856_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_707_cast_fp16)[name = tensor("op_7856_cast_fp16")]; tensor var_7857_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_709_cast_fp16)[name = tensor("op_7857_cast_fp16")]; tensor var_7858_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_711_cast_fp16)[name = tensor("op_7858_cast_fp16")]; tensor var_7859_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_713_cast_fp16)[name = tensor("op_7859_cast_fp16")]; tensor var_7860_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_715_cast_fp16)[name = tensor("op_7860_cast_fp16")]; tensor var_7861_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_717_cast_fp16)[name = tensor("op_7861_cast_fp16")]; tensor var_7862_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_719_cast_fp16)[name = tensor("op_7862_cast_fp16")]; tensor var_7863_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_721_cast_fp16)[name = tensor("op_7863_cast_fp16")]; tensor var_7864_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_723_cast_fp16)[name = tensor("op_7864_cast_fp16")]; tensor var_7865_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_725_cast_fp16)[name = tensor("op_7865_cast_fp16")]; tensor var_7866_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_727_cast_fp16)[name = tensor("op_7866_cast_fp16")]; tensor var_7867_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_729_cast_fp16)[name = tensor("op_7867_cast_fp16")]; tensor var_7868_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_731_cast_fp16)[name = tensor("op_7868_cast_fp16")]; tensor var_7869_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_733_cast_fp16)[name = tensor("op_7869_cast_fp16")]; tensor var_7870_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_735_cast_fp16)[name = tensor("op_7870_cast_fp16")]; tensor var_7871_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_737_cast_fp16)[name = tensor("op_7871_cast_fp16")]; tensor var_7872_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_739_cast_fp16)[name = tensor("op_7872_cast_fp16")]; tensor var_7873_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_741_cast_fp16)[name = tensor("op_7873_cast_fp16")]; tensor var_7874_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_743_cast_fp16)[name = tensor("op_7874_cast_fp16")]; tensor var_7875_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_745_cast_fp16)[name = tensor("op_7875_cast_fp16")]; tensor var_7876_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_747_cast_fp16)[name = tensor("op_7876_cast_fp16")]; tensor var_7877_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_749_cast_fp16)[name = tensor("op_7877_cast_fp16")]; tensor var_7878_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_751_cast_fp16)[name = tensor("op_7878_cast_fp16")]; tensor var_7879_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_753_cast_fp16)[name = tensor("op_7879_cast_fp16")]; tensor var_7880_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_755_cast_fp16)[name = tensor("op_7880_cast_fp16")]; tensor var_7881_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_757_cast_fp16)[name = tensor("op_7881_cast_fp16")]; tensor var_7882_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_759_cast_fp16)[name = tensor("op_7882_cast_fp16")]; tensor var_7883_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_761_cast_fp16)[name = tensor("op_7883_cast_fp16")]; tensor var_7884_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_763_cast_fp16)[name = tensor("op_7884_cast_fp16")]; tensor var_7885_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_765_cast_fp16)[name = tensor("op_7885_cast_fp16")]; tensor var_7886_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_767_cast_fp16)[name = tensor("op_7886_cast_fp16")]; tensor var_7887_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_769_cast_fp16)[name = tensor("op_7887_cast_fp16")]; tensor var_7888_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_771_cast_fp16)[name = tensor("op_7888_cast_fp16")]; tensor var_7889_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_773_cast_fp16)[name = tensor("op_7889_cast_fp16")]; tensor var_7890_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_775_cast_fp16)[name = tensor("op_7890_cast_fp16")]; tensor var_7891_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_777_cast_fp16)[name = tensor("op_7891_cast_fp16")]; tensor var_7892_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_779_cast_fp16)[name = tensor("op_7892_cast_fp16")]; tensor var_7893_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_781_cast_fp16)[name = tensor("op_7893_cast_fp16")]; tensor var_7894_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_783_cast_fp16)[name = tensor("op_7894_cast_fp16")]; tensor var_7895_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_785_cast_fp16)[name = tensor("op_7895_cast_fp16")]; tensor var_7896_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_787_cast_fp16)[name = tensor("op_7896_cast_fp16")]; tensor var_7897_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_789_cast_fp16)[name = tensor("op_7897_cast_fp16")]; tensor var_7898_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_791_cast_fp16)[name = tensor("op_7898_cast_fp16")]; tensor var_7899_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_793_cast_fp16)[name = tensor("op_7899_cast_fp16")]; tensor var_7900_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_795_cast_fp16)[name = tensor("op_7900_cast_fp16")]; tensor var_7901_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_797_cast_fp16)[name = tensor("op_7901_cast_fp16")]; tensor var_7902_cast_fp16 = softmax(axis = var_6621, x = aw_chunk_799_cast_fp16)[name = tensor("op_7902_cast_fp16")]; tensor var_7904_equation_0 = const()[name = tensor("op_7904_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7904_cast_fp16 = einsum(equation = var_7904_equation_0, values = (var_7424_cast_fp16, var_7823_cast_fp16))[name = tensor("op_7904_cast_fp16")]; tensor var_7906_equation_0 = const()[name = tensor("op_7906_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7906_cast_fp16 = einsum(equation = var_7906_equation_0, values = (var_7424_cast_fp16, var_7824_cast_fp16))[name = tensor("op_7906_cast_fp16")]; tensor var_7908_equation_0 = const()[name = tensor("op_7908_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7908_cast_fp16 = einsum(equation = var_7908_equation_0, values = (var_7424_cast_fp16, var_7825_cast_fp16))[name = tensor("op_7908_cast_fp16")]; tensor var_7910_equation_0 = const()[name = tensor("op_7910_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7910_cast_fp16 = einsum(equation = var_7910_equation_0, values = (var_7424_cast_fp16, var_7826_cast_fp16))[name = tensor("op_7910_cast_fp16")]; tensor var_7912_equation_0 = const()[name = tensor("op_7912_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7912_cast_fp16 = einsum(equation = var_7912_equation_0, values = (var_7428_cast_fp16, var_7827_cast_fp16))[name = tensor("op_7912_cast_fp16")]; tensor var_7914_equation_0 = const()[name = tensor("op_7914_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7914_cast_fp16 = einsum(equation = var_7914_equation_0, values = (var_7428_cast_fp16, var_7828_cast_fp16))[name = tensor("op_7914_cast_fp16")]; tensor var_7916_equation_0 = const()[name = tensor("op_7916_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7916_cast_fp16 = einsum(equation = var_7916_equation_0, values = (var_7428_cast_fp16, var_7829_cast_fp16))[name = tensor("op_7916_cast_fp16")]; tensor var_7918_equation_0 = const()[name = tensor("op_7918_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7918_cast_fp16 = einsum(equation = var_7918_equation_0, values = (var_7428_cast_fp16, var_7830_cast_fp16))[name = tensor("op_7918_cast_fp16")]; tensor var_7920_equation_0 = const()[name = tensor("op_7920_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7920_cast_fp16 = einsum(equation = var_7920_equation_0, values = (var_7432_cast_fp16, var_7831_cast_fp16))[name = tensor("op_7920_cast_fp16")]; tensor var_7922_equation_0 = const()[name = tensor("op_7922_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7922_cast_fp16 = einsum(equation = var_7922_equation_0, values = (var_7432_cast_fp16, var_7832_cast_fp16))[name = tensor("op_7922_cast_fp16")]; tensor var_7924_equation_0 = const()[name = tensor("op_7924_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7924_cast_fp16 = einsum(equation = var_7924_equation_0, values = (var_7432_cast_fp16, var_7833_cast_fp16))[name = tensor("op_7924_cast_fp16")]; tensor var_7926_equation_0 = const()[name = tensor("op_7926_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7926_cast_fp16 = einsum(equation = var_7926_equation_0, values = (var_7432_cast_fp16, var_7834_cast_fp16))[name = tensor("op_7926_cast_fp16")]; tensor var_7928_equation_0 = const()[name = tensor("op_7928_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7928_cast_fp16 = einsum(equation = var_7928_equation_0, values = (var_7436_cast_fp16, var_7835_cast_fp16))[name = tensor("op_7928_cast_fp16")]; tensor var_7930_equation_0 = const()[name = tensor("op_7930_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7930_cast_fp16 = einsum(equation = var_7930_equation_0, values = (var_7436_cast_fp16, var_7836_cast_fp16))[name = tensor("op_7930_cast_fp16")]; tensor var_7932_equation_0 = const()[name = tensor("op_7932_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7932_cast_fp16 = einsum(equation = var_7932_equation_0, values = (var_7436_cast_fp16, var_7837_cast_fp16))[name = tensor("op_7932_cast_fp16")]; tensor var_7934_equation_0 = const()[name = tensor("op_7934_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7934_cast_fp16 = einsum(equation = var_7934_equation_0, values = (var_7436_cast_fp16, var_7838_cast_fp16))[name = tensor("op_7934_cast_fp16")]; tensor var_7936_equation_0 = const()[name = tensor("op_7936_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7936_cast_fp16 = einsum(equation = var_7936_equation_0, values = (var_7440_cast_fp16, var_7839_cast_fp16))[name = tensor("op_7936_cast_fp16")]; tensor var_7938_equation_0 = const()[name = tensor("op_7938_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7938_cast_fp16 = einsum(equation = var_7938_equation_0, values = (var_7440_cast_fp16, var_7840_cast_fp16))[name = tensor("op_7938_cast_fp16")]; tensor var_7940_equation_0 = const()[name = tensor("op_7940_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7940_cast_fp16 = einsum(equation = var_7940_equation_0, values = (var_7440_cast_fp16, var_7841_cast_fp16))[name = tensor("op_7940_cast_fp16")]; tensor var_7942_equation_0 = const()[name = tensor("op_7942_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7942_cast_fp16 = einsum(equation = var_7942_equation_0, values = (var_7440_cast_fp16, var_7842_cast_fp16))[name = tensor("op_7942_cast_fp16")]; tensor var_7944_equation_0 = const()[name = tensor("op_7944_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7944_cast_fp16 = einsum(equation = var_7944_equation_0, values = (var_7444_cast_fp16, var_7843_cast_fp16))[name = tensor("op_7944_cast_fp16")]; tensor var_7946_equation_0 = const()[name = tensor("op_7946_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7946_cast_fp16 = einsum(equation = var_7946_equation_0, values = (var_7444_cast_fp16, var_7844_cast_fp16))[name = tensor("op_7946_cast_fp16")]; tensor var_7948_equation_0 = const()[name = tensor("op_7948_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7948_cast_fp16 = einsum(equation = var_7948_equation_0, values = (var_7444_cast_fp16, var_7845_cast_fp16))[name = tensor("op_7948_cast_fp16")]; tensor var_7950_equation_0 = const()[name = tensor("op_7950_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7950_cast_fp16 = einsum(equation = var_7950_equation_0, values = (var_7444_cast_fp16, var_7846_cast_fp16))[name = tensor("op_7950_cast_fp16")]; tensor var_7952_equation_0 = const()[name = tensor("op_7952_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7952_cast_fp16 = einsum(equation = var_7952_equation_0, values = (var_7448_cast_fp16, var_7847_cast_fp16))[name = tensor("op_7952_cast_fp16")]; tensor var_7954_equation_0 = const()[name = tensor("op_7954_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7954_cast_fp16 = einsum(equation = var_7954_equation_0, values = (var_7448_cast_fp16, var_7848_cast_fp16))[name = tensor("op_7954_cast_fp16")]; tensor var_7956_equation_0 = const()[name = tensor("op_7956_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7956_cast_fp16 = einsum(equation = var_7956_equation_0, values = (var_7448_cast_fp16, var_7849_cast_fp16))[name = tensor("op_7956_cast_fp16")]; tensor var_7958_equation_0 = const()[name = tensor("op_7958_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7958_cast_fp16 = einsum(equation = var_7958_equation_0, values = (var_7448_cast_fp16, var_7850_cast_fp16))[name = tensor("op_7958_cast_fp16")]; tensor var_7960_equation_0 = const()[name = tensor("op_7960_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7960_cast_fp16 = einsum(equation = var_7960_equation_0, values = (var_7452_cast_fp16, var_7851_cast_fp16))[name = tensor("op_7960_cast_fp16")]; tensor var_7962_equation_0 = const()[name = tensor("op_7962_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7962_cast_fp16 = einsum(equation = var_7962_equation_0, values = (var_7452_cast_fp16, var_7852_cast_fp16))[name = tensor("op_7962_cast_fp16")]; tensor var_7964_equation_0 = const()[name = tensor("op_7964_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7964_cast_fp16 = einsum(equation = var_7964_equation_0, values = (var_7452_cast_fp16, var_7853_cast_fp16))[name = tensor("op_7964_cast_fp16")]; tensor var_7966_equation_0 = const()[name = tensor("op_7966_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7966_cast_fp16 = einsum(equation = var_7966_equation_0, values = (var_7452_cast_fp16, var_7854_cast_fp16))[name = tensor("op_7966_cast_fp16")]; tensor var_7968_equation_0 = const()[name = tensor("op_7968_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7968_cast_fp16 = einsum(equation = var_7968_equation_0, values = (var_7456_cast_fp16, var_7855_cast_fp16))[name = tensor("op_7968_cast_fp16")]; tensor var_7970_equation_0 = const()[name = tensor("op_7970_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7970_cast_fp16 = einsum(equation = var_7970_equation_0, values = (var_7456_cast_fp16, var_7856_cast_fp16))[name = tensor("op_7970_cast_fp16")]; tensor var_7972_equation_0 = const()[name = tensor("op_7972_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7972_cast_fp16 = einsum(equation = var_7972_equation_0, values = (var_7456_cast_fp16, var_7857_cast_fp16))[name = tensor("op_7972_cast_fp16")]; tensor var_7974_equation_0 = const()[name = tensor("op_7974_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7974_cast_fp16 = einsum(equation = var_7974_equation_0, values = (var_7456_cast_fp16, var_7858_cast_fp16))[name = tensor("op_7974_cast_fp16")]; tensor var_7976_equation_0 = const()[name = tensor("op_7976_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7976_cast_fp16 = einsum(equation = var_7976_equation_0, values = (var_7460_cast_fp16, var_7859_cast_fp16))[name = tensor("op_7976_cast_fp16")]; tensor var_7978_equation_0 = const()[name = tensor("op_7978_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7978_cast_fp16 = einsum(equation = var_7978_equation_0, values = (var_7460_cast_fp16, var_7860_cast_fp16))[name = tensor("op_7978_cast_fp16")]; tensor var_7980_equation_0 = const()[name = tensor("op_7980_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7980_cast_fp16 = einsum(equation = var_7980_equation_0, values = (var_7460_cast_fp16, var_7861_cast_fp16))[name = tensor("op_7980_cast_fp16")]; tensor var_7982_equation_0 = const()[name = tensor("op_7982_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7982_cast_fp16 = einsum(equation = var_7982_equation_0, values = (var_7460_cast_fp16, var_7862_cast_fp16))[name = tensor("op_7982_cast_fp16")]; tensor var_7984_equation_0 = const()[name = tensor("op_7984_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7984_cast_fp16 = einsum(equation = var_7984_equation_0, values = (var_7464_cast_fp16, var_7863_cast_fp16))[name = tensor("op_7984_cast_fp16")]; tensor var_7986_equation_0 = const()[name = tensor("op_7986_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7986_cast_fp16 = einsum(equation = var_7986_equation_0, values = (var_7464_cast_fp16, var_7864_cast_fp16))[name = tensor("op_7986_cast_fp16")]; tensor var_7988_equation_0 = const()[name = tensor("op_7988_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7988_cast_fp16 = einsum(equation = var_7988_equation_0, values = (var_7464_cast_fp16, var_7865_cast_fp16))[name = tensor("op_7988_cast_fp16")]; tensor var_7990_equation_0 = const()[name = tensor("op_7990_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7990_cast_fp16 = einsum(equation = var_7990_equation_0, values = (var_7464_cast_fp16, var_7866_cast_fp16))[name = tensor("op_7990_cast_fp16")]; tensor var_7992_equation_0 = const()[name = tensor("op_7992_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7992_cast_fp16 = einsum(equation = var_7992_equation_0, values = (var_7468_cast_fp16, var_7867_cast_fp16))[name = tensor("op_7992_cast_fp16")]; tensor var_7994_equation_0 = const()[name = tensor("op_7994_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7994_cast_fp16 = einsum(equation = var_7994_equation_0, values = (var_7468_cast_fp16, var_7868_cast_fp16))[name = tensor("op_7994_cast_fp16")]; tensor var_7996_equation_0 = const()[name = tensor("op_7996_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7996_cast_fp16 = einsum(equation = var_7996_equation_0, values = (var_7468_cast_fp16, var_7869_cast_fp16))[name = tensor("op_7996_cast_fp16")]; tensor var_7998_equation_0 = const()[name = tensor("op_7998_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_7998_cast_fp16 = einsum(equation = var_7998_equation_0, values = (var_7468_cast_fp16, var_7870_cast_fp16))[name = tensor("op_7998_cast_fp16")]; tensor var_8000_equation_0 = const()[name = tensor("op_8000_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8000_cast_fp16 = einsum(equation = var_8000_equation_0, values = (var_7472_cast_fp16, var_7871_cast_fp16))[name = tensor("op_8000_cast_fp16")]; tensor var_8002_equation_0 = const()[name = tensor("op_8002_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8002_cast_fp16 = einsum(equation = var_8002_equation_0, values = (var_7472_cast_fp16, var_7872_cast_fp16))[name = tensor("op_8002_cast_fp16")]; tensor var_8004_equation_0 = const()[name = tensor("op_8004_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8004_cast_fp16 = einsum(equation = var_8004_equation_0, values = (var_7472_cast_fp16, var_7873_cast_fp16))[name = tensor("op_8004_cast_fp16")]; tensor var_8006_equation_0 = const()[name = tensor("op_8006_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8006_cast_fp16 = einsum(equation = var_8006_equation_0, values = (var_7472_cast_fp16, var_7874_cast_fp16))[name = tensor("op_8006_cast_fp16")]; tensor var_8008_equation_0 = const()[name = tensor("op_8008_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8008_cast_fp16 = einsum(equation = var_8008_equation_0, values = (var_7476_cast_fp16, var_7875_cast_fp16))[name = tensor("op_8008_cast_fp16")]; tensor var_8010_equation_0 = const()[name = tensor("op_8010_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8010_cast_fp16 = einsum(equation = var_8010_equation_0, values = (var_7476_cast_fp16, var_7876_cast_fp16))[name = tensor("op_8010_cast_fp16")]; tensor var_8012_equation_0 = const()[name = tensor("op_8012_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8012_cast_fp16 = einsum(equation = var_8012_equation_0, values = (var_7476_cast_fp16, var_7877_cast_fp16))[name = tensor("op_8012_cast_fp16")]; tensor var_8014_equation_0 = const()[name = tensor("op_8014_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8014_cast_fp16 = einsum(equation = var_8014_equation_0, values = (var_7476_cast_fp16, var_7878_cast_fp16))[name = tensor("op_8014_cast_fp16")]; tensor var_8016_equation_0 = const()[name = tensor("op_8016_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8016_cast_fp16 = einsum(equation = var_8016_equation_0, values = (var_7480_cast_fp16, var_7879_cast_fp16))[name = tensor("op_8016_cast_fp16")]; tensor var_8018_equation_0 = const()[name = tensor("op_8018_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8018_cast_fp16 = einsum(equation = var_8018_equation_0, values = (var_7480_cast_fp16, var_7880_cast_fp16))[name = tensor("op_8018_cast_fp16")]; tensor var_8020_equation_0 = const()[name = tensor("op_8020_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8020_cast_fp16 = einsum(equation = var_8020_equation_0, values = (var_7480_cast_fp16, var_7881_cast_fp16))[name = tensor("op_8020_cast_fp16")]; tensor var_8022_equation_0 = const()[name = tensor("op_8022_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8022_cast_fp16 = einsum(equation = var_8022_equation_0, values = (var_7480_cast_fp16, var_7882_cast_fp16))[name = tensor("op_8022_cast_fp16")]; tensor var_8024_equation_0 = const()[name = tensor("op_8024_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8024_cast_fp16 = einsum(equation = var_8024_equation_0, values = (var_7484_cast_fp16, var_7883_cast_fp16))[name = tensor("op_8024_cast_fp16")]; tensor var_8026_equation_0 = const()[name = tensor("op_8026_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8026_cast_fp16 = einsum(equation = var_8026_equation_0, values = (var_7484_cast_fp16, var_7884_cast_fp16))[name = tensor("op_8026_cast_fp16")]; tensor var_8028_equation_0 = const()[name = tensor("op_8028_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8028_cast_fp16 = einsum(equation = var_8028_equation_0, values = (var_7484_cast_fp16, var_7885_cast_fp16))[name = tensor("op_8028_cast_fp16")]; tensor var_8030_equation_0 = const()[name = tensor("op_8030_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8030_cast_fp16 = einsum(equation = var_8030_equation_0, values = (var_7484_cast_fp16, var_7886_cast_fp16))[name = tensor("op_8030_cast_fp16")]; tensor var_8032_equation_0 = const()[name = tensor("op_8032_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8032_cast_fp16 = einsum(equation = var_8032_equation_0, values = (var_7488_cast_fp16, var_7887_cast_fp16))[name = tensor("op_8032_cast_fp16")]; tensor var_8034_equation_0 = const()[name = tensor("op_8034_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8034_cast_fp16 = einsum(equation = var_8034_equation_0, values = (var_7488_cast_fp16, var_7888_cast_fp16))[name = tensor("op_8034_cast_fp16")]; tensor var_8036_equation_0 = const()[name = tensor("op_8036_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8036_cast_fp16 = einsum(equation = var_8036_equation_0, values = (var_7488_cast_fp16, var_7889_cast_fp16))[name = tensor("op_8036_cast_fp16")]; tensor var_8038_equation_0 = const()[name = tensor("op_8038_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8038_cast_fp16 = einsum(equation = var_8038_equation_0, values = (var_7488_cast_fp16, var_7890_cast_fp16))[name = tensor("op_8038_cast_fp16")]; tensor var_8040_equation_0 = const()[name = tensor("op_8040_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8040_cast_fp16 = einsum(equation = var_8040_equation_0, values = (var_7492_cast_fp16, var_7891_cast_fp16))[name = tensor("op_8040_cast_fp16")]; tensor var_8042_equation_0 = const()[name = tensor("op_8042_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8042_cast_fp16 = einsum(equation = var_8042_equation_0, values = (var_7492_cast_fp16, var_7892_cast_fp16))[name = tensor("op_8042_cast_fp16")]; tensor var_8044_equation_0 = const()[name = tensor("op_8044_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8044_cast_fp16 = einsum(equation = var_8044_equation_0, values = (var_7492_cast_fp16, var_7893_cast_fp16))[name = tensor("op_8044_cast_fp16")]; tensor var_8046_equation_0 = const()[name = tensor("op_8046_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8046_cast_fp16 = einsum(equation = var_8046_equation_0, values = (var_7492_cast_fp16, var_7894_cast_fp16))[name = tensor("op_8046_cast_fp16")]; tensor var_8048_equation_0 = const()[name = tensor("op_8048_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8048_cast_fp16 = einsum(equation = var_8048_equation_0, values = (var_7496_cast_fp16, var_7895_cast_fp16))[name = tensor("op_8048_cast_fp16")]; tensor var_8050_equation_0 = const()[name = tensor("op_8050_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8050_cast_fp16 = einsum(equation = var_8050_equation_0, values = (var_7496_cast_fp16, var_7896_cast_fp16))[name = tensor("op_8050_cast_fp16")]; tensor var_8052_equation_0 = const()[name = tensor("op_8052_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8052_cast_fp16 = einsum(equation = var_8052_equation_0, values = (var_7496_cast_fp16, var_7897_cast_fp16))[name = tensor("op_8052_cast_fp16")]; tensor var_8054_equation_0 = const()[name = tensor("op_8054_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8054_cast_fp16 = einsum(equation = var_8054_equation_0, values = (var_7496_cast_fp16, var_7898_cast_fp16))[name = tensor("op_8054_cast_fp16")]; tensor var_8056_equation_0 = const()[name = tensor("op_8056_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8056_cast_fp16 = einsum(equation = var_8056_equation_0, values = (var_7500_cast_fp16, var_7899_cast_fp16))[name = tensor("op_8056_cast_fp16")]; tensor var_8058_equation_0 = const()[name = tensor("op_8058_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8058_cast_fp16 = einsum(equation = var_8058_equation_0, values = (var_7500_cast_fp16, var_7900_cast_fp16))[name = tensor("op_8058_cast_fp16")]; tensor var_8060_equation_0 = const()[name = tensor("op_8060_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8060_cast_fp16 = einsum(equation = var_8060_equation_0, values = (var_7500_cast_fp16, var_7901_cast_fp16))[name = tensor("op_8060_cast_fp16")]; tensor var_8062_equation_0 = const()[name = tensor("op_8062_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_8062_cast_fp16 = einsum(equation = var_8062_equation_0, values = (var_7500_cast_fp16, var_7902_cast_fp16))[name = tensor("op_8062_cast_fp16")]; tensor var_8064_interleave_0 = const()[name = tensor("op_8064_interleave_0"), val = tensor(false)]; tensor var_8064_cast_fp16 = concat(axis = var_6596, interleave = var_8064_interleave_0, values = (var_7904_cast_fp16, var_7906_cast_fp16, var_7908_cast_fp16, var_7910_cast_fp16))[name = tensor("op_8064_cast_fp16")]; tensor var_8066_interleave_0 = const()[name = tensor("op_8066_interleave_0"), val = tensor(false)]; tensor var_8066_cast_fp16 = concat(axis = var_6596, interleave = var_8066_interleave_0, values = (var_7912_cast_fp16, var_7914_cast_fp16, var_7916_cast_fp16, var_7918_cast_fp16))[name = tensor("op_8066_cast_fp16")]; tensor var_8068_interleave_0 = const()[name = tensor("op_8068_interleave_0"), val = tensor(false)]; tensor var_8068_cast_fp16 = concat(axis = var_6596, interleave = var_8068_interleave_0, values = (var_7920_cast_fp16, var_7922_cast_fp16, var_7924_cast_fp16, var_7926_cast_fp16))[name = tensor("op_8068_cast_fp16")]; tensor var_8070_interleave_0 = const()[name = tensor("op_8070_interleave_0"), val = tensor(false)]; tensor var_8070_cast_fp16 = concat(axis = var_6596, interleave = var_8070_interleave_0, values = (var_7928_cast_fp16, var_7930_cast_fp16, var_7932_cast_fp16, var_7934_cast_fp16))[name = tensor("op_8070_cast_fp16")]; tensor var_8072_interleave_0 = const()[name = tensor("op_8072_interleave_0"), val = tensor(false)]; tensor var_8072_cast_fp16 = concat(axis = var_6596, interleave = var_8072_interleave_0, values = (var_7936_cast_fp16, var_7938_cast_fp16, var_7940_cast_fp16, var_7942_cast_fp16))[name = tensor("op_8072_cast_fp16")]; tensor var_8074_interleave_0 = const()[name = tensor("op_8074_interleave_0"), val = tensor(false)]; tensor var_8074_cast_fp16 = concat(axis = var_6596, interleave = var_8074_interleave_0, values = (var_7944_cast_fp16, var_7946_cast_fp16, var_7948_cast_fp16, var_7950_cast_fp16))[name = tensor("op_8074_cast_fp16")]; tensor var_8076_interleave_0 = const()[name = tensor("op_8076_interleave_0"), val = tensor(false)]; tensor var_8076_cast_fp16 = concat(axis = var_6596, interleave = var_8076_interleave_0, values = (var_7952_cast_fp16, var_7954_cast_fp16, var_7956_cast_fp16, var_7958_cast_fp16))[name = tensor("op_8076_cast_fp16")]; tensor var_8078_interleave_0 = const()[name = tensor("op_8078_interleave_0"), val = tensor(false)]; tensor var_8078_cast_fp16 = concat(axis = var_6596, interleave = var_8078_interleave_0, values = (var_7960_cast_fp16, var_7962_cast_fp16, var_7964_cast_fp16, var_7966_cast_fp16))[name = tensor("op_8078_cast_fp16")]; tensor var_8080_interleave_0 = const()[name = tensor("op_8080_interleave_0"), val = tensor(false)]; tensor var_8080_cast_fp16 = concat(axis = var_6596, interleave = var_8080_interleave_0, values = (var_7968_cast_fp16, var_7970_cast_fp16, var_7972_cast_fp16, var_7974_cast_fp16))[name = tensor("op_8080_cast_fp16")]; tensor var_8082_interleave_0 = const()[name = tensor("op_8082_interleave_0"), val = tensor(false)]; tensor var_8082_cast_fp16 = concat(axis = var_6596, interleave = var_8082_interleave_0, values = (var_7976_cast_fp16, var_7978_cast_fp16, var_7980_cast_fp16, var_7982_cast_fp16))[name = tensor("op_8082_cast_fp16")]; tensor var_8084_interleave_0 = const()[name = tensor("op_8084_interleave_0"), val = tensor(false)]; tensor var_8084_cast_fp16 = concat(axis = var_6596, interleave = var_8084_interleave_0, values = (var_7984_cast_fp16, var_7986_cast_fp16, var_7988_cast_fp16, var_7990_cast_fp16))[name = tensor("op_8084_cast_fp16")]; tensor var_8086_interleave_0 = const()[name = tensor("op_8086_interleave_0"), val = tensor(false)]; tensor var_8086_cast_fp16 = concat(axis = var_6596, interleave = var_8086_interleave_0, values = (var_7992_cast_fp16, var_7994_cast_fp16, var_7996_cast_fp16, var_7998_cast_fp16))[name = tensor("op_8086_cast_fp16")]; tensor var_8088_interleave_0 = const()[name = tensor("op_8088_interleave_0"), val = tensor(false)]; tensor var_8088_cast_fp16 = concat(axis = var_6596, interleave = var_8088_interleave_0, values = (var_8000_cast_fp16, var_8002_cast_fp16, var_8004_cast_fp16, var_8006_cast_fp16))[name = tensor("op_8088_cast_fp16")]; tensor var_8090_interleave_0 = const()[name = tensor("op_8090_interleave_0"), val = tensor(false)]; tensor var_8090_cast_fp16 = concat(axis = var_6596, interleave = var_8090_interleave_0, values = (var_8008_cast_fp16, var_8010_cast_fp16, var_8012_cast_fp16, var_8014_cast_fp16))[name = tensor("op_8090_cast_fp16")]; tensor var_8092_interleave_0 = const()[name = tensor("op_8092_interleave_0"), val = tensor(false)]; tensor var_8092_cast_fp16 = concat(axis = var_6596, interleave = var_8092_interleave_0, values = (var_8016_cast_fp16, var_8018_cast_fp16, var_8020_cast_fp16, var_8022_cast_fp16))[name = tensor("op_8092_cast_fp16")]; tensor var_8094_interleave_0 = const()[name = tensor("op_8094_interleave_0"), val = tensor(false)]; tensor var_8094_cast_fp16 = concat(axis = var_6596, interleave = var_8094_interleave_0, values = (var_8024_cast_fp16, var_8026_cast_fp16, var_8028_cast_fp16, var_8030_cast_fp16))[name = tensor("op_8094_cast_fp16")]; tensor var_8096_interleave_0 = const()[name = tensor("op_8096_interleave_0"), val = tensor(false)]; tensor var_8096_cast_fp16 = concat(axis = var_6596, interleave = var_8096_interleave_0, values = (var_8032_cast_fp16, var_8034_cast_fp16, var_8036_cast_fp16, var_8038_cast_fp16))[name = tensor("op_8096_cast_fp16")]; tensor var_8098_interleave_0 = const()[name = tensor("op_8098_interleave_0"), val = tensor(false)]; tensor var_8098_cast_fp16 = concat(axis = var_6596, interleave = var_8098_interleave_0, values = (var_8040_cast_fp16, var_8042_cast_fp16, var_8044_cast_fp16, var_8046_cast_fp16))[name = tensor("op_8098_cast_fp16")]; tensor var_8100_interleave_0 = const()[name = tensor("op_8100_interleave_0"), val = tensor(false)]; tensor var_8100_cast_fp16 = concat(axis = var_6596, interleave = var_8100_interleave_0, values = (var_8048_cast_fp16, var_8050_cast_fp16, var_8052_cast_fp16, var_8054_cast_fp16))[name = tensor("op_8100_cast_fp16")]; tensor var_8102_interleave_0 = const()[name = tensor("op_8102_interleave_0"), val = tensor(false)]; tensor var_8102_cast_fp16 = concat(axis = var_6596, interleave = var_8102_interleave_0, values = (var_8056_cast_fp16, var_8058_cast_fp16, var_8060_cast_fp16, var_8062_cast_fp16))[name = tensor("op_8102_cast_fp16")]; tensor input_33_interleave_0 = const()[name = tensor("input_33_interleave_0"), val = tensor(false)]; tensor input_33_cast_fp16 = concat(axis = var_6621, interleave = input_33_interleave_0, values = (var_8064_cast_fp16, var_8066_cast_fp16, var_8068_cast_fp16, var_8070_cast_fp16, var_8072_cast_fp16, var_8074_cast_fp16, var_8076_cast_fp16, var_8078_cast_fp16, var_8080_cast_fp16, var_8082_cast_fp16, var_8084_cast_fp16, var_8086_cast_fp16, var_8088_cast_fp16, var_8090_cast_fp16, var_8092_cast_fp16, var_8094_cast_fp16, var_8096_cast_fp16, var_8098_cast_fp16, var_8100_cast_fp16, var_8102_cast_fp16))[name = tensor("input_33_cast_fp16")]; tensor var_8113_pad_type_0 = const()[name = tensor("op_8113_pad_type_0"), val = tensor("valid")]; tensor var_8113_strides_0 = const()[name = tensor("op_8113_strides_0"), val = tensor([1, 1])]; tensor var_8113_pad_0 = const()[name = tensor("op_8113_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8113_dilations_0 = const()[name = tensor("op_8113_dilations_0"), val = tensor([1, 1])]; tensor var_8113_groups_0 = const()[name = tensor("op_8113_groups_0"), val = tensor(1)]; tensor layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72088640))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72907904))), name = tensor("layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_4_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72908032)))]; tensor var_8113_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_8113_dilations_0, groups = var_8113_groups_0, pad = var_8113_pad_0, pad_type = var_8113_pad_type_0, strides = var_8113_strides_0, weight = layers_4_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_33_cast_fp16)[name = tensor("op_8113_cast_fp16")]; tensor var_8119_pad_type_0 = const()[name = tensor("op_8119_pad_type_0"), val = tensor("valid")]; tensor var_8119_strides_0 = const()[name = tensor("op_8119_strides_0"), val = tensor([1, 1])]; tensor var_8119_pad_0 = const()[name = tensor("op_8119_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8119_dilations_0 = const()[name = tensor("op_8119_dilations_0"), val = tensor([1, 1])]; tensor var_8119_groups_0 = const()[name = tensor("op_8119_groups_0"), val = tensor(1)]; tensor layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72933952))), name = tensor("layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72910656))), shape = tensor([1280, 1280, 1, 1])]; tensor var_8119_cast_fp16 = conv(dilations = var_8119_dilations_0, groups = var_8119_groups_0, pad = var_8119_pad_0, pad_type = var_8119_pad_type_0, strides = var_8119_strides_0, weight = layers_4_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_33_cast_fp16)[name = tensor("op_8119_cast_fp16")]; tensor obj_19_cast_fp16 = add(x = var_8113_cast_fp16, y = var_8119_cast_fp16)[name = tensor("obj_19_cast_fp16")]; tensor inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = obj_19_cast_fp16)[name = tensor("inputs_19_cast_fp16")]; tensor out_19_axes_0 = const()[name = tensor("out_19_axes_0"), val = tensor([1])]; tensor var_8130_to_fp16 = const()[name = tensor("op_8130_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_19_cast_fp16 = layer_norm(axes = out_19_axes_0, epsilon = var_8130_to_fp16, x = inputs_19_cast_fp16)[name = tensor("out_19_cast_fp16")]; tensor input_35_gamma_0_to_fp16 = const()[name = tensor("input_35_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73138816)))]; tensor input_35_beta_0_to_fp16 = const()[name = tensor("input_35_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73141440)))]; tensor input_35_epsilon_0_to_fp16 = const()[name = tensor("input_35_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor("input_35_cast_fp16")]; tensor var_8148_pad_type_0 = const()[name = tensor("op_8148_pad_type_0"), val = tensor("valid")]; tensor var_8148_strides_0 = const()[name = tensor("op_8148_strides_0"), val = tensor([1, 1])]; tensor var_8148_pad_0 = const()[name = tensor("op_8148_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8148_dilations_0 = const()[name = tensor("op_8148_dilations_0"), val = tensor([1, 1])]; tensor var_8148_groups_0 = const()[name = tensor("op_8148_groups_0"), val = tensor(1)]; tensor layers_4_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(73144064))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76420928))), name = tensor("layers_4_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_4_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76421056)))]; tensor var_8148_cast_fp16 = conv(bias = layers_4_fc1_inlier_module_bias_to_fp16, dilations = var_8148_dilations_0, groups = var_8148_groups_0, pad = var_8148_pad_0, pad_type = var_8148_pad_type_0, strides = var_8148_strides_0, weight = layers_4_fc1_inlier_module_weight_to_fp16_palettized, x = input_35_cast_fp16)[name = tensor("op_8148_cast_fp16")]; tensor var_8154_pad_type_0 = const()[name = tensor("op_8154_pad_type_0"), val = tensor("valid")]; tensor var_8154_strides_0 = const()[name = tensor("op_8154_strides_0"), val = tensor([1, 1])]; tensor var_8154_pad_0 = const()[name = tensor("op_8154_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8154_dilations_0 = const()[name = tensor("op_8154_dilations_0"), val = tensor([1, 1])]; tensor var_8154_groups_0 = const()[name = tensor("op_8154_groups_0"), val = tensor(1)]; tensor layers_4_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76440192))), name = tensor("layers_4_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(76431360))), shape = tensor([5120, 1280, 1, 1])]; tensor var_8154_cast_fp16 = conv(dilations = var_8154_dilations_0, groups = var_8154_groups_0, pad = var_8154_pad_0, pad_type = var_8154_pad_type_0, strides = var_8154_strides_0, weight = layers_4_fc1_outlier_module_weight_to_fp16_sparsified, x = input_35_cast_fp16)[name = tensor("op_8154_cast_fp16")]; tensor input_37_cast_fp16 = add(x = var_8148_cast_fp16, y = var_8154_cast_fp16)[name = tensor("input_37_cast_fp16")]; tensor input_39_mode_0 = const()[name = tensor("input_39_mode_0"), val = tensor("EXACT")]; tensor input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = tensor("input_39_cast_fp16")]; tensor var_8165_pad_type_0 = const()[name = tensor("op_8165_pad_type_0"), val = tensor("valid")]; tensor var_8165_strides_0 = const()[name = tensor("op_8165_strides_0"), val = tensor([1, 1])]; tensor var_8165_pad_0 = const()[name = tensor("op_8165_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8165_dilations_0 = const()[name = tensor("op_8165_dilations_0"), val = tensor([1, 1])]; tensor var_8165_groups_0 = const()[name = tensor("op_8165_groups_0"), val = tensor(1)]; tensor layers_4_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77259456))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80536320))), name = tensor("layers_4_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_4_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_4_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80536448)))]; tensor var_8165_cast_fp16 = conv(bias = layers_4_fc2_inlier_module_bias_to_fp16, dilations = var_8165_dilations_0, groups = var_8165_groups_0, pad = var_8165_pad_0, pad_type = var_8165_pad_type_0, strides = var_8165_strides_0, weight = layers_4_fc2_inlier_module_weight_to_fp16_palettized, x = input_39_cast_fp16)[name = tensor("op_8165_cast_fp16")]; tensor var_8171_pad_type_0 = const()[name = tensor("op_8171_pad_type_0"), val = tensor("valid")]; tensor var_8171_strides_0 = const()[name = tensor("op_8171_strides_0"), val = tensor([1, 1])]; tensor var_8171_pad_0 = const()[name = tensor("op_8171_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8171_dilations_0 = const()[name = tensor("op_8171_dilations_0"), val = tensor([1, 1])]; tensor var_8171_groups_0 = const()[name = tensor("op_8171_groups_0"), val = tensor(1)]; tensor layers_4_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80778304))), name = tensor("layers_4_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80539072))), shape = tensor([1280, 5120, 1, 1])]; tensor var_8171_cast_fp16 = conv(dilations = var_8171_dilations_0, groups = var_8171_groups_0, pad = var_8171_pad_0, pad_type = var_8171_pad_type_0, strides = var_8171_strides_0, weight = layers_4_fc2_outlier_module_weight_to_fp16_sparsified, x = input_39_cast_fp16)[name = tensor("op_8171_cast_fp16")]; tensor hidden_states_13_cast_fp16 = add(x = var_8165_cast_fp16, y = var_8171_cast_fp16)[name = tensor("hidden_states_13_cast_fp16")]; tensor inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor("inputs_21_cast_fp16")]; tensor var_8177 = const()[name = tensor("op_8177"), val = tensor(3)]; tensor var_8202 = const()[name = tensor("op_8202"), val = tensor(1)]; tensor out_21_axes_0 = const()[name = tensor("out_21_axes_0"), val = tensor([1])]; tensor var_8219_to_fp16 = const()[name = tensor("op_8219_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_21_cast_fp16 = layer_norm(axes = out_21_axes_0, epsilon = var_8219_to_fp16, x = inputs_21_cast_fp16)[name = tensor("out_21_cast_fp16")]; tensor obj_21_gamma_0_to_fp16 = const()[name = tensor("obj_21_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81597568)))]; tensor obj_21_beta_0_to_fp16 = const()[name = tensor("obj_21_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81600192)))]; tensor obj_21_epsilon_0_to_fp16 = const()[name = tensor("obj_21_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor("obj_21_cast_fp16")]; tensor var_8241_pad_type_0 = const()[name = tensor("op_8241_pad_type_0"), val = tensor("valid")]; tensor var_8241_strides_0 = const()[name = tensor("op_8241_strides_0"), val = tensor([1, 1])]; tensor var_8241_pad_0 = const()[name = tensor("op_8241_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8241_dilations_0 = const()[name = tensor("op_8241_dilations_0"), val = tensor([1, 1])]; tensor var_8241_groups_0 = const()[name = tensor("op_8241_groups_0"), val = tensor(1)]; tensor layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81602816))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82422080))), name = tensor("layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_5_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82422208)))]; tensor var_8241_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_8241_dilations_0, groups = var_8241_groups_0, pad = var_8241_pad_0, pad_type = var_8241_pad_type_0, strides = var_8241_strides_0, weight = layers_5_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = tensor("op_8241_cast_fp16")]; tensor var_8247_pad_type_0 = const()[name = tensor("op_8247_pad_type_0"), val = tensor("valid")]; tensor var_8247_strides_0 = const()[name = tensor("op_8247_strides_0"), val = tensor([1, 1])]; tensor var_8247_pad_0 = const()[name = tensor("op_8247_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8247_dilations_0 = const()[name = tensor("op_8247_dilations_0"), val = tensor([1, 1])]; tensor var_8247_groups_0 = const()[name = tensor("op_8247_groups_0"), val = tensor(1)]; tensor layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82473856))), name = tensor("layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82424832))), shape = tensor([1280, 1280, 1, 1])]; tensor var_8247_cast_fp16 = conv(dilations = var_8247_dilations_0, groups = var_8247_groups_0, pad = var_8247_pad_0, pad_type = var_8247_pad_type_0, strides = var_8247_strides_0, weight = layers_5_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = tensor("op_8247_cast_fp16")]; tensor query_11_cast_fp16 = add(x = var_8241_cast_fp16, y = var_8247_cast_fp16)[name = tensor("query_11_cast_fp16")]; tensor var_8256_pad_type_0 = const()[name = tensor("op_8256_pad_type_0"), val = tensor("valid")]; tensor var_8256_strides_0 = const()[name = tensor("op_8256_strides_0"), val = tensor([1, 1])]; tensor var_8256_pad_0 = const()[name = tensor("op_8256_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8256_dilations_0 = const()[name = tensor("op_8256_dilations_0"), val = tensor([1, 1])]; tensor var_8256_groups_0 = const()[name = tensor("op_8256_groups_0"), val = tensor(1)]; tensor layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(82678720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83497984))), name = tensor("layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_8256_cast_fp16 = conv(dilations = var_8256_dilations_0, groups = var_8256_groups_0, pad = var_8256_pad_0, pad_type = var_8256_pad_type_0, strides = var_8256_strides_0, weight = layers_5_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = tensor("op_8256_cast_fp16")]; tensor var_8262_pad_type_0 = const()[name = tensor("op_8262_pad_type_0"), val = tensor("valid")]; tensor var_8262_strides_0 = const()[name = tensor("op_8262_strides_0"), val = tensor([1, 1])]; tensor var_8262_pad_0 = const()[name = tensor("op_8262_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8262_dilations_0 = const()[name = tensor("op_8262_dilations_0"), val = tensor([1, 1])]; tensor var_8262_groups_0 = const()[name = tensor("op_8262_groups_0"), val = tensor(1)]; tensor layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83533248))), name = tensor("layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83498112))), shape = tensor([1280, 1280, 1, 1])]; tensor var_8262_cast_fp16 = conv(dilations = var_8262_dilations_0, groups = var_8262_groups_0, pad = var_8262_pad_0, pad_type = var_8262_pad_type_0, strides = var_8262_strides_0, weight = layers_5_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = tensor("op_8262_cast_fp16")]; tensor key_11_cast_fp16 = add(x = var_8256_cast_fp16, y = var_8262_cast_fp16)[name = tensor("key_11_cast_fp16")]; tensor var_8272_pad_type_0 = const()[name = tensor("op_8272_pad_type_0"), val = tensor("valid")]; tensor var_8272_strides_0 = const()[name = tensor("op_8272_strides_0"), val = tensor([1, 1])]; tensor var_8272_pad_0 = const()[name = tensor("op_8272_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8272_dilations_0 = const()[name = tensor("op_8272_dilations_0"), val = tensor([1, 1])]; tensor var_8272_groups_0 = const()[name = tensor("op_8272_groups_0"), val = tensor(1)]; tensor layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(83738112))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84557376))), name = tensor("layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_5_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84557504)))]; tensor var_8272_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_8272_dilations_0, groups = var_8272_groups_0, pad = var_8272_pad_0, pad_type = var_8272_pad_type_0, strides = var_8272_strides_0, weight = layers_5_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_21_cast_fp16)[name = tensor("op_8272_cast_fp16")]; tensor var_8278_pad_type_0 = const()[name = tensor("op_8278_pad_type_0"), val = tensor("valid")]; tensor var_8278_strides_0 = const()[name = tensor("op_8278_strides_0"), val = tensor([1, 1])]; tensor var_8278_pad_0 = const()[name = tensor("op_8278_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_8278_dilations_0 = const()[name = tensor("op_8278_dilations_0"), val = tensor([1, 1])]; tensor var_8278_groups_0 = const()[name = tensor("op_8278_groups_0"), val = tensor(1)]; tensor layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84586624))), name = tensor("layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84560128))), shape = tensor([1280, 1280, 1, 1])]; tensor var_8278_cast_fp16 = conv(dilations = var_8278_dilations_0, groups = var_8278_groups_0, pad = var_8278_pad_0, pad_type = var_8278_pad_type_0, strides = var_8278_strides_0, weight = layers_5_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_21_cast_fp16)[name = tensor("op_8278_cast_fp16")]; tensor value_11_cast_fp16 = add(x = var_8272_cast_fp16, y = var_8278_cast_fp16)[name = tensor("value_11_cast_fp16")]; tensor var_8284_begin_0 = const()[name = tensor("op_8284_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8284_end_0 = const()[name = tensor("op_8284_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8284_end_mask_0 = const()[name = tensor("op_8284_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8284_cast_fp16 = slice_by_index(begin = var_8284_begin_0, end = var_8284_end_0, end_mask = var_8284_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8284_cast_fp16")]; tensor var_8288_begin_0 = const()[name = tensor("op_8288_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_8288_end_0 = const()[name = tensor("op_8288_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_8288_end_mask_0 = const()[name = tensor("op_8288_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8288_cast_fp16 = slice_by_index(begin = var_8288_begin_0, end = var_8288_end_0, end_mask = var_8288_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8288_cast_fp16")]; tensor var_8292_begin_0 = const()[name = tensor("op_8292_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_8292_end_0 = const()[name = tensor("op_8292_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_8292_end_mask_0 = const()[name = tensor("op_8292_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8292_cast_fp16 = slice_by_index(begin = var_8292_begin_0, end = var_8292_end_0, end_mask = var_8292_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8292_cast_fp16")]; tensor var_8296_begin_0 = const()[name = tensor("op_8296_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_8296_end_0 = const()[name = tensor("op_8296_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_8296_end_mask_0 = const()[name = tensor("op_8296_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8296_cast_fp16 = slice_by_index(begin = var_8296_begin_0, end = var_8296_end_0, end_mask = var_8296_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8296_cast_fp16")]; tensor var_8300_begin_0 = const()[name = tensor("op_8300_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_8300_end_0 = const()[name = tensor("op_8300_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_8300_end_mask_0 = const()[name = tensor("op_8300_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8300_cast_fp16 = slice_by_index(begin = var_8300_begin_0, end = var_8300_end_0, end_mask = var_8300_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8300_cast_fp16")]; tensor var_8304_begin_0 = const()[name = tensor("op_8304_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_8304_end_0 = const()[name = tensor("op_8304_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_8304_end_mask_0 = const()[name = tensor("op_8304_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8304_cast_fp16 = slice_by_index(begin = var_8304_begin_0, end = var_8304_end_0, end_mask = var_8304_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8304_cast_fp16")]; tensor var_8308_begin_0 = const()[name = tensor("op_8308_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_8308_end_0 = const()[name = tensor("op_8308_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_8308_end_mask_0 = const()[name = tensor("op_8308_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8308_cast_fp16 = slice_by_index(begin = var_8308_begin_0, end = var_8308_end_0, end_mask = var_8308_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8308_cast_fp16")]; tensor var_8312_begin_0 = const()[name = tensor("op_8312_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_8312_end_0 = const()[name = tensor("op_8312_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_8312_end_mask_0 = const()[name = tensor("op_8312_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8312_cast_fp16 = slice_by_index(begin = var_8312_begin_0, end = var_8312_end_0, end_mask = var_8312_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8312_cast_fp16")]; tensor var_8316_begin_0 = const()[name = tensor("op_8316_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_8316_end_0 = const()[name = tensor("op_8316_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_8316_end_mask_0 = const()[name = tensor("op_8316_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8316_cast_fp16 = slice_by_index(begin = var_8316_begin_0, end = var_8316_end_0, end_mask = var_8316_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8316_cast_fp16")]; tensor var_8320_begin_0 = const()[name = tensor("op_8320_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_8320_end_0 = const()[name = tensor("op_8320_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_8320_end_mask_0 = const()[name = tensor("op_8320_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8320_cast_fp16 = slice_by_index(begin = var_8320_begin_0, end = var_8320_end_0, end_mask = var_8320_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8320_cast_fp16")]; tensor var_8324_begin_0 = const()[name = tensor("op_8324_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_8324_end_0 = const()[name = tensor("op_8324_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_8324_end_mask_0 = const()[name = tensor("op_8324_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8324_cast_fp16 = slice_by_index(begin = var_8324_begin_0, end = var_8324_end_0, end_mask = var_8324_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8324_cast_fp16")]; tensor var_8328_begin_0 = const()[name = tensor("op_8328_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_8328_end_0 = const()[name = tensor("op_8328_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_8328_end_mask_0 = const()[name = tensor("op_8328_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8328_cast_fp16 = slice_by_index(begin = var_8328_begin_0, end = var_8328_end_0, end_mask = var_8328_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8328_cast_fp16")]; tensor var_8332_begin_0 = const()[name = tensor("op_8332_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_8332_end_0 = const()[name = tensor("op_8332_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_8332_end_mask_0 = const()[name = tensor("op_8332_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8332_cast_fp16 = slice_by_index(begin = var_8332_begin_0, end = var_8332_end_0, end_mask = var_8332_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8332_cast_fp16")]; tensor var_8336_begin_0 = const()[name = tensor("op_8336_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_8336_end_0 = const()[name = tensor("op_8336_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_8336_end_mask_0 = const()[name = tensor("op_8336_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8336_cast_fp16 = slice_by_index(begin = var_8336_begin_0, end = var_8336_end_0, end_mask = var_8336_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8336_cast_fp16")]; tensor var_8340_begin_0 = const()[name = tensor("op_8340_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_8340_end_0 = const()[name = tensor("op_8340_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_8340_end_mask_0 = const()[name = tensor("op_8340_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8340_cast_fp16 = slice_by_index(begin = var_8340_begin_0, end = var_8340_end_0, end_mask = var_8340_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8340_cast_fp16")]; tensor var_8344_begin_0 = const()[name = tensor("op_8344_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_8344_end_0 = const()[name = tensor("op_8344_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_8344_end_mask_0 = const()[name = tensor("op_8344_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8344_cast_fp16 = slice_by_index(begin = var_8344_begin_0, end = var_8344_end_0, end_mask = var_8344_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8344_cast_fp16")]; tensor var_8348_begin_0 = const()[name = tensor("op_8348_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_8348_end_0 = const()[name = tensor("op_8348_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_8348_end_mask_0 = const()[name = tensor("op_8348_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8348_cast_fp16 = slice_by_index(begin = var_8348_begin_0, end = var_8348_end_0, end_mask = var_8348_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8348_cast_fp16")]; tensor var_8352_begin_0 = const()[name = tensor("op_8352_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_8352_end_0 = const()[name = tensor("op_8352_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_8352_end_mask_0 = const()[name = tensor("op_8352_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8352_cast_fp16 = slice_by_index(begin = var_8352_begin_0, end = var_8352_end_0, end_mask = var_8352_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8352_cast_fp16")]; tensor var_8356_begin_0 = const()[name = tensor("op_8356_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_8356_end_0 = const()[name = tensor("op_8356_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_8356_end_mask_0 = const()[name = tensor("op_8356_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8356_cast_fp16 = slice_by_index(begin = var_8356_begin_0, end = var_8356_end_0, end_mask = var_8356_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8356_cast_fp16")]; tensor var_8360_begin_0 = const()[name = tensor("op_8360_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_8360_end_0 = const()[name = tensor("op_8360_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_8360_end_mask_0 = const()[name = tensor("op_8360_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_8360_cast_fp16 = slice_by_index(begin = var_8360_begin_0, end = var_8360_end_0, end_mask = var_8360_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_8360_cast_fp16")]; tensor var_8369_begin_0 = const()[name = tensor("op_8369_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8369_end_0 = const()[name = tensor("op_8369_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8369_end_mask_0 = const()[name = tensor("op_8369_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8369_cast_fp16 = slice_by_index(begin = var_8369_begin_0, end = var_8369_end_0, end_mask = var_8369_end_mask_0, x = var_8284_cast_fp16)[name = tensor("op_8369_cast_fp16")]; tensor var_8376_begin_0 = const()[name = tensor("op_8376_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8376_end_0 = const()[name = tensor("op_8376_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8376_end_mask_0 = const()[name = tensor("op_8376_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8376_cast_fp16 = slice_by_index(begin = var_8376_begin_0, end = var_8376_end_0, end_mask = var_8376_end_mask_0, x = var_8284_cast_fp16)[name = tensor("op_8376_cast_fp16")]; tensor var_8383_begin_0 = const()[name = tensor("op_8383_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8383_end_0 = const()[name = tensor("op_8383_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8383_end_mask_0 = const()[name = tensor("op_8383_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8383_cast_fp16 = slice_by_index(begin = var_8383_begin_0, end = var_8383_end_0, end_mask = var_8383_end_mask_0, x = var_8284_cast_fp16)[name = tensor("op_8383_cast_fp16")]; tensor var_8390_begin_0 = const()[name = tensor("op_8390_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8390_end_0 = const()[name = tensor("op_8390_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8390_end_mask_0 = const()[name = tensor("op_8390_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8390_cast_fp16 = slice_by_index(begin = var_8390_begin_0, end = var_8390_end_0, end_mask = var_8390_end_mask_0, x = var_8284_cast_fp16)[name = tensor("op_8390_cast_fp16")]; tensor var_8397_begin_0 = const()[name = tensor("op_8397_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8397_end_0 = const()[name = tensor("op_8397_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8397_end_mask_0 = const()[name = tensor("op_8397_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8397_cast_fp16 = slice_by_index(begin = var_8397_begin_0, end = var_8397_end_0, end_mask = var_8397_end_mask_0, x = var_8288_cast_fp16)[name = tensor("op_8397_cast_fp16")]; tensor var_8404_begin_0 = const()[name = tensor("op_8404_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8404_end_0 = const()[name = tensor("op_8404_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8404_end_mask_0 = const()[name = tensor("op_8404_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8404_cast_fp16 = slice_by_index(begin = var_8404_begin_0, end = var_8404_end_0, end_mask = var_8404_end_mask_0, x = var_8288_cast_fp16)[name = tensor("op_8404_cast_fp16")]; tensor var_8411_begin_0 = const()[name = tensor("op_8411_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8411_end_0 = const()[name = tensor("op_8411_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8411_end_mask_0 = const()[name = tensor("op_8411_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8411_cast_fp16 = slice_by_index(begin = var_8411_begin_0, end = var_8411_end_0, end_mask = var_8411_end_mask_0, x = var_8288_cast_fp16)[name = tensor("op_8411_cast_fp16")]; tensor var_8418_begin_0 = const()[name = tensor("op_8418_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8418_end_0 = const()[name = tensor("op_8418_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8418_end_mask_0 = const()[name = tensor("op_8418_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8418_cast_fp16 = slice_by_index(begin = var_8418_begin_0, end = var_8418_end_0, end_mask = var_8418_end_mask_0, x = var_8288_cast_fp16)[name = tensor("op_8418_cast_fp16")]; tensor var_8425_begin_0 = const()[name = tensor("op_8425_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8425_end_0 = const()[name = tensor("op_8425_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8425_end_mask_0 = const()[name = tensor("op_8425_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8425_cast_fp16 = slice_by_index(begin = var_8425_begin_0, end = var_8425_end_0, end_mask = var_8425_end_mask_0, x = var_8292_cast_fp16)[name = tensor("op_8425_cast_fp16")]; tensor var_8432_begin_0 = const()[name = tensor("op_8432_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8432_end_0 = const()[name = tensor("op_8432_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8432_end_mask_0 = const()[name = tensor("op_8432_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8432_cast_fp16 = slice_by_index(begin = var_8432_begin_0, end = var_8432_end_0, end_mask = var_8432_end_mask_0, x = var_8292_cast_fp16)[name = tensor("op_8432_cast_fp16")]; tensor var_8439_begin_0 = const()[name = tensor("op_8439_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8439_end_0 = const()[name = tensor("op_8439_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8439_end_mask_0 = const()[name = tensor("op_8439_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8439_cast_fp16 = slice_by_index(begin = var_8439_begin_0, end = var_8439_end_0, end_mask = var_8439_end_mask_0, x = var_8292_cast_fp16)[name = tensor("op_8439_cast_fp16")]; tensor var_8446_begin_0 = const()[name = tensor("op_8446_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8446_end_0 = const()[name = tensor("op_8446_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8446_end_mask_0 = const()[name = tensor("op_8446_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8446_cast_fp16 = slice_by_index(begin = var_8446_begin_0, end = var_8446_end_0, end_mask = var_8446_end_mask_0, x = var_8292_cast_fp16)[name = tensor("op_8446_cast_fp16")]; tensor var_8453_begin_0 = const()[name = tensor("op_8453_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8453_end_0 = const()[name = tensor("op_8453_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8453_end_mask_0 = const()[name = tensor("op_8453_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8453_cast_fp16 = slice_by_index(begin = var_8453_begin_0, end = var_8453_end_0, end_mask = var_8453_end_mask_0, x = var_8296_cast_fp16)[name = tensor("op_8453_cast_fp16")]; tensor var_8460_begin_0 = const()[name = tensor("op_8460_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8460_end_0 = const()[name = tensor("op_8460_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8460_end_mask_0 = const()[name = tensor("op_8460_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8460_cast_fp16 = slice_by_index(begin = var_8460_begin_0, end = var_8460_end_0, end_mask = var_8460_end_mask_0, x = var_8296_cast_fp16)[name = tensor("op_8460_cast_fp16")]; tensor var_8467_begin_0 = const()[name = tensor("op_8467_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8467_end_0 = const()[name = tensor("op_8467_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8467_end_mask_0 = const()[name = tensor("op_8467_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8467_cast_fp16 = slice_by_index(begin = var_8467_begin_0, end = var_8467_end_0, end_mask = var_8467_end_mask_0, x = var_8296_cast_fp16)[name = tensor("op_8467_cast_fp16")]; tensor var_8474_begin_0 = const()[name = tensor("op_8474_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8474_end_0 = const()[name = tensor("op_8474_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8474_end_mask_0 = const()[name = tensor("op_8474_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8474_cast_fp16 = slice_by_index(begin = var_8474_begin_0, end = var_8474_end_0, end_mask = var_8474_end_mask_0, x = var_8296_cast_fp16)[name = tensor("op_8474_cast_fp16")]; tensor var_8481_begin_0 = const()[name = tensor("op_8481_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8481_end_0 = const()[name = tensor("op_8481_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8481_end_mask_0 = const()[name = tensor("op_8481_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8481_cast_fp16 = slice_by_index(begin = var_8481_begin_0, end = var_8481_end_0, end_mask = var_8481_end_mask_0, x = var_8300_cast_fp16)[name = tensor("op_8481_cast_fp16")]; tensor var_8488_begin_0 = const()[name = tensor("op_8488_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8488_end_0 = const()[name = tensor("op_8488_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8488_end_mask_0 = const()[name = tensor("op_8488_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8488_cast_fp16 = slice_by_index(begin = var_8488_begin_0, end = var_8488_end_0, end_mask = var_8488_end_mask_0, x = var_8300_cast_fp16)[name = tensor("op_8488_cast_fp16")]; tensor var_8495_begin_0 = const()[name = tensor("op_8495_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8495_end_0 = const()[name = tensor("op_8495_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8495_end_mask_0 = const()[name = tensor("op_8495_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8495_cast_fp16 = slice_by_index(begin = var_8495_begin_0, end = var_8495_end_0, end_mask = var_8495_end_mask_0, x = var_8300_cast_fp16)[name = tensor("op_8495_cast_fp16")]; tensor var_8502_begin_0 = const()[name = tensor("op_8502_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8502_end_0 = const()[name = tensor("op_8502_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8502_end_mask_0 = const()[name = tensor("op_8502_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8502_cast_fp16 = slice_by_index(begin = var_8502_begin_0, end = var_8502_end_0, end_mask = var_8502_end_mask_0, x = var_8300_cast_fp16)[name = tensor("op_8502_cast_fp16")]; tensor var_8509_begin_0 = const()[name = tensor("op_8509_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8509_end_0 = const()[name = tensor("op_8509_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8509_end_mask_0 = const()[name = tensor("op_8509_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8509_cast_fp16 = slice_by_index(begin = var_8509_begin_0, end = var_8509_end_0, end_mask = var_8509_end_mask_0, x = var_8304_cast_fp16)[name = tensor("op_8509_cast_fp16")]; tensor var_8516_begin_0 = const()[name = tensor("op_8516_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8516_end_0 = const()[name = tensor("op_8516_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8516_end_mask_0 = const()[name = tensor("op_8516_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8516_cast_fp16 = slice_by_index(begin = var_8516_begin_0, end = var_8516_end_0, end_mask = var_8516_end_mask_0, x = var_8304_cast_fp16)[name = tensor("op_8516_cast_fp16")]; tensor var_8523_begin_0 = const()[name = tensor("op_8523_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8523_end_0 = const()[name = tensor("op_8523_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8523_end_mask_0 = const()[name = tensor("op_8523_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8523_cast_fp16 = slice_by_index(begin = var_8523_begin_0, end = var_8523_end_0, end_mask = var_8523_end_mask_0, x = var_8304_cast_fp16)[name = tensor("op_8523_cast_fp16")]; tensor var_8530_begin_0 = const()[name = tensor("op_8530_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8530_end_0 = const()[name = tensor("op_8530_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8530_end_mask_0 = const()[name = tensor("op_8530_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8530_cast_fp16 = slice_by_index(begin = var_8530_begin_0, end = var_8530_end_0, end_mask = var_8530_end_mask_0, x = var_8304_cast_fp16)[name = tensor("op_8530_cast_fp16")]; tensor var_8537_begin_0 = const()[name = tensor("op_8537_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8537_end_0 = const()[name = tensor("op_8537_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8537_end_mask_0 = const()[name = tensor("op_8537_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8537_cast_fp16 = slice_by_index(begin = var_8537_begin_0, end = var_8537_end_0, end_mask = var_8537_end_mask_0, x = var_8308_cast_fp16)[name = tensor("op_8537_cast_fp16")]; tensor var_8544_begin_0 = const()[name = tensor("op_8544_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8544_end_0 = const()[name = tensor("op_8544_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8544_end_mask_0 = const()[name = tensor("op_8544_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8544_cast_fp16 = slice_by_index(begin = var_8544_begin_0, end = var_8544_end_0, end_mask = var_8544_end_mask_0, x = var_8308_cast_fp16)[name = tensor("op_8544_cast_fp16")]; tensor var_8551_begin_0 = const()[name = tensor("op_8551_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8551_end_0 = const()[name = tensor("op_8551_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8551_end_mask_0 = const()[name = tensor("op_8551_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8551_cast_fp16 = slice_by_index(begin = var_8551_begin_0, end = var_8551_end_0, end_mask = var_8551_end_mask_0, x = var_8308_cast_fp16)[name = tensor("op_8551_cast_fp16")]; tensor var_8558_begin_0 = const()[name = tensor("op_8558_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8558_end_0 = const()[name = tensor("op_8558_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8558_end_mask_0 = const()[name = tensor("op_8558_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8558_cast_fp16 = slice_by_index(begin = var_8558_begin_0, end = var_8558_end_0, end_mask = var_8558_end_mask_0, x = var_8308_cast_fp16)[name = tensor("op_8558_cast_fp16")]; tensor var_8565_begin_0 = const()[name = tensor("op_8565_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8565_end_0 = const()[name = tensor("op_8565_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8565_end_mask_0 = const()[name = tensor("op_8565_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8565_cast_fp16 = slice_by_index(begin = var_8565_begin_0, end = var_8565_end_0, end_mask = var_8565_end_mask_0, x = var_8312_cast_fp16)[name = tensor("op_8565_cast_fp16")]; tensor var_8572_begin_0 = const()[name = tensor("op_8572_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8572_end_0 = const()[name = tensor("op_8572_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8572_end_mask_0 = const()[name = tensor("op_8572_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8572_cast_fp16 = slice_by_index(begin = var_8572_begin_0, end = var_8572_end_0, end_mask = var_8572_end_mask_0, x = var_8312_cast_fp16)[name = tensor("op_8572_cast_fp16")]; tensor var_8579_begin_0 = const()[name = tensor("op_8579_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8579_end_0 = const()[name = tensor("op_8579_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8579_end_mask_0 = const()[name = tensor("op_8579_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8579_cast_fp16 = slice_by_index(begin = var_8579_begin_0, end = var_8579_end_0, end_mask = var_8579_end_mask_0, x = var_8312_cast_fp16)[name = tensor("op_8579_cast_fp16")]; tensor var_8586_begin_0 = const()[name = tensor("op_8586_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8586_end_0 = const()[name = tensor("op_8586_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8586_end_mask_0 = const()[name = tensor("op_8586_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8586_cast_fp16 = slice_by_index(begin = var_8586_begin_0, end = var_8586_end_0, end_mask = var_8586_end_mask_0, x = var_8312_cast_fp16)[name = tensor("op_8586_cast_fp16")]; tensor var_8593_begin_0 = const()[name = tensor("op_8593_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8593_end_0 = const()[name = tensor("op_8593_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8593_end_mask_0 = const()[name = tensor("op_8593_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8593_cast_fp16 = slice_by_index(begin = var_8593_begin_0, end = var_8593_end_0, end_mask = var_8593_end_mask_0, x = var_8316_cast_fp16)[name = tensor("op_8593_cast_fp16")]; tensor var_8600_begin_0 = const()[name = tensor("op_8600_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8600_end_0 = const()[name = tensor("op_8600_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8600_end_mask_0 = const()[name = tensor("op_8600_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8600_cast_fp16 = slice_by_index(begin = var_8600_begin_0, end = var_8600_end_0, end_mask = var_8600_end_mask_0, x = var_8316_cast_fp16)[name = tensor("op_8600_cast_fp16")]; tensor var_8607_begin_0 = const()[name = tensor("op_8607_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8607_end_0 = const()[name = tensor("op_8607_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8607_end_mask_0 = const()[name = tensor("op_8607_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8607_cast_fp16 = slice_by_index(begin = var_8607_begin_0, end = var_8607_end_0, end_mask = var_8607_end_mask_0, x = var_8316_cast_fp16)[name = tensor("op_8607_cast_fp16")]; tensor var_8614_begin_0 = const()[name = tensor("op_8614_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8614_end_0 = const()[name = tensor("op_8614_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8614_end_mask_0 = const()[name = tensor("op_8614_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8614_cast_fp16 = slice_by_index(begin = var_8614_begin_0, end = var_8614_end_0, end_mask = var_8614_end_mask_0, x = var_8316_cast_fp16)[name = tensor("op_8614_cast_fp16")]; tensor var_8621_begin_0 = const()[name = tensor("op_8621_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8621_end_0 = const()[name = tensor("op_8621_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8621_end_mask_0 = const()[name = tensor("op_8621_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8621_cast_fp16 = slice_by_index(begin = var_8621_begin_0, end = var_8621_end_0, end_mask = var_8621_end_mask_0, x = var_8320_cast_fp16)[name = tensor("op_8621_cast_fp16")]; tensor var_8628_begin_0 = const()[name = tensor("op_8628_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8628_end_0 = const()[name = tensor("op_8628_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8628_end_mask_0 = const()[name = tensor("op_8628_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8628_cast_fp16 = slice_by_index(begin = var_8628_begin_0, end = var_8628_end_0, end_mask = var_8628_end_mask_0, x = var_8320_cast_fp16)[name = tensor("op_8628_cast_fp16")]; tensor var_8635_begin_0 = const()[name = tensor("op_8635_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8635_end_0 = const()[name = tensor("op_8635_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8635_end_mask_0 = const()[name = tensor("op_8635_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8635_cast_fp16 = slice_by_index(begin = var_8635_begin_0, end = var_8635_end_0, end_mask = var_8635_end_mask_0, x = var_8320_cast_fp16)[name = tensor("op_8635_cast_fp16")]; tensor var_8642_begin_0 = const()[name = tensor("op_8642_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8642_end_0 = const()[name = tensor("op_8642_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8642_end_mask_0 = const()[name = tensor("op_8642_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8642_cast_fp16 = slice_by_index(begin = var_8642_begin_0, end = var_8642_end_0, end_mask = var_8642_end_mask_0, x = var_8320_cast_fp16)[name = tensor("op_8642_cast_fp16")]; tensor var_8649_begin_0 = const()[name = tensor("op_8649_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8649_end_0 = const()[name = tensor("op_8649_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8649_end_mask_0 = const()[name = tensor("op_8649_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8649_cast_fp16 = slice_by_index(begin = var_8649_begin_0, end = var_8649_end_0, end_mask = var_8649_end_mask_0, x = var_8324_cast_fp16)[name = tensor("op_8649_cast_fp16")]; tensor var_8656_begin_0 = const()[name = tensor("op_8656_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8656_end_0 = const()[name = tensor("op_8656_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8656_end_mask_0 = const()[name = tensor("op_8656_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8656_cast_fp16 = slice_by_index(begin = var_8656_begin_0, end = var_8656_end_0, end_mask = var_8656_end_mask_0, x = var_8324_cast_fp16)[name = tensor("op_8656_cast_fp16")]; tensor var_8663_begin_0 = const()[name = tensor("op_8663_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8663_end_0 = const()[name = tensor("op_8663_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8663_end_mask_0 = const()[name = tensor("op_8663_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8663_cast_fp16 = slice_by_index(begin = var_8663_begin_0, end = var_8663_end_0, end_mask = var_8663_end_mask_0, x = var_8324_cast_fp16)[name = tensor("op_8663_cast_fp16")]; tensor var_8670_begin_0 = const()[name = tensor("op_8670_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8670_end_0 = const()[name = tensor("op_8670_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8670_end_mask_0 = const()[name = tensor("op_8670_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8670_cast_fp16 = slice_by_index(begin = var_8670_begin_0, end = var_8670_end_0, end_mask = var_8670_end_mask_0, x = var_8324_cast_fp16)[name = tensor("op_8670_cast_fp16")]; tensor var_8677_begin_0 = const()[name = tensor("op_8677_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8677_end_0 = const()[name = tensor("op_8677_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8677_end_mask_0 = const()[name = tensor("op_8677_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8677_cast_fp16 = slice_by_index(begin = var_8677_begin_0, end = var_8677_end_0, end_mask = var_8677_end_mask_0, x = var_8328_cast_fp16)[name = tensor("op_8677_cast_fp16")]; tensor var_8684_begin_0 = const()[name = tensor("op_8684_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8684_end_0 = const()[name = tensor("op_8684_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8684_end_mask_0 = const()[name = tensor("op_8684_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8684_cast_fp16 = slice_by_index(begin = var_8684_begin_0, end = var_8684_end_0, end_mask = var_8684_end_mask_0, x = var_8328_cast_fp16)[name = tensor("op_8684_cast_fp16")]; tensor var_8691_begin_0 = const()[name = tensor("op_8691_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8691_end_0 = const()[name = tensor("op_8691_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8691_end_mask_0 = const()[name = tensor("op_8691_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8691_cast_fp16 = slice_by_index(begin = var_8691_begin_0, end = var_8691_end_0, end_mask = var_8691_end_mask_0, x = var_8328_cast_fp16)[name = tensor("op_8691_cast_fp16")]; tensor var_8698_begin_0 = const()[name = tensor("op_8698_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8698_end_0 = const()[name = tensor("op_8698_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8698_end_mask_0 = const()[name = tensor("op_8698_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8698_cast_fp16 = slice_by_index(begin = var_8698_begin_0, end = var_8698_end_0, end_mask = var_8698_end_mask_0, x = var_8328_cast_fp16)[name = tensor("op_8698_cast_fp16")]; tensor var_8705_begin_0 = const()[name = tensor("op_8705_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8705_end_0 = const()[name = tensor("op_8705_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8705_end_mask_0 = const()[name = tensor("op_8705_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8705_cast_fp16 = slice_by_index(begin = var_8705_begin_0, end = var_8705_end_0, end_mask = var_8705_end_mask_0, x = var_8332_cast_fp16)[name = tensor("op_8705_cast_fp16")]; tensor var_8712_begin_0 = const()[name = tensor("op_8712_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8712_end_0 = const()[name = tensor("op_8712_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8712_end_mask_0 = const()[name = tensor("op_8712_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8712_cast_fp16 = slice_by_index(begin = var_8712_begin_0, end = var_8712_end_0, end_mask = var_8712_end_mask_0, x = var_8332_cast_fp16)[name = tensor("op_8712_cast_fp16")]; tensor var_8719_begin_0 = const()[name = tensor("op_8719_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8719_end_0 = const()[name = tensor("op_8719_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8719_end_mask_0 = const()[name = tensor("op_8719_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8719_cast_fp16 = slice_by_index(begin = var_8719_begin_0, end = var_8719_end_0, end_mask = var_8719_end_mask_0, x = var_8332_cast_fp16)[name = tensor("op_8719_cast_fp16")]; tensor var_8726_begin_0 = const()[name = tensor("op_8726_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8726_end_0 = const()[name = tensor("op_8726_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8726_end_mask_0 = const()[name = tensor("op_8726_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8726_cast_fp16 = slice_by_index(begin = var_8726_begin_0, end = var_8726_end_0, end_mask = var_8726_end_mask_0, x = var_8332_cast_fp16)[name = tensor("op_8726_cast_fp16")]; tensor var_8733_begin_0 = const()[name = tensor("op_8733_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8733_end_0 = const()[name = tensor("op_8733_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8733_end_mask_0 = const()[name = tensor("op_8733_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8733_cast_fp16 = slice_by_index(begin = var_8733_begin_0, end = var_8733_end_0, end_mask = var_8733_end_mask_0, x = var_8336_cast_fp16)[name = tensor("op_8733_cast_fp16")]; tensor var_8740_begin_0 = const()[name = tensor("op_8740_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8740_end_0 = const()[name = tensor("op_8740_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8740_end_mask_0 = const()[name = tensor("op_8740_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8740_cast_fp16 = slice_by_index(begin = var_8740_begin_0, end = var_8740_end_0, end_mask = var_8740_end_mask_0, x = var_8336_cast_fp16)[name = tensor("op_8740_cast_fp16")]; tensor var_8747_begin_0 = const()[name = tensor("op_8747_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8747_end_0 = const()[name = tensor("op_8747_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8747_end_mask_0 = const()[name = tensor("op_8747_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8747_cast_fp16 = slice_by_index(begin = var_8747_begin_0, end = var_8747_end_0, end_mask = var_8747_end_mask_0, x = var_8336_cast_fp16)[name = tensor("op_8747_cast_fp16")]; tensor var_8754_begin_0 = const()[name = tensor("op_8754_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8754_end_0 = const()[name = tensor("op_8754_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8754_end_mask_0 = const()[name = tensor("op_8754_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8754_cast_fp16 = slice_by_index(begin = var_8754_begin_0, end = var_8754_end_0, end_mask = var_8754_end_mask_0, x = var_8336_cast_fp16)[name = tensor("op_8754_cast_fp16")]; tensor var_8761_begin_0 = const()[name = tensor("op_8761_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8761_end_0 = const()[name = tensor("op_8761_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8761_end_mask_0 = const()[name = tensor("op_8761_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8761_cast_fp16 = slice_by_index(begin = var_8761_begin_0, end = var_8761_end_0, end_mask = var_8761_end_mask_0, x = var_8340_cast_fp16)[name = tensor("op_8761_cast_fp16")]; tensor var_8768_begin_0 = const()[name = tensor("op_8768_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8768_end_0 = const()[name = tensor("op_8768_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8768_end_mask_0 = const()[name = tensor("op_8768_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8768_cast_fp16 = slice_by_index(begin = var_8768_begin_0, end = var_8768_end_0, end_mask = var_8768_end_mask_0, x = var_8340_cast_fp16)[name = tensor("op_8768_cast_fp16")]; tensor var_8775_begin_0 = const()[name = tensor("op_8775_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8775_end_0 = const()[name = tensor("op_8775_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8775_end_mask_0 = const()[name = tensor("op_8775_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8775_cast_fp16 = slice_by_index(begin = var_8775_begin_0, end = var_8775_end_0, end_mask = var_8775_end_mask_0, x = var_8340_cast_fp16)[name = tensor("op_8775_cast_fp16")]; tensor var_8782_begin_0 = const()[name = tensor("op_8782_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8782_end_0 = const()[name = tensor("op_8782_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8782_end_mask_0 = const()[name = tensor("op_8782_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8782_cast_fp16 = slice_by_index(begin = var_8782_begin_0, end = var_8782_end_0, end_mask = var_8782_end_mask_0, x = var_8340_cast_fp16)[name = tensor("op_8782_cast_fp16")]; tensor var_8789_begin_0 = const()[name = tensor("op_8789_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8789_end_0 = const()[name = tensor("op_8789_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8789_end_mask_0 = const()[name = tensor("op_8789_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8789_cast_fp16 = slice_by_index(begin = var_8789_begin_0, end = var_8789_end_0, end_mask = var_8789_end_mask_0, x = var_8344_cast_fp16)[name = tensor("op_8789_cast_fp16")]; tensor var_8796_begin_0 = const()[name = tensor("op_8796_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8796_end_0 = const()[name = tensor("op_8796_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8796_end_mask_0 = const()[name = tensor("op_8796_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8796_cast_fp16 = slice_by_index(begin = var_8796_begin_0, end = var_8796_end_0, end_mask = var_8796_end_mask_0, x = var_8344_cast_fp16)[name = tensor("op_8796_cast_fp16")]; tensor var_8803_begin_0 = const()[name = tensor("op_8803_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8803_end_0 = const()[name = tensor("op_8803_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8803_end_mask_0 = const()[name = tensor("op_8803_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8803_cast_fp16 = slice_by_index(begin = var_8803_begin_0, end = var_8803_end_0, end_mask = var_8803_end_mask_0, x = var_8344_cast_fp16)[name = tensor("op_8803_cast_fp16")]; tensor var_8810_begin_0 = const()[name = tensor("op_8810_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8810_end_0 = const()[name = tensor("op_8810_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8810_end_mask_0 = const()[name = tensor("op_8810_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8810_cast_fp16 = slice_by_index(begin = var_8810_begin_0, end = var_8810_end_0, end_mask = var_8810_end_mask_0, x = var_8344_cast_fp16)[name = tensor("op_8810_cast_fp16")]; tensor var_8817_begin_0 = const()[name = tensor("op_8817_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8817_end_0 = const()[name = tensor("op_8817_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8817_end_mask_0 = const()[name = tensor("op_8817_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8817_cast_fp16 = slice_by_index(begin = var_8817_begin_0, end = var_8817_end_0, end_mask = var_8817_end_mask_0, x = var_8348_cast_fp16)[name = tensor("op_8817_cast_fp16")]; tensor var_8824_begin_0 = const()[name = tensor("op_8824_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8824_end_0 = const()[name = tensor("op_8824_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8824_end_mask_0 = const()[name = tensor("op_8824_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8824_cast_fp16 = slice_by_index(begin = var_8824_begin_0, end = var_8824_end_0, end_mask = var_8824_end_mask_0, x = var_8348_cast_fp16)[name = tensor("op_8824_cast_fp16")]; tensor var_8831_begin_0 = const()[name = tensor("op_8831_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8831_end_0 = const()[name = tensor("op_8831_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8831_end_mask_0 = const()[name = tensor("op_8831_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8831_cast_fp16 = slice_by_index(begin = var_8831_begin_0, end = var_8831_end_0, end_mask = var_8831_end_mask_0, x = var_8348_cast_fp16)[name = tensor("op_8831_cast_fp16")]; tensor var_8838_begin_0 = const()[name = tensor("op_8838_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8838_end_0 = const()[name = tensor("op_8838_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8838_end_mask_0 = const()[name = tensor("op_8838_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8838_cast_fp16 = slice_by_index(begin = var_8838_begin_0, end = var_8838_end_0, end_mask = var_8838_end_mask_0, x = var_8348_cast_fp16)[name = tensor("op_8838_cast_fp16")]; tensor var_8845_begin_0 = const()[name = tensor("op_8845_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8845_end_0 = const()[name = tensor("op_8845_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8845_end_mask_0 = const()[name = tensor("op_8845_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8845_cast_fp16 = slice_by_index(begin = var_8845_begin_0, end = var_8845_end_0, end_mask = var_8845_end_mask_0, x = var_8352_cast_fp16)[name = tensor("op_8845_cast_fp16")]; tensor var_8852_begin_0 = const()[name = tensor("op_8852_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8852_end_0 = const()[name = tensor("op_8852_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8852_end_mask_0 = const()[name = tensor("op_8852_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8852_cast_fp16 = slice_by_index(begin = var_8852_begin_0, end = var_8852_end_0, end_mask = var_8852_end_mask_0, x = var_8352_cast_fp16)[name = tensor("op_8852_cast_fp16")]; tensor var_8859_begin_0 = const()[name = tensor("op_8859_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8859_end_0 = const()[name = tensor("op_8859_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8859_end_mask_0 = const()[name = tensor("op_8859_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8859_cast_fp16 = slice_by_index(begin = var_8859_begin_0, end = var_8859_end_0, end_mask = var_8859_end_mask_0, x = var_8352_cast_fp16)[name = tensor("op_8859_cast_fp16")]; tensor var_8866_begin_0 = const()[name = tensor("op_8866_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8866_end_0 = const()[name = tensor("op_8866_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8866_end_mask_0 = const()[name = tensor("op_8866_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8866_cast_fp16 = slice_by_index(begin = var_8866_begin_0, end = var_8866_end_0, end_mask = var_8866_end_mask_0, x = var_8352_cast_fp16)[name = tensor("op_8866_cast_fp16")]; tensor var_8873_begin_0 = const()[name = tensor("op_8873_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8873_end_0 = const()[name = tensor("op_8873_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8873_end_mask_0 = const()[name = tensor("op_8873_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8873_cast_fp16 = slice_by_index(begin = var_8873_begin_0, end = var_8873_end_0, end_mask = var_8873_end_mask_0, x = var_8356_cast_fp16)[name = tensor("op_8873_cast_fp16")]; tensor var_8880_begin_0 = const()[name = tensor("op_8880_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8880_end_0 = const()[name = tensor("op_8880_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8880_end_mask_0 = const()[name = tensor("op_8880_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8880_cast_fp16 = slice_by_index(begin = var_8880_begin_0, end = var_8880_end_0, end_mask = var_8880_end_mask_0, x = var_8356_cast_fp16)[name = tensor("op_8880_cast_fp16")]; tensor var_8887_begin_0 = const()[name = tensor("op_8887_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8887_end_0 = const()[name = tensor("op_8887_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8887_end_mask_0 = const()[name = tensor("op_8887_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8887_cast_fp16 = slice_by_index(begin = var_8887_begin_0, end = var_8887_end_0, end_mask = var_8887_end_mask_0, x = var_8356_cast_fp16)[name = tensor("op_8887_cast_fp16")]; tensor var_8894_begin_0 = const()[name = tensor("op_8894_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8894_end_0 = const()[name = tensor("op_8894_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8894_end_mask_0 = const()[name = tensor("op_8894_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8894_cast_fp16 = slice_by_index(begin = var_8894_begin_0, end = var_8894_end_0, end_mask = var_8894_end_mask_0, x = var_8356_cast_fp16)[name = tensor("op_8894_cast_fp16")]; tensor var_8901_begin_0 = const()[name = tensor("op_8901_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8901_end_0 = const()[name = tensor("op_8901_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_8901_end_mask_0 = const()[name = tensor("op_8901_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8901_cast_fp16 = slice_by_index(begin = var_8901_begin_0, end = var_8901_end_0, end_mask = var_8901_end_mask_0, x = var_8360_cast_fp16)[name = tensor("op_8901_cast_fp16")]; tensor var_8908_begin_0 = const()[name = tensor("op_8908_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_8908_end_0 = const()[name = tensor("op_8908_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_8908_end_mask_0 = const()[name = tensor("op_8908_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8908_cast_fp16 = slice_by_index(begin = var_8908_begin_0, end = var_8908_end_0, end_mask = var_8908_end_mask_0, x = var_8360_cast_fp16)[name = tensor("op_8908_cast_fp16")]; tensor var_8915_begin_0 = const()[name = tensor("op_8915_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_8915_end_0 = const()[name = tensor("op_8915_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_8915_end_mask_0 = const()[name = tensor("op_8915_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8915_cast_fp16 = slice_by_index(begin = var_8915_begin_0, end = var_8915_end_0, end_mask = var_8915_end_mask_0, x = var_8360_cast_fp16)[name = tensor("op_8915_cast_fp16")]; tensor var_8922_begin_0 = const()[name = tensor("op_8922_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_8922_end_0 = const()[name = tensor("op_8922_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_8922_end_mask_0 = const()[name = tensor("op_8922_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8922_cast_fp16 = slice_by_index(begin = var_8922_begin_0, end = var_8922_end_0, end_mask = var_8922_end_mask_0, x = var_8360_cast_fp16)[name = tensor("op_8922_cast_fp16")]; tensor k_11_perm_0 = const()[name = tensor("k_11_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_8927_begin_0 = const()[name = tensor("op_8927_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_8927_end_0 = const()[name = tensor("op_8927_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_8927_end_mask_0 = const()[name = tensor("op_8927_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_11_cast_fp16 = transpose(perm = k_11_perm_0, x = key_11_cast_fp16)[name = tensor("transpose_26")]; tensor var_8927_cast_fp16 = slice_by_index(begin = var_8927_begin_0, end = var_8927_end_0, end_mask = var_8927_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8927_cast_fp16")]; tensor var_8931_begin_0 = const()[name = tensor("op_8931_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_8931_end_0 = const()[name = tensor("op_8931_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_8931_end_mask_0 = const()[name = tensor("op_8931_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8931_cast_fp16 = slice_by_index(begin = var_8931_begin_0, end = var_8931_end_0, end_mask = var_8931_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8931_cast_fp16")]; tensor var_8935_begin_0 = const()[name = tensor("op_8935_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_8935_end_0 = const()[name = tensor("op_8935_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_8935_end_mask_0 = const()[name = tensor("op_8935_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8935_cast_fp16 = slice_by_index(begin = var_8935_begin_0, end = var_8935_end_0, end_mask = var_8935_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8935_cast_fp16")]; tensor var_8939_begin_0 = const()[name = tensor("op_8939_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_8939_end_0 = const()[name = tensor("op_8939_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_8939_end_mask_0 = const()[name = tensor("op_8939_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8939_cast_fp16 = slice_by_index(begin = var_8939_begin_0, end = var_8939_end_0, end_mask = var_8939_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8939_cast_fp16")]; tensor var_8943_begin_0 = const()[name = tensor("op_8943_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_8943_end_0 = const()[name = tensor("op_8943_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_8943_end_mask_0 = const()[name = tensor("op_8943_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8943_cast_fp16 = slice_by_index(begin = var_8943_begin_0, end = var_8943_end_0, end_mask = var_8943_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8943_cast_fp16")]; tensor var_8947_begin_0 = const()[name = tensor("op_8947_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_8947_end_0 = const()[name = tensor("op_8947_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_8947_end_mask_0 = const()[name = tensor("op_8947_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8947_cast_fp16 = slice_by_index(begin = var_8947_begin_0, end = var_8947_end_0, end_mask = var_8947_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8947_cast_fp16")]; tensor var_8951_begin_0 = const()[name = tensor("op_8951_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_8951_end_0 = const()[name = tensor("op_8951_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_8951_end_mask_0 = const()[name = tensor("op_8951_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8951_cast_fp16 = slice_by_index(begin = var_8951_begin_0, end = var_8951_end_0, end_mask = var_8951_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8951_cast_fp16")]; tensor var_8955_begin_0 = const()[name = tensor("op_8955_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_8955_end_0 = const()[name = tensor("op_8955_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_8955_end_mask_0 = const()[name = tensor("op_8955_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8955_cast_fp16 = slice_by_index(begin = var_8955_begin_0, end = var_8955_end_0, end_mask = var_8955_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8955_cast_fp16")]; tensor var_8959_begin_0 = const()[name = tensor("op_8959_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_8959_end_0 = const()[name = tensor("op_8959_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_8959_end_mask_0 = const()[name = tensor("op_8959_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8959_cast_fp16 = slice_by_index(begin = var_8959_begin_0, end = var_8959_end_0, end_mask = var_8959_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8959_cast_fp16")]; tensor var_8963_begin_0 = const()[name = tensor("op_8963_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_8963_end_0 = const()[name = tensor("op_8963_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_8963_end_mask_0 = const()[name = tensor("op_8963_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8963_cast_fp16 = slice_by_index(begin = var_8963_begin_0, end = var_8963_end_0, end_mask = var_8963_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8963_cast_fp16")]; tensor var_8967_begin_0 = const()[name = tensor("op_8967_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_8967_end_0 = const()[name = tensor("op_8967_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_8967_end_mask_0 = const()[name = tensor("op_8967_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8967_cast_fp16 = slice_by_index(begin = var_8967_begin_0, end = var_8967_end_0, end_mask = var_8967_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8967_cast_fp16")]; tensor var_8971_begin_0 = const()[name = tensor("op_8971_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_8971_end_0 = const()[name = tensor("op_8971_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_8971_end_mask_0 = const()[name = tensor("op_8971_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8971_cast_fp16 = slice_by_index(begin = var_8971_begin_0, end = var_8971_end_0, end_mask = var_8971_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8971_cast_fp16")]; tensor var_8975_begin_0 = const()[name = tensor("op_8975_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_8975_end_0 = const()[name = tensor("op_8975_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_8975_end_mask_0 = const()[name = tensor("op_8975_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8975_cast_fp16 = slice_by_index(begin = var_8975_begin_0, end = var_8975_end_0, end_mask = var_8975_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8975_cast_fp16")]; tensor var_8979_begin_0 = const()[name = tensor("op_8979_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_8979_end_0 = const()[name = tensor("op_8979_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_8979_end_mask_0 = const()[name = tensor("op_8979_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8979_cast_fp16 = slice_by_index(begin = var_8979_begin_0, end = var_8979_end_0, end_mask = var_8979_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8979_cast_fp16")]; tensor var_8983_begin_0 = const()[name = tensor("op_8983_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_8983_end_0 = const()[name = tensor("op_8983_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_8983_end_mask_0 = const()[name = tensor("op_8983_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8983_cast_fp16 = slice_by_index(begin = var_8983_begin_0, end = var_8983_end_0, end_mask = var_8983_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8983_cast_fp16")]; tensor var_8987_begin_0 = const()[name = tensor("op_8987_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_8987_end_0 = const()[name = tensor("op_8987_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_8987_end_mask_0 = const()[name = tensor("op_8987_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8987_cast_fp16 = slice_by_index(begin = var_8987_begin_0, end = var_8987_end_0, end_mask = var_8987_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8987_cast_fp16")]; tensor var_8991_begin_0 = const()[name = tensor("op_8991_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_8991_end_0 = const()[name = tensor("op_8991_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_8991_end_mask_0 = const()[name = tensor("op_8991_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8991_cast_fp16 = slice_by_index(begin = var_8991_begin_0, end = var_8991_end_0, end_mask = var_8991_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8991_cast_fp16")]; tensor var_8995_begin_0 = const()[name = tensor("op_8995_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_8995_end_0 = const()[name = tensor("op_8995_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_8995_end_mask_0 = const()[name = tensor("op_8995_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8995_cast_fp16 = slice_by_index(begin = var_8995_begin_0, end = var_8995_end_0, end_mask = var_8995_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8995_cast_fp16")]; tensor var_8999_begin_0 = const()[name = tensor("op_8999_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_8999_end_0 = const()[name = tensor("op_8999_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_8999_end_mask_0 = const()[name = tensor("op_8999_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_8999_cast_fp16 = slice_by_index(begin = var_8999_begin_0, end = var_8999_end_0, end_mask = var_8999_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_8999_cast_fp16")]; tensor var_9003_begin_0 = const()[name = tensor("op_9003_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_9003_end_0 = const()[name = tensor("op_9003_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_9003_end_mask_0 = const()[name = tensor("op_9003_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9003_cast_fp16 = slice_by_index(begin = var_9003_begin_0, end = var_9003_end_0, end_mask = var_9003_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_9003_cast_fp16")]; tensor var_9005_begin_0 = const()[name = tensor("op_9005_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9005_end_0 = const()[name = tensor("op_9005_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_9005_end_mask_0 = const()[name = tensor("op_9005_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9005_cast_fp16 = slice_by_index(begin = var_9005_begin_0, end = var_9005_end_0, end_mask = var_9005_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9005_cast_fp16")]; tensor var_9009_begin_0 = const()[name = tensor("op_9009_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_9009_end_0 = const()[name = tensor("op_9009_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_9009_end_mask_0 = const()[name = tensor("op_9009_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9009_cast_fp16 = slice_by_index(begin = var_9009_begin_0, end = var_9009_end_0, end_mask = var_9009_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9009_cast_fp16")]; tensor var_9013_begin_0 = const()[name = tensor("op_9013_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_9013_end_0 = const()[name = tensor("op_9013_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_9013_end_mask_0 = const()[name = tensor("op_9013_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9013_cast_fp16 = slice_by_index(begin = var_9013_begin_0, end = var_9013_end_0, end_mask = var_9013_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9013_cast_fp16")]; tensor var_9017_begin_0 = const()[name = tensor("op_9017_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_9017_end_0 = const()[name = tensor("op_9017_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_9017_end_mask_0 = const()[name = tensor("op_9017_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9017_cast_fp16 = slice_by_index(begin = var_9017_begin_0, end = var_9017_end_0, end_mask = var_9017_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9017_cast_fp16")]; tensor var_9021_begin_0 = const()[name = tensor("op_9021_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_9021_end_0 = const()[name = tensor("op_9021_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_9021_end_mask_0 = const()[name = tensor("op_9021_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9021_cast_fp16 = slice_by_index(begin = var_9021_begin_0, end = var_9021_end_0, end_mask = var_9021_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9021_cast_fp16")]; tensor var_9025_begin_0 = const()[name = tensor("op_9025_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_9025_end_0 = const()[name = tensor("op_9025_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_9025_end_mask_0 = const()[name = tensor("op_9025_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9025_cast_fp16 = slice_by_index(begin = var_9025_begin_0, end = var_9025_end_0, end_mask = var_9025_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9025_cast_fp16")]; tensor var_9029_begin_0 = const()[name = tensor("op_9029_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_9029_end_0 = const()[name = tensor("op_9029_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_9029_end_mask_0 = const()[name = tensor("op_9029_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9029_cast_fp16 = slice_by_index(begin = var_9029_begin_0, end = var_9029_end_0, end_mask = var_9029_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9029_cast_fp16")]; tensor var_9033_begin_0 = const()[name = tensor("op_9033_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_9033_end_0 = const()[name = tensor("op_9033_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_9033_end_mask_0 = const()[name = tensor("op_9033_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9033_cast_fp16 = slice_by_index(begin = var_9033_begin_0, end = var_9033_end_0, end_mask = var_9033_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9033_cast_fp16")]; tensor var_9037_begin_0 = const()[name = tensor("op_9037_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_9037_end_0 = const()[name = tensor("op_9037_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_9037_end_mask_0 = const()[name = tensor("op_9037_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9037_cast_fp16 = slice_by_index(begin = var_9037_begin_0, end = var_9037_end_0, end_mask = var_9037_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9037_cast_fp16")]; tensor var_9041_begin_0 = const()[name = tensor("op_9041_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_9041_end_0 = const()[name = tensor("op_9041_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_9041_end_mask_0 = const()[name = tensor("op_9041_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9041_cast_fp16 = slice_by_index(begin = var_9041_begin_0, end = var_9041_end_0, end_mask = var_9041_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9041_cast_fp16")]; tensor var_9045_begin_0 = const()[name = tensor("op_9045_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_9045_end_0 = const()[name = tensor("op_9045_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_9045_end_mask_0 = const()[name = tensor("op_9045_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9045_cast_fp16 = slice_by_index(begin = var_9045_begin_0, end = var_9045_end_0, end_mask = var_9045_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9045_cast_fp16")]; tensor var_9049_begin_0 = const()[name = tensor("op_9049_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_9049_end_0 = const()[name = tensor("op_9049_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_9049_end_mask_0 = const()[name = tensor("op_9049_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9049_cast_fp16 = slice_by_index(begin = var_9049_begin_0, end = var_9049_end_0, end_mask = var_9049_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9049_cast_fp16")]; tensor var_9053_begin_0 = const()[name = tensor("op_9053_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_9053_end_0 = const()[name = tensor("op_9053_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_9053_end_mask_0 = const()[name = tensor("op_9053_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9053_cast_fp16 = slice_by_index(begin = var_9053_begin_0, end = var_9053_end_0, end_mask = var_9053_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9053_cast_fp16")]; tensor var_9057_begin_0 = const()[name = tensor("op_9057_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_9057_end_0 = const()[name = tensor("op_9057_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_9057_end_mask_0 = const()[name = tensor("op_9057_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9057_cast_fp16 = slice_by_index(begin = var_9057_begin_0, end = var_9057_end_0, end_mask = var_9057_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9057_cast_fp16")]; tensor var_9061_begin_0 = const()[name = tensor("op_9061_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_9061_end_0 = const()[name = tensor("op_9061_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_9061_end_mask_0 = const()[name = tensor("op_9061_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9061_cast_fp16 = slice_by_index(begin = var_9061_begin_0, end = var_9061_end_0, end_mask = var_9061_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9061_cast_fp16")]; tensor var_9065_begin_0 = const()[name = tensor("op_9065_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_9065_end_0 = const()[name = tensor("op_9065_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_9065_end_mask_0 = const()[name = tensor("op_9065_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9065_cast_fp16 = slice_by_index(begin = var_9065_begin_0, end = var_9065_end_0, end_mask = var_9065_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9065_cast_fp16")]; tensor var_9069_begin_0 = const()[name = tensor("op_9069_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_9069_end_0 = const()[name = tensor("op_9069_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_9069_end_mask_0 = const()[name = tensor("op_9069_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9069_cast_fp16 = slice_by_index(begin = var_9069_begin_0, end = var_9069_end_0, end_mask = var_9069_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9069_cast_fp16")]; tensor var_9073_begin_0 = const()[name = tensor("op_9073_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_9073_end_0 = const()[name = tensor("op_9073_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_9073_end_mask_0 = const()[name = tensor("op_9073_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9073_cast_fp16 = slice_by_index(begin = var_9073_begin_0, end = var_9073_end_0, end_mask = var_9073_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9073_cast_fp16")]; tensor var_9077_begin_0 = const()[name = tensor("op_9077_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_9077_end_0 = const()[name = tensor("op_9077_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_9077_end_mask_0 = const()[name = tensor("op_9077_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9077_cast_fp16 = slice_by_index(begin = var_9077_begin_0, end = var_9077_end_0, end_mask = var_9077_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9077_cast_fp16")]; tensor var_9081_begin_0 = const()[name = tensor("op_9081_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_9081_end_0 = const()[name = tensor("op_9081_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_9081_end_mask_0 = const()[name = tensor("op_9081_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9081_cast_fp16 = slice_by_index(begin = var_9081_begin_0, end = var_9081_end_0, end_mask = var_9081_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_9081_cast_fp16")]; tensor _SplitHeadsQ__mh_w_801_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_801_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_801_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_801_equation_0, values = (var_8927_cast_fp16, var_8369_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_801_cast_fp16")]; tensor _SplitHeadsQ__mh_w_803_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_803_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_803_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_803_equation_0, values = (var_8927_cast_fp16, var_8376_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_803_cast_fp16")]; tensor _SplitHeadsQ__mh_w_805_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_805_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_805_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_805_equation_0, values = (var_8927_cast_fp16, var_8383_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_805_cast_fp16")]; tensor _SplitHeadsQ__mh_w_807_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_807_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_807_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_807_equation_0, values = (var_8927_cast_fp16, var_8390_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_807_cast_fp16")]; tensor _SplitHeadsQ__mh_w_809_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_809_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_809_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_809_equation_0, values = (var_8931_cast_fp16, var_8397_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_809_cast_fp16")]; tensor _SplitHeadsQ__mh_w_811_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_811_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_811_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_811_equation_0, values = (var_8931_cast_fp16, var_8404_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_811_cast_fp16")]; tensor _SplitHeadsQ__mh_w_813_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_813_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_813_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_813_equation_0, values = (var_8931_cast_fp16, var_8411_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_813_cast_fp16")]; tensor _SplitHeadsQ__mh_w_815_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_815_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_815_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_815_equation_0, values = (var_8931_cast_fp16, var_8418_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_815_cast_fp16")]; tensor _SplitHeadsQ__mh_w_817_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_817_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_817_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_817_equation_0, values = (var_8935_cast_fp16, var_8425_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_817_cast_fp16")]; tensor _SplitHeadsQ__mh_w_819_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_819_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_819_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_819_equation_0, values = (var_8935_cast_fp16, var_8432_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_819_cast_fp16")]; tensor _SplitHeadsQ__mh_w_821_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_821_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_821_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_821_equation_0, values = (var_8935_cast_fp16, var_8439_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_821_cast_fp16")]; tensor _SplitHeadsQ__mh_w_823_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_823_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_823_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_823_equation_0, values = (var_8935_cast_fp16, var_8446_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_823_cast_fp16")]; tensor _SplitHeadsQ__mh_w_825_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_825_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_825_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_825_equation_0, values = (var_8939_cast_fp16, var_8453_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_825_cast_fp16")]; tensor _SplitHeadsQ__mh_w_827_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_827_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_827_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_827_equation_0, values = (var_8939_cast_fp16, var_8460_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_827_cast_fp16")]; tensor _SplitHeadsQ__mh_w_829_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_829_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_829_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_829_equation_0, values = (var_8939_cast_fp16, var_8467_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_829_cast_fp16")]; tensor _SplitHeadsQ__mh_w_831_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_831_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_831_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_831_equation_0, values = (var_8939_cast_fp16, var_8474_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_831_cast_fp16")]; tensor _SplitHeadsQ__mh_w_833_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_833_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_833_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_833_equation_0, values = (var_8943_cast_fp16, var_8481_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_833_cast_fp16")]; tensor _SplitHeadsQ__mh_w_835_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_835_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_835_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_835_equation_0, values = (var_8943_cast_fp16, var_8488_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_835_cast_fp16")]; tensor _SplitHeadsQ__mh_w_837_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_837_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_837_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_837_equation_0, values = (var_8943_cast_fp16, var_8495_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_837_cast_fp16")]; tensor _SplitHeadsQ__mh_w_839_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_839_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_839_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_839_equation_0, values = (var_8943_cast_fp16, var_8502_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_839_cast_fp16")]; tensor _SplitHeadsQ__mh_w_841_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_841_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_841_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_841_equation_0, values = (var_8947_cast_fp16, var_8509_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_841_cast_fp16")]; tensor _SplitHeadsQ__mh_w_843_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_843_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_843_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_843_equation_0, values = (var_8947_cast_fp16, var_8516_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_843_cast_fp16")]; tensor _SplitHeadsQ__mh_w_845_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_845_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_845_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_845_equation_0, values = (var_8947_cast_fp16, var_8523_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_845_cast_fp16")]; tensor _SplitHeadsQ__mh_w_847_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_847_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_847_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_847_equation_0, values = (var_8947_cast_fp16, var_8530_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_847_cast_fp16")]; tensor _SplitHeadsQ__mh_w_849_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_849_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_849_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_849_equation_0, values = (var_8951_cast_fp16, var_8537_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_849_cast_fp16")]; tensor _SplitHeadsQ__mh_w_851_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_851_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_851_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_851_equation_0, values = (var_8951_cast_fp16, var_8544_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_851_cast_fp16")]; tensor _SplitHeadsQ__mh_w_853_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_853_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_853_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_853_equation_0, values = (var_8951_cast_fp16, var_8551_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_853_cast_fp16")]; tensor _SplitHeadsQ__mh_w_855_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_855_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_855_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_855_equation_0, values = (var_8951_cast_fp16, var_8558_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_855_cast_fp16")]; tensor _SplitHeadsQ__mh_w_857_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_857_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_857_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_857_equation_0, values = (var_8955_cast_fp16, var_8565_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_857_cast_fp16")]; tensor _SplitHeadsQ__mh_w_859_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_859_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_859_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_859_equation_0, values = (var_8955_cast_fp16, var_8572_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_859_cast_fp16")]; tensor _SplitHeadsQ__mh_w_861_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_861_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_861_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_861_equation_0, values = (var_8955_cast_fp16, var_8579_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_861_cast_fp16")]; tensor _SplitHeadsQ__mh_w_863_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_863_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_863_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_863_equation_0, values = (var_8955_cast_fp16, var_8586_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_863_cast_fp16")]; tensor _SplitHeadsQ__mh_w_865_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_865_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_865_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_865_equation_0, values = (var_8959_cast_fp16, var_8593_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_865_cast_fp16")]; tensor _SplitHeadsQ__mh_w_867_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_867_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_867_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_867_equation_0, values = (var_8959_cast_fp16, var_8600_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_867_cast_fp16")]; tensor _SplitHeadsQ__mh_w_869_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_869_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_869_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_869_equation_0, values = (var_8959_cast_fp16, var_8607_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_869_cast_fp16")]; tensor _SplitHeadsQ__mh_w_871_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_871_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_871_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_871_equation_0, values = (var_8959_cast_fp16, var_8614_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_871_cast_fp16")]; tensor _SplitHeadsQ__mh_w_873_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_873_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_873_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_873_equation_0, values = (var_8963_cast_fp16, var_8621_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_873_cast_fp16")]; tensor _SplitHeadsQ__mh_w_875_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_875_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_875_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_875_equation_0, values = (var_8963_cast_fp16, var_8628_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_875_cast_fp16")]; tensor _SplitHeadsQ__mh_w_877_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_877_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_877_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_877_equation_0, values = (var_8963_cast_fp16, var_8635_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_877_cast_fp16")]; tensor _SplitHeadsQ__mh_w_879_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_879_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_879_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_879_equation_0, values = (var_8963_cast_fp16, var_8642_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_879_cast_fp16")]; tensor _SplitHeadsQ__mh_w_881_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_881_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_881_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_881_equation_0, values = (var_8967_cast_fp16, var_8649_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_881_cast_fp16")]; tensor _SplitHeadsQ__mh_w_883_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_883_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_883_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_883_equation_0, values = (var_8967_cast_fp16, var_8656_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_883_cast_fp16")]; tensor _SplitHeadsQ__mh_w_885_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_885_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_885_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_885_equation_0, values = (var_8967_cast_fp16, var_8663_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_885_cast_fp16")]; tensor _SplitHeadsQ__mh_w_887_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_887_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_887_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_887_equation_0, values = (var_8967_cast_fp16, var_8670_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_887_cast_fp16")]; tensor _SplitHeadsQ__mh_w_889_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_889_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_889_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_889_equation_0, values = (var_8971_cast_fp16, var_8677_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_889_cast_fp16")]; tensor _SplitHeadsQ__mh_w_891_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_891_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_891_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_891_equation_0, values = (var_8971_cast_fp16, var_8684_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_891_cast_fp16")]; tensor _SplitHeadsQ__mh_w_893_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_893_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_893_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_893_equation_0, values = (var_8971_cast_fp16, var_8691_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_893_cast_fp16")]; tensor _SplitHeadsQ__mh_w_895_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_895_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_895_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_895_equation_0, values = (var_8971_cast_fp16, var_8698_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_895_cast_fp16")]; tensor _SplitHeadsQ__mh_w_897_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_897_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_897_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_897_equation_0, values = (var_8975_cast_fp16, var_8705_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_897_cast_fp16")]; tensor _SplitHeadsQ__mh_w_899_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_899_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_899_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_899_equation_0, values = (var_8975_cast_fp16, var_8712_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_899_cast_fp16")]; tensor _SplitHeadsQ__mh_w_901_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_901_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_901_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_901_equation_0, values = (var_8975_cast_fp16, var_8719_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_901_cast_fp16")]; tensor _SplitHeadsQ__mh_w_903_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_903_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_903_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_903_equation_0, values = (var_8975_cast_fp16, var_8726_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_903_cast_fp16")]; tensor _SplitHeadsQ__mh_w_905_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_905_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_905_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_905_equation_0, values = (var_8979_cast_fp16, var_8733_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_905_cast_fp16")]; tensor _SplitHeadsQ__mh_w_907_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_907_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_907_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_907_equation_0, values = (var_8979_cast_fp16, var_8740_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_907_cast_fp16")]; tensor _SplitHeadsQ__mh_w_909_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_909_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_909_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_909_equation_0, values = (var_8979_cast_fp16, var_8747_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_909_cast_fp16")]; tensor _SplitHeadsQ__mh_w_911_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_911_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_911_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_911_equation_0, values = (var_8979_cast_fp16, var_8754_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_911_cast_fp16")]; tensor _SplitHeadsQ__mh_w_913_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_913_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_913_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_913_equation_0, values = (var_8983_cast_fp16, var_8761_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_913_cast_fp16")]; tensor _SplitHeadsQ__mh_w_915_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_915_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_915_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_915_equation_0, values = (var_8983_cast_fp16, var_8768_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_915_cast_fp16")]; tensor _SplitHeadsQ__mh_w_917_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_917_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_917_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_917_equation_0, values = (var_8983_cast_fp16, var_8775_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_917_cast_fp16")]; tensor _SplitHeadsQ__mh_w_919_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_919_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_919_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_919_equation_0, values = (var_8983_cast_fp16, var_8782_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_919_cast_fp16")]; tensor _SplitHeadsQ__mh_w_921_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_921_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_921_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_921_equation_0, values = (var_8987_cast_fp16, var_8789_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_921_cast_fp16")]; tensor _SplitHeadsQ__mh_w_923_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_923_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_923_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_923_equation_0, values = (var_8987_cast_fp16, var_8796_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_923_cast_fp16")]; tensor _SplitHeadsQ__mh_w_925_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_925_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_925_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_925_equation_0, values = (var_8987_cast_fp16, var_8803_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_925_cast_fp16")]; tensor _SplitHeadsQ__mh_w_927_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_927_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_927_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_927_equation_0, values = (var_8987_cast_fp16, var_8810_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_927_cast_fp16")]; tensor _SplitHeadsQ__mh_w_929_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_929_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_929_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_929_equation_0, values = (var_8991_cast_fp16, var_8817_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_929_cast_fp16")]; tensor _SplitHeadsQ__mh_w_931_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_931_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_931_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_931_equation_0, values = (var_8991_cast_fp16, var_8824_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_931_cast_fp16")]; tensor _SplitHeadsQ__mh_w_933_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_933_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_933_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_933_equation_0, values = (var_8991_cast_fp16, var_8831_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_933_cast_fp16")]; tensor _SplitHeadsQ__mh_w_935_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_935_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_935_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_935_equation_0, values = (var_8991_cast_fp16, var_8838_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_935_cast_fp16")]; tensor _SplitHeadsQ__mh_w_937_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_937_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_937_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_937_equation_0, values = (var_8995_cast_fp16, var_8845_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_937_cast_fp16")]; tensor _SplitHeadsQ__mh_w_939_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_939_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_939_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_939_equation_0, values = (var_8995_cast_fp16, var_8852_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_939_cast_fp16")]; tensor _SplitHeadsQ__mh_w_941_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_941_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_941_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_941_equation_0, values = (var_8995_cast_fp16, var_8859_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_941_cast_fp16")]; tensor _SplitHeadsQ__mh_w_943_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_943_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_943_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_943_equation_0, values = (var_8995_cast_fp16, var_8866_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_943_cast_fp16")]; tensor _SplitHeadsQ__mh_w_945_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_945_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_945_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_945_equation_0, values = (var_8999_cast_fp16, var_8873_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_945_cast_fp16")]; tensor _SplitHeadsQ__mh_w_947_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_947_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_947_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_947_equation_0, values = (var_8999_cast_fp16, var_8880_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_947_cast_fp16")]; tensor _SplitHeadsQ__mh_w_949_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_949_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_949_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_949_equation_0, values = (var_8999_cast_fp16, var_8887_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_949_cast_fp16")]; tensor _SplitHeadsQ__mh_w_951_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_951_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_951_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_951_equation_0, values = (var_8999_cast_fp16, var_8894_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_951_cast_fp16")]; tensor _SplitHeadsQ__mh_w_953_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_953_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_953_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_953_equation_0, values = (var_9003_cast_fp16, var_8901_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_953_cast_fp16")]; tensor _SplitHeadsQ__mh_w_955_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_955_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_955_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_955_equation_0, values = (var_9003_cast_fp16, var_8908_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_955_cast_fp16")]; tensor _SplitHeadsQ__mh_w_957_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_957_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_957_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_957_equation_0, values = (var_9003_cast_fp16, var_8915_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_957_cast_fp16")]; tensor _SplitHeadsQ__mh_w_959_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_959_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_959_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_959_equation_0, values = (var_9003_cast_fp16, var_8922_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_959_cast_fp16")]; tensor var_9244_to_fp16 = const()[name = tensor("op_9244_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_801_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_801_cast_fp16, y = var_9244_to_fp16)[name = tensor("aw_chunk_801_cast_fp16")]; tensor var_9246_to_fp16 = const()[name = tensor("op_9246_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_803_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_803_cast_fp16, y = var_9246_to_fp16)[name = tensor("aw_chunk_803_cast_fp16")]; tensor var_9248_to_fp16 = const()[name = tensor("op_9248_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_805_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_805_cast_fp16, y = var_9248_to_fp16)[name = tensor("aw_chunk_805_cast_fp16")]; tensor var_9250_to_fp16 = const()[name = tensor("op_9250_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_807_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_807_cast_fp16, y = var_9250_to_fp16)[name = tensor("aw_chunk_807_cast_fp16")]; tensor var_9252_to_fp16 = const()[name = tensor("op_9252_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_809_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_809_cast_fp16, y = var_9252_to_fp16)[name = tensor("aw_chunk_809_cast_fp16")]; tensor var_9254_to_fp16 = const()[name = tensor("op_9254_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_811_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_811_cast_fp16, y = var_9254_to_fp16)[name = tensor("aw_chunk_811_cast_fp16")]; tensor var_9256_to_fp16 = const()[name = tensor("op_9256_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_813_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_813_cast_fp16, y = var_9256_to_fp16)[name = tensor("aw_chunk_813_cast_fp16")]; tensor var_9258_to_fp16 = const()[name = tensor("op_9258_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_815_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_815_cast_fp16, y = var_9258_to_fp16)[name = tensor("aw_chunk_815_cast_fp16")]; tensor var_9260_to_fp16 = const()[name = tensor("op_9260_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_817_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_817_cast_fp16, y = var_9260_to_fp16)[name = tensor("aw_chunk_817_cast_fp16")]; tensor var_9262_to_fp16 = const()[name = tensor("op_9262_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_819_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_819_cast_fp16, y = var_9262_to_fp16)[name = tensor("aw_chunk_819_cast_fp16")]; tensor var_9264_to_fp16 = const()[name = tensor("op_9264_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_821_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_821_cast_fp16, y = var_9264_to_fp16)[name = tensor("aw_chunk_821_cast_fp16")]; tensor var_9266_to_fp16 = const()[name = tensor("op_9266_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_823_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_823_cast_fp16, y = var_9266_to_fp16)[name = tensor("aw_chunk_823_cast_fp16")]; tensor var_9268_to_fp16 = const()[name = tensor("op_9268_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_825_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_825_cast_fp16, y = var_9268_to_fp16)[name = tensor("aw_chunk_825_cast_fp16")]; tensor var_9270_to_fp16 = const()[name = tensor("op_9270_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_827_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_827_cast_fp16, y = var_9270_to_fp16)[name = tensor("aw_chunk_827_cast_fp16")]; tensor var_9272_to_fp16 = const()[name = tensor("op_9272_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_829_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_829_cast_fp16, y = var_9272_to_fp16)[name = tensor("aw_chunk_829_cast_fp16")]; tensor var_9274_to_fp16 = const()[name = tensor("op_9274_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_831_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_831_cast_fp16, y = var_9274_to_fp16)[name = tensor("aw_chunk_831_cast_fp16")]; tensor var_9276_to_fp16 = const()[name = tensor("op_9276_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_833_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_833_cast_fp16, y = var_9276_to_fp16)[name = tensor("aw_chunk_833_cast_fp16")]; tensor var_9278_to_fp16 = const()[name = tensor("op_9278_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_835_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_835_cast_fp16, y = var_9278_to_fp16)[name = tensor("aw_chunk_835_cast_fp16")]; tensor var_9280_to_fp16 = const()[name = tensor("op_9280_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_837_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_837_cast_fp16, y = var_9280_to_fp16)[name = tensor("aw_chunk_837_cast_fp16")]; tensor var_9282_to_fp16 = const()[name = tensor("op_9282_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_839_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_839_cast_fp16, y = var_9282_to_fp16)[name = tensor("aw_chunk_839_cast_fp16")]; tensor var_9284_to_fp16 = const()[name = tensor("op_9284_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_841_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_841_cast_fp16, y = var_9284_to_fp16)[name = tensor("aw_chunk_841_cast_fp16")]; tensor var_9286_to_fp16 = const()[name = tensor("op_9286_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_843_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_843_cast_fp16, y = var_9286_to_fp16)[name = tensor("aw_chunk_843_cast_fp16")]; tensor var_9288_to_fp16 = const()[name = tensor("op_9288_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_845_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_845_cast_fp16, y = var_9288_to_fp16)[name = tensor("aw_chunk_845_cast_fp16")]; tensor var_9290_to_fp16 = const()[name = tensor("op_9290_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_847_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_847_cast_fp16, y = var_9290_to_fp16)[name = tensor("aw_chunk_847_cast_fp16")]; tensor var_9292_to_fp16 = const()[name = tensor("op_9292_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_849_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_849_cast_fp16, y = var_9292_to_fp16)[name = tensor("aw_chunk_849_cast_fp16")]; tensor var_9294_to_fp16 = const()[name = tensor("op_9294_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_851_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_851_cast_fp16, y = var_9294_to_fp16)[name = tensor("aw_chunk_851_cast_fp16")]; tensor var_9296_to_fp16 = const()[name = tensor("op_9296_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_853_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_853_cast_fp16, y = var_9296_to_fp16)[name = tensor("aw_chunk_853_cast_fp16")]; tensor var_9298_to_fp16 = const()[name = tensor("op_9298_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_855_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_855_cast_fp16, y = var_9298_to_fp16)[name = tensor("aw_chunk_855_cast_fp16")]; tensor var_9300_to_fp16 = const()[name = tensor("op_9300_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_857_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_857_cast_fp16, y = var_9300_to_fp16)[name = tensor("aw_chunk_857_cast_fp16")]; tensor var_9302_to_fp16 = const()[name = tensor("op_9302_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_859_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_859_cast_fp16, y = var_9302_to_fp16)[name = tensor("aw_chunk_859_cast_fp16")]; tensor var_9304_to_fp16 = const()[name = tensor("op_9304_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_861_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_861_cast_fp16, y = var_9304_to_fp16)[name = tensor("aw_chunk_861_cast_fp16")]; tensor var_9306_to_fp16 = const()[name = tensor("op_9306_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_863_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_863_cast_fp16, y = var_9306_to_fp16)[name = tensor("aw_chunk_863_cast_fp16")]; tensor var_9308_to_fp16 = const()[name = tensor("op_9308_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_865_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_865_cast_fp16, y = var_9308_to_fp16)[name = tensor("aw_chunk_865_cast_fp16")]; tensor var_9310_to_fp16 = const()[name = tensor("op_9310_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_867_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_867_cast_fp16, y = var_9310_to_fp16)[name = tensor("aw_chunk_867_cast_fp16")]; tensor var_9312_to_fp16 = const()[name = tensor("op_9312_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_869_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_869_cast_fp16, y = var_9312_to_fp16)[name = tensor("aw_chunk_869_cast_fp16")]; tensor var_9314_to_fp16 = const()[name = tensor("op_9314_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_871_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_871_cast_fp16, y = var_9314_to_fp16)[name = tensor("aw_chunk_871_cast_fp16")]; tensor var_9316_to_fp16 = const()[name = tensor("op_9316_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_873_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_873_cast_fp16, y = var_9316_to_fp16)[name = tensor("aw_chunk_873_cast_fp16")]; tensor var_9318_to_fp16 = const()[name = tensor("op_9318_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_875_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_875_cast_fp16, y = var_9318_to_fp16)[name = tensor("aw_chunk_875_cast_fp16")]; tensor var_9320_to_fp16 = const()[name = tensor("op_9320_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_877_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_877_cast_fp16, y = var_9320_to_fp16)[name = tensor("aw_chunk_877_cast_fp16")]; tensor var_9322_to_fp16 = const()[name = tensor("op_9322_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_879_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_879_cast_fp16, y = var_9322_to_fp16)[name = tensor("aw_chunk_879_cast_fp16")]; tensor var_9324_to_fp16 = const()[name = tensor("op_9324_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_881_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_881_cast_fp16, y = var_9324_to_fp16)[name = tensor("aw_chunk_881_cast_fp16")]; tensor var_9326_to_fp16 = const()[name = tensor("op_9326_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_883_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_883_cast_fp16, y = var_9326_to_fp16)[name = tensor("aw_chunk_883_cast_fp16")]; tensor var_9328_to_fp16 = const()[name = tensor("op_9328_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_885_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_885_cast_fp16, y = var_9328_to_fp16)[name = tensor("aw_chunk_885_cast_fp16")]; tensor var_9330_to_fp16 = const()[name = tensor("op_9330_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_887_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_887_cast_fp16, y = var_9330_to_fp16)[name = tensor("aw_chunk_887_cast_fp16")]; tensor var_9332_to_fp16 = const()[name = tensor("op_9332_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_889_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_889_cast_fp16, y = var_9332_to_fp16)[name = tensor("aw_chunk_889_cast_fp16")]; tensor var_9334_to_fp16 = const()[name = tensor("op_9334_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_891_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_891_cast_fp16, y = var_9334_to_fp16)[name = tensor("aw_chunk_891_cast_fp16")]; tensor var_9336_to_fp16 = const()[name = tensor("op_9336_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_893_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_893_cast_fp16, y = var_9336_to_fp16)[name = tensor("aw_chunk_893_cast_fp16")]; tensor var_9338_to_fp16 = const()[name = tensor("op_9338_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_895_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_895_cast_fp16, y = var_9338_to_fp16)[name = tensor("aw_chunk_895_cast_fp16")]; tensor var_9340_to_fp16 = const()[name = tensor("op_9340_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_897_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_897_cast_fp16, y = var_9340_to_fp16)[name = tensor("aw_chunk_897_cast_fp16")]; tensor var_9342_to_fp16 = const()[name = tensor("op_9342_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_899_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_899_cast_fp16, y = var_9342_to_fp16)[name = tensor("aw_chunk_899_cast_fp16")]; tensor var_9344_to_fp16 = const()[name = tensor("op_9344_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_901_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_901_cast_fp16, y = var_9344_to_fp16)[name = tensor("aw_chunk_901_cast_fp16")]; tensor var_9346_to_fp16 = const()[name = tensor("op_9346_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_903_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_903_cast_fp16, y = var_9346_to_fp16)[name = tensor("aw_chunk_903_cast_fp16")]; tensor var_9348_to_fp16 = const()[name = tensor("op_9348_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_905_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_905_cast_fp16, y = var_9348_to_fp16)[name = tensor("aw_chunk_905_cast_fp16")]; tensor var_9350_to_fp16 = const()[name = tensor("op_9350_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_907_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_907_cast_fp16, y = var_9350_to_fp16)[name = tensor("aw_chunk_907_cast_fp16")]; tensor var_9352_to_fp16 = const()[name = tensor("op_9352_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_909_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_909_cast_fp16, y = var_9352_to_fp16)[name = tensor("aw_chunk_909_cast_fp16")]; tensor var_9354_to_fp16 = const()[name = tensor("op_9354_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_911_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_911_cast_fp16, y = var_9354_to_fp16)[name = tensor("aw_chunk_911_cast_fp16")]; tensor var_9356_to_fp16 = const()[name = tensor("op_9356_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_913_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_913_cast_fp16, y = var_9356_to_fp16)[name = tensor("aw_chunk_913_cast_fp16")]; tensor var_9358_to_fp16 = const()[name = tensor("op_9358_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_915_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_915_cast_fp16, y = var_9358_to_fp16)[name = tensor("aw_chunk_915_cast_fp16")]; tensor var_9360_to_fp16 = const()[name = tensor("op_9360_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_917_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_917_cast_fp16, y = var_9360_to_fp16)[name = tensor("aw_chunk_917_cast_fp16")]; tensor var_9362_to_fp16 = const()[name = tensor("op_9362_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_919_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_919_cast_fp16, y = var_9362_to_fp16)[name = tensor("aw_chunk_919_cast_fp16")]; tensor var_9364_to_fp16 = const()[name = tensor("op_9364_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_921_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_921_cast_fp16, y = var_9364_to_fp16)[name = tensor("aw_chunk_921_cast_fp16")]; tensor var_9366_to_fp16 = const()[name = tensor("op_9366_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_923_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_923_cast_fp16, y = var_9366_to_fp16)[name = tensor("aw_chunk_923_cast_fp16")]; tensor var_9368_to_fp16 = const()[name = tensor("op_9368_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_925_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_925_cast_fp16, y = var_9368_to_fp16)[name = tensor("aw_chunk_925_cast_fp16")]; tensor var_9370_to_fp16 = const()[name = tensor("op_9370_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_927_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_927_cast_fp16, y = var_9370_to_fp16)[name = tensor("aw_chunk_927_cast_fp16")]; tensor var_9372_to_fp16 = const()[name = tensor("op_9372_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_929_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_929_cast_fp16, y = var_9372_to_fp16)[name = tensor("aw_chunk_929_cast_fp16")]; tensor var_9374_to_fp16 = const()[name = tensor("op_9374_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_931_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_931_cast_fp16, y = var_9374_to_fp16)[name = tensor("aw_chunk_931_cast_fp16")]; tensor var_9376_to_fp16 = const()[name = tensor("op_9376_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_933_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_933_cast_fp16, y = var_9376_to_fp16)[name = tensor("aw_chunk_933_cast_fp16")]; tensor var_9378_to_fp16 = const()[name = tensor("op_9378_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_935_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_935_cast_fp16, y = var_9378_to_fp16)[name = tensor("aw_chunk_935_cast_fp16")]; tensor var_9380_to_fp16 = const()[name = tensor("op_9380_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_937_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_937_cast_fp16, y = var_9380_to_fp16)[name = tensor("aw_chunk_937_cast_fp16")]; tensor var_9382_to_fp16 = const()[name = tensor("op_9382_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_939_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_939_cast_fp16, y = var_9382_to_fp16)[name = tensor("aw_chunk_939_cast_fp16")]; tensor var_9384_to_fp16 = const()[name = tensor("op_9384_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_941_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_941_cast_fp16, y = var_9384_to_fp16)[name = tensor("aw_chunk_941_cast_fp16")]; tensor var_9386_to_fp16 = const()[name = tensor("op_9386_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_943_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_943_cast_fp16, y = var_9386_to_fp16)[name = tensor("aw_chunk_943_cast_fp16")]; tensor var_9388_to_fp16 = const()[name = tensor("op_9388_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_945_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_945_cast_fp16, y = var_9388_to_fp16)[name = tensor("aw_chunk_945_cast_fp16")]; tensor var_9390_to_fp16 = const()[name = tensor("op_9390_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_947_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_947_cast_fp16, y = var_9390_to_fp16)[name = tensor("aw_chunk_947_cast_fp16")]; tensor var_9392_to_fp16 = const()[name = tensor("op_9392_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_949_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_949_cast_fp16, y = var_9392_to_fp16)[name = tensor("aw_chunk_949_cast_fp16")]; tensor var_9394_to_fp16 = const()[name = tensor("op_9394_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_951_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_951_cast_fp16, y = var_9394_to_fp16)[name = tensor("aw_chunk_951_cast_fp16")]; tensor var_9396_to_fp16 = const()[name = tensor("op_9396_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_953_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_953_cast_fp16, y = var_9396_to_fp16)[name = tensor("aw_chunk_953_cast_fp16")]; tensor var_9398_to_fp16 = const()[name = tensor("op_9398_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_955_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_955_cast_fp16, y = var_9398_to_fp16)[name = tensor("aw_chunk_955_cast_fp16")]; tensor var_9400_to_fp16 = const()[name = tensor("op_9400_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_957_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_957_cast_fp16, y = var_9400_to_fp16)[name = tensor("aw_chunk_957_cast_fp16")]; tensor var_9402_to_fp16 = const()[name = tensor("op_9402_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_959_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_959_cast_fp16, y = var_9402_to_fp16)[name = tensor("aw_chunk_959_cast_fp16")]; tensor var_9404_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_801_cast_fp16)[name = tensor("op_9404_cast_fp16")]; tensor var_9405_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_803_cast_fp16)[name = tensor("op_9405_cast_fp16")]; tensor var_9406_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_805_cast_fp16)[name = tensor("op_9406_cast_fp16")]; tensor var_9407_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_807_cast_fp16)[name = tensor("op_9407_cast_fp16")]; tensor var_9408_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_809_cast_fp16)[name = tensor("op_9408_cast_fp16")]; tensor var_9409_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_811_cast_fp16)[name = tensor("op_9409_cast_fp16")]; tensor var_9410_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_813_cast_fp16)[name = tensor("op_9410_cast_fp16")]; tensor var_9411_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_815_cast_fp16)[name = tensor("op_9411_cast_fp16")]; tensor var_9412_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_817_cast_fp16)[name = tensor("op_9412_cast_fp16")]; tensor var_9413_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_819_cast_fp16)[name = tensor("op_9413_cast_fp16")]; tensor var_9414_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_821_cast_fp16)[name = tensor("op_9414_cast_fp16")]; tensor var_9415_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_823_cast_fp16)[name = tensor("op_9415_cast_fp16")]; tensor var_9416_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_825_cast_fp16)[name = tensor("op_9416_cast_fp16")]; tensor var_9417_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_827_cast_fp16)[name = tensor("op_9417_cast_fp16")]; tensor var_9418_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_829_cast_fp16)[name = tensor("op_9418_cast_fp16")]; tensor var_9419_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_831_cast_fp16)[name = tensor("op_9419_cast_fp16")]; tensor var_9420_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_833_cast_fp16)[name = tensor("op_9420_cast_fp16")]; tensor var_9421_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_835_cast_fp16)[name = tensor("op_9421_cast_fp16")]; tensor var_9422_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_837_cast_fp16)[name = tensor("op_9422_cast_fp16")]; tensor var_9423_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_839_cast_fp16)[name = tensor("op_9423_cast_fp16")]; tensor var_9424_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_841_cast_fp16)[name = tensor("op_9424_cast_fp16")]; tensor var_9425_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_843_cast_fp16)[name = tensor("op_9425_cast_fp16")]; tensor var_9426_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_845_cast_fp16)[name = tensor("op_9426_cast_fp16")]; tensor var_9427_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_847_cast_fp16)[name = tensor("op_9427_cast_fp16")]; tensor var_9428_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_849_cast_fp16)[name = tensor("op_9428_cast_fp16")]; tensor var_9429_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_851_cast_fp16)[name = tensor("op_9429_cast_fp16")]; tensor var_9430_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_853_cast_fp16)[name = tensor("op_9430_cast_fp16")]; tensor var_9431_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_855_cast_fp16)[name = tensor("op_9431_cast_fp16")]; tensor var_9432_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_857_cast_fp16)[name = tensor("op_9432_cast_fp16")]; tensor var_9433_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_859_cast_fp16)[name = tensor("op_9433_cast_fp16")]; tensor var_9434_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_861_cast_fp16)[name = tensor("op_9434_cast_fp16")]; tensor var_9435_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_863_cast_fp16)[name = tensor("op_9435_cast_fp16")]; tensor var_9436_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_865_cast_fp16)[name = tensor("op_9436_cast_fp16")]; tensor var_9437_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_867_cast_fp16)[name = tensor("op_9437_cast_fp16")]; tensor var_9438_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_869_cast_fp16)[name = tensor("op_9438_cast_fp16")]; tensor var_9439_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_871_cast_fp16)[name = tensor("op_9439_cast_fp16")]; tensor var_9440_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_873_cast_fp16)[name = tensor("op_9440_cast_fp16")]; tensor var_9441_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_875_cast_fp16)[name = tensor("op_9441_cast_fp16")]; tensor var_9442_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_877_cast_fp16)[name = tensor("op_9442_cast_fp16")]; tensor var_9443_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_879_cast_fp16)[name = tensor("op_9443_cast_fp16")]; tensor var_9444_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_881_cast_fp16)[name = tensor("op_9444_cast_fp16")]; tensor var_9445_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_883_cast_fp16)[name = tensor("op_9445_cast_fp16")]; tensor var_9446_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_885_cast_fp16)[name = tensor("op_9446_cast_fp16")]; tensor var_9447_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_887_cast_fp16)[name = tensor("op_9447_cast_fp16")]; tensor var_9448_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_889_cast_fp16)[name = tensor("op_9448_cast_fp16")]; tensor var_9449_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_891_cast_fp16)[name = tensor("op_9449_cast_fp16")]; tensor var_9450_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_893_cast_fp16)[name = tensor("op_9450_cast_fp16")]; tensor var_9451_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_895_cast_fp16)[name = tensor("op_9451_cast_fp16")]; tensor var_9452_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_897_cast_fp16)[name = tensor("op_9452_cast_fp16")]; tensor var_9453_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_899_cast_fp16)[name = tensor("op_9453_cast_fp16")]; tensor var_9454_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_901_cast_fp16)[name = tensor("op_9454_cast_fp16")]; tensor var_9455_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_903_cast_fp16)[name = tensor("op_9455_cast_fp16")]; tensor var_9456_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_905_cast_fp16)[name = tensor("op_9456_cast_fp16")]; tensor var_9457_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_907_cast_fp16)[name = tensor("op_9457_cast_fp16")]; tensor var_9458_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_909_cast_fp16)[name = tensor("op_9458_cast_fp16")]; tensor var_9459_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_911_cast_fp16)[name = tensor("op_9459_cast_fp16")]; tensor var_9460_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_913_cast_fp16)[name = tensor("op_9460_cast_fp16")]; tensor var_9461_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_915_cast_fp16)[name = tensor("op_9461_cast_fp16")]; tensor var_9462_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_917_cast_fp16)[name = tensor("op_9462_cast_fp16")]; tensor var_9463_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_919_cast_fp16)[name = tensor("op_9463_cast_fp16")]; tensor var_9464_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_921_cast_fp16)[name = tensor("op_9464_cast_fp16")]; tensor var_9465_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_923_cast_fp16)[name = tensor("op_9465_cast_fp16")]; tensor var_9466_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_925_cast_fp16)[name = tensor("op_9466_cast_fp16")]; tensor var_9467_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_927_cast_fp16)[name = tensor("op_9467_cast_fp16")]; tensor var_9468_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_929_cast_fp16)[name = tensor("op_9468_cast_fp16")]; tensor var_9469_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_931_cast_fp16)[name = tensor("op_9469_cast_fp16")]; tensor var_9470_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_933_cast_fp16)[name = tensor("op_9470_cast_fp16")]; tensor var_9471_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_935_cast_fp16)[name = tensor("op_9471_cast_fp16")]; tensor var_9472_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_937_cast_fp16)[name = tensor("op_9472_cast_fp16")]; tensor var_9473_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_939_cast_fp16)[name = tensor("op_9473_cast_fp16")]; tensor var_9474_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_941_cast_fp16)[name = tensor("op_9474_cast_fp16")]; tensor var_9475_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_943_cast_fp16)[name = tensor("op_9475_cast_fp16")]; tensor var_9476_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_945_cast_fp16)[name = tensor("op_9476_cast_fp16")]; tensor var_9477_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_947_cast_fp16)[name = tensor("op_9477_cast_fp16")]; tensor var_9478_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_949_cast_fp16)[name = tensor("op_9478_cast_fp16")]; tensor var_9479_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_951_cast_fp16)[name = tensor("op_9479_cast_fp16")]; tensor var_9480_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_953_cast_fp16)[name = tensor("op_9480_cast_fp16")]; tensor var_9481_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_955_cast_fp16)[name = tensor("op_9481_cast_fp16")]; tensor var_9482_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_957_cast_fp16)[name = tensor("op_9482_cast_fp16")]; tensor var_9483_cast_fp16 = softmax(axis = var_8202, x = aw_chunk_959_cast_fp16)[name = tensor("op_9483_cast_fp16")]; tensor var_9485_equation_0 = const()[name = tensor("op_9485_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9485_cast_fp16 = einsum(equation = var_9485_equation_0, values = (var_9005_cast_fp16, var_9404_cast_fp16))[name = tensor("op_9485_cast_fp16")]; tensor var_9487_equation_0 = const()[name = tensor("op_9487_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9487_cast_fp16 = einsum(equation = var_9487_equation_0, values = (var_9005_cast_fp16, var_9405_cast_fp16))[name = tensor("op_9487_cast_fp16")]; tensor var_9489_equation_0 = const()[name = tensor("op_9489_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9489_cast_fp16 = einsum(equation = var_9489_equation_0, values = (var_9005_cast_fp16, var_9406_cast_fp16))[name = tensor("op_9489_cast_fp16")]; tensor var_9491_equation_0 = const()[name = tensor("op_9491_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9491_cast_fp16 = einsum(equation = var_9491_equation_0, values = (var_9005_cast_fp16, var_9407_cast_fp16))[name = tensor("op_9491_cast_fp16")]; tensor var_9493_equation_0 = const()[name = tensor("op_9493_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9493_cast_fp16 = einsum(equation = var_9493_equation_0, values = (var_9009_cast_fp16, var_9408_cast_fp16))[name = tensor("op_9493_cast_fp16")]; tensor var_9495_equation_0 = const()[name = tensor("op_9495_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9495_cast_fp16 = einsum(equation = var_9495_equation_0, values = (var_9009_cast_fp16, var_9409_cast_fp16))[name = tensor("op_9495_cast_fp16")]; tensor var_9497_equation_0 = const()[name = tensor("op_9497_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9497_cast_fp16 = einsum(equation = var_9497_equation_0, values = (var_9009_cast_fp16, var_9410_cast_fp16))[name = tensor("op_9497_cast_fp16")]; tensor var_9499_equation_0 = const()[name = tensor("op_9499_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9499_cast_fp16 = einsum(equation = var_9499_equation_0, values = (var_9009_cast_fp16, var_9411_cast_fp16))[name = tensor("op_9499_cast_fp16")]; tensor var_9501_equation_0 = const()[name = tensor("op_9501_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9501_cast_fp16 = einsum(equation = var_9501_equation_0, values = (var_9013_cast_fp16, var_9412_cast_fp16))[name = tensor("op_9501_cast_fp16")]; tensor var_9503_equation_0 = const()[name = tensor("op_9503_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9503_cast_fp16 = einsum(equation = var_9503_equation_0, values = (var_9013_cast_fp16, var_9413_cast_fp16))[name = tensor("op_9503_cast_fp16")]; tensor var_9505_equation_0 = const()[name = tensor("op_9505_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9505_cast_fp16 = einsum(equation = var_9505_equation_0, values = (var_9013_cast_fp16, var_9414_cast_fp16))[name = tensor("op_9505_cast_fp16")]; tensor var_9507_equation_0 = const()[name = tensor("op_9507_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9507_cast_fp16 = einsum(equation = var_9507_equation_0, values = (var_9013_cast_fp16, var_9415_cast_fp16))[name = tensor("op_9507_cast_fp16")]; tensor var_9509_equation_0 = const()[name = tensor("op_9509_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9509_cast_fp16 = einsum(equation = var_9509_equation_0, values = (var_9017_cast_fp16, var_9416_cast_fp16))[name = tensor("op_9509_cast_fp16")]; tensor var_9511_equation_0 = const()[name = tensor("op_9511_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9511_cast_fp16 = einsum(equation = var_9511_equation_0, values = (var_9017_cast_fp16, var_9417_cast_fp16))[name = tensor("op_9511_cast_fp16")]; tensor var_9513_equation_0 = const()[name = tensor("op_9513_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9513_cast_fp16 = einsum(equation = var_9513_equation_0, values = (var_9017_cast_fp16, var_9418_cast_fp16))[name = tensor("op_9513_cast_fp16")]; tensor var_9515_equation_0 = const()[name = tensor("op_9515_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9515_cast_fp16 = einsum(equation = var_9515_equation_0, values = (var_9017_cast_fp16, var_9419_cast_fp16))[name = tensor("op_9515_cast_fp16")]; tensor var_9517_equation_0 = const()[name = tensor("op_9517_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9517_cast_fp16 = einsum(equation = var_9517_equation_0, values = (var_9021_cast_fp16, var_9420_cast_fp16))[name = tensor("op_9517_cast_fp16")]; tensor var_9519_equation_0 = const()[name = tensor("op_9519_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9519_cast_fp16 = einsum(equation = var_9519_equation_0, values = (var_9021_cast_fp16, var_9421_cast_fp16))[name = tensor("op_9519_cast_fp16")]; tensor var_9521_equation_0 = const()[name = tensor("op_9521_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9521_cast_fp16 = einsum(equation = var_9521_equation_0, values = (var_9021_cast_fp16, var_9422_cast_fp16))[name = tensor("op_9521_cast_fp16")]; tensor var_9523_equation_0 = const()[name = tensor("op_9523_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9523_cast_fp16 = einsum(equation = var_9523_equation_0, values = (var_9021_cast_fp16, var_9423_cast_fp16))[name = tensor("op_9523_cast_fp16")]; tensor var_9525_equation_0 = const()[name = tensor("op_9525_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9525_cast_fp16 = einsum(equation = var_9525_equation_0, values = (var_9025_cast_fp16, var_9424_cast_fp16))[name = tensor("op_9525_cast_fp16")]; tensor var_9527_equation_0 = const()[name = tensor("op_9527_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9527_cast_fp16 = einsum(equation = var_9527_equation_0, values = (var_9025_cast_fp16, var_9425_cast_fp16))[name = tensor("op_9527_cast_fp16")]; tensor var_9529_equation_0 = const()[name = tensor("op_9529_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9529_cast_fp16 = einsum(equation = var_9529_equation_0, values = (var_9025_cast_fp16, var_9426_cast_fp16))[name = tensor("op_9529_cast_fp16")]; tensor var_9531_equation_0 = const()[name = tensor("op_9531_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9531_cast_fp16 = einsum(equation = var_9531_equation_0, values = (var_9025_cast_fp16, var_9427_cast_fp16))[name = tensor("op_9531_cast_fp16")]; tensor var_9533_equation_0 = const()[name = tensor("op_9533_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9533_cast_fp16 = einsum(equation = var_9533_equation_0, values = (var_9029_cast_fp16, var_9428_cast_fp16))[name = tensor("op_9533_cast_fp16")]; tensor var_9535_equation_0 = const()[name = tensor("op_9535_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9535_cast_fp16 = einsum(equation = var_9535_equation_0, values = (var_9029_cast_fp16, var_9429_cast_fp16))[name = tensor("op_9535_cast_fp16")]; tensor var_9537_equation_0 = const()[name = tensor("op_9537_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9537_cast_fp16 = einsum(equation = var_9537_equation_0, values = (var_9029_cast_fp16, var_9430_cast_fp16))[name = tensor("op_9537_cast_fp16")]; tensor var_9539_equation_0 = const()[name = tensor("op_9539_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9539_cast_fp16 = einsum(equation = var_9539_equation_0, values = (var_9029_cast_fp16, var_9431_cast_fp16))[name = tensor("op_9539_cast_fp16")]; tensor var_9541_equation_0 = const()[name = tensor("op_9541_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9541_cast_fp16 = einsum(equation = var_9541_equation_0, values = (var_9033_cast_fp16, var_9432_cast_fp16))[name = tensor("op_9541_cast_fp16")]; tensor var_9543_equation_0 = const()[name = tensor("op_9543_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9543_cast_fp16 = einsum(equation = var_9543_equation_0, values = (var_9033_cast_fp16, var_9433_cast_fp16))[name = tensor("op_9543_cast_fp16")]; tensor var_9545_equation_0 = const()[name = tensor("op_9545_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9545_cast_fp16 = einsum(equation = var_9545_equation_0, values = (var_9033_cast_fp16, var_9434_cast_fp16))[name = tensor("op_9545_cast_fp16")]; tensor var_9547_equation_0 = const()[name = tensor("op_9547_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9547_cast_fp16 = einsum(equation = var_9547_equation_0, values = (var_9033_cast_fp16, var_9435_cast_fp16))[name = tensor("op_9547_cast_fp16")]; tensor var_9549_equation_0 = const()[name = tensor("op_9549_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9549_cast_fp16 = einsum(equation = var_9549_equation_0, values = (var_9037_cast_fp16, var_9436_cast_fp16))[name = tensor("op_9549_cast_fp16")]; tensor var_9551_equation_0 = const()[name = tensor("op_9551_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9551_cast_fp16 = einsum(equation = var_9551_equation_0, values = (var_9037_cast_fp16, var_9437_cast_fp16))[name = tensor("op_9551_cast_fp16")]; tensor var_9553_equation_0 = const()[name = tensor("op_9553_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9553_cast_fp16 = einsum(equation = var_9553_equation_0, values = (var_9037_cast_fp16, var_9438_cast_fp16))[name = tensor("op_9553_cast_fp16")]; tensor var_9555_equation_0 = const()[name = tensor("op_9555_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9555_cast_fp16 = einsum(equation = var_9555_equation_0, values = (var_9037_cast_fp16, var_9439_cast_fp16))[name = tensor("op_9555_cast_fp16")]; tensor var_9557_equation_0 = const()[name = tensor("op_9557_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9557_cast_fp16 = einsum(equation = var_9557_equation_0, values = (var_9041_cast_fp16, var_9440_cast_fp16))[name = tensor("op_9557_cast_fp16")]; tensor var_9559_equation_0 = const()[name = tensor("op_9559_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9559_cast_fp16 = einsum(equation = var_9559_equation_0, values = (var_9041_cast_fp16, var_9441_cast_fp16))[name = tensor("op_9559_cast_fp16")]; tensor var_9561_equation_0 = const()[name = tensor("op_9561_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9561_cast_fp16 = einsum(equation = var_9561_equation_0, values = (var_9041_cast_fp16, var_9442_cast_fp16))[name = tensor("op_9561_cast_fp16")]; tensor var_9563_equation_0 = const()[name = tensor("op_9563_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9563_cast_fp16 = einsum(equation = var_9563_equation_0, values = (var_9041_cast_fp16, var_9443_cast_fp16))[name = tensor("op_9563_cast_fp16")]; tensor var_9565_equation_0 = const()[name = tensor("op_9565_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9565_cast_fp16 = einsum(equation = var_9565_equation_0, values = (var_9045_cast_fp16, var_9444_cast_fp16))[name = tensor("op_9565_cast_fp16")]; tensor var_9567_equation_0 = const()[name = tensor("op_9567_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9567_cast_fp16 = einsum(equation = var_9567_equation_0, values = (var_9045_cast_fp16, var_9445_cast_fp16))[name = tensor("op_9567_cast_fp16")]; tensor var_9569_equation_0 = const()[name = tensor("op_9569_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9569_cast_fp16 = einsum(equation = var_9569_equation_0, values = (var_9045_cast_fp16, var_9446_cast_fp16))[name = tensor("op_9569_cast_fp16")]; tensor var_9571_equation_0 = const()[name = tensor("op_9571_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9571_cast_fp16 = einsum(equation = var_9571_equation_0, values = (var_9045_cast_fp16, var_9447_cast_fp16))[name = tensor("op_9571_cast_fp16")]; tensor var_9573_equation_0 = const()[name = tensor("op_9573_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9573_cast_fp16 = einsum(equation = var_9573_equation_0, values = (var_9049_cast_fp16, var_9448_cast_fp16))[name = tensor("op_9573_cast_fp16")]; tensor var_9575_equation_0 = const()[name = tensor("op_9575_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9575_cast_fp16 = einsum(equation = var_9575_equation_0, values = (var_9049_cast_fp16, var_9449_cast_fp16))[name = tensor("op_9575_cast_fp16")]; tensor var_9577_equation_0 = const()[name = tensor("op_9577_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9577_cast_fp16 = einsum(equation = var_9577_equation_0, values = (var_9049_cast_fp16, var_9450_cast_fp16))[name = tensor("op_9577_cast_fp16")]; tensor var_9579_equation_0 = const()[name = tensor("op_9579_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9579_cast_fp16 = einsum(equation = var_9579_equation_0, values = (var_9049_cast_fp16, var_9451_cast_fp16))[name = tensor("op_9579_cast_fp16")]; tensor var_9581_equation_0 = const()[name = tensor("op_9581_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9581_cast_fp16 = einsum(equation = var_9581_equation_0, values = (var_9053_cast_fp16, var_9452_cast_fp16))[name = tensor("op_9581_cast_fp16")]; tensor var_9583_equation_0 = const()[name = tensor("op_9583_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9583_cast_fp16 = einsum(equation = var_9583_equation_0, values = (var_9053_cast_fp16, var_9453_cast_fp16))[name = tensor("op_9583_cast_fp16")]; tensor var_9585_equation_0 = const()[name = tensor("op_9585_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9585_cast_fp16 = einsum(equation = var_9585_equation_0, values = (var_9053_cast_fp16, var_9454_cast_fp16))[name = tensor("op_9585_cast_fp16")]; tensor var_9587_equation_0 = const()[name = tensor("op_9587_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9587_cast_fp16 = einsum(equation = var_9587_equation_0, values = (var_9053_cast_fp16, var_9455_cast_fp16))[name = tensor("op_9587_cast_fp16")]; tensor var_9589_equation_0 = const()[name = tensor("op_9589_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9589_cast_fp16 = einsum(equation = var_9589_equation_0, values = (var_9057_cast_fp16, var_9456_cast_fp16))[name = tensor("op_9589_cast_fp16")]; tensor var_9591_equation_0 = const()[name = tensor("op_9591_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9591_cast_fp16 = einsum(equation = var_9591_equation_0, values = (var_9057_cast_fp16, var_9457_cast_fp16))[name = tensor("op_9591_cast_fp16")]; tensor var_9593_equation_0 = const()[name = tensor("op_9593_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9593_cast_fp16 = einsum(equation = var_9593_equation_0, values = (var_9057_cast_fp16, var_9458_cast_fp16))[name = tensor("op_9593_cast_fp16")]; tensor var_9595_equation_0 = const()[name = tensor("op_9595_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9595_cast_fp16 = einsum(equation = var_9595_equation_0, values = (var_9057_cast_fp16, var_9459_cast_fp16))[name = tensor("op_9595_cast_fp16")]; tensor var_9597_equation_0 = const()[name = tensor("op_9597_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9597_cast_fp16 = einsum(equation = var_9597_equation_0, values = (var_9061_cast_fp16, var_9460_cast_fp16))[name = tensor("op_9597_cast_fp16")]; tensor var_9599_equation_0 = const()[name = tensor("op_9599_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9599_cast_fp16 = einsum(equation = var_9599_equation_0, values = (var_9061_cast_fp16, var_9461_cast_fp16))[name = tensor("op_9599_cast_fp16")]; tensor var_9601_equation_0 = const()[name = tensor("op_9601_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9601_cast_fp16 = einsum(equation = var_9601_equation_0, values = (var_9061_cast_fp16, var_9462_cast_fp16))[name = tensor("op_9601_cast_fp16")]; tensor var_9603_equation_0 = const()[name = tensor("op_9603_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9603_cast_fp16 = einsum(equation = var_9603_equation_0, values = (var_9061_cast_fp16, var_9463_cast_fp16))[name = tensor("op_9603_cast_fp16")]; tensor var_9605_equation_0 = const()[name = tensor("op_9605_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9605_cast_fp16 = einsum(equation = var_9605_equation_0, values = (var_9065_cast_fp16, var_9464_cast_fp16))[name = tensor("op_9605_cast_fp16")]; tensor var_9607_equation_0 = const()[name = tensor("op_9607_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9607_cast_fp16 = einsum(equation = var_9607_equation_0, values = (var_9065_cast_fp16, var_9465_cast_fp16))[name = tensor("op_9607_cast_fp16")]; tensor var_9609_equation_0 = const()[name = tensor("op_9609_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9609_cast_fp16 = einsum(equation = var_9609_equation_0, values = (var_9065_cast_fp16, var_9466_cast_fp16))[name = tensor("op_9609_cast_fp16")]; tensor var_9611_equation_0 = const()[name = tensor("op_9611_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9611_cast_fp16 = einsum(equation = var_9611_equation_0, values = (var_9065_cast_fp16, var_9467_cast_fp16))[name = tensor("op_9611_cast_fp16")]; tensor var_9613_equation_0 = const()[name = tensor("op_9613_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9613_cast_fp16 = einsum(equation = var_9613_equation_0, values = (var_9069_cast_fp16, var_9468_cast_fp16))[name = tensor("op_9613_cast_fp16")]; tensor var_9615_equation_0 = const()[name = tensor("op_9615_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9615_cast_fp16 = einsum(equation = var_9615_equation_0, values = (var_9069_cast_fp16, var_9469_cast_fp16))[name = tensor("op_9615_cast_fp16")]; tensor var_9617_equation_0 = const()[name = tensor("op_9617_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9617_cast_fp16 = einsum(equation = var_9617_equation_0, values = (var_9069_cast_fp16, var_9470_cast_fp16))[name = tensor("op_9617_cast_fp16")]; tensor var_9619_equation_0 = const()[name = tensor("op_9619_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9619_cast_fp16 = einsum(equation = var_9619_equation_0, values = (var_9069_cast_fp16, var_9471_cast_fp16))[name = tensor("op_9619_cast_fp16")]; tensor var_9621_equation_0 = const()[name = tensor("op_9621_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9621_cast_fp16 = einsum(equation = var_9621_equation_0, values = (var_9073_cast_fp16, var_9472_cast_fp16))[name = tensor("op_9621_cast_fp16")]; tensor var_9623_equation_0 = const()[name = tensor("op_9623_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9623_cast_fp16 = einsum(equation = var_9623_equation_0, values = (var_9073_cast_fp16, var_9473_cast_fp16))[name = tensor("op_9623_cast_fp16")]; tensor var_9625_equation_0 = const()[name = tensor("op_9625_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9625_cast_fp16 = einsum(equation = var_9625_equation_0, values = (var_9073_cast_fp16, var_9474_cast_fp16))[name = tensor("op_9625_cast_fp16")]; tensor var_9627_equation_0 = const()[name = tensor("op_9627_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9627_cast_fp16 = einsum(equation = var_9627_equation_0, values = (var_9073_cast_fp16, var_9475_cast_fp16))[name = tensor("op_9627_cast_fp16")]; tensor var_9629_equation_0 = const()[name = tensor("op_9629_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9629_cast_fp16 = einsum(equation = var_9629_equation_0, values = (var_9077_cast_fp16, var_9476_cast_fp16))[name = tensor("op_9629_cast_fp16")]; tensor var_9631_equation_0 = const()[name = tensor("op_9631_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9631_cast_fp16 = einsum(equation = var_9631_equation_0, values = (var_9077_cast_fp16, var_9477_cast_fp16))[name = tensor("op_9631_cast_fp16")]; tensor var_9633_equation_0 = const()[name = tensor("op_9633_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9633_cast_fp16 = einsum(equation = var_9633_equation_0, values = (var_9077_cast_fp16, var_9478_cast_fp16))[name = tensor("op_9633_cast_fp16")]; tensor var_9635_equation_0 = const()[name = tensor("op_9635_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9635_cast_fp16 = einsum(equation = var_9635_equation_0, values = (var_9077_cast_fp16, var_9479_cast_fp16))[name = tensor("op_9635_cast_fp16")]; tensor var_9637_equation_0 = const()[name = tensor("op_9637_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9637_cast_fp16 = einsum(equation = var_9637_equation_0, values = (var_9081_cast_fp16, var_9480_cast_fp16))[name = tensor("op_9637_cast_fp16")]; tensor var_9639_equation_0 = const()[name = tensor("op_9639_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9639_cast_fp16 = einsum(equation = var_9639_equation_0, values = (var_9081_cast_fp16, var_9481_cast_fp16))[name = tensor("op_9639_cast_fp16")]; tensor var_9641_equation_0 = const()[name = tensor("op_9641_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9641_cast_fp16 = einsum(equation = var_9641_equation_0, values = (var_9081_cast_fp16, var_9482_cast_fp16))[name = tensor("op_9641_cast_fp16")]; tensor var_9643_equation_0 = const()[name = tensor("op_9643_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_9643_cast_fp16 = einsum(equation = var_9643_equation_0, values = (var_9081_cast_fp16, var_9483_cast_fp16))[name = tensor("op_9643_cast_fp16")]; tensor var_9645_interleave_0 = const()[name = tensor("op_9645_interleave_0"), val = tensor(false)]; tensor var_9645_cast_fp16 = concat(axis = var_8177, interleave = var_9645_interleave_0, values = (var_9485_cast_fp16, var_9487_cast_fp16, var_9489_cast_fp16, var_9491_cast_fp16))[name = tensor("op_9645_cast_fp16")]; tensor var_9647_interleave_0 = const()[name = tensor("op_9647_interleave_0"), val = tensor(false)]; tensor var_9647_cast_fp16 = concat(axis = var_8177, interleave = var_9647_interleave_0, values = (var_9493_cast_fp16, var_9495_cast_fp16, var_9497_cast_fp16, var_9499_cast_fp16))[name = tensor("op_9647_cast_fp16")]; tensor var_9649_interleave_0 = const()[name = tensor("op_9649_interleave_0"), val = tensor(false)]; tensor var_9649_cast_fp16 = concat(axis = var_8177, interleave = var_9649_interleave_0, values = (var_9501_cast_fp16, var_9503_cast_fp16, var_9505_cast_fp16, var_9507_cast_fp16))[name = tensor("op_9649_cast_fp16")]; tensor var_9651_interleave_0 = const()[name = tensor("op_9651_interleave_0"), val = tensor(false)]; tensor var_9651_cast_fp16 = concat(axis = var_8177, interleave = var_9651_interleave_0, values = (var_9509_cast_fp16, var_9511_cast_fp16, var_9513_cast_fp16, var_9515_cast_fp16))[name = tensor("op_9651_cast_fp16")]; tensor var_9653_interleave_0 = const()[name = tensor("op_9653_interleave_0"), val = tensor(false)]; tensor var_9653_cast_fp16 = concat(axis = var_8177, interleave = var_9653_interleave_0, values = (var_9517_cast_fp16, var_9519_cast_fp16, var_9521_cast_fp16, var_9523_cast_fp16))[name = tensor("op_9653_cast_fp16")]; tensor var_9655_interleave_0 = const()[name = tensor("op_9655_interleave_0"), val = tensor(false)]; tensor var_9655_cast_fp16 = concat(axis = var_8177, interleave = var_9655_interleave_0, values = (var_9525_cast_fp16, var_9527_cast_fp16, var_9529_cast_fp16, var_9531_cast_fp16))[name = tensor("op_9655_cast_fp16")]; tensor var_9657_interleave_0 = const()[name = tensor("op_9657_interleave_0"), val = tensor(false)]; tensor var_9657_cast_fp16 = concat(axis = var_8177, interleave = var_9657_interleave_0, values = (var_9533_cast_fp16, var_9535_cast_fp16, var_9537_cast_fp16, var_9539_cast_fp16))[name = tensor("op_9657_cast_fp16")]; tensor var_9659_interleave_0 = const()[name = tensor("op_9659_interleave_0"), val = tensor(false)]; tensor var_9659_cast_fp16 = concat(axis = var_8177, interleave = var_9659_interleave_0, values = (var_9541_cast_fp16, var_9543_cast_fp16, var_9545_cast_fp16, var_9547_cast_fp16))[name = tensor("op_9659_cast_fp16")]; tensor var_9661_interleave_0 = const()[name = tensor("op_9661_interleave_0"), val = tensor(false)]; tensor var_9661_cast_fp16 = concat(axis = var_8177, interleave = var_9661_interleave_0, values = (var_9549_cast_fp16, var_9551_cast_fp16, var_9553_cast_fp16, var_9555_cast_fp16))[name = tensor("op_9661_cast_fp16")]; tensor var_9663_interleave_0 = const()[name = tensor("op_9663_interleave_0"), val = tensor(false)]; tensor var_9663_cast_fp16 = concat(axis = var_8177, interleave = var_9663_interleave_0, values = (var_9557_cast_fp16, var_9559_cast_fp16, var_9561_cast_fp16, var_9563_cast_fp16))[name = tensor("op_9663_cast_fp16")]; tensor var_9665_interleave_0 = const()[name = tensor("op_9665_interleave_0"), val = tensor(false)]; tensor var_9665_cast_fp16 = concat(axis = var_8177, interleave = var_9665_interleave_0, values = (var_9565_cast_fp16, var_9567_cast_fp16, var_9569_cast_fp16, var_9571_cast_fp16))[name = tensor("op_9665_cast_fp16")]; tensor var_9667_interleave_0 = const()[name = tensor("op_9667_interleave_0"), val = tensor(false)]; tensor var_9667_cast_fp16 = concat(axis = var_8177, interleave = var_9667_interleave_0, values = (var_9573_cast_fp16, var_9575_cast_fp16, var_9577_cast_fp16, var_9579_cast_fp16))[name = tensor("op_9667_cast_fp16")]; tensor var_9669_interleave_0 = const()[name = tensor("op_9669_interleave_0"), val = tensor(false)]; tensor var_9669_cast_fp16 = concat(axis = var_8177, interleave = var_9669_interleave_0, values = (var_9581_cast_fp16, var_9583_cast_fp16, var_9585_cast_fp16, var_9587_cast_fp16))[name = tensor("op_9669_cast_fp16")]; tensor var_9671_interleave_0 = const()[name = tensor("op_9671_interleave_0"), val = tensor(false)]; tensor var_9671_cast_fp16 = concat(axis = var_8177, interleave = var_9671_interleave_0, values = (var_9589_cast_fp16, var_9591_cast_fp16, var_9593_cast_fp16, var_9595_cast_fp16))[name = tensor("op_9671_cast_fp16")]; tensor var_9673_interleave_0 = const()[name = tensor("op_9673_interleave_0"), val = tensor(false)]; tensor var_9673_cast_fp16 = concat(axis = var_8177, interleave = var_9673_interleave_0, values = (var_9597_cast_fp16, var_9599_cast_fp16, var_9601_cast_fp16, var_9603_cast_fp16))[name = tensor("op_9673_cast_fp16")]; tensor var_9675_interleave_0 = const()[name = tensor("op_9675_interleave_0"), val = tensor(false)]; tensor var_9675_cast_fp16 = concat(axis = var_8177, interleave = var_9675_interleave_0, values = (var_9605_cast_fp16, var_9607_cast_fp16, var_9609_cast_fp16, var_9611_cast_fp16))[name = tensor("op_9675_cast_fp16")]; tensor var_9677_interleave_0 = const()[name = tensor("op_9677_interleave_0"), val = tensor(false)]; tensor var_9677_cast_fp16 = concat(axis = var_8177, interleave = var_9677_interleave_0, values = (var_9613_cast_fp16, var_9615_cast_fp16, var_9617_cast_fp16, var_9619_cast_fp16))[name = tensor("op_9677_cast_fp16")]; tensor var_9679_interleave_0 = const()[name = tensor("op_9679_interleave_0"), val = tensor(false)]; tensor var_9679_cast_fp16 = concat(axis = var_8177, interleave = var_9679_interleave_0, values = (var_9621_cast_fp16, var_9623_cast_fp16, var_9625_cast_fp16, var_9627_cast_fp16))[name = tensor("op_9679_cast_fp16")]; tensor var_9681_interleave_0 = const()[name = tensor("op_9681_interleave_0"), val = tensor(false)]; tensor var_9681_cast_fp16 = concat(axis = var_8177, interleave = var_9681_interleave_0, values = (var_9629_cast_fp16, var_9631_cast_fp16, var_9633_cast_fp16, var_9635_cast_fp16))[name = tensor("op_9681_cast_fp16")]; tensor var_9683_interleave_0 = const()[name = tensor("op_9683_interleave_0"), val = tensor(false)]; tensor var_9683_cast_fp16 = concat(axis = var_8177, interleave = var_9683_interleave_0, values = (var_9637_cast_fp16, var_9639_cast_fp16, var_9641_cast_fp16, var_9643_cast_fp16))[name = tensor("op_9683_cast_fp16")]; tensor input_41_interleave_0 = const()[name = tensor("input_41_interleave_0"), val = tensor(false)]; tensor input_41_cast_fp16 = concat(axis = var_8202, interleave = input_41_interleave_0, values = (var_9645_cast_fp16, var_9647_cast_fp16, var_9649_cast_fp16, var_9651_cast_fp16, var_9653_cast_fp16, var_9655_cast_fp16, var_9657_cast_fp16, var_9659_cast_fp16, var_9661_cast_fp16, var_9663_cast_fp16, var_9665_cast_fp16, var_9667_cast_fp16, var_9669_cast_fp16, var_9671_cast_fp16, var_9673_cast_fp16, var_9675_cast_fp16, var_9677_cast_fp16, var_9679_cast_fp16, var_9681_cast_fp16, var_9683_cast_fp16))[name = tensor("input_41_cast_fp16")]; tensor var_9694_pad_type_0 = const()[name = tensor("op_9694_pad_type_0"), val = tensor("valid")]; tensor var_9694_strides_0 = const()[name = tensor("op_9694_strides_0"), val = tensor([1, 1])]; tensor var_9694_pad_0 = const()[name = tensor("op_9694_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9694_dilations_0 = const()[name = tensor("op_9694_dilations_0"), val = tensor([1, 1])]; tensor var_9694_groups_0 = const()[name = tensor("op_9694_groups_0"), val = tensor(1)]; tensor layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(84791488))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85610752))), name = tensor("layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_5_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85610880)))]; tensor var_9694_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_9694_dilations_0, groups = var_9694_groups_0, pad = var_9694_pad_0, pad_type = var_9694_pad_type_0, strides = var_9694_strides_0, weight = layers_5_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_41_cast_fp16)[name = tensor("op_9694_cast_fp16")]; tensor var_9700_pad_type_0 = const()[name = tensor("op_9700_pad_type_0"), val = tensor("valid")]; tensor var_9700_strides_0 = const()[name = tensor("op_9700_strides_0"), val = tensor([1, 1])]; tensor var_9700_pad_0 = const()[name = tensor("op_9700_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9700_dilations_0 = const()[name = tensor("op_9700_dilations_0"), val = tensor([1, 1])]; tensor var_9700_groups_0 = const()[name = tensor("op_9700_groups_0"), val = tensor(1)]; tensor layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85634944))), name = tensor("layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85613504))), shape = tensor([1280, 1280, 1, 1])]; tensor var_9700_cast_fp16 = conv(dilations = var_9700_dilations_0, groups = var_9700_groups_0, pad = var_9700_pad_0, pad_type = var_9700_pad_type_0, strides = var_9700_strides_0, weight = layers_5_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_41_cast_fp16)[name = tensor("op_9700_cast_fp16")]; tensor obj_23_cast_fp16 = add(x = var_9694_cast_fp16, y = var_9700_cast_fp16)[name = tensor("obj_23_cast_fp16")]; tensor inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_23_cast_fp16)[name = tensor("inputs_23_cast_fp16")]; tensor out_23_axes_0 = const()[name = tensor("out_23_axes_0"), val = tensor([1])]; tensor var_9711_to_fp16 = const()[name = tensor("op_9711_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_23_cast_fp16 = layer_norm(axes = out_23_axes_0, epsilon = var_9711_to_fp16, x = inputs_23_cast_fp16)[name = tensor("out_23_cast_fp16")]; tensor input_43_gamma_0_to_fp16 = const()[name = tensor("input_43_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85839808)))]; tensor input_43_beta_0_to_fp16 = const()[name = tensor("input_43_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85842432)))]; tensor input_43_epsilon_0_to_fp16 = const()[name = tensor("input_43_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_43_cast_fp16 = batch_norm(beta = input_43_beta_0_to_fp16, epsilon = input_43_epsilon_0_to_fp16, gamma = input_43_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor("input_43_cast_fp16")]; tensor var_9729_pad_type_0 = const()[name = tensor("op_9729_pad_type_0"), val = tensor("valid")]; tensor var_9729_strides_0 = const()[name = tensor("op_9729_strides_0"), val = tensor([1, 1])]; tensor var_9729_pad_0 = const()[name = tensor("op_9729_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9729_dilations_0 = const()[name = tensor("op_9729_dilations_0"), val = tensor([1, 1])]; tensor var_9729_groups_0 = const()[name = tensor("op_9729_groups_0"), val = tensor(1)]; tensor layers_5_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(85845056))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89121920))), name = tensor("layers_5_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_5_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89122048)))]; tensor var_9729_cast_fp16 = conv(bias = layers_5_fc1_inlier_module_bias_to_fp16, dilations = var_9729_dilations_0, groups = var_9729_groups_0, pad = var_9729_pad_0, pad_type = var_9729_pad_type_0, strides = var_9729_strides_0, weight = layers_5_fc1_inlier_module_weight_to_fp16_palettized, x = input_43_cast_fp16)[name = tensor("op_9729_cast_fp16")]; tensor var_9735_pad_type_0 = const()[name = tensor("op_9735_pad_type_0"), val = tensor("valid")]; tensor var_9735_strides_0 = const()[name = tensor("op_9735_strides_0"), val = tensor([1, 1])]; tensor var_9735_pad_0 = const()[name = tensor("op_9735_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9735_dilations_0 = const()[name = tensor("op_9735_dilations_0"), val = tensor([1, 1])]; tensor var_9735_groups_0 = const()[name = tensor("op_9735_groups_0"), val = tensor(1)]; tensor layers_5_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89166464))), name = tensor("layers_5_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89132352))), shape = tensor([5120, 1280, 1, 1])]; tensor var_9735_cast_fp16 = conv(dilations = var_9735_dilations_0, groups = var_9735_groups_0, pad = var_9735_pad_0, pad_type = var_9735_pad_type_0, strides = var_9735_strides_0, weight = layers_5_fc1_outlier_module_weight_to_fp16_sparsified, x = input_43_cast_fp16)[name = tensor("op_9735_cast_fp16")]; tensor input_45_cast_fp16 = add(x = var_9729_cast_fp16, y = var_9735_cast_fp16)[name = tensor("input_45_cast_fp16")]; tensor input_47_mode_0 = const()[name = tensor("input_47_mode_0"), val = tensor("EXACT")]; tensor input_47_cast_fp16 = gelu(mode = input_47_mode_0, x = input_45_cast_fp16)[name = tensor("input_47_cast_fp16")]; tensor var_9746_pad_type_0 = const()[name = tensor("op_9746_pad_type_0"), val = tensor("valid")]; tensor var_9746_strides_0 = const()[name = tensor("op_9746_strides_0"), val = tensor([1, 1])]; tensor var_9746_pad_0 = const()[name = tensor("op_9746_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9746_dilations_0 = const()[name = tensor("op_9746_dilations_0"), val = tensor([1, 1])]; tensor var_9746_groups_0 = const()[name = tensor("op_9746_groups_0"), val = tensor(1)]; tensor layers_5_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(89985728))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93262592))), name = tensor("layers_5_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_5_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_5_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93262720)))]; tensor var_9746_cast_fp16 = conv(bias = layers_5_fc2_inlier_module_bias_to_fp16, dilations = var_9746_dilations_0, groups = var_9746_groups_0, pad = var_9746_pad_0, pad_type = var_9746_pad_type_0, strides = var_9746_strides_0, weight = layers_5_fc2_inlier_module_weight_to_fp16_palettized, x = input_47_cast_fp16)[name = tensor("op_9746_cast_fp16")]; tensor var_9752_pad_type_0 = const()[name = tensor("op_9752_pad_type_0"), val = tensor("valid")]; tensor var_9752_strides_0 = const()[name = tensor("op_9752_strides_0"), val = tensor([1, 1])]; tensor var_9752_pad_0 = const()[name = tensor("op_9752_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9752_dilations_0 = const()[name = tensor("op_9752_dilations_0"), val = tensor([1, 1])]; tensor var_9752_groups_0 = const()[name = tensor("op_9752_groups_0"), val = tensor(1)]; tensor layers_5_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93508032))), name = tensor("layers_5_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93265344))), shape = tensor([1280, 5120, 1, 1])]; tensor var_9752_cast_fp16 = conv(dilations = var_9752_dilations_0, groups = var_9752_groups_0, pad = var_9752_pad_0, pad_type = var_9752_pad_type_0, strides = var_9752_strides_0, weight = layers_5_fc2_outlier_module_weight_to_fp16_sparsified, x = input_47_cast_fp16)[name = tensor("op_9752_cast_fp16")]; tensor hidden_states_15_cast_fp16 = add(x = var_9746_cast_fp16, y = var_9752_cast_fp16)[name = tensor("hidden_states_15_cast_fp16")]; tensor inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_15_cast_fp16)[name = tensor("inputs_25_cast_fp16")]; tensor var_9758 = const()[name = tensor("op_9758"), val = tensor(3)]; tensor var_9783 = const()[name = tensor("op_9783"), val = tensor(1)]; tensor out_25_axes_0 = const()[name = tensor("out_25_axes_0"), val = tensor([1])]; tensor var_9800_to_fp16 = const()[name = tensor("op_9800_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_25_cast_fp16 = layer_norm(axes = out_25_axes_0, epsilon = var_9800_to_fp16, x = inputs_25_cast_fp16)[name = tensor("out_25_cast_fp16")]; tensor obj_25_gamma_0_to_fp16 = const()[name = tensor("obj_25_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94327296)))]; tensor obj_25_beta_0_to_fp16 = const()[name = tensor("obj_25_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94329920)))]; tensor obj_25_epsilon_0_to_fp16 = const()[name = tensor("obj_25_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = tensor("obj_25_cast_fp16")]; tensor var_9822_pad_type_0 = const()[name = tensor("op_9822_pad_type_0"), val = tensor("valid")]; tensor var_9822_strides_0 = const()[name = tensor("op_9822_strides_0"), val = tensor([1, 1])]; tensor var_9822_pad_0 = const()[name = tensor("op_9822_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9822_dilations_0 = const()[name = tensor("op_9822_dilations_0"), val = tensor([1, 1])]; tensor var_9822_groups_0 = const()[name = tensor("op_9822_groups_0"), val = tensor(1)]; tensor layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94332544))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95151808))), name = tensor("layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_6_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95151936)))]; tensor var_9822_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_9822_dilations_0, groups = var_9822_groups_0, pad = var_9822_pad_0, pad_type = var_9822_pad_type_0, strides = var_9822_strides_0, weight = layers_6_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = tensor("op_9822_cast_fp16")]; tensor var_9828_pad_type_0 = const()[name = tensor("op_9828_pad_type_0"), val = tensor("valid")]; tensor var_9828_strides_0 = const()[name = tensor("op_9828_strides_0"), val = tensor([1, 1])]; tensor var_9828_pad_0 = const()[name = tensor("op_9828_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9828_dilations_0 = const()[name = tensor("op_9828_dilations_0"), val = tensor([1, 1])]; tensor var_9828_groups_0 = const()[name = tensor("op_9828_groups_0"), val = tensor(1)]; tensor layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95204224))), name = tensor("layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95154560))), shape = tensor([1280, 1280, 1, 1])]; tensor var_9828_cast_fp16 = conv(dilations = var_9828_dilations_0, groups = var_9828_groups_0, pad = var_9828_pad_0, pad_type = var_9828_pad_type_0, strides = var_9828_strides_0, weight = layers_6_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = tensor("op_9828_cast_fp16")]; tensor query_13_cast_fp16 = add(x = var_9822_cast_fp16, y = var_9828_cast_fp16)[name = tensor("query_13_cast_fp16")]; tensor var_9837_pad_type_0 = const()[name = tensor("op_9837_pad_type_0"), val = tensor("valid")]; tensor var_9837_strides_0 = const()[name = tensor("op_9837_strides_0"), val = tensor([1, 1])]; tensor var_9837_pad_0 = const()[name = tensor("op_9837_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9837_dilations_0 = const()[name = tensor("op_9837_dilations_0"), val = tensor([1, 1])]; tensor var_9837_groups_0 = const()[name = tensor("op_9837_groups_0"), val = tensor(1)]; tensor layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95409088))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96228352))), name = tensor("layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_9837_cast_fp16 = conv(dilations = var_9837_dilations_0, groups = var_9837_groups_0, pad = var_9837_pad_0, pad_type = var_9837_pad_type_0, strides = var_9837_strides_0, weight = layers_6_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = tensor("op_9837_cast_fp16")]; tensor var_9843_pad_type_0 = const()[name = tensor("op_9843_pad_type_0"), val = tensor("valid")]; tensor var_9843_strides_0 = const()[name = tensor("op_9843_strides_0"), val = tensor([1, 1])]; tensor var_9843_pad_0 = const()[name = tensor("op_9843_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9843_dilations_0 = const()[name = tensor("op_9843_dilations_0"), val = tensor([1, 1])]; tensor var_9843_groups_0 = const()[name = tensor("op_9843_groups_0"), val = tensor(1)]; tensor layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96263168))), name = tensor("layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96228480))), shape = tensor([1280, 1280, 1, 1])]; tensor var_9843_cast_fp16 = conv(dilations = var_9843_dilations_0, groups = var_9843_groups_0, pad = var_9843_pad_0, pad_type = var_9843_pad_type_0, strides = var_9843_strides_0, weight = layers_6_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = tensor("op_9843_cast_fp16")]; tensor key_13_cast_fp16 = add(x = var_9837_cast_fp16, y = var_9843_cast_fp16)[name = tensor("key_13_cast_fp16")]; tensor var_9853_pad_type_0 = const()[name = tensor("op_9853_pad_type_0"), val = tensor("valid")]; tensor var_9853_strides_0 = const()[name = tensor("op_9853_strides_0"), val = tensor([1, 1])]; tensor var_9853_pad_0 = const()[name = tensor("op_9853_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9853_dilations_0 = const()[name = tensor("op_9853_dilations_0"), val = tensor([1, 1])]; tensor var_9853_groups_0 = const()[name = tensor("op_9853_groups_0"), val = tensor(1)]; tensor layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96468032))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97287296))), name = tensor("layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_6_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97287424)))]; tensor var_9853_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_9853_dilations_0, groups = var_9853_groups_0, pad = var_9853_pad_0, pad_type = var_9853_pad_type_0, strides = var_9853_strides_0, weight = layers_6_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_25_cast_fp16)[name = tensor("op_9853_cast_fp16")]; tensor var_9859_pad_type_0 = const()[name = tensor("op_9859_pad_type_0"), val = tensor("valid")]; tensor var_9859_strides_0 = const()[name = tensor("op_9859_strides_0"), val = tensor([1, 1])]; tensor var_9859_pad_0 = const()[name = tensor("op_9859_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_9859_dilations_0 = const()[name = tensor("op_9859_dilations_0"), val = tensor([1, 1])]; tensor var_9859_groups_0 = const()[name = tensor("op_9859_groups_0"), val = tensor(1)]; tensor layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97313920))), name = tensor("layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97290048))), shape = tensor([1280, 1280, 1, 1])]; tensor var_9859_cast_fp16 = conv(dilations = var_9859_dilations_0, groups = var_9859_groups_0, pad = var_9859_pad_0, pad_type = var_9859_pad_type_0, strides = var_9859_strides_0, weight = layers_6_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_25_cast_fp16)[name = tensor("op_9859_cast_fp16")]; tensor value_13_cast_fp16 = add(x = var_9853_cast_fp16, y = var_9859_cast_fp16)[name = tensor("value_13_cast_fp16")]; tensor var_9865_begin_0 = const()[name = tensor("op_9865_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9865_end_0 = const()[name = tensor("op_9865_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_9865_end_mask_0 = const()[name = tensor("op_9865_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9865_cast_fp16 = slice_by_index(begin = var_9865_begin_0, end = var_9865_end_0, end_mask = var_9865_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9865_cast_fp16")]; tensor var_9869_begin_0 = const()[name = tensor("op_9869_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_9869_end_0 = const()[name = tensor("op_9869_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_9869_end_mask_0 = const()[name = tensor("op_9869_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9869_cast_fp16 = slice_by_index(begin = var_9869_begin_0, end = var_9869_end_0, end_mask = var_9869_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9869_cast_fp16")]; tensor var_9873_begin_0 = const()[name = tensor("op_9873_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_9873_end_0 = const()[name = tensor("op_9873_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_9873_end_mask_0 = const()[name = tensor("op_9873_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9873_cast_fp16 = slice_by_index(begin = var_9873_begin_0, end = var_9873_end_0, end_mask = var_9873_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9873_cast_fp16")]; tensor var_9877_begin_0 = const()[name = tensor("op_9877_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_9877_end_0 = const()[name = tensor("op_9877_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_9877_end_mask_0 = const()[name = tensor("op_9877_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9877_cast_fp16 = slice_by_index(begin = var_9877_begin_0, end = var_9877_end_0, end_mask = var_9877_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9877_cast_fp16")]; tensor var_9881_begin_0 = const()[name = tensor("op_9881_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_9881_end_0 = const()[name = tensor("op_9881_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_9881_end_mask_0 = const()[name = tensor("op_9881_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9881_cast_fp16 = slice_by_index(begin = var_9881_begin_0, end = var_9881_end_0, end_mask = var_9881_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9881_cast_fp16")]; tensor var_9885_begin_0 = const()[name = tensor("op_9885_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_9885_end_0 = const()[name = tensor("op_9885_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_9885_end_mask_0 = const()[name = tensor("op_9885_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9885_cast_fp16 = slice_by_index(begin = var_9885_begin_0, end = var_9885_end_0, end_mask = var_9885_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9885_cast_fp16")]; tensor var_9889_begin_0 = const()[name = tensor("op_9889_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_9889_end_0 = const()[name = tensor("op_9889_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_9889_end_mask_0 = const()[name = tensor("op_9889_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9889_cast_fp16 = slice_by_index(begin = var_9889_begin_0, end = var_9889_end_0, end_mask = var_9889_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9889_cast_fp16")]; tensor var_9893_begin_0 = const()[name = tensor("op_9893_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_9893_end_0 = const()[name = tensor("op_9893_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_9893_end_mask_0 = const()[name = tensor("op_9893_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9893_cast_fp16 = slice_by_index(begin = var_9893_begin_0, end = var_9893_end_0, end_mask = var_9893_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9893_cast_fp16")]; tensor var_9897_begin_0 = const()[name = tensor("op_9897_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_9897_end_0 = const()[name = tensor("op_9897_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_9897_end_mask_0 = const()[name = tensor("op_9897_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9897_cast_fp16 = slice_by_index(begin = var_9897_begin_0, end = var_9897_end_0, end_mask = var_9897_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9897_cast_fp16")]; tensor var_9901_begin_0 = const()[name = tensor("op_9901_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_9901_end_0 = const()[name = tensor("op_9901_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_9901_end_mask_0 = const()[name = tensor("op_9901_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9901_cast_fp16 = slice_by_index(begin = var_9901_begin_0, end = var_9901_end_0, end_mask = var_9901_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9901_cast_fp16")]; tensor var_9905_begin_0 = const()[name = tensor("op_9905_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_9905_end_0 = const()[name = tensor("op_9905_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_9905_end_mask_0 = const()[name = tensor("op_9905_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9905_cast_fp16 = slice_by_index(begin = var_9905_begin_0, end = var_9905_end_0, end_mask = var_9905_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9905_cast_fp16")]; tensor var_9909_begin_0 = const()[name = tensor("op_9909_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_9909_end_0 = const()[name = tensor("op_9909_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_9909_end_mask_0 = const()[name = tensor("op_9909_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9909_cast_fp16 = slice_by_index(begin = var_9909_begin_0, end = var_9909_end_0, end_mask = var_9909_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9909_cast_fp16")]; tensor var_9913_begin_0 = const()[name = tensor("op_9913_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_9913_end_0 = const()[name = tensor("op_9913_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_9913_end_mask_0 = const()[name = tensor("op_9913_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9913_cast_fp16 = slice_by_index(begin = var_9913_begin_0, end = var_9913_end_0, end_mask = var_9913_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9913_cast_fp16")]; tensor var_9917_begin_0 = const()[name = tensor("op_9917_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_9917_end_0 = const()[name = tensor("op_9917_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_9917_end_mask_0 = const()[name = tensor("op_9917_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9917_cast_fp16 = slice_by_index(begin = var_9917_begin_0, end = var_9917_end_0, end_mask = var_9917_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9917_cast_fp16")]; tensor var_9921_begin_0 = const()[name = tensor("op_9921_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_9921_end_0 = const()[name = tensor("op_9921_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_9921_end_mask_0 = const()[name = tensor("op_9921_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9921_cast_fp16 = slice_by_index(begin = var_9921_begin_0, end = var_9921_end_0, end_mask = var_9921_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9921_cast_fp16")]; tensor var_9925_begin_0 = const()[name = tensor("op_9925_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_9925_end_0 = const()[name = tensor("op_9925_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_9925_end_mask_0 = const()[name = tensor("op_9925_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9925_cast_fp16 = slice_by_index(begin = var_9925_begin_0, end = var_9925_end_0, end_mask = var_9925_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9925_cast_fp16")]; tensor var_9929_begin_0 = const()[name = tensor("op_9929_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_9929_end_0 = const()[name = tensor("op_9929_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_9929_end_mask_0 = const()[name = tensor("op_9929_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9929_cast_fp16 = slice_by_index(begin = var_9929_begin_0, end = var_9929_end_0, end_mask = var_9929_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9929_cast_fp16")]; tensor var_9933_begin_0 = const()[name = tensor("op_9933_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_9933_end_0 = const()[name = tensor("op_9933_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_9933_end_mask_0 = const()[name = tensor("op_9933_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9933_cast_fp16 = slice_by_index(begin = var_9933_begin_0, end = var_9933_end_0, end_mask = var_9933_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9933_cast_fp16")]; tensor var_9937_begin_0 = const()[name = tensor("op_9937_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_9937_end_0 = const()[name = tensor("op_9937_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_9937_end_mask_0 = const()[name = tensor("op_9937_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9937_cast_fp16 = slice_by_index(begin = var_9937_begin_0, end = var_9937_end_0, end_mask = var_9937_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9937_cast_fp16")]; tensor var_9941_begin_0 = const()[name = tensor("op_9941_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_9941_end_0 = const()[name = tensor("op_9941_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_9941_end_mask_0 = const()[name = tensor("op_9941_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_9941_cast_fp16 = slice_by_index(begin = var_9941_begin_0, end = var_9941_end_0, end_mask = var_9941_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9941_cast_fp16")]; tensor var_9950_begin_0 = const()[name = tensor("op_9950_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9950_end_0 = const()[name = tensor("op_9950_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_9950_end_mask_0 = const()[name = tensor("op_9950_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9950_cast_fp16 = slice_by_index(begin = var_9950_begin_0, end = var_9950_end_0, end_mask = var_9950_end_mask_0, x = var_9865_cast_fp16)[name = tensor("op_9950_cast_fp16")]; tensor var_9957_begin_0 = const()[name = tensor("op_9957_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_9957_end_0 = const()[name = tensor("op_9957_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_9957_end_mask_0 = const()[name = tensor("op_9957_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9957_cast_fp16 = slice_by_index(begin = var_9957_begin_0, end = var_9957_end_0, end_mask = var_9957_end_mask_0, x = var_9865_cast_fp16)[name = tensor("op_9957_cast_fp16")]; tensor var_9964_begin_0 = const()[name = tensor("op_9964_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_9964_end_0 = const()[name = tensor("op_9964_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_9964_end_mask_0 = const()[name = tensor("op_9964_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9964_cast_fp16 = slice_by_index(begin = var_9964_begin_0, end = var_9964_end_0, end_mask = var_9964_end_mask_0, x = var_9865_cast_fp16)[name = tensor("op_9964_cast_fp16")]; tensor var_9971_begin_0 = const()[name = tensor("op_9971_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_9971_end_0 = const()[name = tensor("op_9971_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_9971_end_mask_0 = const()[name = tensor("op_9971_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9971_cast_fp16 = slice_by_index(begin = var_9971_begin_0, end = var_9971_end_0, end_mask = var_9971_end_mask_0, x = var_9865_cast_fp16)[name = tensor("op_9971_cast_fp16")]; tensor var_9978_begin_0 = const()[name = tensor("op_9978_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_9978_end_0 = const()[name = tensor("op_9978_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_9978_end_mask_0 = const()[name = tensor("op_9978_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9978_cast_fp16 = slice_by_index(begin = var_9978_begin_0, end = var_9978_end_0, end_mask = var_9978_end_mask_0, x = var_9869_cast_fp16)[name = tensor("op_9978_cast_fp16")]; tensor var_9985_begin_0 = const()[name = tensor("op_9985_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_9985_end_0 = const()[name = tensor("op_9985_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_9985_end_mask_0 = const()[name = tensor("op_9985_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9985_cast_fp16 = slice_by_index(begin = var_9985_begin_0, end = var_9985_end_0, end_mask = var_9985_end_mask_0, x = var_9869_cast_fp16)[name = tensor("op_9985_cast_fp16")]; tensor var_9992_begin_0 = const()[name = tensor("op_9992_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_9992_end_0 = const()[name = tensor("op_9992_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_9992_end_mask_0 = const()[name = tensor("op_9992_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9992_cast_fp16 = slice_by_index(begin = var_9992_begin_0, end = var_9992_end_0, end_mask = var_9992_end_mask_0, x = var_9869_cast_fp16)[name = tensor("op_9992_cast_fp16")]; tensor var_9999_begin_0 = const()[name = tensor("op_9999_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_9999_end_0 = const()[name = tensor("op_9999_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_9999_end_mask_0 = const()[name = tensor("op_9999_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_9999_cast_fp16 = slice_by_index(begin = var_9999_begin_0, end = var_9999_end_0, end_mask = var_9999_end_mask_0, x = var_9869_cast_fp16)[name = tensor("op_9999_cast_fp16")]; tensor var_10006_begin_0 = const()[name = tensor("op_10006_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10006_end_0 = const()[name = tensor("op_10006_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_10006_end_mask_0 = const()[name = tensor("op_10006_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10006_cast_fp16 = slice_by_index(begin = var_10006_begin_0, end = var_10006_end_0, end_mask = var_10006_end_mask_0, x = var_9873_cast_fp16)[name = tensor("op_10006_cast_fp16")]; tensor var_10013_begin_0 = const()[name = tensor("op_10013_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_10013_end_0 = const()[name = tensor("op_10013_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_10013_end_mask_0 = const()[name = tensor("op_10013_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10013_cast_fp16 = slice_by_index(begin = var_10013_begin_0, end = var_10013_end_0, end_mask = var_10013_end_mask_0, x = var_9873_cast_fp16)[name = tensor("op_10013_cast_fp16")]; tensor var_10020_begin_0 = const()[name = tensor("op_10020_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_10020_end_0 = const()[name = tensor("op_10020_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_10020_end_mask_0 = const()[name = tensor("op_10020_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10020_cast_fp16 = slice_by_index(begin = var_10020_begin_0, end = var_10020_end_0, end_mask = var_10020_end_mask_0, x = var_9873_cast_fp16)[name = tensor("op_10020_cast_fp16")]; tensor var_10027_begin_0 = const()[name = tensor("op_10027_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_10027_end_0 = const()[name = tensor("op_10027_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10027_end_mask_0 = const()[name = tensor("op_10027_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10027_cast_fp16 = slice_by_index(begin = var_10027_begin_0, end = var_10027_end_0, end_mask = var_10027_end_mask_0, x = var_9873_cast_fp16)[name = tensor("op_10027_cast_fp16")]; tensor var_10034_begin_0 = const()[name = tensor("op_10034_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10034_end_0 = const()[name = tensor("op_10034_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_10034_end_mask_0 = const()[name = tensor("op_10034_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10034_cast_fp16 = slice_by_index(begin = var_10034_begin_0, end = var_10034_end_0, end_mask = var_10034_end_mask_0, x = var_9877_cast_fp16)[name = tensor("op_10034_cast_fp16")]; tensor var_10041_begin_0 = const()[name = tensor("op_10041_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_10041_end_0 = const()[name = tensor("op_10041_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_10041_end_mask_0 = const()[name = tensor("op_10041_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10041_cast_fp16 = slice_by_index(begin = var_10041_begin_0, end = var_10041_end_0, end_mask = var_10041_end_mask_0, x = var_9877_cast_fp16)[name = tensor("op_10041_cast_fp16")]; tensor var_10048_begin_0 = const()[name = tensor("op_10048_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_10048_end_0 = const()[name = tensor("op_10048_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_10048_end_mask_0 = const()[name = tensor("op_10048_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10048_cast_fp16 = slice_by_index(begin = var_10048_begin_0, end = var_10048_end_0, end_mask = var_10048_end_mask_0, x = var_9877_cast_fp16)[name = tensor("op_10048_cast_fp16")]; tensor var_10055_begin_0 = const()[name = tensor("op_10055_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_10055_end_0 = const()[name = tensor("op_10055_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10055_end_mask_0 = const()[name = tensor("op_10055_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10055_cast_fp16 = slice_by_index(begin = var_10055_begin_0, end = var_10055_end_0, end_mask = var_10055_end_mask_0, x = var_9877_cast_fp16)[name = tensor("op_10055_cast_fp16")]; tensor var_10062_begin_0 = const()[name = tensor("op_10062_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10062_end_0 = const()[name = tensor("op_10062_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_10062_end_mask_0 = const()[name = tensor("op_10062_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10062_cast_fp16 = slice_by_index(begin = var_10062_begin_0, end = var_10062_end_0, end_mask = var_10062_end_mask_0, x = var_9881_cast_fp16)[name = tensor("op_10062_cast_fp16")]; tensor var_10069_begin_0 = const()[name = tensor("op_10069_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_10069_end_0 = const()[name = tensor("op_10069_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_10069_end_mask_0 = const()[name = tensor("op_10069_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10069_cast_fp16 = slice_by_index(begin = var_10069_begin_0, end = var_10069_end_0, end_mask = var_10069_end_mask_0, x = var_9881_cast_fp16)[name = tensor("op_10069_cast_fp16")]; tensor var_10076_begin_0 = const()[name = tensor("op_10076_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_10076_end_0 = const()[name = tensor("op_10076_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_10076_end_mask_0 = const()[name = tensor("op_10076_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10076_cast_fp16 = slice_by_index(begin = var_10076_begin_0, end = var_10076_end_0, end_mask = var_10076_end_mask_0, x = var_9881_cast_fp16)[name = tensor("op_10076_cast_fp16")]; tensor var_10083_begin_0 = const()[name = tensor("op_10083_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_10083_end_0 = const()[name = tensor("op_10083_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10083_end_mask_0 = const()[name = tensor("op_10083_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10083_cast_fp16 = slice_by_index(begin = var_10083_begin_0, end = var_10083_end_0, end_mask = var_10083_end_mask_0, x = var_9881_cast_fp16)[name = tensor("op_10083_cast_fp16")]; tensor var_10090_begin_0 = const()[name = tensor("op_10090_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10090_end_0 = const()[name = tensor("op_10090_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_10090_end_mask_0 = const()[name = tensor("op_10090_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10090_cast_fp16 = slice_by_index(begin = var_10090_begin_0, end = var_10090_end_0, end_mask = var_10090_end_mask_0, x = var_9885_cast_fp16)[name = tensor("op_10090_cast_fp16")]; tensor var_10097_begin_0 = const()[name = tensor("op_10097_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_10097_end_0 = const()[name = tensor("op_10097_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_10097_end_mask_0 = const()[name = tensor("op_10097_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10097_cast_fp16 = slice_by_index(begin = var_10097_begin_0, end = var_10097_end_0, end_mask = var_10097_end_mask_0, x = var_9885_cast_fp16)[name = tensor("op_10097_cast_fp16")]; tensor var_10104_begin_0 = const()[name = tensor("op_10104_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_10104_end_0 = const()[name = tensor("op_10104_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_10104_end_mask_0 = const()[name = tensor("op_10104_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10104_cast_fp16 = slice_by_index(begin = var_10104_begin_0, end = var_10104_end_0, end_mask = var_10104_end_mask_0, x = var_9885_cast_fp16)[name = tensor("op_10104_cast_fp16")]; tensor var_10111_begin_0 = const()[name = tensor("op_10111_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_10111_end_0 = const()[name = tensor("op_10111_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10111_end_mask_0 = const()[name = tensor("op_10111_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10111_cast_fp16 = slice_by_index(begin = var_10111_begin_0, end = var_10111_end_0, end_mask = var_10111_end_mask_0, x = var_9885_cast_fp16)[name = tensor("op_10111_cast_fp16")]; tensor var_10118_begin_0 = const()[name = tensor("op_10118_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10118_end_0 = const()[name = tensor("op_10118_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_10118_end_mask_0 = const()[name = tensor("op_10118_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10118_cast_fp16 = slice_by_index(begin = var_10118_begin_0, end = var_10118_end_0, end_mask = var_10118_end_mask_0, x = var_9889_cast_fp16)[name = tensor("op_10118_cast_fp16")]; tensor var_10125_begin_0 = const()[name = tensor("op_10125_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_10125_end_0 = const()[name = tensor("op_10125_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_10125_end_mask_0 = const()[name = tensor("op_10125_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10125_cast_fp16 = slice_by_index(begin = var_10125_begin_0, end = var_10125_end_0, end_mask = var_10125_end_mask_0, x = var_9889_cast_fp16)[name = tensor("op_10125_cast_fp16")]; tensor var_10132_begin_0 = const()[name = tensor("op_10132_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_10132_end_0 = const()[name = tensor("op_10132_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_10132_end_mask_0 = const()[name = tensor("op_10132_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10132_cast_fp16 = slice_by_index(begin = var_10132_begin_0, end = var_10132_end_0, end_mask = var_10132_end_mask_0, x = var_9889_cast_fp16)[name = tensor("op_10132_cast_fp16")]; tensor var_10139_begin_0 = const()[name = tensor("op_10139_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_10139_end_0 = const()[name = tensor("op_10139_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10139_end_mask_0 = const()[name = tensor("op_10139_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10139_cast_fp16 = slice_by_index(begin = var_10139_begin_0, end = var_10139_end_0, end_mask = var_10139_end_mask_0, x = var_9889_cast_fp16)[name = tensor("op_10139_cast_fp16")]; tensor var_10146_begin_0 = const()[name = tensor("op_10146_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10146_end_0 = const()[name = tensor("op_10146_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_10146_end_mask_0 = const()[name = tensor("op_10146_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10146_cast_fp16 = slice_by_index(begin = var_10146_begin_0, end = var_10146_end_0, end_mask = var_10146_end_mask_0, x = var_9893_cast_fp16)[name = tensor("op_10146_cast_fp16")]; tensor var_10153_begin_0 = const()[name = tensor("op_10153_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_10153_end_0 = const()[name = tensor("op_10153_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_10153_end_mask_0 = const()[name = tensor("op_10153_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10153_cast_fp16 = slice_by_index(begin = var_10153_begin_0, end = var_10153_end_0, end_mask = var_10153_end_mask_0, x = var_9893_cast_fp16)[name = tensor("op_10153_cast_fp16")]; tensor var_10160_begin_0 = const()[name = tensor("op_10160_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_10160_end_0 = const()[name = tensor("op_10160_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_10160_end_mask_0 = const()[name = tensor("op_10160_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10160_cast_fp16 = slice_by_index(begin = var_10160_begin_0, end = var_10160_end_0, end_mask = var_10160_end_mask_0, x = var_9893_cast_fp16)[name = tensor("op_10160_cast_fp16")]; tensor var_10167_begin_0 = const()[name = tensor("op_10167_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_10167_end_0 = const()[name = tensor("op_10167_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10167_end_mask_0 = const()[name = tensor("op_10167_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10167_cast_fp16 = slice_by_index(begin = var_10167_begin_0, end = var_10167_end_0, end_mask = var_10167_end_mask_0, x = var_9893_cast_fp16)[name = tensor("op_10167_cast_fp16")]; tensor var_10174_begin_0 = const()[name = tensor("op_10174_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10174_end_0 = const()[name = tensor("op_10174_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_10174_end_mask_0 = const()[name = tensor("op_10174_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10174_cast_fp16 = slice_by_index(begin = var_10174_begin_0, end = var_10174_end_0, end_mask = var_10174_end_mask_0, x = var_9897_cast_fp16)[name = tensor("op_10174_cast_fp16")]; tensor var_10181_begin_0 = const()[name = tensor("op_10181_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_10181_end_0 = const()[name = tensor("op_10181_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_10181_end_mask_0 = const()[name = tensor("op_10181_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10181_cast_fp16 = slice_by_index(begin = var_10181_begin_0, end = var_10181_end_0, end_mask = var_10181_end_mask_0, x = var_9897_cast_fp16)[name = tensor("op_10181_cast_fp16")]; tensor var_10188_begin_0 = const()[name = tensor("op_10188_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_10188_end_0 = const()[name = tensor("op_10188_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_10188_end_mask_0 = const()[name = tensor("op_10188_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10188_cast_fp16 = slice_by_index(begin = var_10188_begin_0, end = var_10188_end_0, end_mask = var_10188_end_mask_0, x = var_9897_cast_fp16)[name = tensor("op_10188_cast_fp16")]; tensor var_10195_begin_0 = const()[name = tensor("op_10195_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_10195_end_0 = const()[name = tensor("op_10195_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10195_end_mask_0 = const()[name = tensor("op_10195_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10195_cast_fp16 = slice_by_index(begin = var_10195_begin_0, end = var_10195_end_0, end_mask = var_10195_end_mask_0, x = var_9897_cast_fp16)[name = tensor("op_10195_cast_fp16")]; tensor var_10202_begin_0 = const()[name = tensor("op_10202_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10202_end_0 = const()[name = tensor("op_10202_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_10202_end_mask_0 = const()[name = tensor("op_10202_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10202_cast_fp16 = slice_by_index(begin = var_10202_begin_0, end = var_10202_end_0, end_mask = var_10202_end_mask_0, x = var_9901_cast_fp16)[name = tensor("op_10202_cast_fp16")]; tensor var_10209_begin_0 = const()[name = tensor("op_10209_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_10209_end_0 = const()[name = tensor("op_10209_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_10209_end_mask_0 = const()[name = tensor("op_10209_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10209_cast_fp16 = slice_by_index(begin = var_10209_begin_0, end = var_10209_end_0, end_mask = var_10209_end_mask_0, x = var_9901_cast_fp16)[name = tensor("op_10209_cast_fp16")]; tensor var_10216_begin_0 = const()[name = tensor("op_10216_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_10216_end_0 = const()[name = tensor("op_10216_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_10216_end_mask_0 = const()[name = tensor("op_10216_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10216_cast_fp16 = slice_by_index(begin = var_10216_begin_0, end = var_10216_end_0, end_mask = var_10216_end_mask_0, x = var_9901_cast_fp16)[name = tensor("op_10216_cast_fp16")]; tensor var_10223_begin_0 = const()[name = tensor("op_10223_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_10223_end_0 = const()[name = tensor("op_10223_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10223_end_mask_0 = const()[name = tensor("op_10223_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10223_cast_fp16 = slice_by_index(begin = var_10223_begin_0, end = var_10223_end_0, end_mask = var_10223_end_mask_0, x = var_9901_cast_fp16)[name = tensor("op_10223_cast_fp16")]; tensor var_10230_begin_0 = const()[name = tensor("op_10230_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10230_end_0 = const()[name = tensor("op_10230_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_10230_end_mask_0 = const()[name = tensor("op_10230_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10230_cast_fp16 = slice_by_index(begin = var_10230_begin_0, end = var_10230_end_0, end_mask = var_10230_end_mask_0, x = var_9905_cast_fp16)[name = tensor("op_10230_cast_fp16")]; tensor var_10237_begin_0 = const()[name = tensor("op_10237_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_10237_end_0 = const()[name = tensor("op_10237_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_10237_end_mask_0 = const()[name = tensor("op_10237_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10237_cast_fp16 = slice_by_index(begin = var_10237_begin_0, end = var_10237_end_0, end_mask = var_10237_end_mask_0, x = var_9905_cast_fp16)[name = tensor("op_10237_cast_fp16")]; tensor var_10244_begin_0 = const()[name = tensor("op_10244_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_10244_end_0 = const()[name = tensor("op_10244_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_10244_end_mask_0 = const()[name = tensor("op_10244_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10244_cast_fp16 = slice_by_index(begin = var_10244_begin_0, end = var_10244_end_0, end_mask = var_10244_end_mask_0, x = var_9905_cast_fp16)[name = tensor("op_10244_cast_fp16")]; tensor var_10251_begin_0 = const()[name = tensor("op_10251_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_10251_end_0 = const()[name = tensor("op_10251_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10251_end_mask_0 = const()[name = tensor("op_10251_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10251_cast_fp16 = slice_by_index(begin = var_10251_begin_0, end = var_10251_end_0, end_mask = var_10251_end_mask_0, x = var_9905_cast_fp16)[name = tensor("op_10251_cast_fp16")]; tensor var_10258_begin_0 = const()[name = tensor("op_10258_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10258_end_0 = const()[name = tensor("op_10258_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_10258_end_mask_0 = const()[name = tensor("op_10258_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10258_cast_fp16 = slice_by_index(begin = var_10258_begin_0, end = var_10258_end_0, end_mask = var_10258_end_mask_0, x = var_9909_cast_fp16)[name = tensor("op_10258_cast_fp16")]; tensor var_10265_begin_0 = const()[name = tensor("op_10265_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_10265_end_0 = const()[name = tensor("op_10265_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_10265_end_mask_0 = const()[name = tensor("op_10265_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10265_cast_fp16 = slice_by_index(begin = var_10265_begin_0, end = var_10265_end_0, end_mask = var_10265_end_mask_0, x = var_9909_cast_fp16)[name = tensor("op_10265_cast_fp16")]; tensor var_10272_begin_0 = const()[name = tensor("op_10272_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_10272_end_0 = const()[name = tensor("op_10272_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_10272_end_mask_0 = const()[name = tensor("op_10272_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10272_cast_fp16 = slice_by_index(begin = var_10272_begin_0, end = var_10272_end_0, end_mask = var_10272_end_mask_0, x = var_9909_cast_fp16)[name = tensor("op_10272_cast_fp16")]; tensor var_10279_begin_0 = const()[name = tensor("op_10279_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_10279_end_0 = const()[name = tensor("op_10279_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10279_end_mask_0 = const()[name = tensor("op_10279_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10279_cast_fp16 = slice_by_index(begin = var_10279_begin_0, end = var_10279_end_0, end_mask = var_10279_end_mask_0, x = var_9909_cast_fp16)[name = tensor("op_10279_cast_fp16")]; tensor var_10286_begin_0 = const()[name = tensor("op_10286_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10286_end_0 = const()[name = tensor("op_10286_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_10286_end_mask_0 = const()[name = tensor("op_10286_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10286_cast_fp16 = slice_by_index(begin = var_10286_begin_0, end = var_10286_end_0, end_mask = var_10286_end_mask_0, x = var_9913_cast_fp16)[name = tensor("op_10286_cast_fp16")]; tensor var_10293_begin_0 = const()[name = tensor("op_10293_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_10293_end_0 = const()[name = tensor("op_10293_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_10293_end_mask_0 = const()[name = tensor("op_10293_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10293_cast_fp16 = slice_by_index(begin = var_10293_begin_0, end = var_10293_end_0, end_mask = var_10293_end_mask_0, x = var_9913_cast_fp16)[name = tensor("op_10293_cast_fp16")]; tensor var_10300_begin_0 = const()[name = tensor("op_10300_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_10300_end_0 = const()[name = tensor("op_10300_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_10300_end_mask_0 = const()[name = tensor("op_10300_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10300_cast_fp16 = slice_by_index(begin = var_10300_begin_0, end = var_10300_end_0, end_mask = var_10300_end_mask_0, x = var_9913_cast_fp16)[name = tensor("op_10300_cast_fp16")]; tensor var_10307_begin_0 = const()[name = tensor("op_10307_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_10307_end_0 = const()[name = tensor("op_10307_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10307_end_mask_0 = const()[name = tensor("op_10307_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10307_cast_fp16 = slice_by_index(begin = var_10307_begin_0, end = var_10307_end_0, end_mask = var_10307_end_mask_0, x = var_9913_cast_fp16)[name = tensor("op_10307_cast_fp16")]; tensor var_10314_begin_0 = const()[name = tensor("op_10314_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10314_end_0 = const()[name = tensor("op_10314_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_10314_end_mask_0 = const()[name = tensor("op_10314_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10314_cast_fp16 = slice_by_index(begin = var_10314_begin_0, end = var_10314_end_0, end_mask = var_10314_end_mask_0, x = var_9917_cast_fp16)[name = tensor("op_10314_cast_fp16")]; tensor var_10321_begin_0 = const()[name = tensor("op_10321_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_10321_end_0 = const()[name = tensor("op_10321_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_10321_end_mask_0 = const()[name = tensor("op_10321_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10321_cast_fp16 = slice_by_index(begin = var_10321_begin_0, end = var_10321_end_0, end_mask = var_10321_end_mask_0, x = var_9917_cast_fp16)[name = tensor("op_10321_cast_fp16")]; tensor var_10328_begin_0 = const()[name = tensor("op_10328_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_10328_end_0 = const()[name = tensor("op_10328_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_10328_end_mask_0 = const()[name = tensor("op_10328_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10328_cast_fp16 = slice_by_index(begin = var_10328_begin_0, end = var_10328_end_0, end_mask = var_10328_end_mask_0, x = var_9917_cast_fp16)[name = tensor("op_10328_cast_fp16")]; tensor var_10335_begin_0 = const()[name = tensor("op_10335_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_10335_end_0 = const()[name = tensor("op_10335_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10335_end_mask_0 = const()[name = tensor("op_10335_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10335_cast_fp16 = slice_by_index(begin = var_10335_begin_0, end = var_10335_end_0, end_mask = var_10335_end_mask_0, x = var_9917_cast_fp16)[name = tensor("op_10335_cast_fp16")]; tensor var_10342_begin_0 = const()[name = tensor("op_10342_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10342_end_0 = const()[name = tensor("op_10342_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_10342_end_mask_0 = const()[name = tensor("op_10342_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10342_cast_fp16 = slice_by_index(begin = var_10342_begin_0, end = var_10342_end_0, end_mask = var_10342_end_mask_0, x = var_9921_cast_fp16)[name = tensor("op_10342_cast_fp16")]; tensor var_10349_begin_0 = const()[name = tensor("op_10349_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_10349_end_0 = const()[name = tensor("op_10349_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_10349_end_mask_0 = const()[name = tensor("op_10349_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10349_cast_fp16 = slice_by_index(begin = var_10349_begin_0, end = var_10349_end_0, end_mask = var_10349_end_mask_0, x = var_9921_cast_fp16)[name = tensor("op_10349_cast_fp16")]; tensor var_10356_begin_0 = const()[name = tensor("op_10356_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_10356_end_0 = const()[name = tensor("op_10356_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_10356_end_mask_0 = const()[name = tensor("op_10356_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10356_cast_fp16 = slice_by_index(begin = var_10356_begin_0, end = var_10356_end_0, end_mask = var_10356_end_mask_0, x = var_9921_cast_fp16)[name = tensor("op_10356_cast_fp16")]; tensor var_10363_begin_0 = const()[name = tensor("op_10363_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_10363_end_0 = const()[name = tensor("op_10363_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10363_end_mask_0 = const()[name = tensor("op_10363_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10363_cast_fp16 = slice_by_index(begin = var_10363_begin_0, end = var_10363_end_0, end_mask = var_10363_end_mask_0, x = var_9921_cast_fp16)[name = tensor("op_10363_cast_fp16")]; tensor var_10370_begin_0 = const()[name = tensor("op_10370_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10370_end_0 = const()[name = tensor("op_10370_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_10370_end_mask_0 = const()[name = tensor("op_10370_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10370_cast_fp16 = slice_by_index(begin = var_10370_begin_0, end = var_10370_end_0, end_mask = var_10370_end_mask_0, x = var_9925_cast_fp16)[name = tensor("op_10370_cast_fp16")]; tensor var_10377_begin_0 = const()[name = tensor("op_10377_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_10377_end_0 = const()[name = tensor("op_10377_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_10377_end_mask_0 = const()[name = tensor("op_10377_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10377_cast_fp16 = slice_by_index(begin = var_10377_begin_0, end = var_10377_end_0, end_mask = var_10377_end_mask_0, x = var_9925_cast_fp16)[name = tensor("op_10377_cast_fp16")]; tensor var_10384_begin_0 = const()[name = tensor("op_10384_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_10384_end_0 = const()[name = tensor("op_10384_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_10384_end_mask_0 = const()[name = tensor("op_10384_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10384_cast_fp16 = slice_by_index(begin = var_10384_begin_0, end = var_10384_end_0, end_mask = var_10384_end_mask_0, x = var_9925_cast_fp16)[name = tensor("op_10384_cast_fp16")]; tensor var_10391_begin_0 = const()[name = tensor("op_10391_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_10391_end_0 = const()[name = tensor("op_10391_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10391_end_mask_0 = const()[name = tensor("op_10391_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10391_cast_fp16 = slice_by_index(begin = var_10391_begin_0, end = var_10391_end_0, end_mask = var_10391_end_mask_0, x = var_9925_cast_fp16)[name = tensor("op_10391_cast_fp16")]; tensor var_10398_begin_0 = const()[name = tensor("op_10398_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10398_end_0 = const()[name = tensor("op_10398_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_10398_end_mask_0 = const()[name = tensor("op_10398_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10398_cast_fp16 = slice_by_index(begin = var_10398_begin_0, end = var_10398_end_0, end_mask = var_10398_end_mask_0, x = var_9929_cast_fp16)[name = tensor("op_10398_cast_fp16")]; tensor var_10405_begin_0 = const()[name = tensor("op_10405_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_10405_end_0 = const()[name = tensor("op_10405_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_10405_end_mask_0 = const()[name = tensor("op_10405_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10405_cast_fp16 = slice_by_index(begin = var_10405_begin_0, end = var_10405_end_0, end_mask = var_10405_end_mask_0, x = var_9929_cast_fp16)[name = tensor("op_10405_cast_fp16")]; tensor var_10412_begin_0 = const()[name = tensor("op_10412_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_10412_end_0 = const()[name = tensor("op_10412_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_10412_end_mask_0 = const()[name = tensor("op_10412_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10412_cast_fp16 = slice_by_index(begin = var_10412_begin_0, end = var_10412_end_0, end_mask = var_10412_end_mask_0, x = var_9929_cast_fp16)[name = tensor("op_10412_cast_fp16")]; tensor var_10419_begin_0 = const()[name = tensor("op_10419_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_10419_end_0 = const()[name = tensor("op_10419_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10419_end_mask_0 = const()[name = tensor("op_10419_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10419_cast_fp16 = slice_by_index(begin = var_10419_begin_0, end = var_10419_end_0, end_mask = var_10419_end_mask_0, x = var_9929_cast_fp16)[name = tensor("op_10419_cast_fp16")]; tensor var_10426_begin_0 = const()[name = tensor("op_10426_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10426_end_0 = const()[name = tensor("op_10426_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_10426_end_mask_0 = const()[name = tensor("op_10426_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10426_cast_fp16 = slice_by_index(begin = var_10426_begin_0, end = var_10426_end_0, end_mask = var_10426_end_mask_0, x = var_9933_cast_fp16)[name = tensor("op_10426_cast_fp16")]; tensor var_10433_begin_0 = const()[name = tensor("op_10433_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_10433_end_0 = const()[name = tensor("op_10433_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_10433_end_mask_0 = const()[name = tensor("op_10433_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10433_cast_fp16 = slice_by_index(begin = var_10433_begin_0, end = var_10433_end_0, end_mask = var_10433_end_mask_0, x = var_9933_cast_fp16)[name = tensor("op_10433_cast_fp16")]; tensor var_10440_begin_0 = const()[name = tensor("op_10440_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_10440_end_0 = const()[name = tensor("op_10440_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_10440_end_mask_0 = const()[name = tensor("op_10440_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10440_cast_fp16 = slice_by_index(begin = var_10440_begin_0, end = var_10440_end_0, end_mask = var_10440_end_mask_0, x = var_9933_cast_fp16)[name = tensor("op_10440_cast_fp16")]; tensor var_10447_begin_0 = const()[name = tensor("op_10447_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_10447_end_0 = const()[name = tensor("op_10447_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10447_end_mask_0 = const()[name = tensor("op_10447_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10447_cast_fp16 = slice_by_index(begin = var_10447_begin_0, end = var_10447_end_0, end_mask = var_10447_end_mask_0, x = var_9933_cast_fp16)[name = tensor("op_10447_cast_fp16")]; tensor var_10454_begin_0 = const()[name = tensor("op_10454_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10454_end_0 = const()[name = tensor("op_10454_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_10454_end_mask_0 = const()[name = tensor("op_10454_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10454_cast_fp16 = slice_by_index(begin = var_10454_begin_0, end = var_10454_end_0, end_mask = var_10454_end_mask_0, x = var_9937_cast_fp16)[name = tensor("op_10454_cast_fp16")]; tensor var_10461_begin_0 = const()[name = tensor("op_10461_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_10461_end_0 = const()[name = tensor("op_10461_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_10461_end_mask_0 = const()[name = tensor("op_10461_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10461_cast_fp16 = slice_by_index(begin = var_10461_begin_0, end = var_10461_end_0, end_mask = var_10461_end_mask_0, x = var_9937_cast_fp16)[name = tensor("op_10461_cast_fp16")]; tensor var_10468_begin_0 = const()[name = tensor("op_10468_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_10468_end_0 = const()[name = tensor("op_10468_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_10468_end_mask_0 = const()[name = tensor("op_10468_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10468_cast_fp16 = slice_by_index(begin = var_10468_begin_0, end = var_10468_end_0, end_mask = var_10468_end_mask_0, x = var_9937_cast_fp16)[name = tensor("op_10468_cast_fp16")]; tensor var_10475_begin_0 = const()[name = tensor("op_10475_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_10475_end_0 = const()[name = tensor("op_10475_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10475_end_mask_0 = const()[name = tensor("op_10475_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10475_cast_fp16 = slice_by_index(begin = var_10475_begin_0, end = var_10475_end_0, end_mask = var_10475_end_mask_0, x = var_9937_cast_fp16)[name = tensor("op_10475_cast_fp16")]; tensor var_10482_begin_0 = const()[name = tensor("op_10482_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10482_end_0 = const()[name = tensor("op_10482_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_10482_end_mask_0 = const()[name = tensor("op_10482_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10482_cast_fp16 = slice_by_index(begin = var_10482_begin_0, end = var_10482_end_0, end_mask = var_10482_end_mask_0, x = var_9941_cast_fp16)[name = tensor("op_10482_cast_fp16")]; tensor var_10489_begin_0 = const()[name = tensor("op_10489_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_10489_end_0 = const()[name = tensor("op_10489_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_10489_end_mask_0 = const()[name = tensor("op_10489_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10489_cast_fp16 = slice_by_index(begin = var_10489_begin_0, end = var_10489_end_0, end_mask = var_10489_end_mask_0, x = var_9941_cast_fp16)[name = tensor("op_10489_cast_fp16")]; tensor var_10496_begin_0 = const()[name = tensor("op_10496_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_10496_end_0 = const()[name = tensor("op_10496_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_10496_end_mask_0 = const()[name = tensor("op_10496_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10496_cast_fp16 = slice_by_index(begin = var_10496_begin_0, end = var_10496_end_0, end_mask = var_10496_end_mask_0, x = var_9941_cast_fp16)[name = tensor("op_10496_cast_fp16")]; tensor var_10503_begin_0 = const()[name = tensor("op_10503_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_10503_end_0 = const()[name = tensor("op_10503_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10503_end_mask_0 = const()[name = tensor("op_10503_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10503_cast_fp16 = slice_by_index(begin = var_10503_begin_0, end = var_10503_end_0, end_mask = var_10503_end_mask_0, x = var_9941_cast_fp16)[name = tensor("op_10503_cast_fp16")]; tensor k_13_perm_0 = const()[name = tensor("k_13_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_10508_begin_0 = const()[name = tensor("op_10508_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10508_end_0 = const()[name = tensor("op_10508_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_10508_end_mask_0 = const()[name = tensor("op_10508_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_13_cast_fp16 = transpose(perm = k_13_perm_0, x = key_13_cast_fp16)[name = tensor("transpose_25")]; tensor var_10508_cast_fp16 = slice_by_index(begin = var_10508_begin_0, end = var_10508_end_0, end_mask = var_10508_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10508_cast_fp16")]; tensor var_10512_begin_0 = const()[name = tensor("op_10512_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_10512_end_0 = const()[name = tensor("op_10512_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_10512_end_mask_0 = const()[name = tensor("op_10512_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10512_cast_fp16 = slice_by_index(begin = var_10512_begin_0, end = var_10512_end_0, end_mask = var_10512_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10512_cast_fp16")]; tensor var_10516_begin_0 = const()[name = tensor("op_10516_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_10516_end_0 = const()[name = tensor("op_10516_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_10516_end_mask_0 = const()[name = tensor("op_10516_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10516_cast_fp16 = slice_by_index(begin = var_10516_begin_0, end = var_10516_end_0, end_mask = var_10516_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10516_cast_fp16")]; tensor var_10520_begin_0 = const()[name = tensor("op_10520_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_10520_end_0 = const()[name = tensor("op_10520_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_10520_end_mask_0 = const()[name = tensor("op_10520_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10520_cast_fp16 = slice_by_index(begin = var_10520_begin_0, end = var_10520_end_0, end_mask = var_10520_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10520_cast_fp16")]; tensor var_10524_begin_0 = const()[name = tensor("op_10524_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_10524_end_0 = const()[name = tensor("op_10524_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_10524_end_mask_0 = const()[name = tensor("op_10524_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10524_cast_fp16 = slice_by_index(begin = var_10524_begin_0, end = var_10524_end_0, end_mask = var_10524_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10524_cast_fp16")]; tensor var_10528_begin_0 = const()[name = tensor("op_10528_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_10528_end_0 = const()[name = tensor("op_10528_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_10528_end_mask_0 = const()[name = tensor("op_10528_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10528_cast_fp16 = slice_by_index(begin = var_10528_begin_0, end = var_10528_end_0, end_mask = var_10528_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10528_cast_fp16")]; tensor var_10532_begin_0 = const()[name = tensor("op_10532_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_10532_end_0 = const()[name = tensor("op_10532_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_10532_end_mask_0 = const()[name = tensor("op_10532_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10532_cast_fp16 = slice_by_index(begin = var_10532_begin_0, end = var_10532_end_0, end_mask = var_10532_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10532_cast_fp16")]; tensor var_10536_begin_0 = const()[name = tensor("op_10536_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_10536_end_0 = const()[name = tensor("op_10536_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_10536_end_mask_0 = const()[name = tensor("op_10536_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10536_cast_fp16 = slice_by_index(begin = var_10536_begin_0, end = var_10536_end_0, end_mask = var_10536_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10536_cast_fp16")]; tensor var_10540_begin_0 = const()[name = tensor("op_10540_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_10540_end_0 = const()[name = tensor("op_10540_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_10540_end_mask_0 = const()[name = tensor("op_10540_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10540_cast_fp16 = slice_by_index(begin = var_10540_begin_0, end = var_10540_end_0, end_mask = var_10540_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10540_cast_fp16")]; tensor var_10544_begin_0 = const()[name = tensor("op_10544_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_10544_end_0 = const()[name = tensor("op_10544_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_10544_end_mask_0 = const()[name = tensor("op_10544_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10544_cast_fp16 = slice_by_index(begin = var_10544_begin_0, end = var_10544_end_0, end_mask = var_10544_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10544_cast_fp16")]; tensor var_10548_begin_0 = const()[name = tensor("op_10548_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_10548_end_0 = const()[name = tensor("op_10548_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_10548_end_mask_0 = const()[name = tensor("op_10548_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10548_cast_fp16 = slice_by_index(begin = var_10548_begin_0, end = var_10548_end_0, end_mask = var_10548_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10548_cast_fp16")]; tensor var_10552_begin_0 = const()[name = tensor("op_10552_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_10552_end_0 = const()[name = tensor("op_10552_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_10552_end_mask_0 = const()[name = tensor("op_10552_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10552_cast_fp16 = slice_by_index(begin = var_10552_begin_0, end = var_10552_end_0, end_mask = var_10552_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10552_cast_fp16")]; tensor var_10556_begin_0 = const()[name = tensor("op_10556_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_10556_end_0 = const()[name = tensor("op_10556_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_10556_end_mask_0 = const()[name = tensor("op_10556_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10556_cast_fp16 = slice_by_index(begin = var_10556_begin_0, end = var_10556_end_0, end_mask = var_10556_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10556_cast_fp16")]; tensor var_10560_begin_0 = const()[name = tensor("op_10560_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_10560_end_0 = const()[name = tensor("op_10560_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_10560_end_mask_0 = const()[name = tensor("op_10560_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10560_cast_fp16 = slice_by_index(begin = var_10560_begin_0, end = var_10560_end_0, end_mask = var_10560_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10560_cast_fp16")]; tensor var_10564_begin_0 = const()[name = tensor("op_10564_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_10564_end_0 = const()[name = tensor("op_10564_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_10564_end_mask_0 = const()[name = tensor("op_10564_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10564_cast_fp16 = slice_by_index(begin = var_10564_begin_0, end = var_10564_end_0, end_mask = var_10564_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10564_cast_fp16")]; tensor var_10568_begin_0 = const()[name = tensor("op_10568_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_10568_end_0 = const()[name = tensor("op_10568_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_10568_end_mask_0 = const()[name = tensor("op_10568_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10568_cast_fp16 = slice_by_index(begin = var_10568_begin_0, end = var_10568_end_0, end_mask = var_10568_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10568_cast_fp16")]; tensor var_10572_begin_0 = const()[name = tensor("op_10572_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_10572_end_0 = const()[name = tensor("op_10572_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_10572_end_mask_0 = const()[name = tensor("op_10572_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10572_cast_fp16 = slice_by_index(begin = var_10572_begin_0, end = var_10572_end_0, end_mask = var_10572_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10572_cast_fp16")]; tensor var_10576_begin_0 = const()[name = tensor("op_10576_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_10576_end_0 = const()[name = tensor("op_10576_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_10576_end_mask_0 = const()[name = tensor("op_10576_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10576_cast_fp16 = slice_by_index(begin = var_10576_begin_0, end = var_10576_end_0, end_mask = var_10576_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10576_cast_fp16")]; tensor var_10580_begin_0 = const()[name = tensor("op_10580_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_10580_end_0 = const()[name = tensor("op_10580_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_10580_end_mask_0 = const()[name = tensor("op_10580_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10580_cast_fp16 = slice_by_index(begin = var_10580_begin_0, end = var_10580_end_0, end_mask = var_10580_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10580_cast_fp16")]; tensor var_10584_begin_0 = const()[name = tensor("op_10584_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_10584_end_0 = const()[name = tensor("op_10584_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_10584_end_mask_0 = const()[name = tensor("op_10584_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_10584_cast_fp16 = slice_by_index(begin = var_10584_begin_0, end = var_10584_end_0, end_mask = var_10584_end_mask_0, x = k_13_cast_fp16)[name = tensor("op_10584_cast_fp16")]; tensor var_10586_begin_0 = const()[name = tensor("op_10586_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_10586_end_0 = const()[name = tensor("op_10586_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_10586_end_mask_0 = const()[name = tensor("op_10586_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10586_cast_fp16 = slice_by_index(begin = var_10586_begin_0, end = var_10586_end_0, end_mask = var_10586_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10586_cast_fp16")]; tensor var_10590_begin_0 = const()[name = tensor("op_10590_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_10590_end_0 = const()[name = tensor("op_10590_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_10590_end_mask_0 = const()[name = tensor("op_10590_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10590_cast_fp16 = slice_by_index(begin = var_10590_begin_0, end = var_10590_end_0, end_mask = var_10590_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10590_cast_fp16")]; tensor var_10594_begin_0 = const()[name = tensor("op_10594_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_10594_end_0 = const()[name = tensor("op_10594_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_10594_end_mask_0 = const()[name = tensor("op_10594_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10594_cast_fp16 = slice_by_index(begin = var_10594_begin_0, end = var_10594_end_0, end_mask = var_10594_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10594_cast_fp16")]; tensor var_10598_begin_0 = const()[name = tensor("op_10598_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_10598_end_0 = const()[name = tensor("op_10598_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_10598_end_mask_0 = const()[name = tensor("op_10598_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10598_cast_fp16 = slice_by_index(begin = var_10598_begin_0, end = var_10598_end_0, end_mask = var_10598_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10598_cast_fp16")]; tensor var_10602_begin_0 = const()[name = tensor("op_10602_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_10602_end_0 = const()[name = tensor("op_10602_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_10602_end_mask_0 = const()[name = tensor("op_10602_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10602_cast_fp16 = slice_by_index(begin = var_10602_begin_0, end = var_10602_end_0, end_mask = var_10602_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10602_cast_fp16")]; tensor var_10606_begin_0 = const()[name = tensor("op_10606_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_10606_end_0 = const()[name = tensor("op_10606_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_10606_end_mask_0 = const()[name = tensor("op_10606_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10606_cast_fp16 = slice_by_index(begin = var_10606_begin_0, end = var_10606_end_0, end_mask = var_10606_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10606_cast_fp16")]; tensor var_10610_begin_0 = const()[name = tensor("op_10610_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_10610_end_0 = const()[name = tensor("op_10610_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_10610_end_mask_0 = const()[name = tensor("op_10610_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10610_cast_fp16 = slice_by_index(begin = var_10610_begin_0, end = var_10610_end_0, end_mask = var_10610_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10610_cast_fp16")]; tensor var_10614_begin_0 = const()[name = tensor("op_10614_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_10614_end_0 = const()[name = tensor("op_10614_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_10614_end_mask_0 = const()[name = tensor("op_10614_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10614_cast_fp16 = slice_by_index(begin = var_10614_begin_0, end = var_10614_end_0, end_mask = var_10614_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10614_cast_fp16")]; tensor var_10618_begin_0 = const()[name = tensor("op_10618_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_10618_end_0 = const()[name = tensor("op_10618_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_10618_end_mask_0 = const()[name = tensor("op_10618_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10618_cast_fp16 = slice_by_index(begin = var_10618_begin_0, end = var_10618_end_0, end_mask = var_10618_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10618_cast_fp16")]; tensor var_10622_begin_0 = const()[name = tensor("op_10622_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_10622_end_0 = const()[name = tensor("op_10622_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_10622_end_mask_0 = const()[name = tensor("op_10622_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10622_cast_fp16 = slice_by_index(begin = var_10622_begin_0, end = var_10622_end_0, end_mask = var_10622_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10622_cast_fp16")]; tensor var_10626_begin_0 = const()[name = tensor("op_10626_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_10626_end_0 = const()[name = tensor("op_10626_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_10626_end_mask_0 = const()[name = tensor("op_10626_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10626_cast_fp16 = slice_by_index(begin = var_10626_begin_0, end = var_10626_end_0, end_mask = var_10626_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10626_cast_fp16")]; tensor var_10630_begin_0 = const()[name = tensor("op_10630_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_10630_end_0 = const()[name = tensor("op_10630_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_10630_end_mask_0 = const()[name = tensor("op_10630_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10630_cast_fp16 = slice_by_index(begin = var_10630_begin_0, end = var_10630_end_0, end_mask = var_10630_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10630_cast_fp16")]; tensor var_10634_begin_0 = const()[name = tensor("op_10634_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_10634_end_0 = const()[name = tensor("op_10634_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_10634_end_mask_0 = const()[name = tensor("op_10634_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10634_cast_fp16 = slice_by_index(begin = var_10634_begin_0, end = var_10634_end_0, end_mask = var_10634_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10634_cast_fp16")]; tensor var_10638_begin_0 = const()[name = tensor("op_10638_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_10638_end_0 = const()[name = tensor("op_10638_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_10638_end_mask_0 = const()[name = tensor("op_10638_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10638_cast_fp16 = slice_by_index(begin = var_10638_begin_0, end = var_10638_end_0, end_mask = var_10638_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10638_cast_fp16")]; tensor var_10642_begin_0 = const()[name = tensor("op_10642_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_10642_end_0 = const()[name = tensor("op_10642_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_10642_end_mask_0 = const()[name = tensor("op_10642_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10642_cast_fp16 = slice_by_index(begin = var_10642_begin_0, end = var_10642_end_0, end_mask = var_10642_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10642_cast_fp16")]; tensor var_10646_begin_0 = const()[name = tensor("op_10646_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_10646_end_0 = const()[name = tensor("op_10646_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_10646_end_mask_0 = const()[name = tensor("op_10646_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10646_cast_fp16 = slice_by_index(begin = var_10646_begin_0, end = var_10646_end_0, end_mask = var_10646_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10646_cast_fp16")]; tensor var_10650_begin_0 = const()[name = tensor("op_10650_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_10650_end_0 = const()[name = tensor("op_10650_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_10650_end_mask_0 = const()[name = tensor("op_10650_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10650_cast_fp16 = slice_by_index(begin = var_10650_begin_0, end = var_10650_end_0, end_mask = var_10650_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10650_cast_fp16")]; tensor var_10654_begin_0 = const()[name = tensor("op_10654_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_10654_end_0 = const()[name = tensor("op_10654_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_10654_end_mask_0 = const()[name = tensor("op_10654_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10654_cast_fp16 = slice_by_index(begin = var_10654_begin_0, end = var_10654_end_0, end_mask = var_10654_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10654_cast_fp16")]; tensor var_10658_begin_0 = const()[name = tensor("op_10658_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_10658_end_0 = const()[name = tensor("op_10658_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_10658_end_mask_0 = const()[name = tensor("op_10658_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10658_cast_fp16 = slice_by_index(begin = var_10658_begin_0, end = var_10658_end_0, end_mask = var_10658_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10658_cast_fp16")]; tensor var_10662_begin_0 = const()[name = tensor("op_10662_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_10662_end_0 = const()[name = tensor("op_10662_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_10662_end_mask_0 = const()[name = tensor("op_10662_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_10662_cast_fp16 = slice_by_index(begin = var_10662_begin_0, end = var_10662_end_0, end_mask = var_10662_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10662_cast_fp16")]; tensor _SplitHeadsQ__mh_w_961_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_961_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_961_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_961_equation_0, values = (var_10508_cast_fp16, var_9950_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_961_cast_fp16")]; tensor _SplitHeadsQ__mh_w_963_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_963_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_963_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_963_equation_0, values = (var_10508_cast_fp16, var_9957_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_963_cast_fp16")]; tensor _SplitHeadsQ__mh_w_965_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_965_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_965_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_965_equation_0, values = (var_10508_cast_fp16, var_9964_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_965_cast_fp16")]; tensor _SplitHeadsQ__mh_w_967_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_967_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_967_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_967_equation_0, values = (var_10508_cast_fp16, var_9971_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_967_cast_fp16")]; tensor _SplitHeadsQ__mh_w_969_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_969_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_969_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_969_equation_0, values = (var_10512_cast_fp16, var_9978_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_969_cast_fp16")]; tensor _SplitHeadsQ__mh_w_971_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_971_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_971_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_971_equation_0, values = (var_10512_cast_fp16, var_9985_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_971_cast_fp16")]; tensor _SplitHeadsQ__mh_w_973_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_973_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_973_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_973_equation_0, values = (var_10512_cast_fp16, var_9992_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_973_cast_fp16")]; tensor _SplitHeadsQ__mh_w_975_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_975_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_975_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_975_equation_0, values = (var_10512_cast_fp16, var_9999_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_975_cast_fp16")]; tensor _SplitHeadsQ__mh_w_977_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_977_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_977_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_977_equation_0, values = (var_10516_cast_fp16, var_10006_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_977_cast_fp16")]; tensor _SplitHeadsQ__mh_w_979_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_979_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_979_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_979_equation_0, values = (var_10516_cast_fp16, var_10013_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_979_cast_fp16")]; tensor _SplitHeadsQ__mh_w_981_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_981_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_981_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_981_equation_0, values = (var_10516_cast_fp16, var_10020_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_981_cast_fp16")]; tensor _SplitHeadsQ__mh_w_983_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_983_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_983_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_983_equation_0, values = (var_10516_cast_fp16, var_10027_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_983_cast_fp16")]; tensor _SplitHeadsQ__mh_w_985_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_985_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_985_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_985_equation_0, values = (var_10520_cast_fp16, var_10034_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_985_cast_fp16")]; tensor _SplitHeadsQ__mh_w_987_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_987_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_987_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_987_equation_0, values = (var_10520_cast_fp16, var_10041_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_987_cast_fp16")]; tensor _SplitHeadsQ__mh_w_989_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_989_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_989_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_989_equation_0, values = (var_10520_cast_fp16, var_10048_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_989_cast_fp16")]; tensor _SplitHeadsQ__mh_w_991_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_991_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_991_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_991_equation_0, values = (var_10520_cast_fp16, var_10055_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_991_cast_fp16")]; tensor _SplitHeadsQ__mh_w_993_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_993_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_993_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_993_equation_0, values = (var_10524_cast_fp16, var_10062_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_993_cast_fp16")]; tensor _SplitHeadsQ__mh_w_995_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_995_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_995_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_995_equation_0, values = (var_10524_cast_fp16, var_10069_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_995_cast_fp16")]; tensor _SplitHeadsQ__mh_w_997_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_997_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_997_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_997_equation_0, values = (var_10524_cast_fp16, var_10076_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_997_cast_fp16")]; tensor _SplitHeadsQ__mh_w_999_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_999_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_999_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_999_equation_0, values = (var_10524_cast_fp16, var_10083_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_999_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1001_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1001_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1001_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1001_equation_0, values = (var_10528_cast_fp16, var_10090_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1001_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1003_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1003_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1003_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1003_equation_0, values = (var_10528_cast_fp16, var_10097_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1003_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1005_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1005_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1005_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1005_equation_0, values = (var_10528_cast_fp16, var_10104_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1005_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1007_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1007_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1007_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1007_equation_0, values = (var_10528_cast_fp16, var_10111_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1007_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1009_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1009_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1009_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1009_equation_0, values = (var_10532_cast_fp16, var_10118_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1009_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1011_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1011_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1011_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1011_equation_0, values = (var_10532_cast_fp16, var_10125_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1011_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1013_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1013_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1013_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1013_equation_0, values = (var_10532_cast_fp16, var_10132_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1013_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1015_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1015_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1015_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1015_equation_0, values = (var_10532_cast_fp16, var_10139_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1015_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1017_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1017_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1017_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1017_equation_0, values = (var_10536_cast_fp16, var_10146_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1017_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1019_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1019_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1019_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1019_equation_0, values = (var_10536_cast_fp16, var_10153_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1019_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1021_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1021_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1021_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1021_equation_0, values = (var_10536_cast_fp16, var_10160_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1021_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1023_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1023_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1023_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1023_equation_0, values = (var_10536_cast_fp16, var_10167_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1023_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1025_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1025_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1025_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1025_equation_0, values = (var_10540_cast_fp16, var_10174_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1025_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1027_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1027_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1027_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1027_equation_0, values = (var_10540_cast_fp16, var_10181_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1027_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1029_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1029_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1029_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1029_equation_0, values = (var_10540_cast_fp16, var_10188_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1029_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1031_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1031_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1031_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1031_equation_0, values = (var_10540_cast_fp16, var_10195_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1031_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1033_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1033_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1033_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1033_equation_0, values = (var_10544_cast_fp16, var_10202_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1033_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1035_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1035_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1035_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1035_equation_0, values = (var_10544_cast_fp16, var_10209_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1035_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1037_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1037_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1037_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1037_equation_0, values = (var_10544_cast_fp16, var_10216_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1037_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1039_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1039_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1039_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1039_equation_0, values = (var_10544_cast_fp16, var_10223_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1039_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1041_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1041_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1041_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1041_equation_0, values = (var_10548_cast_fp16, var_10230_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1041_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1043_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1043_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1043_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1043_equation_0, values = (var_10548_cast_fp16, var_10237_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1043_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1045_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1045_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1045_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1045_equation_0, values = (var_10548_cast_fp16, var_10244_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1045_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1047_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1047_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1047_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1047_equation_0, values = (var_10548_cast_fp16, var_10251_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1047_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1049_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1049_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1049_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1049_equation_0, values = (var_10552_cast_fp16, var_10258_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1049_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1051_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1051_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1051_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1051_equation_0, values = (var_10552_cast_fp16, var_10265_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1051_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1053_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1053_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1053_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1053_equation_0, values = (var_10552_cast_fp16, var_10272_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1053_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1055_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1055_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1055_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1055_equation_0, values = (var_10552_cast_fp16, var_10279_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1055_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1057_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1057_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1057_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1057_equation_0, values = (var_10556_cast_fp16, var_10286_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1057_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1059_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1059_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1059_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1059_equation_0, values = (var_10556_cast_fp16, var_10293_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1059_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1061_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1061_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1061_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1061_equation_0, values = (var_10556_cast_fp16, var_10300_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1061_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1063_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1063_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1063_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1063_equation_0, values = (var_10556_cast_fp16, var_10307_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1063_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1065_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1065_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1065_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1065_equation_0, values = (var_10560_cast_fp16, var_10314_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1065_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1067_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1067_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1067_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1067_equation_0, values = (var_10560_cast_fp16, var_10321_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1067_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1069_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1069_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1069_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1069_equation_0, values = (var_10560_cast_fp16, var_10328_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1069_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1071_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1071_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1071_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1071_equation_0, values = (var_10560_cast_fp16, var_10335_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1071_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1073_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1073_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1073_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1073_equation_0, values = (var_10564_cast_fp16, var_10342_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1073_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1075_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1075_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1075_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1075_equation_0, values = (var_10564_cast_fp16, var_10349_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1075_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1077_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1077_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1077_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1077_equation_0, values = (var_10564_cast_fp16, var_10356_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1077_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1079_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1079_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1079_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1079_equation_0, values = (var_10564_cast_fp16, var_10363_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1079_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1081_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1081_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1081_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1081_equation_0, values = (var_10568_cast_fp16, var_10370_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1081_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1083_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1083_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1083_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1083_equation_0, values = (var_10568_cast_fp16, var_10377_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1083_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1085_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1085_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1085_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1085_equation_0, values = (var_10568_cast_fp16, var_10384_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1085_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1087_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1087_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1087_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1087_equation_0, values = (var_10568_cast_fp16, var_10391_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1087_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1089_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1089_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1089_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1089_equation_0, values = (var_10572_cast_fp16, var_10398_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1089_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1091_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1091_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1091_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1091_equation_0, values = (var_10572_cast_fp16, var_10405_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1091_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1093_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1093_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1093_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1093_equation_0, values = (var_10572_cast_fp16, var_10412_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1093_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1095_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1095_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1095_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1095_equation_0, values = (var_10572_cast_fp16, var_10419_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1095_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1097_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1097_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1097_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1097_equation_0, values = (var_10576_cast_fp16, var_10426_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1097_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1099_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1099_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1099_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1099_equation_0, values = (var_10576_cast_fp16, var_10433_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1099_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1101_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1101_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1101_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1101_equation_0, values = (var_10576_cast_fp16, var_10440_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1101_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1103_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1103_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1103_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1103_equation_0, values = (var_10576_cast_fp16, var_10447_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1103_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1105_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1105_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1105_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1105_equation_0, values = (var_10580_cast_fp16, var_10454_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1105_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1107_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1107_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1107_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1107_equation_0, values = (var_10580_cast_fp16, var_10461_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1107_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1109_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1109_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1109_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1109_equation_0, values = (var_10580_cast_fp16, var_10468_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1109_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1111_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1111_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1111_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1111_equation_0, values = (var_10580_cast_fp16, var_10475_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1111_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1113_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1113_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1113_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1113_equation_0, values = (var_10584_cast_fp16, var_10482_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1113_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1115_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1115_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1115_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1115_equation_0, values = (var_10584_cast_fp16, var_10489_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1115_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1117_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1117_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1117_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1117_equation_0, values = (var_10584_cast_fp16, var_10496_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1117_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1119_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1119_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1119_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1119_equation_0, values = (var_10584_cast_fp16, var_10503_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1119_cast_fp16")]; tensor var_10825_to_fp16 = const()[name = tensor("op_10825_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_961_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_961_cast_fp16, y = var_10825_to_fp16)[name = tensor("aw_chunk_961_cast_fp16")]; tensor var_10827_to_fp16 = const()[name = tensor("op_10827_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_963_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_963_cast_fp16, y = var_10827_to_fp16)[name = tensor("aw_chunk_963_cast_fp16")]; tensor var_10829_to_fp16 = const()[name = tensor("op_10829_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_965_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_965_cast_fp16, y = var_10829_to_fp16)[name = tensor("aw_chunk_965_cast_fp16")]; tensor var_10831_to_fp16 = const()[name = tensor("op_10831_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_967_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_967_cast_fp16, y = var_10831_to_fp16)[name = tensor("aw_chunk_967_cast_fp16")]; tensor var_10833_to_fp16 = const()[name = tensor("op_10833_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_969_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_969_cast_fp16, y = var_10833_to_fp16)[name = tensor("aw_chunk_969_cast_fp16")]; tensor var_10835_to_fp16 = const()[name = tensor("op_10835_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_971_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_971_cast_fp16, y = var_10835_to_fp16)[name = tensor("aw_chunk_971_cast_fp16")]; tensor var_10837_to_fp16 = const()[name = tensor("op_10837_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_973_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_973_cast_fp16, y = var_10837_to_fp16)[name = tensor("aw_chunk_973_cast_fp16")]; tensor var_10839_to_fp16 = const()[name = tensor("op_10839_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_975_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_975_cast_fp16, y = var_10839_to_fp16)[name = tensor("aw_chunk_975_cast_fp16")]; tensor var_10841_to_fp16 = const()[name = tensor("op_10841_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_977_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_977_cast_fp16, y = var_10841_to_fp16)[name = tensor("aw_chunk_977_cast_fp16")]; tensor var_10843_to_fp16 = const()[name = tensor("op_10843_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_979_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_979_cast_fp16, y = var_10843_to_fp16)[name = tensor("aw_chunk_979_cast_fp16")]; tensor var_10845_to_fp16 = const()[name = tensor("op_10845_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_981_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_981_cast_fp16, y = var_10845_to_fp16)[name = tensor("aw_chunk_981_cast_fp16")]; tensor var_10847_to_fp16 = const()[name = tensor("op_10847_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_983_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_983_cast_fp16, y = var_10847_to_fp16)[name = tensor("aw_chunk_983_cast_fp16")]; tensor var_10849_to_fp16 = const()[name = tensor("op_10849_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_985_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_985_cast_fp16, y = var_10849_to_fp16)[name = tensor("aw_chunk_985_cast_fp16")]; tensor var_10851_to_fp16 = const()[name = tensor("op_10851_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_987_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_987_cast_fp16, y = var_10851_to_fp16)[name = tensor("aw_chunk_987_cast_fp16")]; tensor var_10853_to_fp16 = const()[name = tensor("op_10853_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_989_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_989_cast_fp16, y = var_10853_to_fp16)[name = tensor("aw_chunk_989_cast_fp16")]; tensor var_10855_to_fp16 = const()[name = tensor("op_10855_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_991_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_991_cast_fp16, y = var_10855_to_fp16)[name = tensor("aw_chunk_991_cast_fp16")]; tensor var_10857_to_fp16 = const()[name = tensor("op_10857_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_993_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_993_cast_fp16, y = var_10857_to_fp16)[name = tensor("aw_chunk_993_cast_fp16")]; tensor var_10859_to_fp16 = const()[name = tensor("op_10859_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_995_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_995_cast_fp16, y = var_10859_to_fp16)[name = tensor("aw_chunk_995_cast_fp16")]; tensor var_10861_to_fp16 = const()[name = tensor("op_10861_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_997_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_997_cast_fp16, y = var_10861_to_fp16)[name = tensor("aw_chunk_997_cast_fp16")]; tensor var_10863_to_fp16 = const()[name = tensor("op_10863_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_999_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_999_cast_fp16, y = var_10863_to_fp16)[name = tensor("aw_chunk_999_cast_fp16")]; tensor var_10865_to_fp16 = const()[name = tensor("op_10865_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1001_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1001_cast_fp16, y = var_10865_to_fp16)[name = tensor("aw_chunk_1001_cast_fp16")]; tensor var_10867_to_fp16 = const()[name = tensor("op_10867_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1003_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1003_cast_fp16, y = var_10867_to_fp16)[name = tensor("aw_chunk_1003_cast_fp16")]; tensor var_10869_to_fp16 = const()[name = tensor("op_10869_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1005_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1005_cast_fp16, y = var_10869_to_fp16)[name = tensor("aw_chunk_1005_cast_fp16")]; tensor var_10871_to_fp16 = const()[name = tensor("op_10871_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1007_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1007_cast_fp16, y = var_10871_to_fp16)[name = tensor("aw_chunk_1007_cast_fp16")]; tensor var_10873_to_fp16 = const()[name = tensor("op_10873_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1009_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1009_cast_fp16, y = var_10873_to_fp16)[name = tensor("aw_chunk_1009_cast_fp16")]; tensor var_10875_to_fp16 = const()[name = tensor("op_10875_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1011_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1011_cast_fp16, y = var_10875_to_fp16)[name = tensor("aw_chunk_1011_cast_fp16")]; tensor var_10877_to_fp16 = const()[name = tensor("op_10877_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1013_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1013_cast_fp16, y = var_10877_to_fp16)[name = tensor("aw_chunk_1013_cast_fp16")]; tensor var_10879_to_fp16 = const()[name = tensor("op_10879_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1015_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1015_cast_fp16, y = var_10879_to_fp16)[name = tensor("aw_chunk_1015_cast_fp16")]; tensor var_10881_to_fp16 = const()[name = tensor("op_10881_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1017_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1017_cast_fp16, y = var_10881_to_fp16)[name = tensor("aw_chunk_1017_cast_fp16")]; tensor var_10883_to_fp16 = const()[name = tensor("op_10883_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1019_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1019_cast_fp16, y = var_10883_to_fp16)[name = tensor("aw_chunk_1019_cast_fp16")]; tensor var_10885_to_fp16 = const()[name = tensor("op_10885_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1021_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1021_cast_fp16, y = var_10885_to_fp16)[name = tensor("aw_chunk_1021_cast_fp16")]; tensor var_10887_to_fp16 = const()[name = tensor("op_10887_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1023_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1023_cast_fp16, y = var_10887_to_fp16)[name = tensor("aw_chunk_1023_cast_fp16")]; tensor var_10889_to_fp16 = const()[name = tensor("op_10889_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1025_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1025_cast_fp16, y = var_10889_to_fp16)[name = tensor("aw_chunk_1025_cast_fp16")]; tensor var_10891_to_fp16 = const()[name = tensor("op_10891_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1027_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1027_cast_fp16, y = var_10891_to_fp16)[name = tensor("aw_chunk_1027_cast_fp16")]; tensor var_10893_to_fp16 = const()[name = tensor("op_10893_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1029_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1029_cast_fp16, y = var_10893_to_fp16)[name = tensor("aw_chunk_1029_cast_fp16")]; tensor var_10895_to_fp16 = const()[name = tensor("op_10895_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1031_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1031_cast_fp16, y = var_10895_to_fp16)[name = tensor("aw_chunk_1031_cast_fp16")]; tensor var_10897_to_fp16 = const()[name = tensor("op_10897_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1033_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1033_cast_fp16, y = var_10897_to_fp16)[name = tensor("aw_chunk_1033_cast_fp16")]; tensor var_10899_to_fp16 = const()[name = tensor("op_10899_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1035_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1035_cast_fp16, y = var_10899_to_fp16)[name = tensor("aw_chunk_1035_cast_fp16")]; tensor var_10901_to_fp16 = const()[name = tensor("op_10901_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1037_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1037_cast_fp16, y = var_10901_to_fp16)[name = tensor("aw_chunk_1037_cast_fp16")]; tensor var_10903_to_fp16 = const()[name = tensor("op_10903_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1039_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1039_cast_fp16, y = var_10903_to_fp16)[name = tensor("aw_chunk_1039_cast_fp16")]; tensor var_10905_to_fp16 = const()[name = tensor("op_10905_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1041_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1041_cast_fp16, y = var_10905_to_fp16)[name = tensor("aw_chunk_1041_cast_fp16")]; tensor var_10907_to_fp16 = const()[name = tensor("op_10907_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1043_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1043_cast_fp16, y = var_10907_to_fp16)[name = tensor("aw_chunk_1043_cast_fp16")]; tensor var_10909_to_fp16 = const()[name = tensor("op_10909_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1045_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1045_cast_fp16, y = var_10909_to_fp16)[name = tensor("aw_chunk_1045_cast_fp16")]; tensor var_10911_to_fp16 = const()[name = tensor("op_10911_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1047_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1047_cast_fp16, y = var_10911_to_fp16)[name = tensor("aw_chunk_1047_cast_fp16")]; tensor var_10913_to_fp16 = const()[name = tensor("op_10913_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1049_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1049_cast_fp16, y = var_10913_to_fp16)[name = tensor("aw_chunk_1049_cast_fp16")]; tensor var_10915_to_fp16 = const()[name = tensor("op_10915_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1051_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1051_cast_fp16, y = var_10915_to_fp16)[name = tensor("aw_chunk_1051_cast_fp16")]; tensor var_10917_to_fp16 = const()[name = tensor("op_10917_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1053_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1053_cast_fp16, y = var_10917_to_fp16)[name = tensor("aw_chunk_1053_cast_fp16")]; tensor var_10919_to_fp16 = const()[name = tensor("op_10919_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1055_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1055_cast_fp16, y = var_10919_to_fp16)[name = tensor("aw_chunk_1055_cast_fp16")]; tensor var_10921_to_fp16 = const()[name = tensor("op_10921_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1057_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1057_cast_fp16, y = var_10921_to_fp16)[name = tensor("aw_chunk_1057_cast_fp16")]; tensor var_10923_to_fp16 = const()[name = tensor("op_10923_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1059_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1059_cast_fp16, y = var_10923_to_fp16)[name = tensor("aw_chunk_1059_cast_fp16")]; tensor var_10925_to_fp16 = const()[name = tensor("op_10925_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1061_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1061_cast_fp16, y = var_10925_to_fp16)[name = tensor("aw_chunk_1061_cast_fp16")]; tensor var_10927_to_fp16 = const()[name = tensor("op_10927_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1063_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1063_cast_fp16, y = var_10927_to_fp16)[name = tensor("aw_chunk_1063_cast_fp16")]; tensor var_10929_to_fp16 = const()[name = tensor("op_10929_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1065_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1065_cast_fp16, y = var_10929_to_fp16)[name = tensor("aw_chunk_1065_cast_fp16")]; tensor var_10931_to_fp16 = const()[name = tensor("op_10931_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1067_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1067_cast_fp16, y = var_10931_to_fp16)[name = tensor("aw_chunk_1067_cast_fp16")]; tensor var_10933_to_fp16 = const()[name = tensor("op_10933_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1069_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1069_cast_fp16, y = var_10933_to_fp16)[name = tensor("aw_chunk_1069_cast_fp16")]; tensor var_10935_to_fp16 = const()[name = tensor("op_10935_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1071_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1071_cast_fp16, y = var_10935_to_fp16)[name = tensor("aw_chunk_1071_cast_fp16")]; tensor var_10937_to_fp16 = const()[name = tensor("op_10937_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1073_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1073_cast_fp16, y = var_10937_to_fp16)[name = tensor("aw_chunk_1073_cast_fp16")]; tensor var_10939_to_fp16 = const()[name = tensor("op_10939_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1075_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1075_cast_fp16, y = var_10939_to_fp16)[name = tensor("aw_chunk_1075_cast_fp16")]; tensor var_10941_to_fp16 = const()[name = tensor("op_10941_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1077_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1077_cast_fp16, y = var_10941_to_fp16)[name = tensor("aw_chunk_1077_cast_fp16")]; tensor var_10943_to_fp16 = const()[name = tensor("op_10943_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1079_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1079_cast_fp16, y = var_10943_to_fp16)[name = tensor("aw_chunk_1079_cast_fp16")]; tensor var_10945_to_fp16 = const()[name = tensor("op_10945_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1081_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1081_cast_fp16, y = var_10945_to_fp16)[name = tensor("aw_chunk_1081_cast_fp16")]; tensor var_10947_to_fp16 = const()[name = tensor("op_10947_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1083_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1083_cast_fp16, y = var_10947_to_fp16)[name = tensor("aw_chunk_1083_cast_fp16")]; tensor var_10949_to_fp16 = const()[name = tensor("op_10949_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1085_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1085_cast_fp16, y = var_10949_to_fp16)[name = tensor("aw_chunk_1085_cast_fp16")]; tensor var_10951_to_fp16 = const()[name = tensor("op_10951_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1087_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1087_cast_fp16, y = var_10951_to_fp16)[name = tensor("aw_chunk_1087_cast_fp16")]; tensor var_10953_to_fp16 = const()[name = tensor("op_10953_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1089_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1089_cast_fp16, y = var_10953_to_fp16)[name = tensor("aw_chunk_1089_cast_fp16")]; tensor var_10955_to_fp16 = const()[name = tensor("op_10955_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1091_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1091_cast_fp16, y = var_10955_to_fp16)[name = tensor("aw_chunk_1091_cast_fp16")]; tensor var_10957_to_fp16 = const()[name = tensor("op_10957_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1093_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1093_cast_fp16, y = var_10957_to_fp16)[name = tensor("aw_chunk_1093_cast_fp16")]; tensor var_10959_to_fp16 = const()[name = tensor("op_10959_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1095_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1095_cast_fp16, y = var_10959_to_fp16)[name = tensor("aw_chunk_1095_cast_fp16")]; tensor var_10961_to_fp16 = const()[name = tensor("op_10961_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1097_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1097_cast_fp16, y = var_10961_to_fp16)[name = tensor("aw_chunk_1097_cast_fp16")]; tensor var_10963_to_fp16 = const()[name = tensor("op_10963_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1099_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1099_cast_fp16, y = var_10963_to_fp16)[name = tensor("aw_chunk_1099_cast_fp16")]; tensor var_10965_to_fp16 = const()[name = tensor("op_10965_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1101_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1101_cast_fp16, y = var_10965_to_fp16)[name = tensor("aw_chunk_1101_cast_fp16")]; tensor var_10967_to_fp16 = const()[name = tensor("op_10967_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1103_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1103_cast_fp16, y = var_10967_to_fp16)[name = tensor("aw_chunk_1103_cast_fp16")]; tensor var_10969_to_fp16 = const()[name = tensor("op_10969_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1105_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1105_cast_fp16, y = var_10969_to_fp16)[name = tensor("aw_chunk_1105_cast_fp16")]; tensor var_10971_to_fp16 = const()[name = tensor("op_10971_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1107_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1107_cast_fp16, y = var_10971_to_fp16)[name = tensor("aw_chunk_1107_cast_fp16")]; tensor var_10973_to_fp16 = const()[name = tensor("op_10973_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1109_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1109_cast_fp16, y = var_10973_to_fp16)[name = tensor("aw_chunk_1109_cast_fp16")]; tensor var_10975_to_fp16 = const()[name = tensor("op_10975_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1111_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1111_cast_fp16, y = var_10975_to_fp16)[name = tensor("aw_chunk_1111_cast_fp16")]; tensor var_10977_to_fp16 = const()[name = tensor("op_10977_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1113_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1113_cast_fp16, y = var_10977_to_fp16)[name = tensor("aw_chunk_1113_cast_fp16")]; tensor var_10979_to_fp16 = const()[name = tensor("op_10979_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1115_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1115_cast_fp16, y = var_10979_to_fp16)[name = tensor("aw_chunk_1115_cast_fp16")]; tensor var_10981_to_fp16 = const()[name = tensor("op_10981_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1117_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1117_cast_fp16, y = var_10981_to_fp16)[name = tensor("aw_chunk_1117_cast_fp16")]; tensor var_10983_to_fp16 = const()[name = tensor("op_10983_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1119_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1119_cast_fp16, y = var_10983_to_fp16)[name = tensor("aw_chunk_1119_cast_fp16")]; tensor var_10985_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_961_cast_fp16)[name = tensor("op_10985_cast_fp16")]; tensor var_10986_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_963_cast_fp16)[name = tensor("op_10986_cast_fp16")]; tensor var_10987_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_965_cast_fp16)[name = tensor("op_10987_cast_fp16")]; tensor var_10988_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_967_cast_fp16)[name = tensor("op_10988_cast_fp16")]; tensor var_10989_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_969_cast_fp16)[name = tensor("op_10989_cast_fp16")]; tensor var_10990_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_971_cast_fp16)[name = tensor("op_10990_cast_fp16")]; tensor var_10991_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_973_cast_fp16)[name = tensor("op_10991_cast_fp16")]; tensor var_10992_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_975_cast_fp16)[name = tensor("op_10992_cast_fp16")]; tensor var_10993_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_977_cast_fp16)[name = tensor("op_10993_cast_fp16")]; tensor var_10994_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_979_cast_fp16)[name = tensor("op_10994_cast_fp16")]; tensor var_10995_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_981_cast_fp16)[name = tensor("op_10995_cast_fp16")]; tensor var_10996_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_983_cast_fp16)[name = tensor("op_10996_cast_fp16")]; tensor var_10997_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_985_cast_fp16)[name = tensor("op_10997_cast_fp16")]; tensor var_10998_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_987_cast_fp16)[name = tensor("op_10998_cast_fp16")]; tensor var_10999_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_989_cast_fp16)[name = tensor("op_10999_cast_fp16")]; tensor var_11000_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_991_cast_fp16)[name = tensor("op_11000_cast_fp16")]; tensor var_11001_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_993_cast_fp16)[name = tensor("op_11001_cast_fp16")]; tensor var_11002_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_995_cast_fp16)[name = tensor("op_11002_cast_fp16")]; tensor var_11003_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_997_cast_fp16)[name = tensor("op_11003_cast_fp16")]; tensor var_11004_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_999_cast_fp16)[name = tensor("op_11004_cast_fp16")]; tensor var_11005_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1001_cast_fp16)[name = tensor("op_11005_cast_fp16")]; tensor var_11006_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1003_cast_fp16)[name = tensor("op_11006_cast_fp16")]; tensor var_11007_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1005_cast_fp16)[name = tensor("op_11007_cast_fp16")]; tensor var_11008_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1007_cast_fp16)[name = tensor("op_11008_cast_fp16")]; tensor var_11009_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1009_cast_fp16)[name = tensor("op_11009_cast_fp16")]; tensor var_11010_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1011_cast_fp16)[name = tensor("op_11010_cast_fp16")]; tensor var_11011_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1013_cast_fp16)[name = tensor("op_11011_cast_fp16")]; tensor var_11012_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1015_cast_fp16)[name = tensor("op_11012_cast_fp16")]; tensor var_11013_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1017_cast_fp16)[name = tensor("op_11013_cast_fp16")]; tensor var_11014_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1019_cast_fp16)[name = tensor("op_11014_cast_fp16")]; tensor var_11015_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1021_cast_fp16)[name = tensor("op_11015_cast_fp16")]; tensor var_11016_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1023_cast_fp16)[name = tensor("op_11016_cast_fp16")]; tensor var_11017_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1025_cast_fp16)[name = tensor("op_11017_cast_fp16")]; tensor var_11018_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1027_cast_fp16)[name = tensor("op_11018_cast_fp16")]; tensor var_11019_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1029_cast_fp16)[name = tensor("op_11019_cast_fp16")]; tensor var_11020_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1031_cast_fp16)[name = tensor("op_11020_cast_fp16")]; tensor var_11021_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1033_cast_fp16)[name = tensor("op_11021_cast_fp16")]; tensor var_11022_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1035_cast_fp16)[name = tensor("op_11022_cast_fp16")]; tensor var_11023_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1037_cast_fp16)[name = tensor("op_11023_cast_fp16")]; tensor var_11024_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1039_cast_fp16)[name = tensor("op_11024_cast_fp16")]; tensor var_11025_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1041_cast_fp16)[name = tensor("op_11025_cast_fp16")]; tensor var_11026_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1043_cast_fp16)[name = tensor("op_11026_cast_fp16")]; tensor var_11027_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1045_cast_fp16)[name = tensor("op_11027_cast_fp16")]; tensor var_11028_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1047_cast_fp16)[name = tensor("op_11028_cast_fp16")]; tensor var_11029_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1049_cast_fp16)[name = tensor("op_11029_cast_fp16")]; tensor var_11030_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1051_cast_fp16)[name = tensor("op_11030_cast_fp16")]; tensor var_11031_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1053_cast_fp16)[name = tensor("op_11031_cast_fp16")]; tensor var_11032_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1055_cast_fp16)[name = tensor("op_11032_cast_fp16")]; tensor var_11033_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1057_cast_fp16)[name = tensor("op_11033_cast_fp16")]; tensor var_11034_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1059_cast_fp16)[name = tensor("op_11034_cast_fp16")]; tensor var_11035_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1061_cast_fp16)[name = tensor("op_11035_cast_fp16")]; tensor var_11036_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1063_cast_fp16)[name = tensor("op_11036_cast_fp16")]; tensor var_11037_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1065_cast_fp16)[name = tensor("op_11037_cast_fp16")]; tensor var_11038_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1067_cast_fp16)[name = tensor("op_11038_cast_fp16")]; tensor var_11039_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1069_cast_fp16)[name = tensor("op_11039_cast_fp16")]; tensor var_11040_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1071_cast_fp16)[name = tensor("op_11040_cast_fp16")]; tensor var_11041_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1073_cast_fp16)[name = tensor("op_11041_cast_fp16")]; tensor var_11042_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1075_cast_fp16)[name = tensor("op_11042_cast_fp16")]; tensor var_11043_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1077_cast_fp16)[name = tensor("op_11043_cast_fp16")]; tensor var_11044_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1079_cast_fp16)[name = tensor("op_11044_cast_fp16")]; tensor var_11045_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1081_cast_fp16)[name = tensor("op_11045_cast_fp16")]; tensor var_11046_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1083_cast_fp16)[name = tensor("op_11046_cast_fp16")]; tensor var_11047_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1085_cast_fp16)[name = tensor("op_11047_cast_fp16")]; tensor var_11048_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1087_cast_fp16)[name = tensor("op_11048_cast_fp16")]; tensor var_11049_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1089_cast_fp16)[name = tensor("op_11049_cast_fp16")]; tensor var_11050_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1091_cast_fp16)[name = tensor("op_11050_cast_fp16")]; tensor var_11051_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1093_cast_fp16)[name = tensor("op_11051_cast_fp16")]; tensor var_11052_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1095_cast_fp16)[name = tensor("op_11052_cast_fp16")]; tensor var_11053_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1097_cast_fp16)[name = tensor("op_11053_cast_fp16")]; tensor var_11054_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1099_cast_fp16)[name = tensor("op_11054_cast_fp16")]; tensor var_11055_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1101_cast_fp16)[name = tensor("op_11055_cast_fp16")]; tensor var_11056_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1103_cast_fp16)[name = tensor("op_11056_cast_fp16")]; tensor var_11057_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1105_cast_fp16)[name = tensor("op_11057_cast_fp16")]; tensor var_11058_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1107_cast_fp16)[name = tensor("op_11058_cast_fp16")]; tensor var_11059_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1109_cast_fp16)[name = tensor("op_11059_cast_fp16")]; tensor var_11060_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1111_cast_fp16)[name = tensor("op_11060_cast_fp16")]; tensor var_11061_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1113_cast_fp16)[name = tensor("op_11061_cast_fp16")]; tensor var_11062_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1115_cast_fp16)[name = tensor("op_11062_cast_fp16")]; tensor var_11063_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1117_cast_fp16)[name = tensor("op_11063_cast_fp16")]; tensor var_11064_cast_fp16 = softmax(axis = var_9783, x = aw_chunk_1119_cast_fp16)[name = tensor("op_11064_cast_fp16")]; tensor var_11066_equation_0 = const()[name = tensor("op_11066_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11066_cast_fp16 = einsum(equation = var_11066_equation_0, values = (var_10586_cast_fp16, var_10985_cast_fp16))[name = tensor("op_11066_cast_fp16")]; tensor var_11068_equation_0 = const()[name = tensor("op_11068_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11068_cast_fp16 = einsum(equation = var_11068_equation_0, values = (var_10586_cast_fp16, var_10986_cast_fp16))[name = tensor("op_11068_cast_fp16")]; tensor var_11070_equation_0 = const()[name = tensor("op_11070_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11070_cast_fp16 = einsum(equation = var_11070_equation_0, values = (var_10586_cast_fp16, var_10987_cast_fp16))[name = tensor("op_11070_cast_fp16")]; tensor var_11072_equation_0 = const()[name = tensor("op_11072_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11072_cast_fp16 = einsum(equation = var_11072_equation_0, values = (var_10586_cast_fp16, var_10988_cast_fp16))[name = tensor("op_11072_cast_fp16")]; tensor var_11074_equation_0 = const()[name = tensor("op_11074_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11074_cast_fp16 = einsum(equation = var_11074_equation_0, values = (var_10590_cast_fp16, var_10989_cast_fp16))[name = tensor("op_11074_cast_fp16")]; tensor var_11076_equation_0 = const()[name = tensor("op_11076_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11076_cast_fp16 = einsum(equation = var_11076_equation_0, values = (var_10590_cast_fp16, var_10990_cast_fp16))[name = tensor("op_11076_cast_fp16")]; tensor var_11078_equation_0 = const()[name = tensor("op_11078_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11078_cast_fp16 = einsum(equation = var_11078_equation_0, values = (var_10590_cast_fp16, var_10991_cast_fp16))[name = tensor("op_11078_cast_fp16")]; tensor var_11080_equation_0 = const()[name = tensor("op_11080_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11080_cast_fp16 = einsum(equation = var_11080_equation_0, values = (var_10590_cast_fp16, var_10992_cast_fp16))[name = tensor("op_11080_cast_fp16")]; tensor var_11082_equation_0 = const()[name = tensor("op_11082_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11082_cast_fp16 = einsum(equation = var_11082_equation_0, values = (var_10594_cast_fp16, var_10993_cast_fp16))[name = tensor("op_11082_cast_fp16")]; tensor var_11084_equation_0 = const()[name = tensor("op_11084_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11084_cast_fp16 = einsum(equation = var_11084_equation_0, values = (var_10594_cast_fp16, var_10994_cast_fp16))[name = tensor("op_11084_cast_fp16")]; tensor var_11086_equation_0 = const()[name = tensor("op_11086_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11086_cast_fp16 = einsum(equation = var_11086_equation_0, values = (var_10594_cast_fp16, var_10995_cast_fp16))[name = tensor("op_11086_cast_fp16")]; tensor var_11088_equation_0 = const()[name = tensor("op_11088_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11088_cast_fp16 = einsum(equation = var_11088_equation_0, values = (var_10594_cast_fp16, var_10996_cast_fp16))[name = tensor("op_11088_cast_fp16")]; tensor var_11090_equation_0 = const()[name = tensor("op_11090_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11090_cast_fp16 = einsum(equation = var_11090_equation_0, values = (var_10598_cast_fp16, var_10997_cast_fp16))[name = tensor("op_11090_cast_fp16")]; tensor var_11092_equation_0 = const()[name = tensor("op_11092_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11092_cast_fp16 = einsum(equation = var_11092_equation_0, values = (var_10598_cast_fp16, var_10998_cast_fp16))[name = tensor("op_11092_cast_fp16")]; tensor var_11094_equation_0 = const()[name = tensor("op_11094_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11094_cast_fp16 = einsum(equation = var_11094_equation_0, values = (var_10598_cast_fp16, var_10999_cast_fp16))[name = tensor("op_11094_cast_fp16")]; tensor var_11096_equation_0 = const()[name = tensor("op_11096_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11096_cast_fp16 = einsum(equation = var_11096_equation_0, values = (var_10598_cast_fp16, var_11000_cast_fp16))[name = tensor("op_11096_cast_fp16")]; tensor var_11098_equation_0 = const()[name = tensor("op_11098_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11098_cast_fp16 = einsum(equation = var_11098_equation_0, values = (var_10602_cast_fp16, var_11001_cast_fp16))[name = tensor("op_11098_cast_fp16")]; tensor var_11100_equation_0 = const()[name = tensor("op_11100_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11100_cast_fp16 = einsum(equation = var_11100_equation_0, values = (var_10602_cast_fp16, var_11002_cast_fp16))[name = tensor("op_11100_cast_fp16")]; tensor var_11102_equation_0 = const()[name = tensor("op_11102_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11102_cast_fp16 = einsum(equation = var_11102_equation_0, values = (var_10602_cast_fp16, var_11003_cast_fp16))[name = tensor("op_11102_cast_fp16")]; tensor var_11104_equation_0 = const()[name = tensor("op_11104_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11104_cast_fp16 = einsum(equation = var_11104_equation_0, values = (var_10602_cast_fp16, var_11004_cast_fp16))[name = tensor("op_11104_cast_fp16")]; tensor var_11106_equation_0 = const()[name = tensor("op_11106_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11106_cast_fp16 = einsum(equation = var_11106_equation_0, values = (var_10606_cast_fp16, var_11005_cast_fp16))[name = tensor("op_11106_cast_fp16")]; tensor var_11108_equation_0 = const()[name = tensor("op_11108_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11108_cast_fp16 = einsum(equation = var_11108_equation_0, values = (var_10606_cast_fp16, var_11006_cast_fp16))[name = tensor("op_11108_cast_fp16")]; tensor var_11110_equation_0 = const()[name = tensor("op_11110_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11110_cast_fp16 = einsum(equation = var_11110_equation_0, values = (var_10606_cast_fp16, var_11007_cast_fp16))[name = tensor("op_11110_cast_fp16")]; tensor var_11112_equation_0 = const()[name = tensor("op_11112_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11112_cast_fp16 = einsum(equation = var_11112_equation_0, values = (var_10606_cast_fp16, var_11008_cast_fp16))[name = tensor("op_11112_cast_fp16")]; tensor var_11114_equation_0 = const()[name = tensor("op_11114_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11114_cast_fp16 = einsum(equation = var_11114_equation_0, values = (var_10610_cast_fp16, var_11009_cast_fp16))[name = tensor("op_11114_cast_fp16")]; tensor var_11116_equation_0 = const()[name = tensor("op_11116_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11116_cast_fp16 = einsum(equation = var_11116_equation_0, values = (var_10610_cast_fp16, var_11010_cast_fp16))[name = tensor("op_11116_cast_fp16")]; tensor var_11118_equation_0 = const()[name = tensor("op_11118_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11118_cast_fp16 = einsum(equation = var_11118_equation_0, values = (var_10610_cast_fp16, var_11011_cast_fp16))[name = tensor("op_11118_cast_fp16")]; tensor var_11120_equation_0 = const()[name = tensor("op_11120_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11120_cast_fp16 = einsum(equation = var_11120_equation_0, values = (var_10610_cast_fp16, var_11012_cast_fp16))[name = tensor("op_11120_cast_fp16")]; tensor var_11122_equation_0 = const()[name = tensor("op_11122_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11122_cast_fp16 = einsum(equation = var_11122_equation_0, values = (var_10614_cast_fp16, var_11013_cast_fp16))[name = tensor("op_11122_cast_fp16")]; tensor var_11124_equation_0 = const()[name = tensor("op_11124_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11124_cast_fp16 = einsum(equation = var_11124_equation_0, values = (var_10614_cast_fp16, var_11014_cast_fp16))[name = tensor("op_11124_cast_fp16")]; tensor var_11126_equation_0 = const()[name = tensor("op_11126_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11126_cast_fp16 = einsum(equation = var_11126_equation_0, values = (var_10614_cast_fp16, var_11015_cast_fp16))[name = tensor("op_11126_cast_fp16")]; tensor var_11128_equation_0 = const()[name = tensor("op_11128_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11128_cast_fp16 = einsum(equation = var_11128_equation_0, values = (var_10614_cast_fp16, var_11016_cast_fp16))[name = tensor("op_11128_cast_fp16")]; tensor var_11130_equation_0 = const()[name = tensor("op_11130_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11130_cast_fp16 = einsum(equation = var_11130_equation_0, values = (var_10618_cast_fp16, var_11017_cast_fp16))[name = tensor("op_11130_cast_fp16")]; tensor var_11132_equation_0 = const()[name = tensor("op_11132_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11132_cast_fp16 = einsum(equation = var_11132_equation_0, values = (var_10618_cast_fp16, var_11018_cast_fp16))[name = tensor("op_11132_cast_fp16")]; tensor var_11134_equation_0 = const()[name = tensor("op_11134_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11134_cast_fp16 = einsum(equation = var_11134_equation_0, values = (var_10618_cast_fp16, var_11019_cast_fp16))[name = tensor("op_11134_cast_fp16")]; tensor var_11136_equation_0 = const()[name = tensor("op_11136_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11136_cast_fp16 = einsum(equation = var_11136_equation_0, values = (var_10618_cast_fp16, var_11020_cast_fp16))[name = tensor("op_11136_cast_fp16")]; tensor var_11138_equation_0 = const()[name = tensor("op_11138_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11138_cast_fp16 = einsum(equation = var_11138_equation_0, values = (var_10622_cast_fp16, var_11021_cast_fp16))[name = tensor("op_11138_cast_fp16")]; tensor var_11140_equation_0 = const()[name = tensor("op_11140_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11140_cast_fp16 = einsum(equation = var_11140_equation_0, values = (var_10622_cast_fp16, var_11022_cast_fp16))[name = tensor("op_11140_cast_fp16")]; tensor var_11142_equation_0 = const()[name = tensor("op_11142_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11142_cast_fp16 = einsum(equation = var_11142_equation_0, values = (var_10622_cast_fp16, var_11023_cast_fp16))[name = tensor("op_11142_cast_fp16")]; tensor var_11144_equation_0 = const()[name = tensor("op_11144_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11144_cast_fp16 = einsum(equation = var_11144_equation_0, values = (var_10622_cast_fp16, var_11024_cast_fp16))[name = tensor("op_11144_cast_fp16")]; tensor var_11146_equation_0 = const()[name = tensor("op_11146_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11146_cast_fp16 = einsum(equation = var_11146_equation_0, values = (var_10626_cast_fp16, var_11025_cast_fp16))[name = tensor("op_11146_cast_fp16")]; tensor var_11148_equation_0 = const()[name = tensor("op_11148_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11148_cast_fp16 = einsum(equation = var_11148_equation_0, values = (var_10626_cast_fp16, var_11026_cast_fp16))[name = tensor("op_11148_cast_fp16")]; tensor var_11150_equation_0 = const()[name = tensor("op_11150_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11150_cast_fp16 = einsum(equation = var_11150_equation_0, values = (var_10626_cast_fp16, var_11027_cast_fp16))[name = tensor("op_11150_cast_fp16")]; tensor var_11152_equation_0 = const()[name = tensor("op_11152_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11152_cast_fp16 = einsum(equation = var_11152_equation_0, values = (var_10626_cast_fp16, var_11028_cast_fp16))[name = tensor("op_11152_cast_fp16")]; tensor var_11154_equation_0 = const()[name = tensor("op_11154_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11154_cast_fp16 = einsum(equation = var_11154_equation_0, values = (var_10630_cast_fp16, var_11029_cast_fp16))[name = tensor("op_11154_cast_fp16")]; tensor var_11156_equation_0 = const()[name = tensor("op_11156_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11156_cast_fp16 = einsum(equation = var_11156_equation_0, values = (var_10630_cast_fp16, var_11030_cast_fp16))[name = tensor("op_11156_cast_fp16")]; tensor var_11158_equation_0 = const()[name = tensor("op_11158_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11158_cast_fp16 = einsum(equation = var_11158_equation_0, values = (var_10630_cast_fp16, var_11031_cast_fp16))[name = tensor("op_11158_cast_fp16")]; tensor var_11160_equation_0 = const()[name = tensor("op_11160_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11160_cast_fp16 = einsum(equation = var_11160_equation_0, values = (var_10630_cast_fp16, var_11032_cast_fp16))[name = tensor("op_11160_cast_fp16")]; tensor var_11162_equation_0 = const()[name = tensor("op_11162_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11162_cast_fp16 = einsum(equation = var_11162_equation_0, values = (var_10634_cast_fp16, var_11033_cast_fp16))[name = tensor("op_11162_cast_fp16")]; tensor var_11164_equation_0 = const()[name = tensor("op_11164_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11164_cast_fp16 = einsum(equation = var_11164_equation_0, values = (var_10634_cast_fp16, var_11034_cast_fp16))[name = tensor("op_11164_cast_fp16")]; tensor var_11166_equation_0 = const()[name = tensor("op_11166_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11166_cast_fp16 = einsum(equation = var_11166_equation_0, values = (var_10634_cast_fp16, var_11035_cast_fp16))[name = tensor("op_11166_cast_fp16")]; tensor var_11168_equation_0 = const()[name = tensor("op_11168_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11168_cast_fp16 = einsum(equation = var_11168_equation_0, values = (var_10634_cast_fp16, var_11036_cast_fp16))[name = tensor("op_11168_cast_fp16")]; tensor var_11170_equation_0 = const()[name = tensor("op_11170_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11170_cast_fp16 = einsum(equation = var_11170_equation_0, values = (var_10638_cast_fp16, var_11037_cast_fp16))[name = tensor("op_11170_cast_fp16")]; tensor var_11172_equation_0 = const()[name = tensor("op_11172_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11172_cast_fp16 = einsum(equation = var_11172_equation_0, values = (var_10638_cast_fp16, var_11038_cast_fp16))[name = tensor("op_11172_cast_fp16")]; tensor var_11174_equation_0 = const()[name = tensor("op_11174_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11174_cast_fp16 = einsum(equation = var_11174_equation_0, values = (var_10638_cast_fp16, var_11039_cast_fp16))[name = tensor("op_11174_cast_fp16")]; tensor var_11176_equation_0 = const()[name = tensor("op_11176_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11176_cast_fp16 = einsum(equation = var_11176_equation_0, values = (var_10638_cast_fp16, var_11040_cast_fp16))[name = tensor("op_11176_cast_fp16")]; tensor var_11178_equation_0 = const()[name = tensor("op_11178_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11178_cast_fp16 = einsum(equation = var_11178_equation_0, values = (var_10642_cast_fp16, var_11041_cast_fp16))[name = tensor("op_11178_cast_fp16")]; tensor var_11180_equation_0 = const()[name = tensor("op_11180_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11180_cast_fp16 = einsum(equation = var_11180_equation_0, values = (var_10642_cast_fp16, var_11042_cast_fp16))[name = tensor("op_11180_cast_fp16")]; tensor var_11182_equation_0 = const()[name = tensor("op_11182_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11182_cast_fp16 = einsum(equation = var_11182_equation_0, values = (var_10642_cast_fp16, var_11043_cast_fp16))[name = tensor("op_11182_cast_fp16")]; tensor var_11184_equation_0 = const()[name = tensor("op_11184_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11184_cast_fp16 = einsum(equation = var_11184_equation_0, values = (var_10642_cast_fp16, var_11044_cast_fp16))[name = tensor("op_11184_cast_fp16")]; tensor var_11186_equation_0 = const()[name = tensor("op_11186_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11186_cast_fp16 = einsum(equation = var_11186_equation_0, values = (var_10646_cast_fp16, var_11045_cast_fp16))[name = tensor("op_11186_cast_fp16")]; tensor var_11188_equation_0 = const()[name = tensor("op_11188_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11188_cast_fp16 = einsum(equation = var_11188_equation_0, values = (var_10646_cast_fp16, var_11046_cast_fp16))[name = tensor("op_11188_cast_fp16")]; tensor var_11190_equation_0 = const()[name = tensor("op_11190_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11190_cast_fp16 = einsum(equation = var_11190_equation_0, values = (var_10646_cast_fp16, var_11047_cast_fp16))[name = tensor("op_11190_cast_fp16")]; tensor var_11192_equation_0 = const()[name = tensor("op_11192_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11192_cast_fp16 = einsum(equation = var_11192_equation_0, values = (var_10646_cast_fp16, var_11048_cast_fp16))[name = tensor("op_11192_cast_fp16")]; tensor var_11194_equation_0 = const()[name = tensor("op_11194_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11194_cast_fp16 = einsum(equation = var_11194_equation_0, values = (var_10650_cast_fp16, var_11049_cast_fp16))[name = tensor("op_11194_cast_fp16")]; tensor var_11196_equation_0 = const()[name = tensor("op_11196_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11196_cast_fp16 = einsum(equation = var_11196_equation_0, values = (var_10650_cast_fp16, var_11050_cast_fp16))[name = tensor("op_11196_cast_fp16")]; tensor var_11198_equation_0 = const()[name = tensor("op_11198_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11198_cast_fp16 = einsum(equation = var_11198_equation_0, values = (var_10650_cast_fp16, var_11051_cast_fp16))[name = tensor("op_11198_cast_fp16")]; tensor var_11200_equation_0 = const()[name = tensor("op_11200_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11200_cast_fp16 = einsum(equation = var_11200_equation_0, values = (var_10650_cast_fp16, var_11052_cast_fp16))[name = tensor("op_11200_cast_fp16")]; tensor var_11202_equation_0 = const()[name = tensor("op_11202_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11202_cast_fp16 = einsum(equation = var_11202_equation_0, values = (var_10654_cast_fp16, var_11053_cast_fp16))[name = tensor("op_11202_cast_fp16")]; tensor var_11204_equation_0 = const()[name = tensor("op_11204_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11204_cast_fp16 = einsum(equation = var_11204_equation_0, values = (var_10654_cast_fp16, var_11054_cast_fp16))[name = tensor("op_11204_cast_fp16")]; tensor var_11206_equation_0 = const()[name = tensor("op_11206_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11206_cast_fp16 = einsum(equation = var_11206_equation_0, values = (var_10654_cast_fp16, var_11055_cast_fp16))[name = tensor("op_11206_cast_fp16")]; tensor var_11208_equation_0 = const()[name = tensor("op_11208_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11208_cast_fp16 = einsum(equation = var_11208_equation_0, values = (var_10654_cast_fp16, var_11056_cast_fp16))[name = tensor("op_11208_cast_fp16")]; tensor var_11210_equation_0 = const()[name = tensor("op_11210_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11210_cast_fp16 = einsum(equation = var_11210_equation_0, values = (var_10658_cast_fp16, var_11057_cast_fp16))[name = tensor("op_11210_cast_fp16")]; tensor var_11212_equation_0 = const()[name = tensor("op_11212_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11212_cast_fp16 = einsum(equation = var_11212_equation_0, values = (var_10658_cast_fp16, var_11058_cast_fp16))[name = tensor("op_11212_cast_fp16")]; tensor var_11214_equation_0 = const()[name = tensor("op_11214_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11214_cast_fp16 = einsum(equation = var_11214_equation_0, values = (var_10658_cast_fp16, var_11059_cast_fp16))[name = tensor("op_11214_cast_fp16")]; tensor var_11216_equation_0 = const()[name = tensor("op_11216_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11216_cast_fp16 = einsum(equation = var_11216_equation_0, values = (var_10658_cast_fp16, var_11060_cast_fp16))[name = tensor("op_11216_cast_fp16")]; tensor var_11218_equation_0 = const()[name = tensor("op_11218_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11218_cast_fp16 = einsum(equation = var_11218_equation_0, values = (var_10662_cast_fp16, var_11061_cast_fp16))[name = tensor("op_11218_cast_fp16")]; tensor var_11220_equation_0 = const()[name = tensor("op_11220_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11220_cast_fp16 = einsum(equation = var_11220_equation_0, values = (var_10662_cast_fp16, var_11062_cast_fp16))[name = tensor("op_11220_cast_fp16")]; tensor var_11222_equation_0 = const()[name = tensor("op_11222_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11222_cast_fp16 = einsum(equation = var_11222_equation_0, values = (var_10662_cast_fp16, var_11063_cast_fp16))[name = tensor("op_11222_cast_fp16")]; tensor var_11224_equation_0 = const()[name = tensor("op_11224_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_11224_cast_fp16 = einsum(equation = var_11224_equation_0, values = (var_10662_cast_fp16, var_11064_cast_fp16))[name = tensor("op_11224_cast_fp16")]; tensor var_11226_interleave_0 = const()[name = tensor("op_11226_interleave_0"), val = tensor(false)]; tensor var_11226_cast_fp16 = concat(axis = var_9758, interleave = var_11226_interleave_0, values = (var_11066_cast_fp16, var_11068_cast_fp16, var_11070_cast_fp16, var_11072_cast_fp16))[name = tensor("op_11226_cast_fp16")]; tensor var_11228_interleave_0 = const()[name = tensor("op_11228_interleave_0"), val = tensor(false)]; tensor var_11228_cast_fp16 = concat(axis = var_9758, interleave = var_11228_interleave_0, values = (var_11074_cast_fp16, var_11076_cast_fp16, var_11078_cast_fp16, var_11080_cast_fp16))[name = tensor("op_11228_cast_fp16")]; tensor var_11230_interleave_0 = const()[name = tensor("op_11230_interleave_0"), val = tensor(false)]; tensor var_11230_cast_fp16 = concat(axis = var_9758, interleave = var_11230_interleave_0, values = (var_11082_cast_fp16, var_11084_cast_fp16, var_11086_cast_fp16, var_11088_cast_fp16))[name = tensor("op_11230_cast_fp16")]; tensor var_11232_interleave_0 = const()[name = tensor("op_11232_interleave_0"), val = tensor(false)]; tensor var_11232_cast_fp16 = concat(axis = var_9758, interleave = var_11232_interleave_0, values = (var_11090_cast_fp16, var_11092_cast_fp16, var_11094_cast_fp16, var_11096_cast_fp16))[name = tensor("op_11232_cast_fp16")]; tensor var_11234_interleave_0 = const()[name = tensor("op_11234_interleave_0"), val = tensor(false)]; tensor var_11234_cast_fp16 = concat(axis = var_9758, interleave = var_11234_interleave_0, values = (var_11098_cast_fp16, var_11100_cast_fp16, var_11102_cast_fp16, var_11104_cast_fp16))[name = tensor("op_11234_cast_fp16")]; tensor var_11236_interleave_0 = const()[name = tensor("op_11236_interleave_0"), val = tensor(false)]; tensor var_11236_cast_fp16 = concat(axis = var_9758, interleave = var_11236_interleave_0, values = (var_11106_cast_fp16, var_11108_cast_fp16, var_11110_cast_fp16, var_11112_cast_fp16))[name = tensor("op_11236_cast_fp16")]; tensor var_11238_interleave_0 = const()[name = tensor("op_11238_interleave_0"), val = tensor(false)]; tensor var_11238_cast_fp16 = concat(axis = var_9758, interleave = var_11238_interleave_0, values = (var_11114_cast_fp16, var_11116_cast_fp16, var_11118_cast_fp16, var_11120_cast_fp16))[name = tensor("op_11238_cast_fp16")]; tensor var_11240_interleave_0 = const()[name = tensor("op_11240_interleave_0"), val = tensor(false)]; tensor var_11240_cast_fp16 = concat(axis = var_9758, interleave = var_11240_interleave_0, values = (var_11122_cast_fp16, var_11124_cast_fp16, var_11126_cast_fp16, var_11128_cast_fp16))[name = tensor("op_11240_cast_fp16")]; tensor var_11242_interleave_0 = const()[name = tensor("op_11242_interleave_0"), val = tensor(false)]; tensor var_11242_cast_fp16 = concat(axis = var_9758, interleave = var_11242_interleave_0, values = (var_11130_cast_fp16, var_11132_cast_fp16, var_11134_cast_fp16, var_11136_cast_fp16))[name = tensor("op_11242_cast_fp16")]; tensor var_11244_interleave_0 = const()[name = tensor("op_11244_interleave_0"), val = tensor(false)]; tensor var_11244_cast_fp16 = concat(axis = var_9758, interleave = var_11244_interleave_0, values = (var_11138_cast_fp16, var_11140_cast_fp16, var_11142_cast_fp16, var_11144_cast_fp16))[name = tensor("op_11244_cast_fp16")]; tensor var_11246_interleave_0 = const()[name = tensor("op_11246_interleave_0"), val = tensor(false)]; tensor var_11246_cast_fp16 = concat(axis = var_9758, interleave = var_11246_interleave_0, values = (var_11146_cast_fp16, var_11148_cast_fp16, var_11150_cast_fp16, var_11152_cast_fp16))[name = tensor("op_11246_cast_fp16")]; tensor var_11248_interleave_0 = const()[name = tensor("op_11248_interleave_0"), val = tensor(false)]; tensor var_11248_cast_fp16 = concat(axis = var_9758, interleave = var_11248_interleave_0, values = (var_11154_cast_fp16, var_11156_cast_fp16, var_11158_cast_fp16, var_11160_cast_fp16))[name = tensor("op_11248_cast_fp16")]; tensor var_11250_interleave_0 = const()[name = tensor("op_11250_interleave_0"), val = tensor(false)]; tensor var_11250_cast_fp16 = concat(axis = var_9758, interleave = var_11250_interleave_0, values = (var_11162_cast_fp16, var_11164_cast_fp16, var_11166_cast_fp16, var_11168_cast_fp16))[name = tensor("op_11250_cast_fp16")]; tensor var_11252_interleave_0 = const()[name = tensor("op_11252_interleave_0"), val = tensor(false)]; tensor var_11252_cast_fp16 = concat(axis = var_9758, interleave = var_11252_interleave_0, values = (var_11170_cast_fp16, var_11172_cast_fp16, var_11174_cast_fp16, var_11176_cast_fp16))[name = tensor("op_11252_cast_fp16")]; tensor var_11254_interleave_0 = const()[name = tensor("op_11254_interleave_0"), val = tensor(false)]; tensor var_11254_cast_fp16 = concat(axis = var_9758, interleave = var_11254_interleave_0, values = (var_11178_cast_fp16, var_11180_cast_fp16, var_11182_cast_fp16, var_11184_cast_fp16))[name = tensor("op_11254_cast_fp16")]; tensor var_11256_interleave_0 = const()[name = tensor("op_11256_interleave_0"), val = tensor(false)]; tensor var_11256_cast_fp16 = concat(axis = var_9758, interleave = var_11256_interleave_0, values = (var_11186_cast_fp16, var_11188_cast_fp16, var_11190_cast_fp16, var_11192_cast_fp16))[name = tensor("op_11256_cast_fp16")]; tensor var_11258_interleave_0 = const()[name = tensor("op_11258_interleave_0"), val = tensor(false)]; tensor var_11258_cast_fp16 = concat(axis = var_9758, interleave = var_11258_interleave_0, values = (var_11194_cast_fp16, var_11196_cast_fp16, var_11198_cast_fp16, var_11200_cast_fp16))[name = tensor("op_11258_cast_fp16")]; tensor var_11260_interleave_0 = const()[name = tensor("op_11260_interleave_0"), val = tensor(false)]; tensor var_11260_cast_fp16 = concat(axis = var_9758, interleave = var_11260_interleave_0, values = (var_11202_cast_fp16, var_11204_cast_fp16, var_11206_cast_fp16, var_11208_cast_fp16))[name = tensor("op_11260_cast_fp16")]; tensor var_11262_interleave_0 = const()[name = tensor("op_11262_interleave_0"), val = tensor(false)]; tensor var_11262_cast_fp16 = concat(axis = var_9758, interleave = var_11262_interleave_0, values = (var_11210_cast_fp16, var_11212_cast_fp16, var_11214_cast_fp16, var_11216_cast_fp16))[name = tensor("op_11262_cast_fp16")]; tensor var_11264_interleave_0 = const()[name = tensor("op_11264_interleave_0"), val = tensor(false)]; tensor var_11264_cast_fp16 = concat(axis = var_9758, interleave = var_11264_interleave_0, values = (var_11218_cast_fp16, var_11220_cast_fp16, var_11222_cast_fp16, var_11224_cast_fp16))[name = tensor("op_11264_cast_fp16")]; tensor input_49_interleave_0 = const()[name = tensor("input_49_interleave_0"), val = tensor(false)]; tensor input_49_cast_fp16 = concat(axis = var_9783, interleave = input_49_interleave_0, values = (var_11226_cast_fp16, var_11228_cast_fp16, var_11230_cast_fp16, var_11232_cast_fp16, var_11234_cast_fp16, var_11236_cast_fp16, var_11238_cast_fp16, var_11240_cast_fp16, var_11242_cast_fp16, var_11244_cast_fp16, var_11246_cast_fp16, var_11248_cast_fp16, var_11250_cast_fp16, var_11252_cast_fp16, var_11254_cast_fp16, var_11256_cast_fp16, var_11258_cast_fp16, var_11260_cast_fp16, var_11262_cast_fp16, var_11264_cast_fp16))[name = tensor("input_49_cast_fp16")]; tensor var_11275_pad_type_0 = const()[name = tensor("op_11275_pad_type_0"), val = tensor("valid")]; tensor var_11275_strides_0 = const()[name = tensor("op_11275_strides_0"), val = tensor([1, 1])]; tensor var_11275_pad_0 = const()[name = tensor("op_11275_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11275_dilations_0 = const()[name = tensor("op_11275_dilations_0"), val = tensor([1, 1])]; tensor var_11275_groups_0 = const()[name = tensor("op_11275_groups_0"), val = tensor(1)]; tensor layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(97518784))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98338048))), name = tensor("layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_6_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98338176)))]; tensor var_11275_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_11275_dilations_0, groups = var_11275_groups_0, pad = var_11275_pad_0, pad_type = var_11275_pad_type_0, strides = var_11275_strides_0, weight = layers_6_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_49_cast_fp16)[name = tensor("op_11275_cast_fp16")]; tensor var_11281_pad_type_0 = const()[name = tensor("op_11281_pad_type_0"), val = tensor("valid")]; tensor var_11281_strides_0 = const()[name = tensor("op_11281_strides_0"), val = tensor([1, 1])]; tensor var_11281_pad_0 = const()[name = tensor("op_11281_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11281_dilations_0 = const()[name = tensor("op_11281_dilations_0"), val = tensor([1, 1])]; tensor var_11281_groups_0 = const()[name = tensor("op_11281_groups_0"), val = tensor(1)]; tensor layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98362688))), name = tensor("layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98340800))), shape = tensor([1280, 1280, 1, 1])]; tensor var_11281_cast_fp16 = conv(dilations = var_11281_dilations_0, groups = var_11281_groups_0, pad = var_11281_pad_0, pad_type = var_11281_pad_type_0, strides = var_11281_strides_0, weight = layers_6_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_49_cast_fp16)[name = tensor("op_11281_cast_fp16")]; tensor obj_27_cast_fp16 = add(x = var_11275_cast_fp16, y = var_11281_cast_fp16)[name = tensor("obj_27_cast_fp16")]; tensor inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_27_cast_fp16)[name = tensor("inputs_27_cast_fp16")]; tensor out_27_axes_0 = const()[name = tensor("out_27_axes_0"), val = tensor([1])]; tensor var_11292_to_fp16 = const()[name = tensor("op_11292_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_27_cast_fp16 = layer_norm(axes = out_27_axes_0, epsilon = var_11292_to_fp16, x = inputs_27_cast_fp16)[name = tensor("out_27_cast_fp16")]; tensor input_51_gamma_0_to_fp16 = const()[name = tensor("input_51_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98567552)))]; tensor input_51_beta_0_to_fp16 = const()[name = tensor("input_51_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98570176)))]; tensor input_51_epsilon_0_to_fp16 = const()[name = tensor("input_51_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_51_cast_fp16 = batch_norm(beta = input_51_beta_0_to_fp16, epsilon = input_51_epsilon_0_to_fp16, gamma = input_51_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = tensor("input_51_cast_fp16")]; tensor var_11310_pad_type_0 = const()[name = tensor("op_11310_pad_type_0"), val = tensor("valid")]; tensor var_11310_strides_0 = const()[name = tensor("op_11310_strides_0"), val = tensor([1, 1])]; tensor var_11310_pad_0 = const()[name = tensor("op_11310_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11310_dilations_0 = const()[name = tensor("op_11310_dilations_0"), val = tensor([1, 1])]; tensor var_11310_groups_0 = const()[name = tensor("op_11310_groups_0"), val = tensor(1)]; tensor layers_6_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(98572800))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101849664))), name = tensor("layers_6_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_6_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101849792)))]; tensor var_11310_cast_fp16 = conv(bias = layers_6_fc1_inlier_module_bias_to_fp16, dilations = var_11310_dilations_0, groups = var_11310_groups_0, pad = var_11310_pad_0, pad_type = var_11310_pad_type_0, strides = var_11310_strides_0, weight = layers_6_fc1_inlier_module_weight_to_fp16_palettized, x = input_51_cast_fp16)[name = tensor("op_11310_cast_fp16")]; tensor var_11316_pad_type_0 = const()[name = tensor("op_11316_pad_type_0"), val = tensor("valid")]; tensor var_11316_strides_0 = const()[name = tensor("op_11316_strides_0"), val = tensor([1, 1])]; tensor var_11316_pad_0 = const()[name = tensor("op_11316_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11316_dilations_0 = const()[name = tensor("op_11316_dilations_0"), val = tensor([1, 1])]; tensor var_11316_groups_0 = const()[name = tensor("op_11316_groups_0"), val = tensor(1)]; tensor layers_6_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101881792))), name = tensor("layers_6_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(101860096))), shape = tensor([5120, 1280, 1, 1])]; tensor var_11316_cast_fp16 = conv(dilations = var_11316_dilations_0, groups = var_11316_groups_0, pad = var_11316_pad_0, pad_type = var_11316_pad_type_0, strides = var_11316_strides_0, weight = layers_6_fc1_outlier_module_weight_to_fp16_sparsified, x = input_51_cast_fp16)[name = tensor("op_11316_cast_fp16")]; tensor input_53_cast_fp16 = add(x = var_11310_cast_fp16, y = var_11316_cast_fp16)[name = tensor("input_53_cast_fp16")]; tensor input_55_mode_0 = const()[name = tensor("input_55_mode_0"), val = tensor("EXACT")]; tensor input_55_cast_fp16 = gelu(mode = input_55_mode_0, x = input_53_cast_fp16)[name = tensor("input_55_cast_fp16")]; tensor var_11327_pad_type_0 = const()[name = tensor("op_11327_pad_type_0"), val = tensor("valid")]; tensor var_11327_strides_0 = const()[name = tensor("op_11327_strides_0"), val = tensor([1, 1])]; tensor var_11327_pad_0 = const()[name = tensor("op_11327_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11327_dilations_0 = const()[name = tensor("op_11327_dilations_0"), val = tensor([1, 1])]; tensor var_11327_groups_0 = const()[name = tensor("op_11327_groups_0"), val = tensor(1)]; tensor layers_6_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102701056))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105977920))), name = tensor("layers_6_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_6_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_6_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105978048)))]; tensor var_11327_cast_fp16 = conv(bias = layers_6_fc2_inlier_module_bias_to_fp16, dilations = var_11327_dilations_0, groups = var_11327_groups_0, pad = var_11327_pad_0, pad_type = var_11327_pad_type_0, strides = var_11327_strides_0, weight = layers_6_fc2_inlier_module_weight_to_fp16_palettized, x = input_55_cast_fp16)[name = tensor("op_11327_cast_fp16")]; tensor var_11333_pad_type_0 = const()[name = tensor("op_11333_pad_type_0"), val = tensor("valid")]; tensor var_11333_strides_0 = const()[name = tensor("op_11333_strides_0"), val = tensor([1, 1])]; tensor var_11333_pad_0 = const()[name = tensor("op_11333_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11333_dilations_0 = const()[name = tensor("op_11333_dilations_0"), val = tensor([1, 1])]; tensor var_11333_groups_0 = const()[name = tensor("op_11333_groups_0"), val = tensor(1)]; tensor layers_6_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106227584))), name = tensor("layers_6_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105980672))), shape = tensor([1280, 5120, 1, 1])]; tensor var_11333_cast_fp16 = conv(dilations = var_11333_dilations_0, groups = var_11333_groups_0, pad = var_11333_pad_0, pad_type = var_11333_pad_type_0, strides = var_11333_strides_0, weight = layers_6_fc2_outlier_module_weight_to_fp16_sparsified, x = input_55_cast_fp16)[name = tensor("op_11333_cast_fp16")]; tensor hidden_states_17_cast_fp16 = add(x = var_11327_cast_fp16, y = var_11333_cast_fp16)[name = tensor("hidden_states_17_cast_fp16")]; tensor inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = hidden_states_17_cast_fp16)[name = tensor("inputs_29_cast_fp16")]; tensor var_11339 = const()[name = tensor("op_11339"), val = tensor(3)]; tensor var_11364 = const()[name = tensor("op_11364"), val = tensor(1)]; tensor out_29_axes_0 = const()[name = tensor("out_29_axes_0"), val = tensor([1])]; tensor var_11381_to_fp16 = const()[name = tensor("op_11381_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_29_cast_fp16 = layer_norm(axes = out_29_axes_0, epsilon = var_11381_to_fp16, x = inputs_29_cast_fp16)[name = tensor("out_29_cast_fp16")]; tensor obj_29_gamma_0_to_fp16 = const()[name = tensor("obj_29_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107046848)))]; tensor obj_29_beta_0_to_fp16 = const()[name = tensor("obj_29_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107049472)))]; tensor obj_29_epsilon_0_to_fp16 = const()[name = tensor("obj_29_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = tensor("obj_29_cast_fp16")]; tensor var_11403_pad_type_0 = const()[name = tensor("op_11403_pad_type_0"), val = tensor("valid")]; tensor var_11403_strides_0 = const()[name = tensor("op_11403_strides_0"), val = tensor([1, 1])]; tensor var_11403_pad_0 = const()[name = tensor("op_11403_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11403_dilations_0 = const()[name = tensor("op_11403_dilations_0"), val = tensor([1, 1])]; tensor var_11403_groups_0 = const()[name = tensor("op_11403_groups_0"), val = tensor(1)]; tensor layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107052096))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107871360))), name = tensor("layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_7_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107871488)))]; tensor var_11403_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_11403_dilations_0, groups = var_11403_groups_0, pad = var_11403_pad_0, pad_type = var_11403_pad_type_0, strides = var_11403_strides_0, weight = layers_7_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_11403_cast_fp16")]; tensor var_11409_pad_type_0 = const()[name = tensor("op_11409_pad_type_0"), val = tensor("valid")]; tensor var_11409_strides_0 = const()[name = tensor("op_11409_strides_0"), val = tensor([1, 1])]; tensor var_11409_pad_0 = const()[name = tensor("op_11409_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11409_dilations_0 = const()[name = tensor("op_11409_dilations_0"), val = tensor([1, 1])]; tensor var_11409_groups_0 = const()[name = tensor("op_11409_groups_0"), val = tensor(1)]; tensor layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107923264))), name = tensor("layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107874112))), shape = tensor([1280, 1280, 1, 1])]; tensor var_11409_cast_fp16 = conv(dilations = var_11409_dilations_0, groups = var_11409_groups_0, pad = var_11409_pad_0, pad_type = var_11409_pad_type_0, strides = var_11409_strides_0, weight = layers_7_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_11409_cast_fp16")]; tensor query_15_cast_fp16 = add(x = var_11403_cast_fp16, y = var_11409_cast_fp16)[name = tensor("query_15_cast_fp16")]; tensor var_11418_pad_type_0 = const()[name = tensor("op_11418_pad_type_0"), val = tensor("valid")]; tensor var_11418_strides_0 = const()[name = tensor("op_11418_strides_0"), val = tensor([1, 1])]; tensor var_11418_pad_0 = const()[name = tensor("op_11418_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11418_dilations_0 = const()[name = tensor("op_11418_dilations_0"), val = tensor([1, 1])]; tensor var_11418_groups_0 = const()[name = tensor("op_11418_groups_0"), val = tensor(1)]; tensor layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108128128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108947392))), name = tensor("layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_11418_cast_fp16 = conv(dilations = var_11418_dilations_0, groups = var_11418_groups_0, pad = var_11418_pad_0, pad_type = var_11418_pad_type_0, strides = var_11418_strides_0, weight = layers_7_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_11418_cast_fp16")]; tensor var_11424_pad_type_0 = const()[name = tensor("op_11424_pad_type_0"), val = tensor("valid")]; tensor var_11424_strides_0 = const()[name = tensor("op_11424_strides_0"), val = tensor([1, 1])]; tensor var_11424_pad_0 = const()[name = tensor("op_11424_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11424_dilations_0 = const()[name = tensor("op_11424_dilations_0"), val = tensor([1, 1])]; tensor var_11424_groups_0 = const()[name = tensor("op_11424_groups_0"), val = tensor(1)]; tensor layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108983872))), name = tensor("layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108947520))), shape = tensor([1280, 1280, 1, 1])]; tensor var_11424_cast_fp16 = conv(dilations = var_11424_dilations_0, groups = var_11424_groups_0, pad = var_11424_pad_0, pad_type = var_11424_pad_type_0, strides = var_11424_strides_0, weight = layers_7_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_11424_cast_fp16")]; tensor key_15_cast_fp16 = add(x = var_11418_cast_fp16, y = var_11424_cast_fp16)[name = tensor("key_15_cast_fp16")]; tensor var_11434_pad_type_0 = const()[name = tensor("op_11434_pad_type_0"), val = tensor("valid")]; tensor var_11434_strides_0 = const()[name = tensor("op_11434_strides_0"), val = tensor([1, 1])]; tensor var_11434_pad_0 = const()[name = tensor("op_11434_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11434_dilations_0 = const()[name = tensor("op_11434_dilations_0"), val = tensor([1, 1])]; tensor var_11434_groups_0 = const()[name = tensor("op_11434_groups_0"), val = tensor(1)]; tensor layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(109188736))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110008000))), name = tensor("layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_7_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110008128)))]; tensor var_11434_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_11434_dilations_0, groups = var_11434_groups_0, pad = var_11434_pad_0, pad_type = var_11434_pad_type_0, strides = var_11434_strides_0, weight = layers_7_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_29_cast_fp16)[name = tensor("op_11434_cast_fp16")]; tensor var_11440_pad_type_0 = const()[name = tensor("op_11440_pad_type_0"), val = tensor("valid")]; tensor var_11440_strides_0 = const()[name = tensor("op_11440_strides_0"), val = tensor([1, 1])]; tensor var_11440_pad_0 = const()[name = tensor("op_11440_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_11440_dilations_0 = const()[name = tensor("op_11440_dilations_0"), val = tensor([1, 1])]; tensor var_11440_groups_0 = const()[name = tensor("op_11440_groups_0"), val = tensor(1)]; tensor layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110034304))), name = tensor("layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110010752))), shape = tensor([1280, 1280, 1, 1])]; tensor var_11440_cast_fp16 = conv(dilations = var_11440_dilations_0, groups = var_11440_groups_0, pad = var_11440_pad_0, pad_type = var_11440_pad_type_0, strides = var_11440_strides_0, weight = layers_7_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_29_cast_fp16)[name = tensor("op_11440_cast_fp16")]; tensor value_15_cast_fp16 = add(x = var_11434_cast_fp16, y = var_11440_cast_fp16)[name = tensor("value_15_cast_fp16")]; tensor var_11446_begin_0 = const()[name = tensor("op_11446_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11446_end_0 = const()[name = tensor("op_11446_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_11446_end_mask_0 = const()[name = tensor("op_11446_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11446_cast_fp16 = slice_by_index(begin = var_11446_begin_0, end = var_11446_end_0, end_mask = var_11446_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11446_cast_fp16")]; tensor var_11450_begin_0 = const()[name = tensor("op_11450_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_11450_end_0 = const()[name = tensor("op_11450_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_11450_end_mask_0 = const()[name = tensor("op_11450_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11450_cast_fp16 = slice_by_index(begin = var_11450_begin_0, end = var_11450_end_0, end_mask = var_11450_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11450_cast_fp16")]; tensor var_11454_begin_0 = const()[name = tensor("op_11454_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_11454_end_0 = const()[name = tensor("op_11454_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_11454_end_mask_0 = const()[name = tensor("op_11454_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11454_cast_fp16 = slice_by_index(begin = var_11454_begin_0, end = var_11454_end_0, end_mask = var_11454_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11454_cast_fp16")]; tensor var_11458_begin_0 = const()[name = tensor("op_11458_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_11458_end_0 = const()[name = tensor("op_11458_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_11458_end_mask_0 = const()[name = tensor("op_11458_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11458_cast_fp16 = slice_by_index(begin = var_11458_begin_0, end = var_11458_end_0, end_mask = var_11458_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11458_cast_fp16")]; tensor var_11462_begin_0 = const()[name = tensor("op_11462_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_11462_end_0 = const()[name = tensor("op_11462_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_11462_end_mask_0 = const()[name = tensor("op_11462_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11462_cast_fp16 = slice_by_index(begin = var_11462_begin_0, end = var_11462_end_0, end_mask = var_11462_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11462_cast_fp16")]; tensor var_11466_begin_0 = const()[name = tensor("op_11466_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_11466_end_0 = const()[name = tensor("op_11466_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_11466_end_mask_0 = const()[name = tensor("op_11466_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11466_cast_fp16 = slice_by_index(begin = var_11466_begin_0, end = var_11466_end_0, end_mask = var_11466_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11466_cast_fp16")]; tensor var_11470_begin_0 = const()[name = tensor("op_11470_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_11470_end_0 = const()[name = tensor("op_11470_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_11470_end_mask_0 = const()[name = tensor("op_11470_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11470_cast_fp16 = slice_by_index(begin = var_11470_begin_0, end = var_11470_end_0, end_mask = var_11470_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11470_cast_fp16")]; tensor var_11474_begin_0 = const()[name = tensor("op_11474_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_11474_end_0 = const()[name = tensor("op_11474_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_11474_end_mask_0 = const()[name = tensor("op_11474_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11474_cast_fp16 = slice_by_index(begin = var_11474_begin_0, end = var_11474_end_0, end_mask = var_11474_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11474_cast_fp16")]; tensor var_11478_begin_0 = const()[name = tensor("op_11478_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_11478_end_0 = const()[name = tensor("op_11478_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_11478_end_mask_0 = const()[name = tensor("op_11478_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11478_cast_fp16 = slice_by_index(begin = var_11478_begin_0, end = var_11478_end_0, end_mask = var_11478_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11478_cast_fp16")]; tensor var_11482_begin_0 = const()[name = tensor("op_11482_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_11482_end_0 = const()[name = tensor("op_11482_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_11482_end_mask_0 = const()[name = tensor("op_11482_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11482_cast_fp16 = slice_by_index(begin = var_11482_begin_0, end = var_11482_end_0, end_mask = var_11482_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11482_cast_fp16")]; tensor var_11486_begin_0 = const()[name = tensor("op_11486_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_11486_end_0 = const()[name = tensor("op_11486_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_11486_end_mask_0 = const()[name = tensor("op_11486_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11486_cast_fp16 = slice_by_index(begin = var_11486_begin_0, end = var_11486_end_0, end_mask = var_11486_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11486_cast_fp16")]; tensor var_11490_begin_0 = const()[name = tensor("op_11490_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_11490_end_0 = const()[name = tensor("op_11490_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_11490_end_mask_0 = const()[name = tensor("op_11490_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11490_cast_fp16 = slice_by_index(begin = var_11490_begin_0, end = var_11490_end_0, end_mask = var_11490_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11490_cast_fp16")]; tensor var_11494_begin_0 = const()[name = tensor("op_11494_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_11494_end_0 = const()[name = tensor("op_11494_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_11494_end_mask_0 = const()[name = tensor("op_11494_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11494_cast_fp16 = slice_by_index(begin = var_11494_begin_0, end = var_11494_end_0, end_mask = var_11494_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11494_cast_fp16")]; tensor var_11498_begin_0 = const()[name = tensor("op_11498_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_11498_end_0 = const()[name = tensor("op_11498_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_11498_end_mask_0 = const()[name = tensor("op_11498_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11498_cast_fp16 = slice_by_index(begin = var_11498_begin_0, end = var_11498_end_0, end_mask = var_11498_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11498_cast_fp16")]; tensor var_11502_begin_0 = const()[name = tensor("op_11502_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_11502_end_0 = const()[name = tensor("op_11502_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_11502_end_mask_0 = const()[name = tensor("op_11502_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11502_cast_fp16 = slice_by_index(begin = var_11502_begin_0, end = var_11502_end_0, end_mask = var_11502_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11502_cast_fp16")]; tensor var_11506_begin_0 = const()[name = tensor("op_11506_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_11506_end_0 = const()[name = tensor("op_11506_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_11506_end_mask_0 = const()[name = tensor("op_11506_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11506_cast_fp16 = slice_by_index(begin = var_11506_begin_0, end = var_11506_end_0, end_mask = var_11506_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11506_cast_fp16")]; tensor var_11510_begin_0 = const()[name = tensor("op_11510_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_11510_end_0 = const()[name = tensor("op_11510_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_11510_end_mask_0 = const()[name = tensor("op_11510_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11510_cast_fp16 = slice_by_index(begin = var_11510_begin_0, end = var_11510_end_0, end_mask = var_11510_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11510_cast_fp16")]; tensor var_11514_begin_0 = const()[name = tensor("op_11514_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_11514_end_0 = const()[name = tensor("op_11514_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_11514_end_mask_0 = const()[name = tensor("op_11514_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11514_cast_fp16 = slice_by_index(begin = var_11514_begin_0, end = var_11514_end_0, end_mask = var_11514_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11514_cast_fp16")]; tensor var_11518_begin_0 = const()[name = tensor("op_11518_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_11518_end_0 = const()[name = tensor("op_11518_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_11518_end_mask_0 = const()[name = tensor("op_11518_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11518_cast_fp16 = slice_by_index(begin = var_11518_begin_0, end = var_11518_end_0, end_mask = var_11518_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11518_cast_fp16")]; tensor var_11522_begin_0 = const()[name = tensor("op_11522_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_11522_end_0 = const()[name = tensor("op_11522_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_11522_end_mask_0 = const()[name = tensor("op_11522_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_11522_cast_fp16 = slice_by_index(begin = var_11522_begin_0, end = var_11522_end_0, end_mask = var_11522_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11522_cast_fp16")]; tensor var_11531_begin_0 = const()[name = tensor("op_11531_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11531_end_0 = const()[name = tensor("op_11531_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_11531_end_mask_0 = const()[name = tensor("op_11531_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11531_cast_fp16 = slice_by_index(begin = var_11531_begin_0, end = var_11531_end_0, end_mask = var_11531_end_mask_0, x = var_11446_cast_fp16)[name = tensor("op_11531_cast_fp16")]; tensor var_11538_begin_0 = const()[name = tensor("op_11538_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_11538_end_0 = const()[name = tensor("op_11538_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_11538_end_mask_0 = const()[name = tensor("op_11538_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11538_cast_fp16 = slice_by_index(begin = var_11538_begin_0, end = var_11538_end_0, end_mask = var_11538_end_mask_0, x = var_11446_cast_fp16)[name = tensor("op_11538_cast_fp16")]; tensor var_11545_begin_0 = const()[name = tensor("op_11545_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_11545_end_0 = const()[name = tensor("op_11545_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_11545_end_mask_0 = const()[name = tensor("op_11545_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11545_cast_fp16 = slice_by_index(begin = var_11545_begin_0, end = var_11545_end_0, end_mask = var_11545_end_mask_0, x = var_11446_cast_fp16)[name = tensor("op_11545_cast_fp16")]; tensor var_11552_begin_0 = const()[name = tensor("op_11552_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_11552_end_0 = const()[name = tensor("op_11552_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_11552_end_mask_0 = const()[name = tensor("op_11552_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11552_cast_fp16 = slice_by_index(begin = var_11552_begin_0, end = var_11552_end_0, end_mask = var_11552_end_mask_0, x = var_11446_cast_fp16)[name = tensor("op_11552_cast_fp16")]; tensor var_11559_begin_0 = const()[name = tensor("op_11559_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11559_end_0 = const()[name = tensor("op_11559_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_11559_end_mask_0 = const()[name = tensor("op_11559_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11559_cast_fp16 = slice_by_index(begin = var_11559_begin_0, end = var_11559_end_0, end_mask = var_11559_end_mask_0, x = var_11450_cast_fp16)[name = tensor("op_11559_cast_fp16")]; tensor var_11566_begin_0 = const()[name = tensor("op_11566_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_11566_end_0 = const()[name = tensor("op_11566_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_11566_end_mask_0 = const()[name = tensor("op_11566_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11566_cast_fp16 = slice_by_index(begin = var_11566_begin_0, end = var_11566_end_0, end_mask = var_11566_end_mask_0, x = var_11450_cast_fp16)[name = tensor("op_11566_cast_fp16")]; tensor var_11573_begin_0 = const()[name = tensor("op_11573_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_11573_end_0 = const()[name = tensor("op_11573_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_11573_end_mask_0 = const()[name = tensor("op_11573_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11573_cast_fp16 = slice_by_index(begin = var_11573_begin_0, end = var_11573_end_0, end_mask = var_11573_end_mask_0, x = var_11450_cast_fp16)[name = tensor("op_11573_cast_fp16")]; tensor var_11580_begin_0 = const()[name = tensor("op_11580_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_11580_end_0 = const()[name = tensor("op_11580_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_11580_end_mask_0 = const()[name = tensor("op_11580_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11580_cast_fp16 = slice_by_index(begin = var_11580_begin_0, end = var_11580_end_0, end_mask = var_11580_end_mask_0, x = var_11450_cast_fp16)[name = tensor("op_11580_cast_fp16")]; tensor var_11587_begin_0 = const()[name = tensor("op_11587_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11587_end_0 = const()[name = tensor("op_11587_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_11587_end_mask_0 = const()[name = tensor("op_11587_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11587_cast_fp16 = slice_by_index(begin = var_11587_begin_0, end = var_11587_end_0, end_mask = var_11587_end_mask_0, x = var_11454_cast_fp16)[name = tensor("op_11587_cast_fp16")]; tensor var_11594_begin_0 = const()[name = tensor("op_11594_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_11594_end_0 = const()[name = tensor("op_11594_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_11594_end_mask_0 = const()[name = tensor("op_11594_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11594_cast_fp16 = slice_by_index(begin = var_11594_begin_0, end = var_11594_end_0, end_mask = var_11594_end_mask_0, x = var_11454_cast_fp16)[name = tensor("op_11594_cast_fp16")]; tensor var_11601_begin_0 = const()[name = tensor("op_11601_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_11601_end_0 = const()[name = tensor("op_11601_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_11601_end_mask_0 = const()[name = tensor("op_11601_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11601_cast_fp16 = slice_by_index(begin = var_11601_begin_0, end = var_11601_end_0, end_mask = var_11601_end_mask_0, x = var_11454_cast_fp16)[name = tensor("op_11601_cast_fp16")]; tensor var_11608_begin_0 = const()[name = tensor("op_11608_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_11608_end_0 = const()[name = tensor("op_11608_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_11608_end_mask_0 = const()[name = tensor("op_11608_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11608_cast_fp16 = slice_by_index(begin = var_11608_begin_0, end = var_11608_end_0, end_mask = var_11608_end_mask_0, x = var_11454_cast_fp16)[name = tensor("op_11608_cast_fp16")]; tensor var_11615_begin_0 = const()[name = tensor("op_11615_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11615_end_0 = const()[name = tensor("op_11615_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_11615_end_mask_0 = const()[name = tensor("op_11615_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11615_cast_fp16 = slice_by_index(begin = var_11615_begin_0, end = var_11615_end_0, end_mask = var_11615_end_mask_0, x = var_11458_cast_fp16)[name = tensor("op_11615_cast_fp16")]; tensor var_11622_begin_0 = const()[name = tensor("op_11622_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_11622_end_0 = const()[name = tensor("op_11622_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_11622_end_mask_0 = const()[name = tensor("op_11622_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11622_cast_fp16 = slice_by_index(begin = var_11622_begin_0, end = var_11622_end_0, end_mask = var_11622_end_mask_0, x = var_11458_cast_fp16)[name = tensor("op_11622_cast_fp16")]; tensor var_11629_begin_0 = const()[name = tensor("op_11629_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_11629_end_0 = const()[name = tensor("op_11629_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_11629_end_mask_0 = const()[name = tensor("op_11629_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11629_cast_fp16 = slice_by_index(begin = var_11629_begin_0, end = var_11629_end_0, end_mask = var_11629_end_mask_0, x = var_11458_cast_fp16)[name = tensor("op_11629_cast_fp16")]; tensor var_11636_begin_0 = const()[name = tensor("op_11636_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_11636_end_0 = const()[name = tensor("op_11636_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_11636_end_mask_0 = const()[name = tensor("op_11636_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11636_cast_fp16 = slice_by_index(begin = var_11636_begin_0, end = var_11636_end_0, end_mask = var_11636_end_mask_0, x = var_11458_cast_fp16)[name = tensor("op_11636_cast_fp16")]; tensor var_11643_begin_0 = const()[name = tensor("op_11643_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11643_end_0 = const()[name = tensor("op_11643_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_11643_end_mask_0 = const()[name = tensor("op_11643_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11643_cast_fp16 = slice_by_index(begin = var_11643_begin_0, end = var_11643_end_0, end_mask = var_11643_end_mask_0, x = var_11462_cast_fp16)[name = tensor("op_11643_cast_fp16")]; tensor var_11650_begin_0 = const()[name = tensor("op_11650_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_11650_end_0 = const()[name = tensor("op_11650_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_11650_end_mask_0 = const()[name = tensor("op_11650_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11650_cast_fp16 = slice_by_index(begin = var_11650_begin_0, end = var_11650_end_0, end_mask = var_11650_end_mask_0, x = var_11462_cast_fp16)[name = tensor("op_11650_cast_fp16")]; tensor var_11657_begin_0 = const()[name = tensor("op_11657_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_11657_end_0 = const()[name = tensor("op_11657_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_11657_end_mask_0 = const()[name = tensor("op_11657_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11657_cast_fp16 = slice_by_index(begin = var_11657_begin_0, end = var_11657_end_0, end_mask = var_11657_end_mask_0, x = var_11462_cast_fp16)[name = tensor("op_11657_cast_fp16")]; tensor var_11664_begin_0 = const()[name = tensor("op_11664_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_11664_end_0 = const()[name = tensor("op_11664_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_11664_end_mask_0 = const()[name = tensor("op_11664_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11664_cast_fp16 = slice_by_index(begin = var_11664_begin_0, end = var_11664_end_0, end_mask = var_11664_end_mask_0, x = var_11462_cast_fp16)[name = tensor("op_11664_cast_fp16")]; tensor var_11671_begin_0 = const()[name = tensor("op_11671_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11671_end_0 = const()[name = tensor("op_11671_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_11671_end_mask_0 = const()[name = tensor("op_11671_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11671_cast_fp16 = slice_by_index(begin = var_11671_begin_0, end = var_11671_end_0, end_mask = var_11671_end_mask_0, x = var_11466_cast_fp16)[name = tensor("op_11671_cast_fp16")]; tensor var_11678_begin_0 = const()[name = tensor("op_11678_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_11678_end_0 = const()[name = tensor("op_11678_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_11678_end_mask_0 = const()[name = tensor("op_11678_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11678_cast_fp16 = slice_by_index(begin = var_11678_begin_0, end = var_11678_end_0, end_mask = var_11678_end_mask_0, x = var_11466_cast_fp16)[name = tensor("op_11678_cast_fp16")]; tensor var_11685_begin_0 = const()[name = tensor("op_11685_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_11685_end_0 = const()[name = tensor("op_11685_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_11685_end_mask_0 = const()[name = tensor("op_11685_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11685_cast_fp16 = slice_by_index(begin = var_11685_begin_0, end = var_11685_end_0, end_mask = var_11685_end_mask_0, x = var_11466_cast_fp16)[name = tensor("op_11685_cast_fp16")]; tensor var_11692_begin_0 = const()[name = tensor("op_11692_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_11692_end_0 = const()[name = tensor("op_11692_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_11692_end_mask_0 = const()[name = tensor("op_11692_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11692_cast_fp16 = slice_by_index(begin = var_11692_begin_0, end = var_11692_end_0, end_mask = var_11692_end_mask_0, x = var_11466_cast_fp16)[name = tensor("op_11692_cast_fp16")]; tensor var_11699_begin_0 = const()[name = tensor("op_11699_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11699_end_0 = const()[name = tensor("op_11699_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_11699_end_mask_0 = const()[name = tensor("op_11699_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11699_cast_fp16 = slice_by_index(begin = var_11699_begin_0, end = var_11699_end_0, end_mask = var_11699_end_mask_0, x = var_11470_cast_fp16)[name = tensor("op_11699_cast_fp16")]; tensor var_11706_begin_0 = const()[name = tensor("op_11706_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_11706_end_0 = const()[name = tensor("op_11706_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_11706_end_mask_0 = const()[name = tensor("op_11706_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11706_cast_fp16 = slice_by_index(begin = var_11706_begin_0, end = var_11706_end_0, end_mask = var_11706_end_mask_0, x = var_11470_cast_fp16)[name = tensor("op_11706_cast_fp16")]; tensor var_11713_begin_0 = const()[name = tensor("op_11713_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_11713_end_0 = const()[name = tensor("op_11713_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_11713_end_mask_0 = const()[name = tensor("op_11713_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11713_cast_fp16 = slice_by_index(begin = var_11713_begin_0, end = var_11713_end_0, end_mask = var_11713_end_mask_0, x = var_11470_cast_fp16)[name = tensor("op_11713_cast_fp16")]; tensor var_11720_begin_0 = const()[name = tensor("op_11720_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_11720_end_0 = const()[name = tensor("op_11720_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_11720_end_mask_0 = const()[name = tensor("op_11720_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11720_cast_fp16 = slice_by_index(begin = var_11720_begin_0, end = var_11720_end_0, end_mask = var_11720_end_mask_0, x = var_11470_cast_fp16)[name = tensor("op_11720_cast_fp16")]; tensor var_11727_begin_0 = const()[name = tensor("op_11727_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11727_end_0 = const()[name = tensor("op_11727_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_11727_end_mask_0 = const()[name = tensor("op_11727_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11727_cast_fp16 = slice_by_index(begin = var_11727_begin_0, end = var_11727_end_0, end_mask = var_11727_end_mask_0, x = var_11474_cast_fp16)[name = tensor("op_11727_cast_fp16")]; tensor var_11734_begin_0 = const()[name = tensor("op_11734_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_11734_end_0 = const()[name = tensor("op_11734_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_11734_end_mask_0 = const()[name = tensor("op_11734_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11734_cast_fp16 = slice_by_index(begin = var_11734_begin_0, end = var_11734_end_0, end_mask = var_11734_end_mask_0, x = var_11474_cast_fp16)[name = tensor("op_11734_cast_fp16")]; tensor var_11741_begin_0 = const()[name = tensor("op_11741_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_11741_end_0 = const()[name = tensor("op_11741_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_11741_end_mask_0 = const()[name = tensor("op_11741_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11741_cast_fp16 = slice_by_index(begin = var_11741_begin_0, end = var_11741_end_0, end_mask = var_11741_end_mask_0, x = var_11474_cast_fp16)[name = tensor("op_11741_cast_fp16")]; tensor var_11748_begin_0 = const()[name = tensor("op_11748_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_11748_end_0 = const()[name = tensor("op_11748_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_11748_end_mask_0 = const()[name = tensor("op_11748_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11748_cast_fp16 = slice_by_index(begin = var_11748_begin_0, end = var_11748_end_0, end_mask = var_11748_end_mask_0, x = var_11474_cast_fp16)[name = tensor("op_11748_cast_fp16")]; tensor var_11755_begin_0 = const()[name = tensor("op_11755_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11755_end_0 = const()[name = tensor("op_11755_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_11755_end_mask_0 = const()[name = tensor("op_11755_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11755_cast_fp16 = slice_by_index(begin = var_11755_begin_0, end = var_11755_end_0, end_mask = var_11755_end_mask_0, x = var_11478_cast_fp16)[name = tensor("op_11755_cast_fp16")]; tensor var_11762_begin_0 = const()[name = tensor("op_11762_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_11762_end_0 = const()[name = tensor("op_11762_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_11762_end_mask_0 = const()[name = tensor("op_11762_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11762_cast_fp16 = slice_by_index(begin = var_11762_begin_0, end = var_11762_end_0, end_mask = var_11762_end_mask_0, x = var_11478_cast_fp16)[name = tensor("op_11762_cast_fp16")]; tensor var_11769_begin_0 = const()[name = tensor("op_11769_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_11769_end_0 = const()[name = tensor("op_11769_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_11769_end_mask_0 = const()[name = tensor("op_11769_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11769_cast_fp16 = slice_by_index(begin = var_11769_begin_0, end = var_11769_end_0, end_mask = var_11769_end_mask_0, x = var_11478_cast_fp16)[name = tensor("op_11769_cast_fp16")]; tensor var_11776_begin_0 = const()[name = tensor("op_11776_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_11776_end_0 = const()[name = tensor("op_11776_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_11776_end_mask_0 = const()[name = tensor("op_11776_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11776_cast_fp16 = slice_by_index(begin = var_11776_begin_0, end = var_11776_end_0, end_mask = var_11776_end_mask_0, x = var_11478_cast_fp16)[name = tensor("op_11776_cast_fp16")]; tensor var_11783_begin_0 = const()[name = tensor("op_11783_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11783_end_0 = const()[name = tensor("op_11783_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_11783_end_mask_0 = const()[name = tensor("op_11783_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11783_cast_fp16 = slice_by_index(begin = var_11783_begin_0, end = var_11783_end_0, end_mask = var_11783_end_mask_0, x = var_11482_cast_fp16)[name = tensor("op_11783_cast_fp16")]; tensor var_11790_begin_0 = const()[name = tensor("op_11790_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_11790_end_0 = const()[name = tensor("op_11790_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_11790_end_mask_0 = const()[name = tensor("op_11790_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11790_cast_fp16 = slice_by_index(begin = var_11790_begin_0, end = var_11790_end_0, end_mask = var_11790_end_mask_0, x = var_11482_cast_fp16)[name = tensor("op_11790_cast_fp16")]; tensor var_11797_begin_0 = const()[name = tensor("op_11797_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_11797_end_0 = const()[name = tensor("op_11797_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_11797_end_mask_0 = const()[name = tensor("op_11797_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11797_cast_fp16 = slice_by_index(begin = var_11797_begin_0, end = var_11797_end_0, end_mask = var_11797_end_mask_0, x = var_11482_cast_fp16)[name = tensor("op_11797_cast_fp16")]; tensor var_11804_begin_0 = const()[name = tensor("op_11804_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_11804_end_0 = const()[name = tensor("op_11804_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_11804_end_mask_0 = const()[name = tensor("op_11804_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11804_cast_fp16 = slice_by_index(begin = var_11804_begin_0, end = var_11804_end_0, end_mask = var_11804_end_mask_0, x = var_11482_cast_fp16)[name = tensor("op_11804_cast_fp16")]; tensor var_11811_begin_0 = const()[name = tensor("op_11811_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11811_end_0 = const()[name = tensor("op_11811_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_11811_end_mask_0 = const()[name = tensor("op_11811_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11811_cast_fp16 = slice_by_index(begin = var_11811_begin_0, end = var_11811_end_0, end_mask = var_11811_end_mask_0, x = var_11486_cast_fp16)[name = tensor("op_11811_cast_fp16")]; tensor var_11818_begin_0 = const()[name = tensor("op_11818_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_11818_end_0 = const()[name = tensor("op_11818_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_11818_end_mask_0 = const()[name = tensor("op_11818_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11818_cast_fp16 = slice_by_index(begin = var_11818_begin_0, end = var_11818_end_0, end_mask = var_11818_end_mask_0, x = var_11486_cast_fp16)[name = tensor("op_11818_cast_fp16")]; tensor var_11825_begin_0 = const()[name = tensor("op_11825_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_11825_end_0 = const()[name = tensor("op_11825_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_11825_end_mask_0 = const()[name = tensor("op_11825_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11825_cast_fp16 = slice_by_index(begin = var_11825_begin_0, end = var_11825_end_0, end_mask = var_11825_end_mask_0, x = var_11486_cast_fp16)[name = tensor("op_11825_cast_fp16")]; tensor var_11832_begin_0 = const()[name = tensor("op_11832_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_11832_end_0 = const()[name = tensor("op_11832_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_11832_end_mask_0 = const()[name = tensor("op_11832_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11832_cast_fp16 = slice_by_index(begin = var_11832_begin_0, end = var_11832_end_0, end_mask = var_11832_end_mask_0, x = var_11486_cast_fp16)[name = tensor("op_11832_cast_fp16")]; tensor var_11839_begin_0 = const()[name = tensor("op_11839_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11839_end_0 = const()[name = tensor("op_11839_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_11839_end_mask_0 = const()[name = tensor("op_11839_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11839_cast_fp16 = slice_by_index(begin = var_11839_begin_0, end = var_11839_end_0, end_mask = var_11839_end_mask_0, x = var_11490_cast_fp16)[name = tensor("op_11839_cast_fp16")]; tensor var_11846_begin_0 = const()[name = tensor("op_11846_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_11846_end_0 = const()[name = tensor("op_11846_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_11846_end_mask_0 = const()[name = tensor("op_11846_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11846_cast_fp16 = slice_by_index(begin = var_11846_begin_0, end = var_11846_end_0, end_mask = var_11846_end_mask_0, x = var_11490_cast_fp16)[name = tensor("op_11846_cast_fp16")]; tensor var_11853_begin_0 = const()[name = tensor("op_11853_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_11853_end_0 = const()[name = tensor("op_11853_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_11853_end_mask_0 = const()[name = tensor("op_11853_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11853_cast_fp16 = slice_by_index(begin = var_11853_begin_0, end = var_11853_end_0, end_mask = var_11853_end_mask_0, x = var_11490_cast_fp16)[name = tensor("op_11853_cast_fp16")]; tensor var_11860_begin_0 = const()[name = tensor("op_11860_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_11860_end_0 = const()[name = tensor("op_11860_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_11860_end_mask_0 = const()[name = tensor("op_11860_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11860_cast_fp16 = slice_by_index(begin = var_11860_begin_0, end = var_11860_end_0, end_mask = var_11860_end_mask_0, x = var_11490_cast_fp16)[name = tensor("op_11860_cast_fp16")]; tensor var_11867_begin_0 = const()[name = tensor("op_11867_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11867_end_0 = const()[name = tensor("op_11867_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_11867_end_mask_0 = const()[name = tensor("op_11867_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11867_cast_fp16 = slice_by_index(begin = var_11867_begin_0, end = var_11867_end_0, end_mask = var_11867_end_mask_0, x = var_11494_cast_fp16)[name = tensor("op_11867_cast_fp16")]; tensor var_11874_begin_0 = const()[name = tensor("op_11874_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_11874_end_0 = const()[name = tensor("op_11874_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_11874_end_mask_0 = const()[name = tensor("op_11874_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11874_cast_fp16 = slice_by_index(begin = var_11874_begin_0, end = var_11874_end_0, end_mask = var_11874_end_mask_0, x = var_11494_cast_fp16)[name = tensor("op_11874_cast_fp16")]; tensor var_11881_begin_0 = const()[name = tensor("op_11881_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_11881_end_0 = const()[name = tensor("op_11881_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_11881_end_mask_0 = const()[name = tensor("op_11881_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11881_cast_fp16 = slice_by_index(begin = var_11881_begin_0, end = var_11881_end_0, end_mask = var_11881_end_mask_0, x = var_11494_cast_fp16)[name = tensor("op_11881_cast_fp16")]; tensor var_11888_begin_0 = const()[name = tensor("op_11888_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_11888_end_0 = const()[name = tensor("op_11888_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_11888_end_mask_0 = const()[name = tensor("op_11888_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11888_cast_fp16 = slice_by_index(begin = var_11888_begin_0, end = var_11888_end_0, end_mask = var_11888_end_mask_0, x = var_11494_cast_fp16)[name = tensor("op_11888_cast_fp16")]; tensor var_11895_begin_0 = const()[name = tensor("op_11895_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11895_end_0 = const()[name = tensor("op_11895_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_11895_end_mask_0 = const()[name = tensor("op_11895_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11895_cast_fp16 = slice_by_index(begin = var_11895_begin_0, end = var_11895_end_0, end_mask = var_11895_end_mask_0, x = var_11498_cast_fp16)[name = tensor("op_11895_cast_fp16")]; tensor var_11902_begin_0 = const()[name = tensor("op_11902_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_11902_end_0 = const()[name = tensor("op_11902_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_11902_end_mask_0 = const()[name = tensor("op_11902_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11902_cast_fp16 = slice_by_index(begin = var_11902_begin_0, end = var_11902_end_0, end_mask = var_11902_end_mask_0, x = var_11498_cast_fp16)[name = tensor("op_11902_cast_fp16")]; tensor var_11909_begin_0 = const()[name = tensor("op_11909_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_11909_end_0 = const()[name = tensor("op_11909_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_11909_end_mask_0 = const()[name = tensor("op_11909_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11909_cast_fp16 = slice_by_index(begin = var_11909_begin_0, end = var_11909_end_0, end_mask = var_11909_end_mask_0, x = var_11498_cast_fp16)[name = tensor("op_11909_cast_fp16")]; tensor var_11916_begin_0 = const()[name = tensor("op_11916_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_11916_end_0 = const()[name = tensor("op_11916_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_11916_end_mask_0 = const()[name = tensor("op_11916_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11916_cast_fp16 = slice_by_index(begin = var_11916_begin_0, end = var_11916_end_0, end_mask = var_11916_end_mask_0, x = var_11498_cast_fp16)[name = tensor("op_11916_cast_fp16")]; tensor var_11923_begin_0 = const()[name = tensor("op_11923_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11923_end_0 = const()[name = tensor("op_11923_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_11923_end_mask_0 = const()[name = tensor("op_11923_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11923_cast_fp16 = slice_by_index(begin = var_11923_begin_0, end = var_11923_end_0, end_mask = var_11923_end_mask_0, x = var_11502_cast_fp16)[name = tensor("op_11923_cast_fp16")]; tensor var_11930_begin_0 = const()[name = tensor("op_11930_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_11930_end_0 = const()[name = tensor("op_11930_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_11930_end_mask_0 = const()[name = tensor("op_11930_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11930_cast_fp16 = slice_by_index(begin = var_11930_begin_0, end = var_11930_end_0, end_mask = var_11930_end_mask_0, x = var_11502_cast_fp16)[name = tensor("op_11930_cast_fp16")]; tensor var_11937_begin_0 = const()[name = tensor("op_11937_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_11937_end_0 = const()[name = tensor("op_11937_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_11937_end_mask_0 = const()[name = tensor("op_11937_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11937_cast_fp16 = slice_by_index(begin = var_11937_begin_0, end = var_11937_end_0, end_mask = var_11937_end_mask_0, x = var_11502_cast_fp16)[name = tensor("op_11937_cast_fp16")]; tensor var_11944_begin_0 = const()[name = tensor("op_11944_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_11944_end_0 = const()[name = tensor("op_11944_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_11944_end_mask_0 = const()[name = tensor("op_11944_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11944_cast_fp16 = slice_by_index(begin = var_11944_begin_0, end = var_11944_end_0, end_mask = var_11944_end_mask_0, x = var_11502_cast_fp16)[name = tensor("op_11944_cast_fp16")]; tensor var_11951_begin_0 = const()[name = tensor("op_11951_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11951_end_0 = const()[name = tensor("op_11951_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_11951_end_mask_0 = const()[name = tensor("op_11951_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11951_cast_fp16 = slice_by_index(begin = var_11951_begin_0, end = var_11951_end_0, end_mask = var_11951_end_mask_0, x = var_11506_cast_fp16)[name = tensor("op_11951_cast_fp16")]; tensor var_11958_begin_0 = const()[name = tensor("op_11958_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_11958_end_0 = const()[name = tensor("op_11958_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_11958_end_mask_0 = const()[name = tensor("op_11958_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11958_cast_fp16 = slice_by_index(begin = var_11958_begin_0, end = var_11958_end_0, end_mask = var_11958_end_mask_0, x = var_11506_cast_fp16)[name = tensor("op_11958_cast_fp16")]; tensor var_11965_begin_0 = const()[name = tensor("op_11965_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_11965_end_0 = const()[name = tensor("op_11965_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_11965_end_mask_0 = const()[name = tensor("op_11965_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11965_cast_fp16 = slice_by_index(begin = var_11965_begin_0, end = var_11965_end_0, end_mask = var_11965_end_mask_0, x = var_11506_cast_fp16)[name = tensor("op_11965_cast_fp16")]; tensor var_11972_begin_0 = const()[name = tensor("op_11972_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_11972_end_0 = const()[name = tensor("op_11972_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_11972_end_mask_0 = const()[name = tensor("op_11972_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11972_cast_fp16 = slice_by_index(begin = var_11972_begin_0, end = var_11972_end_0, end_mask = var_11972_end_mask_0, x = var_11506_cast_fp16)[name = tensor("op_11972_cast_fp16")]; tensor var_11979_begin_0 = const()[name = tensor("op_11979_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_11979_end_0 = const()[name = tensor("op_11979_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_11979_end_mask_0 = const()[name = tensor("op_11979_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11979_cast_fp16 = slice_by_index(begin = var_11979_begin_0, end = var_11979_end_0, end_mask = var_11979_end_mask_0, x = var_11510_cast_fp16)[name = tensor("op_11979_cast_fp16")]; tensor var_11986_begin_0 = const()[name = tensor("op_11986_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_11986_end_0 = const()[name = tensor("op_11986_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_11986_end_mask_0 = const()[name = tensor("op_11986_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11986_cast_fp16 = slice_by_index(begin = var_11986_begin_0, end = var_11986_end_0, end_mask = var_11986_end_mask_0, x = var_11510_cast_fp16)[name = tensor("op_11986_cast_fp16")]; tensor var_11993_begin_0 = const()[name = tensor("op_11993_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_11993_end_0 = const()[name = tensor("op_11993_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_11993_end_mask_0 = const()[name = tensor("op_11993_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_11993_cast_fp16 = slice_by_index(begin = var_11993_begin_0, end = var_11993_end_0, end_mask = var_11993_end_mask_0, x = var_11510_cast_fp16)[name = tensor("op_11993_cast_fp16")]; tensor var_12000_begin_0 = const()[name = tensor("op_12000_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_12000_end_0 = const()[name = tensor("op_12000_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_12000_end_mask_0 = const()[name = tensor("op_12000_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12000_cast_fp16 = slice_by_index(begin = var_12000_begin_0, end = var_12000_end_0, end_mask = var_12000_end_mask_0, x = var_11510_cast_fp16)[name = tensor("op_12000_cast_fp16")]; tensor var_12007_begin_0 = const()[name = tensor("op_12007_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12007_end_0 = const()[name = tensor("op_12007_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_12007_end_mask_0 = const()[name = tensor("op_12007_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12007_cast_fp16 = slice_by_index(begin = var_12007_begin_0, end = var_12007_end_0, end_mask = var_12007_end_mask_0, x = var_11514_cast_fp16)[name = tensor("op_12007_cast_fp16")]; tensor var_12014_begin_0 = const()[name = tensor("op_12014_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_12014_end_0 = const()[name = tensor("op_12014_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_12014_end_mask_0 = const()[name = tensor("op_12014_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12014_cast_fp16 = slice_by_index(begin = var_12014_begin_0, end = var_12014_end_0, end_mask = var_12014_end_mask_0, x = var_11514_cast_fp16)[name = tensor("op_12014_cast_fp16")]; tensor var_12021_begin_0 = const()[name = tensor("op_12021_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_12021_end_0 = const()[name = tensor("op_12021_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_12021_end_mask_0 = const()[name = tensor("op_12021_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12021_cast_fp16 = slice_by_index(begin = var_12021_begin_0, end = var_12021_end_0, end_mask = var_12021_end_mask_0, x = var_11514_cast_fp16)[name = tensor("op_12021_cast_fp16")]; tensor var_12028_begin_0 = const()[name = tensor("op_12028_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_12028_end_0 = const()[name = tensor("op_12028_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_12028_end_mask_0 = const()[name = tensor("op_12028_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12028_cast_fp16 = slice_by_index(begin = var_12028_begin_0, end = var_12028_end_0, end_mask = var_12028_end_mask_0, x = var_11514_cast_fp16)[name = tensor("op_12028_cast_fp16")]; tensor var_12035_begin_0 = const()[name = tensor("op_12035_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12035_end_0 = const()[name = tensor("op_12035_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_12035_end_mask_0 = const()[name = tensor("op_12035_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12035_cast_fp16 = slice_by_index(begin = var_12035_begin_0, end = var_12035_end_0, end_mask = var_12035_end_mask_0, x = var_11518_cast_fp16)[name = tensor("op_12035_cast_fp16")]; tensor var_12042_begin_0 = const()[name = tensor("op_12042_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_12042_end_0 = const()[name = tensor("op_12042_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_12042_end_mask_0 = const()[name = tensor("op_12042_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12042_cast_fp16 = slice_by_index(begin = var_12042_begin_0, end = var_12042_end_0, end_mask = var_12042_end_mask_0, x = var_11518_cast_fp16)[name = tensor("op_12042_cast_fp16")]; tensor var_12049_begin_0 = const()[name = tensor("op_12049_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_12049_end_0 = const()[name = tensor("op_12049_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_12049_end_mask_0 = const()[name = tensor("op_12049_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12049_cast_fp16 = slice_by_index(begin = var_12049_begin_0, end = var_12049_end_0, end_mask = var_12049_end_mask_0, x = var_11518_cast_fp16)[name = tensor("op_12049_cast_fp16")]; tensor var_12056_begin_0 = const()[name = tensor("op_12056_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_12056_end_0 = const()[name = tensor("op_12056_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_12056_end_mask_0 = const()[name = tensor("op_12056_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12056_cast_fp16 = slice_by_index(begin = var_12056_begin_0, end = var_12056_end_0, end_mask = var_12056_end_mask_0, x = var_11518_cast_fp16)[name = tensor("op_12056_cast_fp16")]; tensor var_12063_begin_0 = const()[name = tensor("op_12063_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12063_end_0 = const()[name = tensor("op_12063_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_12063_end_mask_0 = const()[name = tensor("op_12063_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12063_cast_fp16 = slice_by_index(begin = var_12063_begin_0, end = var_12063_end_0, end_mask = var_12063_end_mask_0, x = var_11522_cast_fp16)[name = tensor("op_12063_cast_fp16")]; tensor var_12070_begin_0 = const()[name = tensor("op_12070_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_12070_end_0 = const()[name = tensor("op_12070_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_12070_end_mask_0 = const()[name = tensor("op_12070_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12070_cast_fp16 = slice_by_index(begin = var_12070_begin_0, end = var_12070_end_0, end_mask = var_12070_end_mask_0, x = var_11522_cast_fp16)[name = tensor("op_12070_cast_fp16")]; tensor var_12077_begin_0 = const()[name = tensor("op_12077_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_12077_end_0 = const()[name = tensor("op_12077_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_12077_end_mask_0 = const()[name = tensor("op_12077_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12077_cast_fp16 = slice_by_index(begin = var_12077_begin_0, end = var_12077_end_0, end_mask = var_12077_end_mask_0, x = var_11522_cast_fp16)[name = tensor("op_12077_cast_fp16")]; tensor var_12084_begin_0 = const()[name = tensor("op_12084_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_12084_end_0 = const()[name = tensor("op_12084_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_12084_end_mask_0 = const()[name = tensor("op_12084_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12084_cast_fp16 = slice_by_index(begin = var_12084_begin_0, end = var_12084_end_0, end_mask = var_12084_end_mask_0, x = var_11522_cast_fp16)[name = tensor("op_12084_cast_fp16")]; tensor k_15_perm_0 = const()[name = tensor("k_15_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_12089_begin_0 = const()[name = tensor("op_12089_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12089_end_0 = const()[name = tensor("op_12089_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_12089_end_mask_0 = const()[name = tensor("op_12089_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_15_cast_fp16 = transpose(perm = k_15_perm_0, x = key_15_cast_fp16)[name = tensor("transpose_24")]; tensor var_12089_cast_fp16 = slice_by_index(begin = var_12089_begin_0, end = var_12089_end_0, end_mask = var_12089_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12089_cast_fp16")]; tensor var_12093_begin_0 = const()[name = tensor("op_12093_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_12093_end_0 = const()[name = tensor("op_12093_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_12093_end_mask_0 = const()[name = tensor("op_12093_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12093_cast_fp16 = slice_by_index(begin = var_12093_begin_0, end = var_12093_end_0, end_mask = var_12093_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12093_cast_fp16")]; tensor var_12097_begin_0 = const()[name = tensor("op_12097_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_12097_end_0 = const()[name = tensor("op_12097_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_12097_end_mask_0 = const()[name = tensor("op_12097_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12097_cast_fp16 = slice_by_index(begin = var_12097_begin_0, end = var_12097_end_0, end_mask = var_12097_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12097_cast_fp16")]; tensor var_12101_begin_0 = const()[name = tensor("op_12101_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_12101_end_0 = const()[name = tensor("op_12101_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_12101_end_mask_0 = const()[name = tensor("op_12101_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12101_cast_fp16 = slice_by_index(begin = var_12101_begin_0, end = var_12101_end_0, end_mask = var_12101_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12101_cast_fp16")]; tensor var_12105_begin_0 = const()[name = tensor("op_12105_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_12105_end_0 = const()[name = tensor("op_12105_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_12105_end_mask_0 = const()[name = tensor("op_12105_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12105_cast_fp16 = slice_by_index(begin = var_12105_begin_0, end = var_12105_end_0, end_mask = var_12105_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12105_cast_fp16")]; tensor var_12109_begin_0 = const()[name = tensor("op_12109_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_12109_end_0 = const()[name = tensor("op_12109_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_12109_end_mask_0 = const()[name = tensor("op_12109_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12109_cast_fp16 = slice_by_index(begin = var_12109_begin_0, end = var_12109_end_0, end_mask = var_12109_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12109_cast_fp16")]; tensor var_12113_begin_0 = const()[name = tensor("op_12113_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_12113_end_0 = const()[name = tensor("op_12113_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_12113_end_mask_0 = const()[name = tensor("op_12113_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12113_cast_fp16 = slice_by_index(begin = var_12113_begin_0, end = var_12113_end_0, end_mask = var_12113_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12113_cast_fp16")]; tensor var_12117_begin_0 = const()[name = tensor("op_12117_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_12117_end_0 = const()[name = tensor("op_12117_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_12117_end_mask_0 = const()[name = tensor("op_12117_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12117_cast_fp16 = slice_by_index(begin = var_12117_begin_0, end = var_12117_end_0, end_mask = var_12117_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12117_cast_fp16")]; tensor var_12121_begin_0 = const()[name = tensor("op_12121_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_12121_end_0 = const()[name = tensor("op_12121_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_12121_end_mask_0 = const()[name = tensor("op_12121_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12121_cast_fp16 = slice_by_index(begin = var_12121_begin_0, end = var_12121_end_0, end_mask = var_12121_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12121_cast_fp16")]; tensor var_12125_begin_0 = const()[name = tensor("op_12125_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_12125_end_0 = const()[name = tensor("op_12125_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_12125_end_mask_0 = const()[name = tensor("op_12125_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12125_cast_fp16 = slice_by_index(begin = var_12125_begin_0, end = var_12125_end_0, end_mask = var_12125_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12125_cast_fp16")]; tensor var_12129_begin_0 = const()[name = tensor("op_12129_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_12129_end_0 = const()[name = tensor("op_12129_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_12129_end_mask_0 = const()[name = tensor("op_12129_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12129_cast_fp16 = slice_by_index(begin = var_12129_begin_0, end = var_12129_end_0, end_mask = var_12129_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12129_cast_fp16")]; tensor var_12133_begin_0 = const()[name = tensor("op_12133_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_12133_end_0 = const()[name = tensor("op_12133_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_12133_end_mask_0 = const()[name = tensor("op_12133_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12133_cast_fp16 = slice_by_index(begin = var_12133_begin_0, end = var_12133_end_0, end_mask = var_12133_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12133_cast_fp16")]; tensor var_12137_begin_0 = const()[name = tensor("op_12137_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_12137_end_0 = const()[name = tensor("op_12137_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_12137_end_mask_0 = const()[name = tensor("op_12137_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12137_cast_fp16 = slice_by_index(begin = var_12137_begin_0, end = var_12137_end_0, end_mask = var_12137_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12137_cast_fp16")]; tensor var_12141_begin_0 = const()[name = tensor("op_12141_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_12141_end_0 = const()[name = tensor("op_12141_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_12141_end_mask_0 = const()[name = tensor("op_12141_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12141_cast_fp16 = slice_by_index(begin = var_12141_begin_0, end = var_12141_end_0, end_mask = var_12141_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12141_cast_fp16")]; tensor var_12145_begin_0 = const()[name = tensor("op_12145_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_12145_end_0 = const()[name = tensor("op_12145_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_12145_end_mask_0 = const()[name = tensor("op_12145_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12145_cast_fp16 = slice_by_index(begin = var_12145_begin_0, end = var_12145_end_0, end_mask = var_12145_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12145_cast_fp16")]; tensor var_12149_begin_0 = const()[name = tensor("op_12149_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_12149_end_0 = const()[name = tensor("op_12149_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_12149_end_mask_0 = const()[name = tensor("op_12149_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12149_cast_fp16 = slice_by_index(begin = var_12149_begin_0, end = var_12149_end_0, end_mask = var_12149_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12149_cast_fp16")]; tensor var_12153_begin_0 = const()[name = tensor("op_12153_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_12153_end_0 = const()[name = tensor("op_12153_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_12153_end_mask_0 = const()[name = tensor("op_12153_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12153_cast_fp16 = slice_by_index(begin = var_12153_begin_0, end = var_12153_end_0, end_mask = var_12153_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12153_cast_fp16")]; tensor var_12157_begin_0 = const()[name = tensor("op_12157_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_12157_end_0 = const()[name = tensor("op_12157_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_12157_end_mask_0 = const()[name = tensor("op_12157_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12157_cast_fp16 = slice_by_index(begin = var_12157_begin_0, end = var_12157_end_0, end_mask = var_12157_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12157_cast_fp16")]; tensor var_12161_begin_0 = const()[name = tensor("op_12161_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_12161_end_0 = const()[name = tensor("op_12161_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_12161_end_mask_0 = const()[name = tensor("op_12161_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12161_cast_fp16 = slice_by_index(begin = var_12161_begin_0, end = var_12161_end_0, end_mask = var_12161_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12161_cast_fp16")]; tensor var_12165_begin_0 = const()[name = tensor("op_12165_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_12165_end_0 = const()[name = tensor("op_12165_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_12165_end_mask_0 = const()[name = tensor("op_12165_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_12165_cast_fp16 = slice_by_index(begin = var_12165_begin_0, end = var_12165_end_0, end_mask = var_12165_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_12165_cast_fp16")]; tensor var_12167_begin_0 = const()[name = tensor("op_12167_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_12167_end_0 = const()[name = tensor("op_12167_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_12167_end_mask_0 = const()[name = tensor("op_12167_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12167_cast_fp16 = slice_by_index(begin = var_12167_begin_0, end = var_12167_end_0, end_mask = var_12167_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12167_cast_fp16")]; tensor var_12171_begin_0 = const()[name = tensor("op_12171_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_12171_end_0 = const()[name = tensor("op_12171_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_12171_end_mask_0 = const()[name = tensor("op_12171_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12171_cast_fp16 = slice_by_index(begin = var_12171_begin_0, end = var_12171_end_0, end_mask = var_12171_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12171_cast_fp16")]; tensor var_12175_begin_0 = const()[name = tensor("op_12175_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_12175_end_0 = const()[name = tensor("op_12175_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_12175_end_mask_0 = const()[name = tensor("op_12175_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12175_cast_fp16 = slice_by_index(begin = var_12175_begin_0, end = var_12175_end_0, end_mask = var_12175_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12175_cast_fp16")]; tensor var_12179_begin_0 = const()[name = tensor("op_12179_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_12179_end_0 = const()[name = tensor("op_12179_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_12179_end_mask_0 = const()[name = tensor("op_12179_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12179_cast_fp16 = slice_by_index(begin = var_12179_begin_0, end = var_12179_end_0, end_mask = var_12179_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12179_cast_fp16")]; tensor var_12183_begin_0 = const()[name = tensor("op_12183_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_12183_end_0 = const()[name = tensor("op_12183_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_12183_end_mask_0 = const()[name = tensor("op_12183_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12183_cast_fp16 = slice_by_index(begin = var_12183_begin_0, end = var_12183_end_0, end_mask = var_12183_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12183_cast_fp16")]; tensor var_12187_begin_0 = const()[name = tensor("op_12187_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_12187_end_0 = const()[name = tensor("op_12187_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_12187_end_mask_0 = const()[name = tensor("op_12187_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12187_cast_fp16 = slice_by_index(begin = var_12187_begin_0, end = var_12187_end_0, end_mask = var_12187_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12187_cast_fp16")]; tensor var_12191_begin_0 = const()[name = tensor("op_12191_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_12191_end_0 = const()[name = tensor("op_12191_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_12191_end_mask_0 = const()[name = tensor("op_12191_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12191_cast_fp16 = slice_by_index(begin = var_12191_begin_0, end = var_12191_end_0, end_mask = var_12191_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12191_cast_fp16")]; tensor var_12195_begin_0 = const()[name = tensor("op_12195_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_12195_end_0 = const()[name = tensor("op_12195_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_12195_end_mask_0 = const()[name = tensor("op_12195_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12195_cast_fp16 = slice_by_index(begin = var_12195_begin_0, end = var_12195_end_0, end_mask = var_12195_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12195_cast_fp16")]; tensor var_12199_begin_0 = const()[name = tensor("op_12199_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_12199_end_0 = const()[name = tensor("op_12199_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_12199_end_mask_0 = const()[name = tensor("op_12199_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12199_cast_fp16 = slice_by_index(begin = var_12199_begin_0, end = var_12199_end_0, end_mask = var_12199_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12199_cast_fp16")]; tensor var_12203_begin_0 = const()[name = tensor("op_12203_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_12203_end_0 = const()[name = tensor("op_12203_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_12203_end_mask_0 = const()[name = tensor("op_12203_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12203_cast_fp16 = slice_by_index(begin = var_12203_begin_0, end = var_12203_end_0, end_mask = var_12203_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12203_cast_fp16")]; tensor var_12207_begin_0 = const()[name = tensor("op_12207_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_12207_end_0 = const()[name = tensor("op_12207_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_12207_end_mask_0 = const()[name = tensor("op_12207_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12207_cast_fp16 = slice_by_index(begin = var_12207_begin_0, end = var_12207_end_0, end_mask = var_12207_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12207_cast_fp16")]; tensor var_12211_begin_0 = const()[name = tensor("op_12211_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_12211_end_0 = const()[name = tensor("op_12211_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_12211_end_mask_0 = const()[name = tensor("op_12211_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12211_cast_fp16 = slice_by_index(begin = var_12211_begin_0, end = var_12211_end_0, end_mask = var_12211_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12211_cast_fp16")]; tensor var_12215_begin_0 = const()[name = tensor("op_12215_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_12215_end_0 = const()[name = tensor("op_12215_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_12215_end_mask_0 = const()[name = tensor("op_12215_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12215_cast_fp16 = slice_by_index(begin = var_12215_begin_0, end = var_12215_end_0, end_mask = var_12215_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12215_cast_fp16")]; tensor var_12219_begin_0 = const()[name = tensor("op_12219_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_12219_end_0 = const()[name = tensor("op_12219_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_12219_end_mask_0 = const()[name = tensor("op_12219_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12219_cast_fp16 = slice_by_index(begin = var_12219_begin_0, end = var_12219_end_0, end_mask = var_12219_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12219_cast_fp16")]; tensor var_12223_begin_0 = const()[name = tensor("op_12223_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_12223_end_0 = const()[name = tensor("op_12223_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_12223_end_mask_0 = const()[name = tensor("op_12223_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12223_cast_fp16 = slice_by_index(begin = var_12223_begin_0, end = var_12223_end_0, end_mask = var_12223_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12223_cast_fp16")]; tensor var_12227_begin_0 = const()[name = tensor("op_12227_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_12227_end_0 = const()[name = tensor("op_12227_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_12227_end_mask_0 = const()[name = tensor("op_12227_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12227_cast_fp16 = slice_by_index(begin = var_12227_begin_0, end = var_12227_end_0, end_mask = var_12227_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12227_cast_fp16")]; tensor var_12231_begin_0 = const()[name = tensor("op_12231_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_12231_end_0 = const()[name = tensor("op_12231_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_12231_end_mask_0 = const()[name = tensor("op_12231_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12231_cast_fp16 = slice_by_index(begin = var_12231_begin_0, end = var_12231_end_0, end_mask = var_12231_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12231_cast_fp16")]; tensor var_12235_begin_0 = const()[name = tensor("op_12235_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_12235_end_0 = const()[name = tensor("op_12235_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_12235_end_mask_0 = const()[name = tensor("op_12235_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12235_cast_fp16 = slice_by_index(begin = var_12235_begin_0, end = var_12235_end_0, end_mask = var_12235_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12235_cast_fp16")]; tensor var_12239_begin_0 = const()[name = tensor("op_12239_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_12239_end_0 = const()[name = tensor("op_12239_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_12239_end_mask_0 = const()[name = tensor("op_12239_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12239_cast_fp16 = slice_by_index(begin = var_12239_begin_0, end = var_12239_end_0, end_mask = var_12239_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12239_cast_fp16")]; tensor var_12243_begin_0 = const()[name = tensor("op_12243_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_12243_end_0 = const()[name = tensor("op_12243_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_12243_end_mask_0 = const()[name = tensor("op_12243_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_12243_cast_fp16 = slice_by_index(begin = var_12243_begin_0, end = var_12243_end_0, end_mask = var_12243_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_12243_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1121_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1121_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1121_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1121_equation_0, values = (var_12089_cast_fp16, var_11531_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1121_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1123_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1123_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1123_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1123_equation_0, values = (var_12089_cast_fp16, var_11538_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1123_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1125_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1125_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1125_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1125_equation_0, values = (var_12089_cast_fp16, var_11545_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1125_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1127_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1127_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1127_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1127_equation_0, values = (var_12089_cast_fp16, var_11552_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1127_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1129_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1129_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1129_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1129_equation_0, values = (var_12093_cast_fp16, var_11559_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1129_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1131_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1131_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1131_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1131_equation_0, values = (var_12093_cast_fp16, var_11566_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1131_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1133_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1133_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1133_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1133_equation_0, values = (var_12093_cast_fp16, var_11573_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1133_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1135_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1135_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1135_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1135_equation_0, values = (var_12093_cast_fp16, var_11580_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1135_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1137_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1137_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1137_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1137_equation_0, values = (var_12097_cast_fp16, var_11587_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1137_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1139_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1139_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1139_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1139_equation_0, values = (var_12097_cast_fp16, var_11594_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1139_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1141_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1141_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1141_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1141_equation_0, values = (var_12097_cast_fp16, var_11601_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1141_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1143_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1143_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1143_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1143_equation_0, values = (var_12097_cast_fp16, var_11608_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1143_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1145_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1145_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1145_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1145_equation_0, values = (var_12101_cast_fp16, var_11615_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1145_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1147_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1147_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1147_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1147_equation_0, values = (var_12101_cast_fp16, var_11622_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1147_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1149_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1149_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1149_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1149_equation_0, values = (var_12101_cast_fp16, var_11629_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1149_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1151_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1151_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1151_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1151_equation_0, values = (var_12101_cast_fp16, var_11636_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1151_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1153_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1153_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1153_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1153_equation_0, values = (var_12105_cast_fp16, var_11643_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1153_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1155_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1155_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1155_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1155_equation_0, values = (var_12105_cast_fp16, var_11650_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1155_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1157_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1157_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1157_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1157_equation_0, values = (var_12105_cast_fp16, var_11657_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1157_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1159_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1159_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1159_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1159_equation_0, values = (var_12105_cast_fp16, var_11664_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1159_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1161_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1161_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1161_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1161_equation_0, values = (var_12109_cast_fp16, var_11671_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1161_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1163_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1163_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1163_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1163_equation_0, values = (var_12109_cast_fp16, var_11678_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1163_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1165_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1165_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1165_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1165_equation_0, values = (var_12109_cast_fp16, var_11685_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1165_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1167_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1167_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1167_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1167_equation_0, values = (var_12109_cast_fp16, var_11692_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1167_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1169_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1169_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1169_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1169_equation_0, values = (var_12113_cast_fp16, var_11699_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1169_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1171_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1171_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1171_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1171_equation_0, values = (var_12113_cast_fp16, var_11706_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1171_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1173_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1173_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1173_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1173_equation_0, values = (var_12113_cast_fp16, var_11713_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1173_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1175_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1175_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1175_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1175_equation_0, values = (var_12113_cast_fp16, var_11720_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1175_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1177_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1177_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1177_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1177_equation_0, values = (var_12117_cast_fp16, var_11727_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1177_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1179_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1179_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1179_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1179_equation_0, values = (var_12117_cast_fp16, var_11734_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1179_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1181_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1181_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1181_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1181_equation_0, values = (var_12117_cast_fp16, var_11741_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1181_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1183_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1183_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1183_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1183_equation_0, values = (var_12117_cast_fp16, var_11748_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1183_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1185_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1185_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1185_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1185_equation_0, values = (var_12121_cast_fp16, var_11755_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1185_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1187_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1187_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1187_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1187_equation_0, values = (var_12121_cast_fp16, var_11762_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1187_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1189_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1189_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1189_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1189_equation_0, values = (var_12121_cast_fp16, var_11769_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1189_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1191_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1191_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1191_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1191_equation_0, values = (var_12121_cast_fp16, var_11776_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1191_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1193_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1193_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1193_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1193_equation_0, values = (var_12125_cast_fp16, var_11783_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1193_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1195_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1195_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1195_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1195_equation_0, values = (var_12125_cast_fp16, var_11790_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1195_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1197_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1197_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1197_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1197_equation_0, values = (var_12125_cast_fp16, var_11797_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1197_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1199_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1199_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1199_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1199_equation_0, values = (var_12125_cast_fp16, var_11804_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1199_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1201_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1201_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1201_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1201_equation_0, values = (var_12129_cast_fp16, var_11811_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1201_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1203_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1203_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1203_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1203_equation_0, values = (var_12129_cast_fp16, var_11818_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1203_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1205_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1205_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1205_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1205_equation_0, values = (var_12129_cast_fp16, var_11825_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1205_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1207_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1207_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1207_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1207_equation_0, values = (var_12129_cast_fp16, var_11832_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1207_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1209_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1209_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1209_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1209_equation_0, values = (var_12133_cast_fp16, var_11839_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1209_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1211_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1211_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1211_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1211_equation_0, values = (var_12133_cast_fp16, var_11846_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1211_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1213_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1213_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1213_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1213_equation_0, values = (var_12133_cast_fp16, var_11853_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1213_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1215_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1215_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1215_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1215_equation_0, values = (var_12133_cast_fp16, var_11860_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1215_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1217_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1217_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1217_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1217_equation_0, values = (var_12137_cast_fp16, var_11867_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1217_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1219_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1219_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1219_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1219_equation_0, values = (var_12137_cast_fp16, var_11874_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1219_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1221_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1221_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1221_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1221_equation_0, values = (var_12137_cast_fp16, var_11881_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1221_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1223_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1223_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1223_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1223_equation_0, values = (var_12137_cast_fp16, var_11888_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1223_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1225_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1225_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1225_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1225_equation_0, values = (var_12141_cast_fp16, var_11895_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1225_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1227_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1227_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1227_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1227_equation_0, values = (var_12141_cast_fp16, var_11902_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1227_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1229_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1229_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1229_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1229_equation_0, values = (var_12141_cast_fp16, var_11909_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1229_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1231_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1231_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1231_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1231_equation_0, values = (var_12141_cast_fp16, var_11916_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1231_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1233_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1233_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1233_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1233_equation_0, values = (var_12145_cast_fp16, var_11923_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1233_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1235_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1235_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1235_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1235_equation_0, values = (var_12145_cast_fp16, var_11930_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1235_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1237_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1237_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1237_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1237_equation_0, values = (var_12145_cast_fp16, var_11937_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1237_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1239_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1239_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1239_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1239_equation_0, values = (var_12145_cast_fp16, var_11944_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1239_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1241_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1241_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1241_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1241_equation_0, values = (var_12149_cast_fp16, var_11951_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1241_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1243_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1243_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1243_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1243_equation_0, values = (var_12149_cast_fp16, var_11958_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1243_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1245_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1245_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1245_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1245_equation_0, values = (var_12149_cast_fp16, var_11965_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1245_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1247_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1247_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1247_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1247_equation_0, values = (var_12149_cast_fp16, var_11972_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1247_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1249_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1249_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1249_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1249_equation_0, values = (var_12153_cast_fp16, var_11979_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1249_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1251_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1251_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1251_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1251_equation_0, values = (var_12153_cast_fp16, var_11986_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1251_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1253_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1253_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1253_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1253_equation_0, values = (var_12153_cast_fp16, var_11993_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1253_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1255_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1255_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1255_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1255_equation_0, values = (var_12153_cast_fp16, var_12000_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1255_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1257_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1257_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1257_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1257_equation_0, values = (var_12157_cast_fp16, var_12007_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1257_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1259_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1259_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1259_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1259_equation_0, values = (var_12157_cast_fp16, var_12014_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1259_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1261_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1261_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1261_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1261_equation_0, values = (var_12157_cast_fp16, var_12021_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1261_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1263_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1263_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1263_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1263_equation_0, values = (var_12157_cast_fp16, var_12028_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1263_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1265_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1265_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1265_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1265_equation_0, values = (var_12161_cast_fp16, var_12035_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1265_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1267_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1267_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1267_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1267_equation_0, values = (var_12161_cast_fp16, var_12042_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1267_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1269_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1269_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1269_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1269_equation_0, values = (var_12161_cast_fp16, var_12049_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1269_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1271_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1271_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1271_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1271_equation_0, values = (var_12161_cast_fp16, var_12056_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1271_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1273_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1273_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1273_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1273_equation_0, values = (var_12165_cast_fp16, var_12063_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1273_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1275_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1275_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1275_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1275_equation_0, values = (var_12165_cast_fp16, var_12070_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1275_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1277_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1277_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1277_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1277_equation_0, values = (var_12165_cast_fp16, var_12077_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1277_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1279_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1279_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1279_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1279_equation_0, values = (var_12165_cast_fp16, var_12084_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1279_cast_fp16")]; tensor var_12406_to_fp16 = const()[name = tensor("op_12406_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1121_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1121_cast_fp16, y = var_12406_to_fp16)[name = tensor("aw_chunk_1121_cast_fp16")]; tensor var_12408_to_fp16 = const()[name = tensor("op_12408_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1123_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1123_cast_fp16, y = var_12408_to_fp16)[name = tensor("aw_chunk_1123_cast_fp16")]; tensor var_12410_to_fp16 = const()[name = tensor("op_12410_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1125_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1125_cast_fp16, y = var_12410_to_fp16)[name = tensor("aw_chunk_1125_cast_fp16")]; tensor var_12412_to_fp16 = const()[name = tensor("op_12412_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1127_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1127_cast_fp16, y = var_12412_to_fp16)[name = tensor("aw_chunk_1127_cast_fp16")]; tensor var_12414_to_fp16 = const()[name = tensor("op_12414_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1129_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1129_cast_fp16, y = var_12414_to_fp16)[name = tensor("aw_chunk_1129_cast_fp16")]; tensor var_12416_to_fp16 = const()[name = tensor("op_12416_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1131_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1131_cast_fp16, y = var_12416_to_fp16)[name = tensor("aw_chunk_1131_cast_fp16")]; tensor var_12418_to_fp16 = const()[name = tensor("op_12418_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1133_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1133_cast_fp16, y = var_12418_to_fp16)[name = tensor("aw_chunk_1133_cast_fp16")]; tensor var_12420_to_fp16 = const()[name = tensor("op_12420_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1135_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1135_cast_fp16, y = var_12420_to_fp16)[name = tensor("aw_chunk_1135_cast_fp16")]; tensor var_12422_to_fp16 = const()[name = tensor("op_12422_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1137_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1137_cast_fp16, y = var_12422_to_fp16)[name = tensor("aw_chunk_1137_cast_fp16")]; tensor var_12424_to_fp16 = const()[name = tensor("op_12424_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1139_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1139_cast_fp16, y = var_12424_to_fp16)[name = tensor("aw_chunk_1139_cast_fp16")]; tensor var_12426_to_fp16 = const()[name = tensor("op_12426_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1141_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1141_cast_fp16, y = var_12426_to_fp16)[name = tensor("aw_chunk_1141_cast_fp16")]; tensor var_12428_to_fp16 = const()[name = tensor("op_12428_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1143_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1143_cast_fp16, y = var_12428_to_fp16)[name = tensor("aw_chunk_1143_cast_fp16")]; tensor var_12430_to_fp16 = const()[name = tensor("op_12430_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1145_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1145_cast_fp16, y = var_12430_to_fp16)[name = tensor("aw_chunk_1145_cast_fp16")]; tensor var_12432_to_fp16 = const()[name = tensor("op_12432_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1147_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1147_cast_fp16, y = var_12432_to_fp16)[name = tensor("aw_chunk_1147_cast_fp16")]; tensor var_12434_to_fp16 = const()[name = tensor("op_12434_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1149_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1149_cast_fp16, y = var_12434_to_fp16)[name = tensor("aw_chunk_1149_cast_fp16")]; tensor var_12436_to_fp16 = const()[name = tensor("op_12436_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1151_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1151_cast_fp16, y = var_12436_to_fp16)[name = tensor("aw_chunk_1151_cast_fp16")]; tensor var_12438_to_fp16 = const()[name = tensor("op_12438_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1153_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1153_cast_fp16, y = var_12438_to_fp16)[name = tensor("aw_chunk_1153_cast_fp16")]; tensor var_12440_to_fp16 = const()[name = tensor("op_12440_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1155_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1155_cast_fp16, y = var_12440_to_fp16)[name = tensor("aw_chunk_1155_cast_fp16")]; tensor var_12442_to_fp16 = const()[name = tensor("op_12442_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1157_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1157_cast_fp16, y = var_12442_to_fp16)[name = tensor("aw_chunk_1157_cast_fp16")]; tensor var_12444_to_fp16 = const()[name = tensor("op_12444_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1159_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1159_cast_fp16, y = var_12444_to_fp16)[name = tensor("aw_chunk_1159_cast_fp16")]; tensor var_12446_to_fp16 = const()[name = tensor("op_12446_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1161_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1161_cast_fp16, y = var_12446_to_fp16)[name = tensor("aw_chunk_1161_cast_fp16")]; tensor var_12448_to_fp16 = const()[name = tensor("op_12448_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1163_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1163_cast_fp16, y = var_12448_to_fp16)[name = tensor("aw_chunk_1163_cast_fp16")]; tensor var_12450_to_fp16 = const()[name = tensor("op_12450_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1165_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1165_cast_fp16, y = var_12450_to_fp16)[name = tensor("aw_chunk_1165_cast_fp16")]; tensor var_12452_to_fp16 = const()[name = tensor("op_12452_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1167_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1167_cast_fp16, y = var_12452_to_fp16)[name = tensor("aw_chunk_1167_cast_fp16")]; tensor var_12454_to_fp16 = const()[name = tensor("op_12454_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1169_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1169_cast_fp16, y = var_12454_to_fp16)[name = tensor("aw_chunk_1169_cast_fp16")]; tensor var_12456_to_fp16 = const()[name = tensor("op_12456_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1171_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1171_cast_fp16, y = var_12456_to_fp16)[name = tensor("aw_chunk_1171_cast_fp16")]; tensor var_12458_to_fp16 = const()[name = tensor("op_12458_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1173_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1173_cast_fp16, y = var_12458_to_fp16)[name = tensor("aw_chunk_1173_cast_fp16")]; tensor var_12460_to_fp16 = const()[name = tensor("op_12460_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1175_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1175_cast_fp16, y = var_12460_to_fp16)[name = tensor("aw_chunk_1175_cast_fp16")]; tensor var_12462_to_fp16 = const()[name = tensor("op_12462_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1177_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1177_cast_fp16, y = var_12462_to_fp16)[name = tensor("aw_chunk_1177_cast_fp16")]; tensor var_12464_to_fp16 = const()[name = tensor("op_12464_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1179_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1179_cast_fp16, y = var_12464_to_fp16)[name = tensor("aw_chunk_1179_cast_fp16")]; tensor var_12466_to_fp16 = const()[name = tensor("op_12466_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1181_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1181_cast_fp16, y = var_12466_to_fp16)[name = tensor("aw_chunk_1181_cast_fp16")]; tensor var_12468_to_fp16 = const()[name = tensor("op_12468_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1183_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1183_cast_fp16, y = var_12468_to_fp16)[name = tensor("aw_chunk_1183_cast_fp16")]; tensor var_12470_to_fp16 = const()[name = tensor("op_12470_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1185_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1185_cast_fp16, y = var_12470_to_fp16)[name = tensor("aw_chunk_1185_cast_fp16")]; tensor var_12472_to_fp16 = const()[name = tensor("op_12472_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1187_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1187_cast_fp16, y = var_12472_to_fp16)[name = tensor("aw_chunk_1187_cast_fp16")]; tensor var_12474_to_fp16 = const()[name = tensor("op_12474_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1189_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1189_cast_fp16, y = var_12474_to_fp16)[name = tensor("aw_chunk_1189_cast_fp16")]; tensor var_12476_to_fp16 = const()[name = tensor("op_12476_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1191_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1191_cast_fp16, y = var_12476_to_fp16)[name = tensor("aw_chunk_1191_cast_fp16")]; tensor var_12478_to_fp16 = const()[name = tensor("op_12478_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1193_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1193_cast_fp16, y = var_12478_to_fp16)[name = tensor("aw_chunk_1193_cast_fp16")]; tensor var_12480_to_fp16 = const()[name = tensor("op_12480_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1195_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1195_cast_fp16, y = var_12480_to_fp16)[name = tensor("aw_chunk_1195_cast_fp16")]; tensor var_12482_to_fp16 = const()[name = tensor("op_12482_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1197_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1197_cast_fp16, y = var_12482_to_fp16)[name = tensor("aw_chunk_1197_cast_fp16")]; tensor var_12484_to_fp16 = const()[name = tensor("op_12484_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1199_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1199_cast_fp16, y = var_12484_to_fp16)[name = tensor("aw_chunk_1199_cast_fp16")]; tensor var_12486_to_fp16 = const()[name = tensor("op_12486_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1201_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1201_cast_fp16, y = var_12486_to_fp16)[name = tensor("aw_chunk_1201_cast_fp16")]; tensor var_12488_to_fp16 = const()[name = tensor("op_12488_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1203_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1203_cast_fp16, y = var_12488_to_fp16)[name = tensor("aw_chunk_1203_cast_fp16")]; tensor var_12490_to_fp16 = const()[name = tensor("op_12490_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1205_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1205_cast_fp16, y = var_12490_to_fp16)[name = tensor("aw_chunk_1205_cast_fp16")]; tensor var_12492_to_fp16 = const()[name = tensor("op_12492_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1207_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1207_cast_fp16, y = var_12492_to_fp16)[name = tensor("aw_chunk_1207_cast_fp16")]; tensor var_12494_to_fp16 = const()[name = tensor("op_12494_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1209_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1209_cast_fp16, y = var_12494_to_fp16)[name = tensor("aw_chunk_1209_cast_fp16")]; tensor var_12496_to_fp16 = const()[name = tensor("op_12496_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1211_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1211_cast_fp16, y = var_12496_to_fp16)[name = tensor("aw_chunk_1211_cast_fp16")]; tensor var_12498_to_fp16 = const()[name = tensor("op_12498_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1213_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1213_cast_fp16, y = var_12498_to_fp16)[name = tensor("aw_chunk_1213_cast_fp16")]; tensor var_12500_to_fp16 = const()[name = tensor("op_12500_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1215_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1215_cast_fp16, y = var_12500_to_fp16)[name = tensor("aw_chunk_1215_cast_fp16")]; tensor var_12502_to_fp16 = const()[name = tensor("op_12502_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1217_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1217_cast_fp16, y = var_12502_to_fp16)[name = tensor("aw_chunk_1217_cast_fp16")]; tensor var_12504_to_fp16 = const()[name = tensor("op_12504_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1219_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1219_cast_fp16, y = var_12504_to_fp16)[name = tensor("aw_chunk_1219_cast_fp16")]; tensor var_12506_to_fp16 = const()[name = tensor("op_12506_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1221_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1221_cast_fp16, y = var_12506_to_fp16)[name = tensor("aw_chunk_1221_cast_fp16")]; tensor var_12508_to_fp16 = const()[name = tensor("op_12508_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1223_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1223_cast_fp16, y = var_12508_to_fp16)[name = tensor("aw_chunk_1223_cast_fp16")]; tensor var_12510_to_fp16 = const()[name = tensor("op_12510_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1225_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1225_cast_fp16, y = var_12510_to_fp16)[name = tensor("aw_chunk_1225_cast_fp16")]; tensor var_12512_to_fp16 = const()[name = tensor("op_12512_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1227_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1227_cast_fp16, y = var_12512_to_fp16)[name = tensor("aw_chunk_1227_cast_fp16")]; tensor var_12514_to_fp16 = const()[name = tensor("op_12514_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1229_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1229_cast_fp16, y = var_12514_to_fp16)[name = tensor("aw_chunk_1229_cast_fp16")]; tensor var_12516_to_fp16 = const()[name = tensor("op_12516_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1231_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1231_cast_fp16, y = var_12516_to_fp16)[name = tensor("aw_chunk_1231_cast_fp16")]; tensor var_12518_to_fp16 = const()[name = tensor("op_12518_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1233_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1233_cast_fp16, y = var_12518_to_fp16)[name = tensor("aw_chunk_1233_cast_fp16")]; tensor var_12520_to_fp16 = const()[name = tensor("op_12520_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1235_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1235_cast_fp16, y = var_12520_to_fp16)[name = tensor("aw_chunk_1235_cast_fp16")]; tensor var_12522_to_fp16 = const()[name = tensor("op_12522_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1237_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1237_cast_fp16, y = var_12522_to_fp16)[name = tensor("aw_chunk_1237_cast_fp16")]; tensor var_12524_to_fp16 = const()[name = tensor("op_12524_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1239_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1239_cast_fp16, y = var_12524_to_fp16)[name = tensor("aw_chunk_1239_cast_fp16")]; tensor var_12526_to_fp16 = const()[name = tensor("op_12526_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1241_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1241_cast_fp16, y = var_12526_to_fp16)[name = tensor("aw_chunk_1241_cast_fp16")]; tensor var_12528_to_fp16 = const()[name = tensor("op_12528_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1243_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1243_cast_fp16, y = var_12528_to_fp16)[name = tensor("aw_chunk_1243_cast_fp16")]; tensor var_12530_to_fp16 = const()[name = tensor("op_12530_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1245_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1245_cast_fp16, y = var_12530_to_fp16)[name = tensor("aw_chunk_1245_cast_fp16")]; tensor var_12532_to_fp16 = const()[name = tensor("op_12532_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1247_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1247_cast_fp16, y = var_12532_to_fp16)[name = tensor("aw_chunk_1247_cast_fp16")]; tensor var_12534_to_fp16 = const()[name = tensor("op_12534_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1249_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1249_cast_fp16, y = var_12534_to_fp16)[name = tensor("aw_chunk_1249_cast_fp16")]; tensor var_12536_to_fp16 = const()[name = tensor("op_12536_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1251_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1251_cast_fp16, y = var_12536_to_fp16)[name = tensor("aw_chunk_1251_cast_fp16")]; tensor var_12538_to_fp16 = const()[name = tensor("op_12538_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1253_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1253_cast_fp16, y = var_12538_to_fp16)[name = tensor("aw_chunk_1253_cast_fp16")]; tensor var_12540_to_fp16 = const()[name = tensor("op_12540_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1255_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1255_cast_fp16, y = var_12540_to_fp16)[name = tensor("aw_chunk_1255_cast_fp16")]; tensor var_12542_to_fp16 = const()[name = tensor("op_12542_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1257_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1257_cast_fp16, y = var_12542_to_fp16)[name = tensor("aw_chunk_1257_cast_fp16")]; tensor var_12544_to_fp16 = const()[name = tensor("op_12544_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1259_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1259_cast_fp16, y = var_12544_to_fp16)[name = tensor("aw_chunk_1259_cast_fp16")]; tensor var_12546_to_fp16 = const()[name = tensor("op_12546_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1261_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1261_cast_fp16, y = var_12546_to_fp16)[name = tensor("aw_chunk_1261_cast_fp16")]; tensor var_12548_to_fp16 = const()[name = tensor("op_12548_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1263_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1263_cast_fp16, y = var_12548_to_fp16)[name = tensor("aw_chunk_1263_cast_fp16")]; tensor var_12550_to_fp16 = const()[name = tensor("op_12550_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1265_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1265_cast_fp16, y = var_12550_to_fp16)[name = tensor("aw_chunk_1265_cast_fp16")]; tensor var_12552_to_fp16 = const()[name = tensor("op_12552_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1267_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1267_cast_fp16, y = var_12552_to_fp16)[name = tensor("aw_chunk_1267_cast_fp16")]; tensor var_12554_to_fp16 = const()[name = tensor("op_12554_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1269_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1269_cast_fp16, y = var_12554_to_fp16)[name = tensor("aw_chunk_1269_cast_fp16")]; tensor var_12556_to_fp16 = const()[name = tensor("op_12556_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1271_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1271_cast_fp16, y = var_12556_to_fp16)[name = tensor("aw_chunk_1271_cast_fp16")]; tensor var_12558_to_fp16 = const()[name = tensor("op_12558_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1273_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1273_cast_fp16, y = var_12558_to_fp16)[name = tensor("aw_chunk_1273_cast_fp16")]; tensor var_12560_to_fp16 = const()[name = tensor("op_12560_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1275_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1275_cast_fp16, y = var_12560_to_fp16)[name = tensor("aw_chunk_1275_cast_fp16")]; tensor var_12562_to_fp16 = const()[name = tensor("op_12562_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1277_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1277_cast_fp16, y = var_12562_to_fp16)[name = tensor("aw_chunk_1277_cast_fp16")]; tensor var_12564_to_fp16 = const()[name = tensor("op_12564_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1279_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1279_cast_fp16, y = var_12564_to_fp16)[name = tensor("aw_chunk_1279_cast_fp16")]; tensor var_12566_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1121_cast_fp16)[name = tensor("op_12566_cast_fp16")]; tensor var_12567_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1123_cast_fp16)[name = tensor("op_12567_cast_fp16")]; tensor var_12568_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1125_cast_fp16)[name = tensor("op_12568_cast_fp16")]; tensor var_12569_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1127_cast_fp16)[name = tensor("op_12569_cast_fp16")]; tensor var_12570_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1129_cast_fp16)[name = tensor("op_12570_cast_fp16")]; tensor var_12571_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1131_cast_fp16)[name = tensor("op_12571_cast_fp16")]; tensor var_12572_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1133_cast_fp16)[name = tensor("op_12572_cast_fp16")]; tensor var_12573_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1135_cast_fp16)[name = tensor("op_12573_cast_fp16")]; tensor var_12574_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1137_cast_fp16)[name = tensor("op_12574_cast_fp16")]; tensor var_12575_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1139_cast_fp16)[name = tensor("op_12575_cast_fp16")]; tensor var_12576_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1141_cast_fp16)[name = tensor("op_12576_cast_fp16")]; tensor var_12577_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1143_cast_fp16)[name = tensor("op_12577_cast_fp16")]; tensor var_12578_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1145_cast_fp16)[name = tensor("op_12578_cast_fp16")]; tensor var_12579_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1147_cast_fp16)[name = tensor("op_12579_cast_fp16")]; tensor var_12580_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1149_cast_fp16)[name = tensor("op_12580_cast_fp16")]; tensor var_12581_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1151_cast_fp16)[name = tensor("op_12581_cast_fp16")]; tensor var_12582_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1153_cast_fp16)[name = tensor("op_12582_cast_fp16")]; tensor var_12583_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1155_cast_fp16)[name = tensor("op_12583_cast_fp16")]; tensor var_12584_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1157_cast_fp16)[name = tensor("op_12584_cast_fp16")]; tensor var_12585_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1159_cast_fp16)[name = tensor("op_12585_cast_fp16")]; tensor var_12586_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1161_cast_fp16)[name = tensor("op_12586_cast_fp16")]; tensor var_12587_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1163_cast_fp16)[name = tensor("op_12587_cast_fp16")]; tensor var_12588_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1165_cast_fp16)[name = tensor("op_12588_cast_fp16")]; tensor var_12589_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1167_cast_fp16)[name = tensor("op_12589_cast_fp16")]; tensor var_12590_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1169_cast_fp16)[name = tensor("op_12590_cast_fp16")]; tensor var_12591_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1171_cast_fp16)[name = tensor("op_12591_cast_fp16")]; tensor var_12592_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1173_cast_fp16)[name = tensor("op_12592_cast_fp16")]; tensor var_12593_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1175_cast_fp16)[name = tensor("op_12593_cast_fp16")]; tensor var_12594_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1177_cast_fp16)[name = tensor("op_12594_cast_fp16")]; tensor var_12595_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1179_cast_fp16)[name = tensor("op_12595_cast_fp16")]; tensor var_12596_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1181_cast_fp16)[name = tensor("op_12596_cast_fp16")]; tensor var_12597_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1183_cast_fp16)[name = tensor("op_12597_cast_fp16")]; tensor var_12598_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1185_cast_fp16)[name = tensor("op_12598_cast_fp16")]; tensor var_12599_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1187_cast_fp16)[name = tensor("op_12599_cast_fp16")]; tensor var_12600_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1189_cast_fp16)[name = tensor("op_12600_cast_fp16")]; tensor var_12601_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1191_cast_fp16)[name = tensor("op_12601_cast_fp16")]; tensor var_12602_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1193_cast_fp16)[name = tensor("op_12602_cast_fp16")]; tensor var_12603_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1195_cast_fp16)[name = tensor("op_12603_cast_fp16")]; tensor var_12604_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1197_cast_fp16)[name = tensor("op_12604_cast_fp16")]; tensor var_12605_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1199_cast_fp16)[name = tensor("op_12605_cast_fp16")]; tensor var_12606_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1201_cast_fp16)[name = tensor("op_12606_cast_fp16")]; tensor var_12607_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1203_cast_fp16)[name = tensor("op_12607_cast_fp16")]; tensor var_12608_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1205_cast_fp16)[name = tensor("op_12608_cast_fp16")]; tensor var_12609_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1207_cast_fp16)[name = tensor("op_12609_cast_fp16")]; tensor var_12610_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1209_cast_fp16)[name = tensor("op_12610_cast_fp16")]; tensor var_12611_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1211_cast_fp16)[name = tensor("op_12611_cast_fp16")]; tensor var_12612_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1213_cast_fp16)[name = tensor("op_12612_cast_fp16")]; tensor var_12613_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1215_cast_fp16)[name = tensor("op_12613_cast_fp16")]; tensor var_12614_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1217_cast_fp16)[name = tensor("op_12614_cast_fp16")]; tensor var_12615_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1219_cast_fp16)[name = tensor("op_12615_cast_fp16")]; tensor var_12616_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1221_cast_fp16)[name = tensor("op_12616_cast_fp16")]; tensor var_12617_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1223_cast_fp16)[name = tensor("op_12617_cast_fp16")]; tensor var_12618_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1225_cast_fp16)[name = tensor("op_12618_cast_fp16")]; tensor var_12619_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1227_cast_fp16)[name = tensor("op_12619_cast_fp16")]; tensor var_12620_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1229_cast_fp16)[name = tensor("op_12620_cast_fp16")]; tensor var_12621_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1231_cast_fp16)[name = tensor("op_12621_cast_fp16")]; tensor var_12622_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1233_cast_fp16)[name = tensor("op_12622_cast_fp16")]; tensor var_12623_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1235_cast_fp16)[name = tensor("op_12623_cast_fp16")]; tensor var_12624_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1237_cast_fp16)[name = tensor("op_12624_cast_fp16")]; tensor var_12625_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1239_cast_fp16)[name = tensor("op_12625_cast_fp16")]; tensor var_12626_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1241_cast_fp16)[name = tensor("op_12626_cast_fp16")]; tensor var_12627_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1243_cast_fp16)[name = tensor("op_12627_cast_fp16")]; tensor var_12628_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1245_cast_fp16)[name = tensor("op_12628_cast_fp16")]; tensor var_12629_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1247_cast_fp16)[name = tensor("op_12629_cast_fp16")]; tensor var_12630_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1249_cast_fp16)[name = tensor("op_12630_cast_fp16")]; tensor var_12631_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1251_cast_fp16)[name = tensor("op_12631_cast_fp16")]; tensor var_12632_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1253_cast_fp16)[name = tensor("op_12632_cast_fp16")]; tensor var_12633_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1255_cast_fp16)[name = tensor("op_12633_cast_fp16")]; tensor var_12634_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1257_cast_fp16)[name = tensor("op_12634_cast_fp16")]; tensor var_12635_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1259_cast_fp16)[name = tensor("op_12635_cast_fp16")]; tensor var_12636_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1261_cast_fp16)[name = tensor("op_12636_cast_fp16")]; tensor var_12637_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1263_cast_fp16)[name = tensor("op_12637_cast_fp16")]; tensor var_12638_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1265_cast_fp16)[name = tensor("op_12638_cast_fp16")]; tensor var_12639_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1267_cast_fp16)[name = tensor("op_12639_cast_fp16")]; tensor var_12640_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1269_cast_fp16)[name = tensor("op_12640_cast_fp16")]; tensor var_12641_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1271_cast_fp16)[name = tensor("op_12641_cast_fp16")]; tensor var_12642_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1273_cast_fp16)[name = tensor("op_12642_cast_fp16")]; tensor var_12643_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1275_cast_fp16)[name = tensor("op_12643_cast_fp16")]; tensor var_12644_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1277_cast_fp16)[name = tensor("op_12644_cast_fp16")]; tensor var_12645_cast_fp16 = softmax(axis = var_11364, x = aw_chunk_1279_cast_fp16)[name = tensor("op_12645_cast_fp16")]; tensor var_12647_equation_0 = const()[name = tensor("op_12647_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12647_cast_fp16 = einsum(equation = var_12647_equation_0, values = (var_12167_cast_fp16, var_12566_cast_fp16))[name = tensor("op_12647_cast_fp16")]; tensor var_12649_equation_0 = const()[name = tensor("op_12649_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12649_cast_fp16 = einsum(equation = var_12649_equation_0, values = (var_12167_cast_fp16, var_12567_cast_fp16))[name = tensor("op_12649_cast_fp16")]; tensor var_12651_equation_0 = const()[name = tensor("op_12651_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12651_cast_fp16 = einsum(equation = var_12651_equation_0, values = (var_12167_cast_fp16, var_12568_cast_fp16))[name = tensor("op_12651_cast_fp16")]; tensor var_12653_equation_0 = const()[name = tensor("op_12653_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12653_cast_fp16 = einsum(equation = var_12653_equation_0, values = (var_12167_cast_fp16, var_12569_cast_fp16))[name = tensor("op_12653_cast_fp16")]; tensor var_12655_equation_0 = const()[name = tensor("op_12655_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12655_cast_fp16 = einsum(equation = var_12655_equation_0, values = (var_12171_cast_fp16, var_12570_cast_fp16))[name = tensor("op_12655_cast_fp16")]; tensor var_12657_equation_0 = const()[name = tensor("op_12657_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12657_cast_fp16 = einsum(equation = var_12657_equation_0, values = (var_12171_cast_fp16, var_12571_cast_fp16))[name = tensor("op_12657_cast_fp16")]; tensor var_12659_equation_0 = const()[name = tensor("op_12659_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12659_cast_fp16 = einsum(equation = var_12659_equation_0, values = (var_12171_cast_fp16, var_12572_cast_fp16))[name = tensor("op_12659_cast_fp16")]; tensor var_12661_equation_0 = const()[name = tensor("op_12661_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12661_cast_fp16 = einsum(equation = var_12661_equation_0, values = (var_12171_cast_fp16, var_12573_cast_fp16))[name = tensor("op_12661_cast_fp16")]; tensor var_12663_equation_0 = const()[name = tensor("op_12663_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12663_cast_fp16 = einsum(equation = var_12663_equation_0, values = (var_12175_cast_fp16, var_12574_cast_fp16))[name = tensor("op_12663_cast_fp16")]; tensor var_12665_equation_0 = const()[name = tensor("op_12665_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12665_cast_fp16 = einsum(equation = var_12665_equation_0, values = (var_12175_cast_fp16, var_12575_cast_fp16))[name = tensor("op_12665_cast_fp16")]; tensor var_12667_equation_0 = const()[name = tensor("op_12667_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12667_cast_fp16 = einsum(equation = var_12667_equation_0, values = (var_12175_cast_fp16, var_12576_cast_fp16))[name = tensor("op_12667_cast_fp16")]; tensor var_12669_equation_0 = const()[name = tensor("op_12669_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12669_cast_fp16 = einsum(equation = var_12669_equation_0, values = (var_12175_cast_fp16, var_12577_cast_fp16))[name = tensor("op_12669_cast_fp16")]; tensor var_12671_equation_0 = const()[name = tensor("op_12671_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12671_cast_fp16 = einsum(equation = var_12671_equation_0, values = (var_12179_cast_fp16, var_12578_cast_fp16))[name = tensor("op_12671_cast_fp16")]; tensor var_12673_equation_0 = const()[name = tensor("op_12673_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12673_cast_fp16 = einsum(equation = var_12673_equation_0, values = (var_12179_cast_fp16, var_12579_cast_fp16))[name = tensor("op_12673_cast_fp16")]; tensor var_12675_equation_0 = const()[name = tensor("op_12675_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12675_cast_fp16 = einsum(equation = var_12675_equation_0, values = (var_12179_cast_fp16, var_12580_cast_fp16))[name = tensor("op_12675_cast_fp16")]; tensor var_12677_equation_0 = const()[name = tensor("op_12677_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12677_cast_fp16 = einsum(equation = var_12677_equation_0, values = (var_12179_cast_fp16, var_12581_cast_fp16))[name = tensor("op_12677_cast_fp16")]; tensor var_12679_equation_0 = const()[name = tensor("op_12679_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12679_cast_fp16 = einsum(equation = var_12679_equation_0, values = (var_12183_cast_fp16, var_12582_cast_fp16))[name = tensor("op_12679_cast_fp16")]; tensor var_12681_equation_0 = const()[name = tensor("op_12681_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12681_cast_fp16 = einsum(equation = var_12681_equation_0, values = (var_12183_cast_fp16, var_12583_cast_fp16))[name = tensor("op_12681_cast_fp16")]; tensor var_12683_equation_0 = const()[name = tensor("op_12683_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12683_cast_fp16 = einsum(equation = var_12683_equation_0, values = (var_12183_cast_fp16, var_12584_cast_fp16))[name = tensor("op_12683_cast_fp16")]; tensor var_12685_equation_0 = const()[name = tensor("op_12685_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12685_cast_fp16 = einsum(equation = var_12685_equation_0, values = (var_12183_cast_fp16, var_12585_cast_fp16))[name = tensor("op_12685_cast_fp16")]; tensor var_12687_equation_0 = const()[name = tensor("op_12687_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12687_cast_fp16 = einsum(equation = var_12687_equation_0, values = (var_12187_cast_fp16, var_12586_cast_fp16))[name = tensor("op_12687_cast_fp16")]; tensor var_12689_equation_0 = const()[name = tensor("op_12689_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12689_cast_fp16 = einsum(equation = var_12689_equation_0, values = (var_12187_cast_fp16, var_12587_cast_fp16))[name = tensor("op_12689_cast_fp16")]; tensor var_12691_equation_0 = const()[name = tensor("op_12691_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12691_cast_fp16 = einsum(equation = var_12691_equation_0, values = (var_12187_cast_fp16, var_12588_cast_fp16))[name = tensor("op_12691_cast_fp16")]; tensor var_12693_equation_0 = const()[name = tensor("op_12693_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12693_cast_fp16 = einsum(equation = var_12693_equation_0, values = (var_12187_cast_fp16, var_12589_cast_fp16))[name = tensor("op_12693_cast_fp16")]; tensor var_12695_equation_0 = const()[name = tensor("op_12695_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12695_cast_fp16 = einsum(equation = var_12695_equation_0, values = (var_12191_cast_fp16, var_12590_cast_fp16))[name = tensor("op_12695_cast_fp16")]; tensor var_12697_equation_0 = const()[name = tensor("op_12697_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12697_cast_fp16 = einsum(equation = var_12697_equation_0, values = (var_12191_cast_fp16, var_12591_cast_fp16))[name = tensor("op_12697_cast_fp16")]; tensor var_12699_equation_0 = const()[name = tensor("op_12699_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12699_cast_fp16 = einsum(equation = var_12699_equation_0, values = (var_12191_cast_fp16, var_12592_cast_fp16))[name = tensor("op_12699_cast_fp16")]; tensor var_12701_equation_0 = const()[name = tensor("op_12701_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12701_cast_fp16 = einsum(equation = var_12701_equation_0, values = (var_12191_cast_fp16, var_12593_cast_fp16))[name = tensor("op_12701_cast_fp16")]; tensor var_12703_equation_0 = const()[name = tensor("op_12703_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12703_cast_fp16 = einsum(equation = var_12703_equation_0, values = (var_12195_cast_fp16, var_12594_cast_fp16))[name = tensor("op_12703_cast_fp16")]; tensor var_12705_equation_0 = const()[name = tensor("op_12705_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12705_cast_fp16 = einsum(equation = var_12705_equation_0, values = (var_12195_cast_fp16, var_12595_cast_fp16))[name = tensor("op_12705_cast_fp16")]; tensor var_12707_equation_0 = const()[name = tensor("op_12707_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12707_cast_fp16 = einsum(equation = var_12707_equation_0, values = (var_12195_cast_fp16, var_12596_cast_fp16))[name = tensor("op_12707_cast_fp16")]; tensor var_12709_equation_0 = const()[name = tensor("op_12709_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12709_cast_fp16 = einsum(equation = var_12709_equation_0, values = (var_12195_cast_fp16, var_12597_cast_fp16))[name = tensor("op_12709_cast_fp16")]; tensor var_12711_equation_0 = const()[name = tensor("op_12711_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12711_cast_fp16 = einsum(equation = var_12711_equation_0, values = (var_12199_cast_fp16, var_12598_cast_fp16))[name = tensor("op_12711_cast_fp16")]; tensor var_12713_equation_0 = const()[name = tensor("op_12713_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12713_cast_fp16 = einsum(equation = var_12713_equation_0, values = (var_12199_cast_fp16, var_12599_cast_fp16))[name = tensor("op_12713_cast_fp16")]; tensor var_12715_equation_0 = const()[name = tensor("op_12715_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12715_cast_fp16 = einsum(equation = var_12715_equation_0, values = (var_12199_cast_fp16, var_12600_cast_fp16))[name = tensor("op_12715_cast_fp16")]; tensor var_12717_equation_0 = const()[name = tensor("op_12717_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12717_cast_fp16 = einsum(equation = var_12717_equation_0, values = (var_12199_cast_fp16, var_12601_cast_fp16))[name = tensor("op_12717_cast_fp16")]; tensor var_12719_equation_0 = const()[name = tensor("op_12719_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12719_cast_fp16 = einsum(equation = var_12719_equation_0, values = (var_12203_cast_fp16, var_12602_cast_fp16))[name = tensor("op_12719_cast_fp16")]; tensor var_12721_equation_0 = const()[name = tensor("op_12721_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12721_cast_fp16 = einsum(equation = var_12721_equation_0, values = (var_12203_cast_fp16, var_12603_cast_fp16))[name = tensor("op_12721_cast_fp16")]; tensor var_12723_equation_0 = const()[name = tensor("op_12723_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12723_cast_fp16 = einsum(equation = var_12723_equation_0, values = (var_12203_cast_fp16, var_12604_cast_fp16))[name = tensor("op_12723_cast_fp16")]; tensor var_12725_equation_0 = const()[name = tensor("op_12725_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12725_cast_fp16 = einsum(equation = var_12725_equation_0, values = (var_12203_cast_fp16, var_12605_cast_fp16))[name = tensor("op_12725_cast_fp16")]; tensor var_12727_equation_0 = const()[name = tensor("op_12727_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12727_cast_fp16 = einsum(equation = var_12727_equation_0, values = (var_12207_cast_fp16, var_12606_cast_fp16))[name = tensor("op_12727_cast_fp16")]; tensor var_12729_equation_0 = const()[name = tensor("op_12729_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12729_cast_fp16 = einsum(equation = var_12729_equation_0, values = (var_12207_cast_fp16, var_12607_cast_fp16))[name = tensor("op_12729_cast_fp16")]; tensor var_12731_equation_0 = const()[name = tensor("op_12731_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12731_cast_fp16 = einsum(equation = var_12731_equation_0, values = (var_12207_cast_fp16, var_12608_cast_fp16))[name = tensor("op_12731_cast_fp16")]; tensor var_12733_equation_0 = const()[name = tensor("op_12733_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12733_cast_fp16 = einsum(equation = var_12733_equation_0, values = (var_12207_cast_fp16, var_12609_cast_fp16))[name = tensor("op_12733_cast_fp16")]; tensor var_12735_equation_0 = const()[name = tensor("op_12735_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12735_cast_fp16 = einsum(equation = var_12735_equation_0, values = (var_12211_cast_fp16, var_12610_cast_fp16))[name = tensor("op_12735_cast_fp16")]; tensor var_12737_equation_0 = const()[name = tensor("op_12737_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12737_cast_fp16 = einsum(equation = var_12737_equation_0, values = (var_12211_cast_fp16, var_12611_cast_fp16))[name = tensor("op_12737_cast_fp16")]; tensor var_12739_equation_0 = const()[name = tensor("op_12739_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12739_cast_fp16 = einsum(equation = var_12739_equation_0, values = (var_12211_cast_fp16, var_12612_cast_fp16))[name = tensor("op_12739_cast_fp16")]; tensor var_12741_equation_0 = const()[name = tensor("op_12741_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12741_cast_fp16 = einsum(equation = var_12741_equation_0, values = (var_12211_cast_fp16, var_12613_cast_fp16))[name = tensor("op_12741_cast_fp16")]; tensor var_12743_equation_0 = const()[name = tensor("op_12743_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12743_cast_fp16 = einsum(equation = var_12743_equation_0, values = (var_12215_cast_fp16, var_12614_cast_fp16))[name = tensor("op_12743_cast_fp16")]; tensor var_12745_equation_0 = const()[name = tensor("op_12745_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12745_cast_fp16 = einsum(equation = var_12745_equation_0, values = (var_12215_cast_fp16, var_12615_cast_fp16))[name = tensor("op_12745_cast_fp16")]; tensor var_12747_equation_0 = const()[name = tensor("op_12747_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12747_cast_fp16 = einsum(equation = var_12747_equation_0, values = (var_12215_cast_fp16, var_12616_cast_fp16))[name = tensor("op_12747_cast_fp16")]; tensor var_12749_equation_0 = const()[name = tensor("op_12749_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12749_cast_fp16 = einsum(equation = var_12749_equation_0, values = (var_12215_cast_fp16, var_12617_cast_fp16))[name = tensor("op_12749_cast_fp16")]; tensor var_12751_equation_0 = const()[name = tensor("op_12751_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12751_cast_fp16 = einsum(equation = var_12751_equation_0, values = (var_12219_cast_fp16, var_12618_cast_fp16))[name = tensor("op_12751_cast_fp16")]; tensor var_12753_equation_0 = const()[name = tensor("op_12753_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12753_cast_fp16 = einsum(equation = var_12753_equation_0, values = (var_12219_cast_fp16, var_12619_cast_fp16))[name = tensor("op_12753_cast_fp16")]; tensor var_12755_equation_0 = const()[name = tensor("op_12755_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12755_cast_fp16 = einsum(equation = var_12755_equation_0, values = (var_12219_cast_fp16, var_12620_cast_fp16))[name = tensor("op_12755_cast_fp16")]; tensor var_12757_equation_0 = const()[name = tensor("op_12757_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12757_cast_fp16 = einsum(equation = var_12757_equation_0, values = (var_12219_cast_fp16, var_12621_cast_fp16))[name = tensor("op_12757_cast_fp16")]; tensor var_12759_equation_0 = const()[name = tensor("op_12759_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12759_cast_fp16 = einsum(equation = var_12759_equation_0, values = (var_12223_cast_fp16, var_12622_cast_fp16))[name = tensor("op_12759_cast_fp16")]; tensor var_12761_equation_0 = const()[name = tensor("op_12761_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12761_cast_fp16 = einsum(equation = var_12761_equation_0, values = (var_12223_cast_fp16, var_12623_cast_fp16))[name = tensor("op_12761_cast_fp16")]; tensor var_12763_equation_0 = const()[name = tensor("op_12763_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12763_cast_fp16 = einsum(equation = var_12763_equation_0, values = (var_12223_cast_fp16, var_12624_cast_fp16))[name = tensor("op_12763_cast_fp16")]; tensor var_12765_equation_0 = const()[name = tensor("op_12765_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12765_cast_fp16 = einsum(equation = var_12765_equation_0, values = (var_12223_cast_fp16, var_12625_cast_fp16))[name = tensor("op_12765_cast_fp16")]; tensor var_12767_equation_0 = const()[name = tensor("op_12767_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12767_cast_fp16 = einsum(equation = var_12767_equation_0, values = (var_12227_cast_fp16, var_12626_cast_fp16))[name = tensor("op_12767_cast_fp16")]; tensor var_12769_equation_0 = const()[name = tensor("op_12769_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12769_cast_fp16 = einsum(equation = var_12769_equation_0, values = (var_12227_cast_fp16, var_12627_cast_fp16))[name = tensor("op_12769_cast_fp16")]; tensor var_12771_equation_0 = const()[name = tensor("op_12771_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12771_cast_fp16 = einsum(equation = var_12771_equation_0, values = (var_12227_cast_fp16, var_12628_cast_fp16))[name = tensor("op_12771_cast_fp16")]; tensor var_12773_equation_0 = const()[name = tensor("op_12773_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12773_cast_fp16 = einsum(equation = var_12773_equation_0, values = (var_12227_cast_fp16, var_12629_cast_fp16))[name = tensor("op_12773_cast_fp16")]; tensor var_12775_equation_0 = const()[name = tensor("op_12775_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12775_cast_fp16 = einsum(equation = var_12775_equation_0, values = (var_12231_cast_fp16, var_12630_cast_fp16))[name = tensor("op_12775_cast_fp16")]; tensor var_12777_equation_0 = const()[name = tensor("op_12777_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12777_cast_fp16 = einsum(equation = var_12777_equation_0, values = (var_12231_cast_fp16, var_12631_cast_fp16))[name = tensor("op_12777_cast_fp16")]; tensor var_12779_equation_0 = const()[name = tensor("op_12779_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12779_cast_fp16 = einsum(equation = var_12779_equation_0, values = (var_12231_cast_fp16, var_12632_cast_fp16))[name = tensor("op_12779_cast_fp16")]; tensor var_12781_equation_0 = const()[name = tensor("op_12781_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12781_cast_fp16 = einsum(equation = var_12781_equation_0, values = (var_12231_cast_fp16, var_12633_cast_fp16))[name = tensor("op_12781_cast_fp16")]; tensor var_12783_equation_0 = const()[name = tensor("op_12783_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12783_cast_fp16 = einsum(equation = var_12783_equation_0, values = (var_12235_cast_fp16, var_12634_cast_fp16))[name = tensor("op_12783_cast_fp16")]; tensor var_12785_equation_0 = const()[name = tensor("op_12785_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12785_cast_fp16 = einsum(equation = var_12785_equation_0, values = (var_12235_cast_fp16, var_12635_cast_fp16))[name = tensor("op_12785_cast_fp16")]; tensor var_12787_equation_0 = const()[name = tensor("op_12787_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12787_cast_fp16 = einsum(equation = var_12787_equation_0, values = (var_12235_cast_fp16, var_12636_cast_fp16))[name = tensor("op_12787_cast_fp16")]; tensor var_12789_equation_0 = const()[name = tensor("op_12789_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12789_cast_fp16 = einsum(equation = var_12789_equation_0, values = (var_12235_cast_fp16, var_12637_cast_fp16))[name = tensor("op_12789_cast_fp16")]; tensor var_12791_equation_0 = const()[name = tensor("op_12791_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12791_cast_fp16 = einsum(equation = var_12791_equation_0, values = (var_12239_cast_fp16, var_12638_cast_fp16))[name = tensor("op_12791_cast_fp16")]; tensor var_12793_equation_0 = const()[name = tensor("op_12793_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12793_cast_fp16 = einsum(equation = var_12793_equation_0, values = (var_12239_cast_fp16, var_12639_cast_fp16))[name = tensor("op_12793_cast_fp16")]; tensor var_12795_equation_0 = const()[name = tensor("op_12795_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12795_cast_fp16 = einsum(equation = var_12795_equation_0, values = (var_12239_cast_fp16, var_12640_cast_fp16))[name = tensor("op_12795_cast_fp16")]; tensor var_12797_equation_0 = const()[name = tensor("op_12797_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12797_cast_fp16 = einsum(equation = var_12797_equation_0, values = (var_12239_cast_fp16, var_12641_cast_fp16))[name = tensor("op_12797_cast_fp16")]; tensor var_12799_equation_0 = const()[name = tensor("op_12799_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12799_cast_fp16 = einsum(equation = var_12799_equation_0, values = (var_12243_cast_fp16, var_12642_cast_fp16))[name = tensor("op_12799_cast_fp16")]; tensor var_12801_equation_0 = const()[name = tensor("op_12801_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12801_cast_fp16 = einsum(equation = var_12801_equation_0, values = (var_12243_cast_fp16, var_12643_cast_fp16))[name = tensor("op_12801_cast_fp16")]; tensor var_12803_equation_0 = const()[name = tensor("op_12803_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12803_cast_fp16 = einsum(equation = var_12803_equation_0, values = (var_12243_cast_fp16, var_12644_cast_fp16))[name = tensor("op_12803_cast_fp16")]; tensor var_12805_equation_0 = const()[name = tensor("op_12805_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_12805_cast_fp16 = einsum(equation = var_12805_equation_0, values = (var_12243_cast_fp16, var_12645_cast_fp16))[name = tensor("op_12805_cast_fp16")]; tensor var_12807_interleave_0 = const()[name = tensor("op_12807_interleave_0"), val = tensor(false)]; tensor var_12807_cast_fp16 = concat(axis = var_11339, interleave = var_12807_interleave_0, values = (var_12647_cast_fp16, var_12649_cast_fp16, var_12651_cast_fp16, var_12653_cast_fp16))[name = tensor("op_12807_cast_fp16")]; tensor var_12809_interleave_0 = const()[name = tensor("op_12809_interleave_0"), val = tensor(false)]; tensor var_12809_cast_fp16 = concat(axis = var_11339, interleave = var_12809_interleave_0, values = (var_12655_cast_fp16, var_12657_cast_fp16, var_12659_cast_fp16, var_12661_cast_fp16))[name = tensor("op_12809_cast_fp16")]; tensor var_12811_interleave_0 = const()[name = tensor("op_12811_interleave_0"), val = tensor(false)]; tensor var_12811_cast_fp16 = concat(axis = var_11339, interleave = var_12811_interleave_0, values = (var_12663_cast_fp16, var_12665_cast_fp16, var_12667_cast_fp16, var_12669_cast_fp16))[name = tensor("op_12811_cast_fp16")]; tensor var_12813_interleave_0 = const()[name = tensor("op_12813_interleave_0"), val = tensor(false)]; tensor var_12813_cast_fp16 = concat(axis = var_11339, interleave = var_12813_interleave_0, values = (var_12671_cast_fp16, var_12673_cast_fp16, var_12675_cast_fp16, var_12677_cast_fp16))[name = tensor("op_12813_cast_fp16")]; tensor var_12815_interleave_0 = const()[name = tensor("op_12815_interleave_0"), val = tensor(false)]; tensor var_12815_cast_fp16 = concat(axis = var_11339, interleave = var_12815_interleave_0, values = (var_12679_cast_fp16, var_12681_cast_fp16, var_12683_cast_fp16, var_12685_cast_fp16))[name = tensor("op_12815_cast_fp16")]; tensor var_12817_interleave_0 = const()[name = tensor("op_12817_interleave_0"), val = tensor(false)]; tensor var_12817_cast_fp16 = concat(axis = var_11339, interleave = var_12817_interleave_0, values = (var_12687_cast_fp16, var_12689_cast_fp16, var_12691_cast_fp16, var_12693_cast_fp16))[name = tensor("op_12817_cast_fp16")]; tensor var_12819_interleave_0 = const()[name = tensor("op_12819_interleave_0"), val = tensor(false)]; tensor var_12819_cast_fp16 = concat(axis = var_11339, interleave = var_12819_interleave_0, values = (var_12695_cast_fp16, var_12697_cast_fp16, var_12699_cast_fp16, var_12701_cast_fp16))[name = tensor("op_12819_cast_fp16")]; tensor var_12821_interleave_0 = const()[name = tensor("op_12821_interleave_0"), val = tensor(false)]; tensor var_12821_cast_fp16 = concat(axis = var_11339, interleave = var_12821_interleave_0, values = (var_12703_cast_fp16, var_12705_cast_fp16, var_12707_cast_fp16, var_12709_cast_fp16))[name = tensor("op_12821_cast_fp16")]; tensor var_12823_interleave_0 = const()[name = tensor("op_12823_interleave_0"), val = tensor(false)]; tensor var_12823_cast_fp16 = concat(axis = var_11339, interleave = var_12823_interleave_0, values = (var_12711_cast_fp16, var_12713_cast_fp16, var_12715_cast_fp16, var_12717_cast_fp16))[name = tensor("op_12823_cast_fp16")]; tensor var_12825_interleave_0 = const()[name = tensor("op_12825_interleave_0"), val = tensor(false)]; tensor var_12825_cast_fp16 = concat(axis = var_11339, interleave = var_12825_interleave_0, values = (var_12719_cast_fp16, var_12721_cast_fp16, var_12723_cast_fp16, var_12725_cast_fp16))[name = tensor("op_12825_cast_fp16")]; tensor var_12827_interleave_0 = const()[name = tensor("op_12827_interleave_0"), val = tensor(false)]; tensor var_12827_cast_fp16 = concat(axis = var_11339, interleave = var_12827_interleave_0, values = (var_12727_cast_fp16, var_12729_cast_fp16, var_12731_cast_fp16, var_12733_cast_fp16))[name = tensor("op_12827_cast_fp16")]; tensor var_12829_interleave_0 = const()[name = tensor("op_12829_interleave_0"), val = tensor(false)]; tensor var_12829_cast_fp16 = concat(axis = var_11339, interleave = var_12829_interleave_0, values = (var_12735_cast_fp16, var_12737_cast_fp16, var_12739_cast_fp16, var_12741_cast_fp16))[name = tensor("op_12829_cast_fp16")]; tensor var_12831_interleave_0 = const()[name = tensor("op_12831_interleave_0"), val = tensor(false)]; tensor var_12831_cast_fp16 = concat(axis = var_11339, interleave = var_12831_interleave_0, values = (var_12743_cast_fp16, var_12745_cast_fp16, var_12747_cast_fp16, var_12749_cast_fp16))[name = tensor("op_12831_cast_fp16")]; tensor var_12833_interleave_0 = const()[name = tensor("op_12833_interleave_0"), val = tensor(false)]; tensor var_12833_cast_fp16 = concat(axis = var_11339, interleave = var_12833_interleave_0, values = (var_12751_cast_fp16, var_12753_cast_fp16, var_12755_cast_fp16, var_12757_cast_fp16))[name = tensor("op_12833_cast_fp16")]; tensor var_12835_interleave_0 = const()[name = tensor("op_12835_interleave_0"), val = tensor(false)]; tensor var_12835_cast_fp16 = concat(axis = var_11339, interleave = var_12835_interleave_0, values = (var_12759_cast_fp16, var_12761_cast_fp16, var_12763_cast_fp16, var_12765_cast_fp16))[name = tensor("op_12835_cast_fp16")]; tensor var_12837_interleave_0 = const()[name = tensor("op_12837_interleave_0"), val = tensor(false)]; tensor var_12837_cast_fp16 = concat(axis = var_11339, interleave = var_12837_interleave_0, values = (var_12767_cast_fp16, var_12769_cast_fp16, var_12771_cast_fp16, var_12773_cast_fp16))[name = tensor("op_12837_cast_fp16")]; tensor var_12839_interleave_0 = const()[name = tensor("op_12839_interleave_0"), val = tensor(false)]; tensor var_12839_cast_fp16 = concat(axis = var_11339, interleave = var_12839_interleave_0, values = (var_12775_cast_fp16, var_12777_cast_fp16, var_12779_cast_fp16, var_12781_cast_fp16))[name = tensor("op_12839_cast_fp16")]; tensor var_12841_interleave_0 = const()[name = tensor("op_12841_interleave_0"), val = tensor(false)]; tensor var_12841_cast_fp16 = concat(axis = var_11339, interleave = var_12841_interleave_0, values = (var_12783_cast_fp16, var_12785_cast_fp16, var_12787_cast_fp16, var_12789_cast_fp16))[name = tensor("op_12841_cast_fp16")]; tensor var_12843_interleave_0 = const()[name = tensor("op_12843_interleave_0"), val = tensor(false)]; tensor var_12843_cast_fp16 = concat(axis = var_11339, interleave = var_12843_interleave_0, values = (var_12791_cast_fp16, var_12793_cast_fp16, var_12795_cast_fp16, var_12797_cast_fp16))[name = tensor("op_12843_cast_fp16")]; tensor var_12845_interleave_0 = const()[name = tensor("op_12845_interleave_0"), val = tensor(false)]; tensor var_12845_cast_fp16 = concat(axis = var_11339, interleave = var_12845_interleave_0, values = (var_12799_cast_fp16, var_12801_cast_fp16, var_12803_cast_fp16, var_12805_cast_fp16))[name = tensor("op_12845_cast_fp16")]; tensor input_57_interleave_0 = const()[name = tensor("input_57_interleave_0"), val = tensor(false)]; tensor input_57_cast_fp16 = concat(axis = var_11364, interleave = input_57_interleave_0, values = (var_12807_cast_fp16, var_12809_cast_fp16, var_12811_cast_fp16, var_12813_cast_fp16, var_12815_cast_fp16, var_12817_cast_fp16, var_12819_cast_fp16, var_12821_cast_fp16, var_12823_cast_fp16, var_12825_cast_fp16, var_12827_cast_fp16, var_12829_cast_fp16, var_12831_cast_fp16, var_12833_cast_fp16, var_12835_cast_fp16, var_12837_cast_fp16, var_12839_cast_fp16, var_12841_cast_fp16, var_12843_cast_fp16, var_12845_cast_fp16))[name = tensor("input_57_cast_fp16")]; tensor var_12856_pad_type_0 = const()[name = tensor("op_12856_pad_type_0"), val = tensor("valid")]; tensor var_12856_strides_0 = const()[name = tensor("op_12856_strides_0"), val = tensor([1, 1])]; tensor var_12856_pad_0 = const()[name = tensor("op_12856_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_12856_dilations_0 = const()[name = tensor("op_12856_dilations_0"), val = tensor([1, 1])]; tensor var_12856_groups_0 = const()[name = tensor("op_12856_groups_0"), val = tensor(1)]; tensor layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110239168))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111058432))), name = tensor("layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_7_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111058560)))]; tensor var_12856_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_12856_dilations_0, groups = var_12856_groups_0, pad = var_12856_pad_0, pad_type = var_12856_pad_type_0, strides = var_12856_strides_0, weight = layers_7_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_57_cast_fp16)[name = tensor("op_12856_cast_fp16")]; tensor var_12862_pad_type_0 = const()[name = tensor("op_12862_pad_type_0"), val = tensor("valid")]; tensor var_12862_strides_0 = const()[name = tensor("op_12862_strides_0"), val = tensor([1, 1])]; tensor var_12862_pad_0 = const()[name = tensor("op_12862_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_12862_dilations_0 = const()[name = tensor("op_12862_dilations_0"), val = tensor([1, 1])]; tensor var_12862_groups_0 = const()[name = tensor("op_12862_groups_0"), val = tensor(1)]; tensor layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111080064))), name = tensor("layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111061184))), shape = tensor([1280, 1280, 1, 1])]; tensor var_12862_cast_fp16 = conv(dilations = var_12862_dilations_0, groups = var_12862_groups_0, pad = var_12862_pad_0, pad_type = var_12862_pad_type_0, strides = var_12862_strides_0, weight = layers_7_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_57_cast_fp16)[name = tensor("op_12862_cast_fp16")]; tensor obj_31_cast_fp16 = add(x = var_12856_cast_fp16, y = var_12862_cast_fp16)[name = tensor("obj_31_cast_fp16")]; tensor inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = obj_31_cast_fp16)[name = tensor("inputs_31_cast_fp16")]; tensor out_31_axes_0 = const()[name = tensor("out_31_axes_0"), val = tensor([1])]; tensor var_12873_to_fp16 = const()[name = tensor("op_12873_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_31_cast_fp16 = layer_norm(axes = out_31_axes_0, epsilon = var_12873_to_fp16, x = inputs_31_cast_fp16)[name = tensor("out_31_cast_fp16")]; tensor input_59_gamma_0_to_fp16 = const()[name = tensor("input_59_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111284928)))]; tensor input_59_beta_0_to_fp16 = const()[name = tensor("input_59_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111287552)))]; tensor input_59_epsilon_0_to_fp16 = const()[name = tensor("input_59_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_59_cast_fp16 = batch_norm(beta = input_59_beta_0_to_fp16, epsilon = input_59_epsilon_0_to_fp16, gamma = input_59_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = tensor("input_59_cast_fp16")]; tensor var_12891_pad_type_0 = const()[name = tensor("op_12891_pad_type_0"), val = tensor("valid")]; tensor var_12891_strides_0 = const()[name = tensor("op_12891_strides_0"), val = tensor([1, 1])]; tensor var_12891_pad_0 = const()[name = tensor("op_12891_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_12891_dilations_0 = const()[name = tensor("op_12891_dilations_0"), val = tensor([1, 1])]; tensor var_12891_groups_0 = const()[name = tensor("op_12891_groups_0"), val = tensor(1)]; tensor layers_7_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(111290176))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114567040))), name = tensor("layers_7_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_7_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114567168)))]; tensor var_12891_cast_fp16 = conv(bias = layers_7_fc1_inlier_module_bias_to_fp16, dilations = var_12891_dilations_0, groups = var_12891_groups_0, pad = var_12891_pad_0, pad_type = var_12891_pad_type_0, strides = var_12891_strides_0, weight = layers_7_fc1_inlier_module_weight_to_fp16_palettized, x = input_59_cast_fp16)[name = tensor("op_12891_cast_fp16")]; tensor var_12897_pad_type_0 = const()[name = tensor("op_12897_pad_type_0"), val = tensor("valid")]; tensor var_12897_strides_0 = const()[name = tensor("op_12897_strides_0"), val = tensor([1, 1])]; tensor var_12897_pad_0 = const()[name = tensor("op_12897_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_12897_dilations_0 = const()[name = tensor("op_12897_dilations_0"), val = tensor([1, 1])]; tensor var_12897_groups_0 = const()[name = tensor("op_12897_groups_0"), val = tensor(1)]; tensor layers_7_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114597440))), name = tensor("layers_7_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114577472))), shape = tensor([5120, 1280, 1, 1])]; tensor var_12897_cast_fp16 = conv(dilations = var_12897_dilations_0, groups = var_12897_groups_0, pad = var_12897_pad_0, pad_type = var_12897_pad_type_0, strides = var_12897_strides_0, weight = layers_7_fc1_outlier_module_weight_to_fp16_sparsified, x = input_59_cast_fp16)[name = tensor("op_12897_cast_fp16")]; tensor input_61_cast_fp16 = add(x = var_12891_cast_fp16, y = var_12897_cast_fp16)[name = tensor("input_61_cast_fp16")]; tensor input_63_mode_0 = const()[name = tensor("input_63_mode_0"), val = tensor("EXACT")]; tensor input_63_cast_fp16 = gelu(mode = input_63_mode_0, x = input_61_cast_fp16)[name = tensor("input_63_cast_fp16")]; tensor var_12908_pad_type_0 = const()[name = tensor("op_12908_pad_type_0"), val = tensor("valid")]; tensor var_12908_strides_0 = const()[name = tensor("op_12908_strides_0"), val = tensor([1, 1])]; tensor var_12908_pad_0 = const()[name = tensor("op_12908_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_12908_dilations_0 = const()[name = tensor("op_12908_dilations_0"), val = tensor([1, 1])]; tensor var_12908_groups_0 = const()[name = tensor("op_12908_groups_0"), val = tensor(1)]; tensor layers_7_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(115416704))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118693568))), name = tensor("layers_7_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_7_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_7_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118693696)))]; tensor var_12908_cast_fp16 = conv(bias = layers_7_fc2_inlier_module_bias_to_fp16, dilations = var_12908_dilations_0, groups = var_12908_groups_0, pad = var_12908_pad_0, pad_type = var_12908_pad_type_0, strides = var_12908_strides_0, weight = layers_7_fc2_inlier_module_weight_to_fp16_palettized, x = input_63_cast_fp16)[name = tensor("op_12908_cast_fp16")]; tensor var_12914_pad_type_0 = const()[name = tensor("op_12914_pad_type_0"), val = tensor("valid")]; tensor var_12914_strides_0 = const()[name = tensor("op_12914_strides_0"), val = tensor([1, 1])]; tensor var_12914_pad_0 = const()[name = tensor("op_12914_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_12914_dilations_0 = const()[name = tensor("op_12914_dilations_0"), val = tensor([1, 1])]; tensor var_12914_groups_0 = const()[name = tensor("op_12914_groups_0"), val = tensor(1)]; tensor layers_7_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118968000))), name = tensor("layers_7_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(118696320))), shape = tensor([1280, 5120, 1, 1])]; tensor var_12914_cast_fp16 = conv(dilations = var_12914_dilations_0, groups = var_12914_groups_0, pad = var_12914_pad_0, pad_type = var_12914_pad_type_0, strides = var_12914_strides_0, weight = layers_7_fc2_outlier_module_weight_to_fp16_sparsified, x = input_63_cast_fp16)[name = tensor("op_12914_cast_fp16")]; tensor hidden_states_19_cast_fp16 = add(x = var_12908_cast_fp16, y = var_12914_cast_fp16)[name = tensor("hidden_states_19_cast_fp16")]; tensor inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = hidden_states_19_cast_fp16)[name = tensor("inputs_33_cast_fp16")]; tensor var_12920 = const()[name = tensor("op_12920"), val = tensor(3)]; tensor var_12945 = const()[name = tensor("op_12945"), val = tensor(1)]; tensor out_33_axes_0 = const()[name = tensor("out_33_axes_0"), val = tensor([1])]; tensor var_12962_to_fp16 = const()[name = tensor("op_12962_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_33_cast_fp16 = layer_norm(axes = out_33_axes_0, epsilon = var_12962_to_fp16, x = inputs_33_cast_fp16)[name = tensor("out_33_cast_fp16")]; tensor obj_33_gamma_0_to_fp16 = const()[name = tensor("obj_33_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119787264)))]; tensor obj_33_beta_0_to_fp16 = const()[name = tensor("obj_33_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119789888)))]; tensor obj_33_epsilon_0_to_fp16 = const()[name = tensor("obj_33_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = tensor("obj_33_cast_fp16")]; tensor var_12984_pad_type_0 = const()[name = tensor("op_12984_pad_type_0"), val = tensor("valid")]; tensor var_12984_strides_0 = const()[name = tensor("op_12984_strides_0"), val = tensor([1, 1])]; tensor var_12984_pad_0 = const()[name = tensor("op_12984_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_12984_dilations_0 = const()[name = tensor("op_12984_dilations_0"), val = tensor([1, 1])]; tensor var_12984_groups_0 = const()[name = tensor("op_12984_groups_0"), val = tensor(1)]; tensor layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119792512))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120611776))), name = tensor("layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_8_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120611904)))]; tensor var_12984_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_12984_dilations_0, groups = var_12984_groups_0, pad = var_12984_pad_0, pad_type = var_12984_pad_type_0, strides = var_12984_strides_0, weight = layers_8_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = tensor("op_12984_cast_fp16")]; tensor var_12990_pad_type_0 = const()[name = tensor("op_12990_pad_type_0"), val = tensor("valid")]; tensor var_12990_strides_0 = const()[name = tensor("op_12990_strides_0"), val = tensor([1, 1])]; tensor var_12990_pad_0 = const()[name = tensor("op_12990_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_12990_dilations_0 = const()[name = tensor("op_12990_dilations_0"), val = tensor([1, 1])]; tensor var_12990_groups_0 = const()[name = tensor("op_12990_groups_0"), val = tensor(1)]; tensor layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120662848))), name = tensor("layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120614528))), shape = tensor([1280, 1280, 1, 1])]; tensor var_12990_cast_fp16 = conv(dilations = var_12990_dilations_0, groups = var_12990_groups_0, pad = var_12990_pad_0, pad_type = var_12990_pad_type_0, strides = var_12990_strides_0, weight = layers_8_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = tensor("op_12990_cast_fp16")]; tensor query_17_cast_fp16 = add(x = var_12984_cast_fp16, y = var_12990_cast_fp16)[name = tensor("query_17_cast_fp16")]; tensor var_12999_pad_type_0 = const()[name = tensor("op_12999_pad_type_0"), val = tensor("valid")]; tensor var_12999_strides_0 = const()[name = tensor("op_12999_strides_0"), val = tensor([1, 1])]; tensor var_12999_pad_0 = const()[name = tensor("op_12999_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_12999_dilations_0 = const()[name = tensor("op_12999_dilations_0"), val = tensor([1, 1])]; tensor var_12999_groups_0 = const()[name = tensor("op_12999_groups_0"), val = tensor(1)]; tensor layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120867712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121686976))), name = tensor("layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_12999_cast_fp16 = conv(dilations = var_12999_dilations_0, groups = var_12999_groups_0, pad = var_12999_pad_0, pad_type = var_12999_pad_type_0, strides = var_12999_strides_0, weight = layers_8_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = tensor("op_12999_cast_fp16")]; tensor var_13005_pad_type_0 = const()[name = tensor("op_13005_pad_type_0"), val = tensor("valid")]; tensor var_13005_strides_0 = const()[name = tensor("op_13005_strides_0"), val = tensor([1, 1])]; tensor var_13005_pad_0 = const()[name = tensor("op_13005_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_13005_dilations_0 = const()[name = tensor("op_13005_dilations_0"), val = tensor([1, 1])]; tensor var_13005_groups_0 = const()[name = tensor("op_13005_groups_0"), val = tensor(1)]; tensor layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121721856))), name = tensor("layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121687104))), shape = tensor([1280, 1280, 1, 1])]; tensor var_13005_cast_fp16 = conv(dilations = var_13005_dilations_0, groups = var_13005_groups_0, pad = var_13005_pad_0, pad_type = var_13005_pad_type_0, strides = var_13005_strides_0, weight = layers_8_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = tensor("op_13005_cast_fp16")]; tensor key_17_cast_fp16 = add(x = var_12999_cast_fp16, y = var_13005_cast_fp16)[name = tensor("key_17_cast_fp16")]; tensor var_13015_pad_type_0 = const()[name = tensor("op_13015_pad_type_0"), val = tensor("valid")]; tensor var_13015_strides_0 = const()[name = tensor("op_13015_strides_0"), val = tensor([1, 1])]; tensor var_13015_pad_0 = const()[name = tensor("op_13015_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_13015_dilations_0 = const()[name = tensor("op_13015_dilations_0"), val = tensor([1, 1])]; tensor var_13015_groups_0 = const()[name = tensor("op_13015_groups_0"), val = tensor(1)]; tensor layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121926720))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122745984))), name = tensor("layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_8_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122746112)))]; tensor var_13015_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_13015_dilations_0, groups = var_13015_groups_0, pad = var_13015_pad_0, pad_type = var_13015_pad_type_0, strides = var_13015_strides_0, weight = layers_8_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_33_cast_fp16)[name = tensor("op_13015_cast_fp16")]; tensor var_13021_pad_type_0 = const()[name = tensor("op_13021_pad_type_0"), val = tensor("valid")]; tensor var_13021_strides_0 = const()[name = tensor("op_13021_strides_0"), val = tensor([1, 1])]; tensor var_13021_pad_0 = const()[name = tensor("op_13021_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_13021_dilations_0 = const()[name = tensor("op_13021_dilations_0"), val = tensor([1, 1])]; tensor var_13021_groups_0 = const()[name = tensor("op_13021_groups_0"), val = tensor(1)]; tensor layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122771968))), name = tensor("layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122748736))), shape = tensor([1280, 1280, 1, 1])]; tensor var_13021_cast_fp16 = conv(dilations = var_13021_dilations_0, groups = var_13021_groups_0, pad = var_13021_pad_0, pad_type = var_13021_pad_type_0, strides = var_13021_strides_0, weight = layers_8_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_33_cast_fp16)[name = tensor("op_13021_cast_fp16")]; tensor value_17_cast_fp16 = add(x = var_13015_cast_fp16, y = var_13021_cast_fp16)[name = tensor("value_17_cast_fp16")]; tensor var_13027_begin_0 = const()[name = tensor("op_13027_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13027_end_0 = const()[name = tensor("op_13027_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13027_end_mask_0 = const()[name = tensor("op_13027_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13027_cast_fp16 = slice_by_index(begin = var_13027_begin_0, end = var_13027_end_0, end_mask = var_13027_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13027_cast_fp16")]; tensor var_13031_begin_0 = const()[name = tensor("op_13031_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_13031_end_0 = const()[name = tensor("op_13031_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_13031_end_mask_0 = const()[name = tensor("op_13031_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13031_cast_fp16 = slice_by_index(begin = var_13031_begin_0, end = var_13031_end_0, end_mask = var_13031_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13031_cast_fp16")]; tensor var_13035_begin_0 = const()[name = tensor("op_13035_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_13035_end_0 = const()[name = tensor("op_13035_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_13035_end_mask_0 = const()[name = tensor("op_13035_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13035_cast_fp16 = slice_by_index(begin = var_13035_begin_0, end = var_13035_end_0, end_mask = var_13035_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13035_cast_fp16")]; tensor var_13039_begin_0 = const()[name = tensor("op_13039_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_13039_end_0 = const()[name = tensor("op_13039_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_13039_end_mask_0 = const()[name = tensor("op_13039_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13039_cast_fp16 = slice_by_index(begin = var_13039_begin_0, end = var_13039_end_0, end_mask = var_13039_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13039_cast_fp16")]; tensor var_13043_begin_0 = const()[name = tensor("op_13043_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_13043_end_0 = const()[name = tensor("op_13043_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_13043_end_mask_0 = const()[name = tensor("op_13043_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13043_cast_fp16 = slice_by_index(begin = var_13043_begin_0, end = var_13043_end_0, end_mask = var_13043_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13043_cast_fp16")]; tensor var_13047_begin_0 = const()[name = tensor("op_13047_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_13047_end_0 = const()[name = tensor("op_13047_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_13047_end_mask_0 = const()[name = tensor("op_13047_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13047_cast_fp16 = slice_by_index(begin = var_13047_begin_0, end = var_13047_end_0, end_mask = var_13047_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13047_cast_fp16")]; tensor var_13051_begin_0 = const()[name = tensor("op_13051_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_13051_end_0 = const()[name = tensor("op_13051_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_13051_end_mask_0 = const()[name = tensor("op_13051_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13051_cast_fp16 = slice_by_index(begin = var_13051_begin_0, end = var_13051_end_0, end_mask = var_13051_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13051_cast_fp16")]; tensor var_13055_begin_0 = const()[name = tensor("op_13055_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_13055_end_0 = const()[name = tensor("op_13055_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_13055_end_mask_0 = const()[name = tensor("op_13055_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13055_cast_fp16 = slice_by_index(begin = var_13055_begin_0, end = var_13055_end_0, end_mask = var_13055_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13055_cast_fp16")]; tensor var_13059_begin_0 = const()[name = tensor("op_13059_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_13059_end_0 = const()[name = tensor("op_13059_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_13059_end_mask_0 = const()[name = tensor("op_13059_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13059_cast_fp16 = slice_by_index(begin = var_13059_begin_0, end = var_13059_end_0, end_mask = var_13059_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13059_cast_fp16")]; tensor var_13063_begin_0 = const()[name = tensor("op_13063_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_13063_end_0 = const()[name = tensor("op_13063_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_13063_end_mask_0 = const()[name = tensor("op_13063_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13063_cast_fp16 = slice_by_index(begin = var_13063_begin_0, end = var_13063_end_0, end_mask = var_13063_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13063_cast_fp16")]; tensor var_13067_begin_0 = const()[name = tensor("op_13067_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_13067_end_0 = const()[name = tensor("op_13067_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_13067_end_mask_0 = const()[name = tensor("op_13067_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13067_cast_fp16 = slice_by_index(begin = var_13067_begin_0, end = var_13067_end_0, end_mask = var_13067_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13067_cast_fp16")]; tensor var_13071_begin_0 = const()[name = tensor("op_13071_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_13071_end_0 = const()[name = tensor("op_13071_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_13071_end_mask_0 = const()[name = tensor("op_13071_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13071_cast_fp16 = slice_by_index(begin = var_13071_begin_0, end = var_13071_end_0, end_mask = var_13071_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13071_cast_fp16")]; tensor var_13075_begin_0 = const()[name = tensor("op_13075_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_13075_end_0 = const()[name = tensor("op_13075_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_13075_end_mask_0 = const()[name = tensor("op_13075_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13075_cast_fp16 = slice_by_index(begin = var_13075_begin_0, end = var_13075_end_0, end_mask = var_13075_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13075_cast_fp16")]; tensor var_13079_begin_0 = const()[name = tensor("op_13079_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_13079_end_0 = const()[name = tensor("op_13079_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_13079_end_mask_0 = const()[name = tensor("op_13079_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13079_cast_fp16 = slice_by_index(begin = var_13079_begin_0, end = var_13079_end_0, end_mask = var_13079_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13079_cast_fp16")]; tensor var_13083_begin_0 = const()[name = tensor("op_13083_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_13083_end_0 = const()[name = tensor("op_13083_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_13083_end_mask_0 = const()[name = tensor("op_13083_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13083_cast_fp16 = slice_by_index(begin = var_13083_begin_0, end = var_13083_end_0, end_mask = var_13083_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13083_cast_fp16")]; tensor var_13087_begin_0 = const()[name = tensor("op_13087_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_13087_end_0 = const()[name = tensor("op_13087_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_13087_end_mask_0 = const()[name = tensor("op_13087_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13087_cast_fp16 = slice_by_index(begin = var_13087_begin_0, end = var_13087_end_0, end_mask = var_13087_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13087_cast_fp16")]; tensor var_13091_begin_0 = const()[name = tensor("op_13091_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_13091_end_0 = const()[name = tensor("op_13091_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_13091_end_mask_0 = const()[name = tensor("op_13091_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13091_cast_fp16 = slice_by_index(begin = var_13091_begin_0, end = var_13091_end_0, end_mask = var_13091_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13091_cast_fp16")]; tensor var_13095_begin_0 = const()[name = tensor("op_13095_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_13095_end_0 = const()[name = tensor("op_13095_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_13095_end_mask_0 = const()[name = tensor("op_13095_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13095_cast_fp16 = slice_by_index(begin = var_13095_begin_0, end = var_13095_end_0, end_mask = var_13095_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13095_cast_fp16")]; tensor var_13099_begin_0 = const()[name = tensor("op_13099_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_13099_end_0 = const()[name = tensor("op_13099_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_13099_end_mask_0 = const()[name = tensor("op_13099_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13099_cast_fp16 = slice_by_index(begin = var_13099_begin_0, end = var_13099_end_0, end_mask = var_13099_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13099_cast_fp16")]; tensor var_13103_begin_0 = const()[name = tensor("op_13103_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_13103_end_0 = const()[name = tensor("op_13103_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_13103_end_mask_0 = const()[name = tensor("op_13103_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13103_cast_fp16 = slice_by_index(begin = var_13103_begin_0, end = var_13103_end_0, end_mask = var_13103_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_13103_cast_fp16")]; tensor var_13112_begin_0 = const()[name = tensor("op_13112_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13112_end_0 = const()[name = tensor("op_13112_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13112_end_mask_0 = const()[name = tensor("op_13112_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13112_cast_fp16 = slice_by_index(begin = var_13112_begin_0, end = var_13112_end_0, end_mask = var_13112_end_mask_0, x = var_13027_cast_fp16)[name = tensor("op_13112_cast_fp16")]; tensor var_13119_begin_0 = const()[name = tensor("op_13119_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13119_end_0 = const()[name = tensor("op_13119_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13119_end_mask_0 = const()[name = tensor("op_13119_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13119_cast_fp16 = slice_by_index(begin = var_13119_begin_0, end = var_13119_end_0, end_mask = var_13119_end_mask_0, x = var_13027_cast_fp16)[name = tensor("op_13119_cast_fp16")]; tensor var_13126_begin_0 = const()[name = tensor("op_13126_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13126_end_0 = const()[name = tensor("op_13126_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13126_end_mask_0 = const()[name = tensor("op_13126_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13126_cast_fp16 = slice_by_index(begin = var_13126_begin_0, end = var_13126_end_0, end_mask = var_13126_end_mask_0, x = var_13027_cast_fp16)[name = tensor("op_13126_cast_fp16")]; tensor var_13133_begin_0 = const()[name = tensor("op_13133_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13133_end_0 = const()[name = tensor("op_13133_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13133_end_mask_0 = const()[name = tensor("op_13133_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13133_cast_fp16 = slice_by_index(begin = var_13133_begin_0, end = var_13133_end_0, end_mask = var_13133_end_mask_0, x = var_13027_cast_fp16)[name = tensor("op_13133_cast_fp16")]; tensor var_13140_begin_0 = const()[name = tensor("op_13140_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13140_end_0 = const()[name = tensor("op_13140_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13140_end_mask_0 = const()[name = tensor("op_13140_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13140_cast_fp16 = slice_by_index(begin = var_13140_begin_0, end = var_13140_end_0, end_mask = var_13140_end_mask_0, x = var_13031_cast_fp16)[name = tensor("op_13140_cast_fp16")]; tensor var_13147_begin_0 = const()[name = tensor("op_13147_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13147_end_0 = const()[name = tensor("op_13147_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13147_end_mask_0 = const()[name = tensor("op_13147_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13147_cast_fp16 = slice_by_index(begin = var_13147_begin_0, end = var_13147_end_0, end_mask = var_13147_end_mask_0, x = var_13031_cast_fp16)[name = tensor("op_13147_cast_fp16")]; tensor var_13154_begin_0 = const()[name = tensor("op_13154_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13154_end_0 = const()[name = tensor("op_13154_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13154_end_mask_0 = const()[name = tensor("op_13154_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13154_cast_fp16 = slice_by_index(begin = var_13154_begin_0, end = var_13154_end_0, end_mask = var_13154_end_mask_0, x = var_13031_cast_fp16)[name = tensor("op_13154_cast_fp16")]; tensor var_13161_begin_0 = const()[name = tensor("op_13161_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13161_end_0 = const()[name = tensor("op_13161_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13161_end_mask_0 = const()[name = tensor("op_13161_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13161_cast_fp16 = slice_by_index(begin = var_13161_begin_0, end = var_13161_end_0, end_mask = var_13161_end_mask_0, x = var_13031_cast_fp16)[name = tensor("op_13161_cast_fp16")]; tensor var_13168_begin_0 = const()[name = tensor("op_13168_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13168_end_0 = const()[name = tensor("op_13168_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13168_end_mask_0 = const()[name = tensor("op_13168_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13168_cast_fp16 = slice_by_index(begin = var_13168_begin_0, end = var_13168_end_0, end_mask = var_13168_end_mask_0, x = var_13035_cast_fp16)[name = tensor("op_13168_cast_fp16")]; tensor var_13175_begin_0 = const()[name = tensor("op_13175_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13175_end_0 = const()[name = tensor("op_13175_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13175_end_mask_0 = const()[name = tensor("op_13175_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13175_cast_fp16 = slice_by_index(begin = var_13175_begin_0, end = var_13175_end_0, end_mask = var_13175_end_mask_0, x = var_13035_cast_fp16)[name = tensor("op_13175_cast_fp16")]; tensor var_13182_begin_0 = const()[name = tensor("op_13182_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13182_end_0 = const()[name = tensor("op_13182_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13182_end_mask_0 = const()[name = tensor("op_13182_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13182_cast_fp16 = slice_by_index(begin = var_13182_begin_0, end = var_13182_end_0, end_mask = var_13182_end_mask_0, x = var_13035_cast_fp16)[name = tensor("op_13182_cast_fp16")]; tensor var_13189_begin_0 = const()[name = tensor("op_13189_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13189_end_0 = const()[name = tensor("op_13189_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13189_end_mask_0 = const()[name = tensor("op_13189_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13189_cast_fp16 = slice_by_index(begin = var_13189_begin_0, end = var_13189_end_0, end_mask = var_13189_end_mask_0, x = var_13035_cast_fp16)[name = tensor("op_13189_cast_fp16")]; tensor var_13196_begin_0 = const()[name = tensor("op_13196_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13196_end_0 = const()[name = tensor("op_13196_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13196_end_mask_0 = const()[name = tensor("op_13196_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13196_cast_fp16 = slice_by_index(begin = var_13196_begin_0, end = var_13196_end_0, end_mask = var_13196_end_mask_0, x = var_13039_cast_fp16)[name = tensor("op_13196_cast_fp16")]; tensor var_13203_begin_0 = const()[name = tensor("op_13203_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13203_end_0 = const()[name = tensor("op_13203_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13203_end_mask_0 = const()[name = tensor("op_13203_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13203_cast_fp16 = slice_by_index(begin = var_13203_begin_0, end = var_13203_end_0, end_mask = var_13203_end_mask_0, x = var_13039_cast_fp16)[name = tensor("op_13203_cast_fp16")]; tensor var_13210_begin_0 = const()[name = tensor("op_13210_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13210_end_0 = const()[name = tensor("op_13210_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13210_end_mask_0 = const()[name = tensor("op_13210_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13210_cast_fp16 = slice_by_index(begin = var_13210_begin_0, end = var_13210_end_0, end_mask = var_13210_end_mask_0, x = var_13039_cast_fp16)[name = tensor("op_13210_cast_fp16")]; tensor var_13217_begin_0 = const()[name = tensor("op_13217_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13217_end_0 = const()[name = tensor("op_13217_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13217_end_mask_0 = const()[name = tensor("op_13217_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13217_cast_fp16 = slice_by_index(begin = var_13217_begin_0, end = var_13217_end_0, end_mask = var_13217_end_mask_0, x = var_13039_cast_fp16)[name = tensor("op_13217_cast_fp16")]; tensor var_13224_begin_0 = const()[name = tensor("op_13224_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13224_end_0 = const()[name = tensor("op_13224_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13224_end_mask_0 = const()[name = tensor("op_13224_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13224_cast_fp16 = slice_by_index(begin = var_13224_begin_0, end = var_13224_end_0, end_mask = var_13224_end_mask_0, x = var_13043_cast_fp16)[name = tensor("op_13224_cast_fp16")]; tensor var_13231_begin_0 = const()[name = tensor("op_13231_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13231_end_0 = const()[name = tensor("op_13231_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13231_end_mask_0 = const()[name = tensor("op_13231_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13231_cast_fp16 = slice_by_index(begin = var_13231_begin_0, end = var_13231_end_0, end_mask = var_13231_end_mask_0, x = var_13043_cast_fp16)[name = tensor("op_13231_cast_fp16")]; tensor var_13238_begin_0 = const()[name = tensor("op_13238_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13238_end_0 = const()[name = tensor("op_13238_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13238_end_mask_0 = const()[name = tensor("op_13238_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13238_cast_fp16 = slice_by_index(begin = var_13238_begin_0, end = var_13238_end_0, end_mask = var_13238_end_mask_0, x = var_13043_cast_fp16)[name = tensor("op_13238_cast_fp16")]; tensor var_13245_begin_0 = const()[name = tensor("op_13245_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13245_end_0 = const()[name = tensor("op_13245_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13245_end_mask_0 = const()[name = tensor("op_13245_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13245_cast_fp16 = slice_by_index(begin = var_13245_begin_0, end = var_13245_end_0, end_mask = var_13245_end_mask_0, x = var_13043_cast_fp16)[name = tensor("op_13245_cast_fp16")]; tensor var_13252_begin_0 = const()[name = tensor("op_13252_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13252_end_0 = const()[name = tensor("op_13252_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13252_end_mask_0 = const()[name = tensor("op_13252_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13252_cast_fp16 = slice_by_index(begin = var_13252_begin_0, end = var_13252_end_0, end_mask = var_13252_end_mask_0, x = var_13047_cast_fp16)[name = tensor("op_13252_cast_fp16")]; tensor var_13259_begin_0 = const()[name = tensor("op_13259_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13259_end_0 = const()[name = tensor("op_13259_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13259_end_mask_0 = const()[name = tensor("op_13259_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13259_cast_fp16 = slice_by_index(begin = var_13259_begin_0, end = var_13259_end_0, end_mask = var_13259_end_mask_0, x = var_13047_cast_fp16)[name = tensor("op_13259_cast_fp16")]; tensor var_13266_begin_0 = const()[name = tensor("op_13266_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13266_end_0 = const()[name = tensor("op_13266_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13266_end_mask_0 = const()[name = tensor("op_13266_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13266_cast_fp16 = slice_by_index(begin = var_13266_begin_0, end = var_13266_end_0, end_mask = var_13266_end_mask_0, x = var_13047_cast_fp16)[name = tensor("op_13266_cast_fp16")]; tensor var_13273_begin_0 = const()[name = tensor("op_13273_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13273_end_0 = const()[name = tensor("op_13273_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13273_end_mask_0 = const()[name = tensor("op_13273_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13273_cast_fp16 = slice_by_index(begin = var_13273_begin_0, end = var_13273_end_0, end_mask = var_13273_end_mask_0, x = var_13047_cast_fp16)[name = tensor("op_13273_cast_fp16")]; tensor var_13280_begin_0 = const()[name = tensor("op_13280_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13280_end_0 = const()[name = tensor("op_13280_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13280_end_mask_0 = const()[name = tensor("op_13280_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13280_cast_fp16 = slice_by_index(begin = var_13280_begin_0, end = var_13280_end_0, end_mask = var_13280_end_mask_0, x = var_13051_cast_fp16)[name = tensor("op_13280_cast_fp16")]; tensor var_13287_begin_0 = const()[name = tensor("op_13287_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13287_end_0 = const()[name = tensor("op_13287_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13287_end_mask_0 = const()[name = tensor("op_13287_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13287_cast_fp16 = slice_by_index(begin = var_13287_begin_0, end = var_13287_end_0, end_mask = var_13287_end_mask_0, x = var_13051_cast_fp16)[name = tensor("op_13287_cast_fp16")]; tensor var_13294_begin_0 = const()[name = tensor("op_13294_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13294_end_0 = const()[name = tensor("op_13294_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13294_end_mask_0 = const()[name = tensor("op_13294_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13294_cast_fp16 = slice_by_index(begin = var_13294_begin_0, end = var_13294_end_0, end_mask = var_13294_end_mask_0, x = var_13051_cast_fp16)[name = tensor("op_13294_cast_fp16")]; tensor var_13301_begin_0 = const()[name = tensor("op_13301_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13301_end_0 = const()[name = tensor("op_13301_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13301_end_mask_0 = const()[name = tensor("op_13301_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13301_cast_fp16 = slice_by_index(begin = var_13301_begin_0, end = var_13301_end_0, end_mask = var_13301_end_mask_0, x = var_13051_cast_fp16)[name = tensor("op_13301_cast_fp16")]; tensor var_13308_begin_0 = const()[name = tensor("op_13308_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13308_end_0 = const()[name = tensor("op_13308_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13308_end_mask_0 = const()[name = tensor("op_13308_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13308_cast_fp16 = slice_by_index(begin = var_13308_begin_0, end = var_13308_end_0, end_mask = var_13308_end_mask_0, x = var_13055_cast_fp16)[name = tensor("op_13308_cast_fp16")]; tensor var_13315_begin_0 = const()[name = tensor("op_13315_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13315_end_0 = const()[name = tensor("op_13315_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13315_end_mask_0 = const()[name = tensor("op_13315_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13315_cast_fp16 = slice_by_index(begin = var_13315_begin_0, end = var_13315_end_0, end_mask = var_13315_end_mask_0, x = var_13055_cast_fp16)[name = tensor("op_13315_cast_fp16")]; tensor var_13322_begin_0 = const()[name = tensor("op_13322_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13322_end_0 = const()[name = tensor("op_13322_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13322_end_mask_0 = const()[name = tensor("op_13322_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13322_cast_fp16 = slice_by_index(begin = var_13322_begin_0, end = var_13322_end_0, end_mask = var_13322_end_mask_0, x = var_13055_cast_fp16)[name = tensor("op_13322_cast_fp16")]; tensor var_13329_begin_0 = const()[name = tensor("op_13329_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13329_end_0 = const()[name = tensor("op_13329_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13329_end_mask_0 = const()[name = tensor("op_13329_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13329_cast_fp16 = slice_by_index(begin = var_13329_begin_0, end = var_13329_end_0, end_mask = var_13329_end_mask_0, x = var_13055_cast_fp16)[name = tensor("op_13329_cast_fp16")]; tensor var_13336_begin_0 = const()[name = tensor("op_13336_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13336_end_0 = const()[name = tensor("op_13336_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13336_end_mask_0 = const()[name = tensor("op_13336_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13336_cast_fp16 = slice_by_index(begin = var_13336_begin_0, end = var_13336_end_0, end_mask = var_13336_end_mask_0, x = var_13059_cast_fp16)[name = tensor("op_13336_cast_fp16")]; tensor var_13343_begin_0 = const()[name = tensor("op_13343_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13343_end_0 = const()[name = tensor("op_13343_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13343_end_mask_0 = const()[name = tensor("op_13343_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13343_cast_fp16 = slice_by_index(begin = var_13343_begin_0, end = var_13343_end_0, end_mask = var_13343_end_mask_0, x = var_13059_cast_fp16)[name = tensor("op_13343_cast_fp16")]; tensor var_13350_begin_0 = const()[name = tensor("op_13350_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13350_end_0 = const()[name = tensor("op_13350_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13350_end_mask_0 = const()[name = tensor("op_13350_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13350_cast_fp16 = slice_by_index(begin = var_13350_begin_0, end = var_13350_end_0, end_mask = var_13350_end_mask_0, x = var_13059_cast_fp16)[name = tensor("op_13350_cast_fp16")]; tensor var_13357_begin_0 = const()[name = tensor("op_13357_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13357_end_0 = const()[name = tensor("op_13357_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13357_end_mask_0 = const()[name = tensor("op_13357_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13357_cast_fp16 = slice_by_index(begin = var_13357_begin_0, end = var_13357_end_0, end_mask = var_13357_end_mask_0, x = var_13059_cast_fp16)[name = tensor("op_13357_cast_fp16")]; tensor var_13364_begin_0 = const()[name = tensor("op_13364_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13364_end_0 = const()[name = tensor("op_13364_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13364_end_mask_0 = const()[name = tensor("op_13364_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13364_cast_fp16 = slice_by_index(begin = var_13364_begin_0, end = var_13364_end_0, end_mask = var_13364_end_mask_0, x = var_13063_cast_fp16)[name = tensor("op_13364_cast_fp16")]; tensor var_13371_begin_0 = const()[name = tensor("op_13371_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13371_end_0 = const()[name = tensor("op_13371_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13371_end_mask_0 = const()[name = tensor("op_13371_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13371_cast_fp16 = slice_by_index(begin = var_13371_begin_0, end = var_13371_end_0, end_mask = var_13371_end_mask_0, x = var_13063_cast_fp16)[name = tensor("op_13371_cast_fp16")]; tensor var_13378_begin_0 = const()[name = tensor("op_13378_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13378_end_0 = const()[name = tensor("op_13378_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13378_end_mask_0 = const()[name = tensor("op_13378_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13378_cast_fp16 = slice_by_index(begin = var_13378_begin_0, end = var_13378_end_0, end_mask = var_13378_end_mask_0, x = var_13063_cast_fp16)[name = tensor("op_13378_cast_fp16")]; tensor var_13385_begin_0 = const()[name = tensor("op_13385_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13385_end_0 = const()[name = tensor("op_13385_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13385_end_mask_0 = const()[name = tensor("op_13385_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13385_cast_fp16 = slice_by_index(begin = var_13385_begin_0, end = var_13385_end_0, end_mask = var_13385_end_mask_0, x = var_13063_cast_fp16)[name = tensor("op_13385_cast_fp16")]; tensor var_13392_begin_0 = const()[name = tensor("op_13392_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13392_end_0 = const()[name = tensor("op_13392_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13392_end_mask_0 = const()[name = tensor("op_13392_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13392_cast_fp16 = slice_by_index(begin = var_13392_begin_0, end = var_13392_end_0, end_mask = var_13392_end_mask_0, x = var_13067_cast_fp16)[name = tensor("op_13392_cast_fp16")]; tensor var_13399_begin_0 = const()[name = tensor("op_13399_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13399_end_0 = const()[name = tensor("op_13399_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13399_end_mask_0 = const()[name = tensor("op_13399_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13399_cast_fp16 = slice_by_index(begin = var_13399_begin_0, end = var_13399_end_0, end_mask = var_13399_end_mask_0, x = var_13067_cast_fp16)[name = tensor("op_13399_cast_fp16")]; tensor var_13406_begin_0 = const()[name = tensor("op_13406_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13406_end_0 = const()[name = tensor("op_13406_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13406_end_mask_0 = const()[name = tensor("op_13406_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13406_cast_fp16 = slice_by_index(begin = var_13406_begin_0, end = var_13406_end_0, end_mask = var_13406_end_mask_0, x = var_13067_cast_fp16)[name = tensor("op_13406_cast_fp16")]; tensor var_13413_begin_0 = const()[name = tensor("op_13413_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13413_end_0 = const()[name = tensor("op_13413_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13413_end_mask_0 = const()[name = tensor("op_13413_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13413_cast_fp16 = slice_by_index(begin = var_13413_begin_0, end = var_13413_end_0, end_mask = var_13413_end_mask_0, x = var_13067_cast_fp16)[name = tensor("op_13413_cast_fp16")]; tensor var_13420_begin_0 = const()[name = tensor("op_13420_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13420_end_0 = const()[name = tensor("op_13420_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13420_end_mask_0 = const()[name = tensor("op_13420_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13420_cast_fp16 = slice_by_index(begin = var_13420_begin_0, end = var_13420_end_0, end_mask = var_13420_end_mask_0, x = var_13071_cast_fp16)[name = tensor("op_13420_cast_fp16")]; tensor var_13427_begin_0 = const()[name = tensor("op_13427_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13427_end_0 = const()[name = tensor("op_13427_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13427_end_mask_0 = const()[name = tensor("op_13427_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13427_cast_fp16 = slice_by_index(begin = var_13427_begin_0, end = var_13427_end_0, end_mask = var_13427_end_mask_0, x = var_13071_cast_fp16)[name = tensor("op_13427_cast_fp16")]; tensor var_13434_begin_0 = const()[name = tensor("op_13434_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13434_end_0 = const()[name = tensor("op_13434_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13434_end_mask_0 = const()[name = tensor("op_13434_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13434_cast_fp16 = slice_by_index(begin = var_13434_begin_0, end = var_13434_end_0, end_mask = var_13434_end_mask_0, x = var_13071_cast_fp16)[name = tensor("op_13434_cast_fp16")]; tensor var_13441_begin_0 = const()[name = tensor("op_13441_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13441_end_0 = const()[name = tensor("op_13441_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13441_end_mask_0 = const()[name = tensor("op_13441_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13441_cast_fp16 = slice_by_index(begin = var_13441_begin_0, end = var_13441_end_0, end_mask = var_13441_end_mask_0, x = var_13071_cast_fp16)[name = tensor("op_13441_cast_fp16")]; tensor var_13448_begin_0 = const()[name = tensor("op_13448_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13448_end_0 = const()[name = tensor("op_13448_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13448_end_mask_0 = const()[name = tensor("op_13448_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13448_cast_fp16 = slice_by_index(begin = var_13448_begin_0, end = var_13448_end_0, end_mask = var_13448_end_mask_0, x = var_13075_cast_fp16)[name = tensor("op_13448_cast_fp16")]; tensor var_13455_begin_0 = const()[name = tensor("op_13455_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13455_end_0 = const()[name = tensor("op_13455_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13455_end_mask_0 = const()[name = tensor("op_13455_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13455_cast_fp16 = slice_by_index(begin = var_13455_begin_0, end = var_13455_end_0, end_mask = var_13455_end_mask_0, x = var_13075_cast_fp16)[name = tensor("op_13455_cast_fp16")]; tensor var_13462_begin_0 = const()[name = tensor("op_13462_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13462_end_0 = const()[name = tensor("op_13462_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13462_end_mask_0 = const()[name = tensor("op_13462_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13462_cast_fp16 = slice_by_index(begin = var_13462_begin_0, end = var_13462_end_0, end_mask = var_13462_end_mask_0, x = var_13075_cast_fp16)[name = tensor("op_13462_cast_fp16")]; tensor var_13469_begin_0 = const()[name = tensor("op_13469_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13469_end_0 = const()[name = tensor("op_13469_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13469_end_mask_0 = const()[name = tensor("op_13469_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13469_cast_fp16 = slice_by_index(begin = var_13469_begin_0, end = var_13469_end_0, end_mask = var_13469_end_mask_0, x = var_13075_cast_fp16)[name = tensor("op_13469_cast_fp16")]; tensor var_13476_begin_0 = const()[name = tensor("op_13476_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13476_end_0 = const()[name = tensor("op_13476_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13476_end_mask_0 = const()[name = tensor("op_13476_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13476_cast_fp16 = slice_by_index(begin = var_13476_begin_0, end = var_13476_end_0, end_mask = var_13476_end_mask_0, x = var_13079_cast_fp16)[name = tensor("op_13476_cast_fp16")]; tensor var_13483_begin_0 = const()[name = tensor("op_13483_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13483_end_0 = const()[name = tensor("op_13483_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13483_end_mask_0 = const()[name = tensor("op_13483_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13483_cast_fp16 = slice_by_index(begin = var_13483_begin_0, end = var_13483_end_0, end_mask = var_13483_end_mask_0, x = var_13079_cast_fp16)[name = tensor("op_13483_cast_fp16")]; tensor var_13490_begin_0 = const()[name = tensor("op_13490_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13490_end_0 = const()[name = tensor("op_13490_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13490_end_mask_0 = const()[name = tensor("op_13490_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13490_cast_fp16 = slice_by_index(begin = var_13490_begin_0, end = var_13490_end_0, end_mask = var_13490_end_mask_0, x = var_13079_cast_fp16)[name = tensor("op_13490_cast_fp16")]; tensor var_13497_begin_0 = const()[name = tensor("op_13497_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13497_end_0 = const()[name = tensor("op_13497_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13497_end_mask_0 = const()[name = tensor("op_13497_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13497_cast_fp16 = slice_by_index(begin = var_13497_begin_0, end = var_13497_end_0, end_mask = var_13497_end_mask_0, x = var_13079_cast_fp16)[name = tensor("op_13497_cast_fp16")]; tensor var_13504_begin_0 = const()[name = tensor("op_13504_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13504_end_0 = const()[name = tensor("op_13504_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13504_end_mask_0 = const()[name = tensor("op_13504_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13504_cast_fp16 = slice_by_index(begin = var_13504_begin_0, end = var_13504_end_0, end_mask = var_13504_end_mask_0, x = var_13083_cast_fp16)[name = tensor("op_13504_cast_fp16")]; tensor var_13511_begin_0 = const()[name = tensor("op_13511_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13511_end_0 = const()[name = tensor("op_13511_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13511_end_mask_0 = const()[name = tensor("op_13511_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13511_cast_fp16 = slice_by_index(begin = var_13511_begin_0, end = var_13511_end_0, end_mask = var_13511_end_mask_0, x = var_13083_cast_fp16)[name = tensor("op_13511_cast_fp16")]; tensor var_13518_begin_0 = const()[name = tensor("op_13518_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13518_end_0 = const()[name = tensor("op_13518_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13518_end_mask_0 = const()[name = tensor("op_13518_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13518_cast_fp16 = slice_by_index(begin = var_13518_begin_0, end = var_13518_end_0, end_mask = var_13518_end_mask_0, x = var_13083_cast_fp16)[name = tensor("op_13518_cast_fp16")]; tensor var_13525_begin_0 = const()[name = tensor("op_13525_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13525_end_0 = const()[name = tensor("op_13525_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13525_end_mask_0 = const()[name = tensor("op_13525_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13525_cast_fp16 = slice_by_index(begin = var_13525_begin_0, end = var_13525_end_0, end_mask = var_13525_end_mask_0, x = var_13083_cast_fp16)[name = tensor("op_13525_cast_fp16")]; tensor var_13532_begin_0 = const()[name = tensor("op_13532_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13532_end_0 = const()[name = tensor("op_13532_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13532_end_mask_0 = const()[name = tensor("op_13532_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13532_cast_fp16 = slice_by_index(begin = var_13532_begin_0, end = var_13532_end_0, end_mask = var_13532_end_mask_0, x = var_13087_cast_fp16)[name = tensor("op_13532_cast_fp16")]; tensor var_13539_begin_0 = const()[name = tensor("op_13539_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13539_end_0 = const()[name = tensor("op_13539_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13539_end_mask_0 = const()[name = tensor("op_13539_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13539_cast_fp16 = slice_by_index(begin = var_13539_begin_0, end = var_13539_end_0, end_mask = var_13539_end_mask_0, x = var_13087_cast_fp16)[name = tensor("op_13539_cast_fp16")]; tensor var_13546_begin_0 = const()[name = tensor("op_13546_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13546_end_0 = const()[name = tensor("op_13546_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13546_end_mask_0 = const()[name = tensor("op_13546_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13546_cast_fp16 = slice_by_index(begin = var_13546_begin_0, end = var_13546_end_0, end_mask = var_13546_end_mask_0, x = var_13087_cast_fp16)[name = tensor("op_13546_cast_fp16")]; tensor var_13553_begin_0 = const()[name = tensor("op_13553_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13553_end_0 = const()[name = tensor("op_13553_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13553_end_mask_0 = const()[name = tensor("op_13553_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13553_cast_fp16 = slice_by_index(begin = var_13553_begin_0, end = var_13553_end_0, end_mask = var_13553_end_mask_0, x = var_13087_cast_fp16)[name = tensor("op_13553_cast_fp16")]; tensor var_13560_begin_0 = const()[name = tensor("op_13560_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13560_end_0 = const()[name = tensor("op_13560_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13560_end_mask_0 = const()[name = tensor("op_13560_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13560_cast_fp16 = slice_by_index(begin = var_13560_begin_0, end = var_13560_end_0, end_mask = var_13560_end_mask_0, x = var_13091_cast_fp16)[name = tensor("op_13560_cast_fp16")]; tensor var_13567_begin_0 = const()[name = tensor("op_13567_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13567_end_0 = const()[name = tensor("op_13567_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13567_end_mask_0 = const()[name = tensor("op_13567_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13567_cast_fp16 = slice_by_index(begin = var_13567_begin_0, end = var_13567_end_0, end_mask = var_13567_end_mask_0, x = var_13091_cast_fp16)[name = tensor("op_13567_cast_fp16")]; tensor var_13574_begin_0 = const()[name = tensor("op_13574_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13574_end_0 = const()[name = tensor("op_13574_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13574_end_mask_0 = const()[name = tensor("op_13574_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13574_cast_fp16 = slice_by_index(begin = var_13574_begin_0, end = var_13574_end_0, end_mask = var_13574_end_mask_0, x = var_13091_cast_fp16)[name = tensor("op_13574_cast_fp16")]; tensor var_13581_begin_0 = const()[name = tensor("op_13581_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13581_end_0 = const()[name = tensor("op_13581_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13581_end_mask_0 = const()[name = tensor("op_13581_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13581_cast_fp16 = slice_by_index(begin = var_13581_begin_0, end = var_13581_end_0, end_mask = var_13581_end_mask_0, x = var_13091_cast_fp16)[name = tensor("op_13581_cast_fp16")]; tensor var_13588_begin_0 = const()[name = tensor("op_13588_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13588_end_0 = const()[name = tensor("op_13588_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13588_end_mask_0 = const()[name = tensor("op_13588_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13588_cast_fp16 = slice_by_index(begin = var_13588_begin_0, end = var_13588_end_0, end_mask = var_13588_end_mask_0, x = var_13095_cast_fp16)[name = tensor("op_13588_cast_fp16")]; tensor var_13595_begin_0 = const()[name = tensor("op_13595_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13595_end_0 = const()[name = tensor("op_13595_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13595_end_mask_0 = const()[name = tensor("op_13595_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13595_cast_fp16 = slice_by_index(begin = var_13595_begin_0, end = var_13595_end_0, end_mask = var_13595_end_mask_0, x = var_13095_cast_fp16)[name = tensor("op_13595_cast_fp16")]; tensor var_13602_begin_0 = const()[name = tensor("op_13602_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13602_end_0 = const()[name = tensor("op_13602_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13602_end_mask_0 = const()[name = tensor("op_13602_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13602_cast_fp16 = slice_by_index(begin = var_13602_begin_0, end = var_13602_end_0, end_mask = var_13602_end_mask_0, x = var_13095_cast_fp16)[name = tensor("op_13602_cast_fp16")]; tensor var_13609_begin_0 = const()[name = tensor("op_13609_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13609_end_0 = const()[name = tensor("op_13609_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13609_end_mask_0 = const()[name = tensor("op_13609_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13609_cast_fp16 = slice_by_index(begin = var_13609_begin_0, end = var_13609_end_0, end_mask = var_13609_end_mask_0, x = var_13095_cast_fp16)[name = tensor("op_13609_cast_fp16")]; tensor var_13616_begin_0 = const()[name = tensor("op_13616_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13616_end_0 = const()[name = tensor("op_13616_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13616_end_mask_0 = const()[name = tensor("op_13616_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13616_cast_fp16 = slice_by_index(begin = var_13616_begin_0, end = var_13616_end_0, end_mask = var_13616_end_mask_0, x = var_13099_cast_fp16)[name = tensor("op_13616_cast_fp16")]; tensor var_13623_begin_0 = const()[name = tensor("op_13623_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13623_end_0 = const()[name = tensor("op_13623_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13623_end_mask_0 = const()[name = tensor("op_13623_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13623_cast_fp16 = slice_by_index(begin = var_13623_begin_0, end = var_13623_end_0, end_mask = var_13623_end_mask_0, x = var_13099_cast_fp16)[name = tensor("op_13623_cast_fp16")]; tensor var_13630_begin_0 = const()[name = tensor("op_13630_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13630_end_0 = const()[name = tensor("op_13630_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13630_end_mask_0 = const()[name = tensor("op_13630_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13630_cast_fp16 = slice_by_index(begin = var_13630_begin_0, end = var_13630_end_0, end_mask = var_13630_end_mask_0, x = var_13099_cast_fp16)[name = tensor("op_13630_cast_fp16")]; tensor var_13637_begin_0 = const()[name = tensor("op_13637_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13637_end_0 = const()[name = tensor("op_13637_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13637_end_mask_0 = const()[name = tensor("op_13637_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13637_cast_fp16 = slice_by_index(begin = var_13637_begin_0, end = var_13637_end_0, end_mask = var_13637_end_mask_0, x = var_13099_cast_fp16)[name = tensor("op_13637_cast_fp16")]; tensor var_13644_begin_0 = const()[name = tensor("op_13644_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13644_end_0 = const()[name = tensor("op_13644_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_13644_end_mask_0 = const()[name = tensor("op_13644_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13644_cast_fp16 = slice_by_index(begin = var_13644_begin_0, end = var_13644_end_0, end_mask = var_13644_end_mask_0, x = var_13103_cast_fp16)[name = tensor("op_13644_cast_fp16")]; tensor var_13651_begin_0 = const()[name = tensor("op_13651_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_13651_end_0 = const()[name = tensor("op_13651_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_13651_end_mask_0 = const()[name = tensor("op_13651_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13651_cast_fp16 = slice_by_index(begin = var_13651_begin_0, end = var_13651_end_0, end_mask = var_13651_end_mask_0, x = var_13103_cast_fp16)[name = tensor("op_13651_cast_fp16")]; tensor var_13658_begin_0 = const()[name = tensor("op_13658_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_13658_end_0 = const()[name = tensor("op_13658_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_13658_end_mask_0 = const()[name = tensor("op_13658_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13658_cast_fp16 = slice_by_index(begin = var_13658_begin_0, end = var_13658_end_0, end_mask = var_13658_end_mask_0, x = var_13103_cast_fp16)[name = tensor("op_13658_cast_fp16")]; tensor var_13665_begin_0 = const()[name = tensor("op_13665_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_13665_end_0 = const()[name = tensor("op_13665_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13665_end_mask_0 = const()[name = tensor("op_13665_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13665_cast_fp16 = slice_by_index(begin = var_13665_begin_0, end = var_13665_end_0, end_mask = var_13665_end_mask_0, x = var_13103_cast_fp16)[name = tensor("op_13665_cast_fp16")]; tensor k_17_perm_0 = const()[name = tensor("k_17_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_13670_begin_0 = const()[name = tensor("op_13670_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13670_end_0 = const()[name = tensor("op_13670_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_13670_end_mask_0 = const()[name = tensor("op_13670_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_17_cast_fp16 = transpose(perm = k_17_perm_0, x = key_17_cast_fp16)[name = tensor("transpose_23")]; tensor var_13670_cast_fp16 = slice_by_index(begin = var_13670_begin_0, end = var_13670_end_0, end_mask = var_13670_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13670_cast_fp16")]; tensor var_13674_begin_0 = const()[name = tensor("op_13674_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_13674_end_0 = const()[name = tensor("op_13674_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_13674_end_mask_0 = const()[name = tensor("op_13674_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13674_cast_fp16 = slice_by_index(begin = var_13674_begin_0, end = var_13674_end_0, end_mask = var_13674_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13674_cast_fp16")]; tensor var_13678_begin_0 = const()[name = tensor("op_13678_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_13678_end_0 = const()[name = tensor("op_13678_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_13678_end_mask_0 = const()[name = tensor("op_13678_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13678_cast_fp16 = slice_by_index(begin = var_13678_begin_0, end = var_13678_end_0, end_mask = var_13678_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13678_cast_fp16")]; tensor var_13682_begin_0 = const()[name = tensor("op_13682_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_13682_end_0 = const()[name = tensor("op_13682_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_13682_end_mask_0 = const()[name = tensor("op_13682_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13682_cast_fp16 = slice_by_index(begin = var_13682_begin_0, end = var_13682_end_0, end_mask = var_13682_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13682_cast_fp16")]; tensor var_13686_begin_0 = const()[name = tensor("op_13686_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_13686_end_0 = const()[name = tensor("op_13686_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_13686_end_mask_0 = const()[name = tensor("op_13686_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13686_cast_fp16 = slice_by_index(begin = var_13686_begin_0, end = var_13686_end_0, end_mask = var_13686_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13686_cast_fp16")]; tensor var_13690_begin_0 = const()[name = tensor("op_13690_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_13690_end_0 = const()[name = tensor("op_13690_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_13690_end_mask_0 = const()[name = tensor("op_13690_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13690_cast_fp16 = slice_by_index(begin = var_13690_begin_0, end = var_13690_end_0, end_mask = var_13690_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13690_cast_fp16")]; tensor var_13694_begin_0 = const()[name = tensor("op_13694_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_13694_end_0 = const()[name = tensor("op_13694_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_13694_end_mask_0 = const()[name = tensor("op_13694_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13694_cast_fp16 = slice_by_index(begin = var_13694_begin_0, end = var_13694_end_0, end_mask = var_13694_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13694_cast_fp16")]; tensor var_13698_begin_0 = const()[name = tensor("op_13698_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_13698_end_0 = const()[name = tensor("op_13698_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_13698_end_mask_0 = const()[name = tensor("op_13698_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13698_cast_fp16 = slice_by_index(begin = var_13698_begin_0, end = var_13698_end_0, end_mask = var_13698_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13698_cast_fp16")]; tensor var_13702_begin_0 = const()[name = tensor("op_13702_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_13702_end_0 = const()[name = tensor("op_13702_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_13702_end_mask_0 = const()[name = tensor("op_13702_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13702_cast_fp16 = slice_by_index(begin = var_13702_begin_0, end = var_13702_end_0, end_mask = var_13702_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13702_cast_fp16")]; tensor var_13706_begin_0 = const()[name = tensor("op_13706_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_13706_end_0 = const()[name = tensor("op_13706_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_13706_end_mask_0 = const()[name = tensor("op_13706_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13706_cast_fp16 = slice_by_index(begin = var_13706_begin_0, end = var_13706_end_0, end_mask = var_13706_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13706_cast_fp16")]; tensor var_13710_begin_0 = const()[name = tensor("op_13710_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_13710_end_0 = const()[name = tensor("op_13710_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_13710_end_mask_0 = const()[name = tensor("op_13710_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13710_cast_fp16 = slice_by_index(begin = var_13710_begin_0, end = var_13710_end_0, end_mask = var_13710_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13710_cast_fp16")]; tensor var_13714_begin_0 = const()[name = tensor("op_13714_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_13714_end_0 = const()[name = tensor("op_13714_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_13714_end_mask_0 = const()[name = tensor("op_13714_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13714_cast_fp16 = slice_by_index(begin = var_13714_begin_0, end = var_13714_end_0, end_mask = var_13714_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13714_cast_fp16")]; tensor var_13718_begin_0 = const()[name = tensor("op_13718_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_13718_end_0 = const()[name = tensor("op_13718_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_13718_end_mask_0 = const()[name = tensor("op_13718_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13718_cast_fp16 = slice_by_index(begin = var_13718_begin_0, end = var_13718_end_0, end_mask = var_13718_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13718_cast_fp16")]; tensor var_13722_begin_0 = const()[name = tensor("op_13722_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_13722_end_0 = const()[name = tensor("op_13722_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_13722_end_mask_0 = const()[name = tensor("op_13722_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13722_cast_fp16 = slice_by_index(begin = var_13722_begin_0, end = var_13722_end_0, end_mask = var_13722_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13722_cast_fp16")]; tensor var_13726_begin_0 = const()[name = tensor("op_13726_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_13726_end_0 = const()[name = tensor("op_13726_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_13726_end_mask_0 = const()[name = tensor("op_13726_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13726_cast_fp16 = slice_by_index(begin = var_13726_begin_0, end = var_13726_end_0, end_mask = var_13726_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13726_cast_fp16")]; tensor var_13730_begin_0 = const()[name = tensor("op_13730_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_13730_end_0 = const()[name = tensor("op_13730_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_13730_end_mask_0 = const()[name = tensor("op_13730_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13730_cast_fp16 = slice_by_index(begin = var_13730_begin_0, end = var_13730_end_0, end_mask = var_13730_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13730_cast_fp16")]; tensor var_13734_begin_0 = const()[name = tensor("op_13734_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_13734_end_0 = const()[name = tensor("op_13734_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_13734_end_mask_0 = const()[name = tensor("op_13734_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13734_cast_fp16 = slice_by_index(begin = var_13734_begin_0, end = var_13734_end_0, end_mask = var_13734_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13734_cast_fp16")]; tensor var_13738_begin_0 = const()[name = tensor("op_13738_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_13738_end_0 = const()[name = tensor("op_13738_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_13738_end_mask_0 = const()[name = tensor("op_13738_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13738_cast_fp16 = slice_by_index(begin = var_13738_begin_0, end = var_13738_end_0, end_mask = var_13738_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13738_cast_fp16")]; tensor var_13742_begin_0 = const()[name = tensor("op_13742_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_13742_end_0 = const()[name = tensor("op_13742_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_13742_end_mask_0 = const()[name = tensor("op_13742_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13742_cast_fp16 = slice_by_index(begin = var_13742_begin_0, end = var_13742_end_0, end_mask = var_13742_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13742_cast_fp16")]; tensor var_13746_begin_0 = const()[name = tensor("op_13746_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_13746_end_0 = const()[name = tensor("op_13746_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_13746_end_mask_0 = const()[name = tensor("op_13746_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_13746_cast_fp16 = slice_by_index(begin = var_13746_begin_0, end = var_13746_end_0, end_mask = var_13746_end_mask_0, x = k_17_cast_fp16)[name = tensor("op_13746_cast_fp16")]; tensor var_13748_begin_0 = const()[name = tensor("op_13748_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_13748_end_0 = const()[name = tensor("op_13748_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_13748_end_mask_0 = const()[name = tensor("op_13748_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13748_cast_fp16 = slice_by_index(begin = var_13748_begin_0, end = var_13748_end_0, end_mask = var_13748_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13748_cast_fp16")]; tensor var_13752_begin_0 = const()[name = tensor("op_13752_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_13752_end_0 = const()[name = tensor("op_13752_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_13752_end_mask_0 = const()[name = tensor("op_13752_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13752_cast_fp16 = slice_by_index(begin = var_13752_begin_0, end = var_13752_end_0, end_mask = var_13752_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13752_cast_fp16")]; tensor var_13756_begin_0 = const()[name = tensor("op_13756_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_13756_end_0 = const()[name = tensor("op_13756_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_13756_end_mask_0 = const()[name = tensor("op_13756_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13756_cast_fp16 = slice_by_index(begin = var_13756_begin_0, end = var_13756_end_0, end_mask = var_13756_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13756_cast_fp16")]; tensor var_13760_begin_0 = const()[name = tensor("op_13760_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_13760_end_0 = const()[name = tensor("op_13760_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_13760_end_mask_0 = const()[name = tensor("op_13760_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13760_cast_fp16 = slice_by_index(begin = var_13760_begin_0, end = var_13760_end_0, end_mask = var_13760_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13760_cast_fp16")]; tensor var_13764_begin_0 = const()[name = tensor("op_13764_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_13764_end_0 = const()[name = tensor("op_13764_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_13764_end_mask_0 = const()[name = tensor("op_13764_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13764_cast_fp16 = slice_by_index(begin = var_13764_begin_0, end = var_13764_end_0, end_mask = var_13764_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13764_cast_fp16")]; tensor var_13768_begin_0 = const()[name = tensor("op_13768_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_13768_end_0 = const()[name = tensor("op_13768_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_13768_end_mask_0 = const()[name = tensor("op_13768_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13768_cast_fp16 = slice_by_index(begin = var_13768_begin_0, end = var_13768_end_0, end_mask = var_13768_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13768_cast_fp16")]; tensor var_13772_begin_0 = const()[name = tensor("op_13772_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_13772_end_0 = const()[name = tensor("op_13772_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_13772_end_mask_0 = const()[name = tensor("op_13772_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13772_cast_fp16 = slice_by_index(begin = var_13772_begin_0, end = var_13772_end_0, end_mask = var_13772_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13772_cast_fp16")]; tensor var_13776_begin_0 = const()[name = tensor("op_13776_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_13776_end_0 = const()[name = tensor("op_13776_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_13776_end_mask_0 = const()[name = tensor("op_13776_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13776_cast_fp16 = slice_by_index(begin = var_13776_begin_0, end = var_13776_end_0, end_mask = var_13776_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13776_cast_fp16")]; tensor var_13780_begin_0 = const()[name = tensor("op_13780_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_13780_end_0 = const()[name = tensor("op_13780_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_13780_end_mask_0 = const()[name = tensor("op_13780_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13780_cast_fp16 = slice_by_index(begin = var_13780_begin_0, end = var_13780_end_0, end_mask = var_13780_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13780_cast_fp16")]; tensor var_13784_begin_0 = const()[name = tensor("op_13784_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_13784_end_0 = const()[name = tensor("op_13784_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_13784_end_mask_0 = const()[name = tensor("op_13784_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13784_cast_fp16 = slice_by_index(begin = var_13784_begin_0, end = var_13784_end_0, end_mask = var_13784_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13784_cast_fp16")]; tensor var_13788_begin_0 = const()[name = tensor("op_13788_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_13788_end_0 = const()[name = tensor("op_13788_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_13788_end_mask_0 = const()[name = tensor("op_13788_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13788_cast_fp16 = slice_by_index(begin = var_13788_begin_0, end = var_13788_end_0, end_mask = var_13788_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13788_cast_fp16")]; tensor var_13792_begin_0 = const()[name = tensor("op_13792_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_13792_end_0 = const()[name = tensor("op_13792_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_13792_end_mask_0 = const()[name = tensor("op_13792_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13792_cast_fp16 = slice_by_index(begin = var_13792_begin_0, end = var_13792_end_0, end_mask = var_13792_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13792_cast_fp16")]; tensor var_13796_begin_0 = const()[name = tensor("op_13796_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_13796_end_0 = const()[name = tensor("op_13796_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_13796_end_mask_0 = const()[name = tensor("op_13796_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13796_cast_fp16 = slice_by_index(begin = var_13796_begin_0, end = var_13796_end_0, end_mask = var_13796_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13796_cast_fp16")]; tensor var_13800_begin_0 = const()[name = tensor("op_13800_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_13800_end_0 = const()[name = tensor("op_13800_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_13800_end_mask_0 = const()[name = tensor("op_13800_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13800_cast_fp16 = slice_by_index(begin = var_13800_begin_0, end = var_13800_end_0, end_mask = var_13800_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13800_cast_fp16")]; tensor var_13804_begin_0 = const()[name = tensor("op_13804_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_13804_end_0 = const()[name = tensor("op_13804_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_13804_end_mask_0 = const()[name = tensor("op_13804_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13804_cast_fp16 = slice_by_index(begin = var_13804_begin_0, end = var_13804_end_0, end_mask = var_13804_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13804_cast_fp16")]; tensor var_13808_begin_0 = const()[name = tensor("op_13808_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_13808_end_0 = const()[name = tensor("op_13808_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_13808_end_mask_0 = const()[name = tensor("op_13808_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13808_cast_fp16 = slice_by_index(begin = var_13808_begin_0, end = var_13808_end_0, end_mask = var_13808_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13808_cast_fp16")]; tensor var_13812_begin_0 = const()[name = tensor("op_13812_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_13812_end_0 = const()[name = tensor("op_13812_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_13812_end_mask_0 = const()[name = tensor("op_13812_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13812_cast_fp16 = slice_by_index(begin = var_13812_begin_0, end = var_13812_end_0, end_mask = var_13812_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13812_cast_fp16")]; tensor var_13816_begin_0 = const()[name = tensor("op_13816_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_13816_end_0 = const()[name = tensor("op_13816_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_13816_end_mask_0 = const()[name = tensor("op_13816_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13816_cast_fp16 = slice_by_index(begin = var_13816_begin_0, end = var_13816_end_0, end_mask = var_13816_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13816_cast_fp16")]; tensor var_13820_begin_0 = const()[name = tensor("op_13820_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_13820_end_0 = const()[name = tensor("op_13820_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_13820_end_mask_0 = const()[name = tensor("op_13820_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13820_cast_fp16 = slice_by_index(begin = var_13820_begin_0, end = var_13820_end_0, end_mask = var_13820_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13820_cast_fp16")]; tensor var_13824_begin_0 = const()[name = tensor("op_13824_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_13824_end_0 = const()[name = tensor("op_13824_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_13824_end_mask_0 = const()[name = tensor("op_13824_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_13824_cast_fp16 = slice_by_index(begin = var_13824_begin_0, end = var_13824_end_0, end_mask = var_13824_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13824_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1281_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1281_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1281_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1281_equation_0, values = (var_13670_cast_fp16, var_13112_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1281_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1283_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1283_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1283_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1283_equation_0, values = (var_13670_cast_fp16, var_13119_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1283_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1285_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1285_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1285_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1285_equation_0, values = (var_13670_cast_fp16, var_13126_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1285_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1287_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1287_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1287_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1287_equation_0, values = (var_13670_cast_fp16, var_13133_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1287_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1289_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1289_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1289_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1289_equation_0, values = (var_13674_cast_fp16, var_13140_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1289_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1291_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1291_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1291_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1291_equation_0, values = (var_13674_cast_fp16, var_13147_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1291_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1293_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1293_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1293_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1293_equation_0, values = (var_13674_cast_fp16, var_13154_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1293_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1295_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1295_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1295_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1295_equation_0, values = (var_13674_cast_fp16, var_13161_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1295_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1297_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1297_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1297_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1297_equation_0, values = (var_13678_cast_fp16, var_13168_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1297_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1299_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1299_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1299_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1299_equation_0, values = (var_13678_cast_fp16, var_13175_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1299_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1301_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1301_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1301_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1301_equation_0, values = (var_13678_cast_fp16, var_13182_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1301_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1303_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1303_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1303_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1303_equation_0, values = (var_13678_cast_fp16, var_13189_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1303_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1305_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1305_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1305_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1305_equation_0, values = (var_13682_cast_fp16, var_13196_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1305_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1307_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1307_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1307_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1307_equation_0, values = (var_13682_cast_fp16, var_13203_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1307_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1309_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1309_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1309_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1309_equation_0, values = (var_13682_cast_fp16, var_13210_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1309_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1311_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1311_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1311_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1311_equation_0, values = (var_13682_cast_fp16, var_13217_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1311_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1313_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1313_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1313_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1313_equation_0, values = (var_13686_cast_fp16, var_13224_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1313_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1315_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1315_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1315_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1315_equation_0, values = (var_13686_cast_fp16, var_13231_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1315_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1317_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1317_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1317_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1317_equation_0, values = (var_13686_cast_fp16, var_13238_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1317_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1319_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1319_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1319_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1319_equation_0, values = (var_13686_cast_fp16, var_13245_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1319_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1321_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1321_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1321_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1321_equation_0, values = (var_13690_cast_fp16, var_13252_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1321_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1323_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1323_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1323_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1323_equation_0, values = (var_13690_cast_fp16, var_13259_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1323_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1325_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1325_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1325_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1325_equation_0, values = (var_13690_cast_fp16, var_13266_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1325_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1327_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1327_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1327_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1327_equation_0, values = (var_13690_cast_fp16, var_13273_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1327_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1329_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1329_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1329_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1329_equation_0, values = (var_13694_cast_fp16, var_13280_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1329_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1331_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1331_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1331_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1331_equation_0, values = (var_13694_cast_fp16, var_13287_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1331_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1333_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1333_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1333_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1333_equation_0, values = (var_13694_cast_fp16, var_13294_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1333_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1335_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1335_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1335_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1335_equation_0, values = (var_13694_cast_fp16, var_13301_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1335_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1337_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1337_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1337_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1337_equation_0, values = (var_13698_cast_fp16, var_13308_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1337_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1339_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1339_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1339_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1339_equation_0, values = (var_13698_cast_fp16, var_13315_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1339_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1341_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1341_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1341_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1341_equation_0, values = (var_13698_cast_fp16, var_13322_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1341_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1343_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1343_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1343_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1343_equation_0, values = (var_13698_cast_fp16, var_13329_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1343_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1345_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1345_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1345_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1345_equation_0, values = (var_13702_cast_fp16, var_13336_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1345_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1347_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1347_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1347_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1347_equation_0, values = (var_13702_cast_fp16, var_13343_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1347_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1349_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1349_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1349_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1349_equation_0, values = (var_13702_cast_fp16, var_13350_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1349_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1351_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1351_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1351_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1351_equation_0, values = (var_13702_cast_fp16, var_13357_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1351_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1353_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1353_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1353_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1353_equation_0, values = (var_13706_cast_fp16, var_13364_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1353_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1355_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1355_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1355_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1355_equation_0, values = (var_13706_cast_fp16, var_13371_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1355_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1357_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1357_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1357_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1357_equation_0, values = (var_13706_cast_fp16, var_13378_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1357_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1359_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1359_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1359_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1359_equation_0, values = (var_13706_cast_fp16, var_13385_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1359_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1361_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1361_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1361_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1361_equation_0, values = (var_13710_cast_fp16, var_13392_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1361_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1363_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1363_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1363_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1363_equation_0, values = (var_13710_cast_fp16, var_13399_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1363_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1365_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1365_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1365_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1365_equation_0, values = (var_13710_cast_fp16, var_13406_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1365_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1367_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1367_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1367_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1367_equation_0, values = (var_13710_cast_fp16, var_13413_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1367_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1369_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1369_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1369_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1369_equation_0, values = (var_13714_cast_fp16, var_13420_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1369_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1371_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1371_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1371_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1371_equation_0, values = (var_13714_cast_fp16, var_13427_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1371_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1373_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1373_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1373_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1373_equation_0, values = (var_13714_cast_fp16, var_13434_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1373_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1375_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1375_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1375_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1375_equation_0, values = (var_13714_cast_fp16, var_13441_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1375_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1377_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1377_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1377_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1377_equation_0, values = (var_13718_cast_fp16, var_13448_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1377_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1379_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1379_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1379_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1379_equation_0, values = (var_13718_cast_fp16, var_13455_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1379_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1381_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1381_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1381_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1381_equation_0, values = (var_13718_cast_fp16, var_13462_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1381_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1383_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1383_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1383_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1383_equation_0, values = (var_13718_cast_fp16, var_13469_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1383_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1385_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1385_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1385_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1385_equation_0, values = (var_13722_cast_fp16, var_13476_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1385_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1387_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1387_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1387_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1387_equation_0, values = (var_13722_cast_fp16, var_13483_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1387_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1389_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1389_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1389_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1389_equation_0, values = (var_13722_cast_fp16, var_13490_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1389_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1391_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1391_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1391_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1391_equation_0, values = (var_13722_cast_fp16, var_13497_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1391_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1393_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1393_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1393_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1393_equation_0, values = (var_13726_cast_fp16, var_13504_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1393_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1395_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1395_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1395_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1395_equation_0, values = (var_13726_cast_fp16, var_13511_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1395_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1397_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1397_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1397_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1397_equation_0, values = (var_13726_cast_fp16, var_13518_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1397_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1399_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1399_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1399_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1399_equation_0, values = (var_13726_cast_fp16, var_13525_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1399_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1401_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1401_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1401_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1401_equation_0, values = (var_13730_cast_fp16, var_13532_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1401_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1403_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1403_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1403_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1403_equation_0, values = (var_13730_cast_fp16, var_13539_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1403_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1405_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1405_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1405_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1405_equation_0, values = (var_13730_cast_fp16, var_13546_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1405_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1407_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1407_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1407_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1407_equation_0, values = (var_13730_cast_fp16, var_13553_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1407_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1409_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1409_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1409_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1409_equation_0, values = (var_13734_cast_fp16, var_13560_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1409_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1411_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1411_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1411_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1411_equation_0, values = (var_13734_cast_fp16, var_13567_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1411_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1413_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1413_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1413_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1413_equation_0, values = (var_13734_cast_fp16, var_13574_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1413_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1415_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1415_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1415_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1415_equation_0, values = (var_13734_cast_fp16, var_13581_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1415_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1417_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1417_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1417_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1417_equation_0, values = (var_13738_cast_fp16, var_13588_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1417_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1419_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1419_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1419_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1419_equation_0, values = (var_13738_cast_fp16, var_13595_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1419_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1421_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1421_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1421_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1421_equation_0, values = (var_13738_cast_fp16, var_13602_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1421_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1423_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1423_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1423_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1423_equation_0, values = (var_13738_cast_fp16, var_13609_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1423_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1425_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1425_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1425_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1425_equation_0, values = (var_13742_cast_fp16, var_13616_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1425_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1427_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1427_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1427_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1427_equation_0, values = (var_13742_cast_fp16, var_13623_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1427_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1429_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1429_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1429_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1429_equation_0, values = (var_13742_cast_fp16, var_13630_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1429_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1431_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1431_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1431_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1431_equation_0, values = (var_13742_cast_fp16, var_13637_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1431_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1433_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1433_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1433_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1433_equation_0, values = (var_13746_cast_fp16, var_13644_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1433_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1435_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1435_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1435_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1435_equation_0, values = (var_13746_cast_fp16, var_13651_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1435_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1437_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1437_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1437_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1437_equation_0, values = (var_13746_cast_fp16, var_13658_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1437_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1439_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1439_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1439_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1439_equation_0, values = (var_13746_cast_fp16, var_13665_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1439_cast_fp16")]; tensor var_13987_to_fp16 = const()[name = tensor("op_13987_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1281_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1281_cast_fp16, y = var_13987_to_fp16)[name = tensor("aw_chunk_1281_cast_fp16")]; tensor var_13989_to_fp16 = const()[name = tensor("op_13989_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1283_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1283_cast_fp16, y = var_13989_to_fp16)[name = tensor("aw_chunk_1283_cast_fp16")]; tensor var_13991_to_fp16 = const()[name = tensor("op_13991_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1285_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1285_cast_fp16, y = var_13991_to_fp16)[name = tensor("aw_chunk_1285_cast_fp16")]; tensor var_13993_to_fp16 = const()[name = tensor("op_13993_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1287_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1287_cast_fp16, y = var_13993_to_fp16)[name = tensor("aw_chunk_1287_cast_fp16")]; tensor var_13995_to_fp16 = const()[name = tensor("op_13995_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1289_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1289_cast_fp16, y = var_13995_to_fp16)[name = tensor("aw_chunk_1289_cast_fp16")]; tensor var_13997_to_fp16 = const()[name = tensor("op_13997_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1291_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1291_cast_fp16, y = var_13997_to_fp16)[name = tensor("aw_chunk_1291_cast_fp16")]; tensor var_13999_to_fp16 = const()[name = tensor("op_13999_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1293_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1293_cast_fp16, y = var_13999_to_fp16)[name = tensor("aw_chunk_1293_cast_fp16")]; tensor var_14001_to_fp16 = const()[name = tensor("op_14001_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1295_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1295_cast_fp16, y = var_14001_to_fp16)[name = tensor("aw_chunk_1295_cast_fp16")]; tensor var_14003_to_fp16 = const()[name = tensor("op_14003_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1297_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1297_cast_fp16, y = var_14003_to_fp16)[name = tensor("aw_chunk_1297_cast_fp16")]; tensor var_14005_to_fp16 = const()[name = tensor("op_14005_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1299_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1299_cast_fp16, y = var_14005_to_fp16)[name = tensor("aw_chunk_1299_cast_fp16")]; tensor var_14007_to_fp16 = const()[name = tensor("op_14007_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1301_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1301_cast_fp16, y = var_14007_to_fp16)[name = tensor("aw_chunk_1301_cast_fp16")]; tensor var_14009_to_fp16 = const()[name = tensor("op_14009_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1303_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1303_cast_fp16, y = var_14009_to_fp16)[name = tensor("aw_chunk_1303_cast_fp16")]; tensor var_14011_to_fp16 = const()[name = tensor("op_14011_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1305_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1305_cast_fp16, y = var_14011_to_fp16)[name = tensor("aw_chunk_1305_cast_fp16")]; tensor var_14013_to_fp16 = const()[name = tensor("op_14013_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1307_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1307_cast_fp16, y = var_14013_to_fp16)[name = tensor("aw_chunk_1307_cast_fp16")]; tensor var_14015_to_fp16 = const()[name = tensor("op_14015_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1309_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1309_cast_fp16, y = var_14015_to_fp16)[name = tensor("aw_chunk_1309_cast_fp16")]; tensor var_14017_to_fp16 = const()[name = tensor("op_14017_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1311_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1311_cast_fp16, y = var_14017_to_fp16)[name = tensor("aw_chunk_1311_cast_fp16")]; tensor var_14019_to_fp16 = const()[name = tensor("op_14019_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1313_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1313_cast_fp16, y = var_14019_to_fp16)[name = tensor("aw_chunk_1313_cast_fp16")]; tensor var_14021_to_fp16 = const()[name = tensor("op_14021_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1315_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1315_cast_fp16, y = var_14021_to_fp16)[name = tensor("aw_chunk_1315_cast_fp16")]; tensor var_14023_to_fp16 = const()[name = tensor("op_14023_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1317_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1317_cast_fp16, y = var_14023_to_fp16)[name = tensor("aw_chunk_1317_cast_fp16")]; tensor var_14025_to_fp16 = const()[name = tensor("op_14025_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1319_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1319_cast_fp16, y = var_14025_to_fp16)[name = tensor("aw_chunk_1319_cast_fp16")]; tensor var_14027_to_fp16 = const()[name = tensor("op_14027_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1321_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1321_cast_fp16, y = var_14027_to_fp16)[name = tensor("aw_chunk_1321_cast_fp16")]; tensor var_14029_to_fp16 = const()[name = tensor("op_14029_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1323_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1323_cast_fp16, y = var_14029_to_fp16)[name = tensor("aw_chunk_1323_cast_fp16")]; tensor var_14031_to_fp16 = const()[name = tensor("op_14031_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1325_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1325_cast_fp16, y = var_14031_to_fp16)[name = tensor("aw_chunk_1325_cast_fp16")]; tensor var_14033_to_fp16 = const()[name = tensor("op_14033_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1327_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1327_cast_fp16, y = var_14033_to_fp16)[name = tensor("aw_chunk_1327_cast_fp16")]; tensor var_14035_to_fp16 = const()[name = tensor("op_14035_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1329_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1329_cast_fp16, y = var_14035_to_fp16)[name = tensor("aw_chunk_1329_cast_fp16")]; tensor var_14037_to_fp16 = const()[name = tensor("op_14037_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1331_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1331_cast_fp16, y = var_14037_to_fp16)[name = tensor("aw_chunk_1331_cast_fp16")]; tensor var_14039_to_fp16 = const()[name = tensor("op_14039_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1333_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1333_cast_fp16, y = var_14039_to_fp16)[name = tensor("aw_chunk_1333_cast_fp16")]; tensor var_14041_to_fp16 = const()[name = tensor("op_14041_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1335_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1335_cast_fp16, y = var_14041_to_fp16)[name = tensor("aw_chunk_1335_cast_fp16")]; tensor var_14043_to_fp16 = const()[name = tensor("op_14043_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1337_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1337_cast_fp16, y = var_14043_to_fp16)[name = tensor("aw_chunk_1337_cast_fp16")]; tensor var_14045_to_fp16 = const()[name = tensor("op_14045_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1339_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1339_cast_fp16, y = var_14045_to_fp16)[name = tensor("aw_chunk_1339_cast_fp16")]; tensor var_14047_to_fp16 = const()[name = tensor("op_14047_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1341_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1341_cast_fp16, y = var_14047_to_fp16)[name = tensor("aw_chunk_1341_cast_fp16")]; tensor var_14049_to_fp16 = const()[name = tensor("op_14049_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1343_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1343_cast_fp16, y = var_14049_to_fp16)[name = tensor("aw_chunk_1343_cast_fp16")]; tensor var_14051_to_fp16 = const()[name = tensor("op_14051_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1345_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1345_cast_fp16, y = var_14051_to_fp16)[name = tensor("aw_chunk_1345_cast_fp16")]; tensor var_14053_to_fp16 = const()[name = tensor("op_14053_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1347_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1347_cast_fp16, y = var_14053_to_fp16)[name = tensor("aw_chunk_1347_cast_fp16")]; tensor var_14055_to_fp16 = const()[name = tensor("op_14055_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1349_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1349_cast_fp16, y = var_14055_to_fp16)[name = tensor("aw_chunk_1349_cast_fp16")]; tensor var_14057_to_fp16 = const()[name = tensor("op_14057_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1351_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1351_cast_fp16, y = var_14057_to_fp16)[name = tensor("aw_chunk_1351_cast_fp16")]; tensor var_14059_to_fp16 = const()[name = tensor("op_14059_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1353_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1353_cast_fp16, y = var_14059_to_fp16)[name = tensor("aw_chunk_1353_cast_fp16")]; tensor var_14061_to_fp16 = const()[name = tensor("op_14061_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1355_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1355_cast_fp16, y = var_14061_to_fp16)[name = tensor("aw_chunk_1355_cast_fp16")]; tensor var_14063_to_fp16 = const()[name = tensor("op_14063_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1357_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1357_cast_fp16, y = var_14063_to_fp16)[name = tensor("aw_chunk_1357_cast_fp16")]; tensor var_14065_to_fp16 = const()[name = tensor("op_14065_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1359_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1359_cast_fp16, y = var_14065_to_fp16)[name = tensor("aw_chunk_1359_cast_fp16")]; tensor var_14067_to_fp16 = const()[name = tensor("op_14067_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1361_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1361_cast_fp16, y = var_14067_to_fp16)[name = tensor("aw_chunk_1361_cast_fp16")]; tensor var_14069_to_fp16 = const()[name = tensor("op_14069_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1363_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1363_cast_fp16, y = var_14069_to_fp16)[name = tensor("aw_chunk_1363_cast_fp16")]; tensor var_14071_to_fp16 = const()[name = tensor("op_14071_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1365_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1365_cast_fp16, y = var_14071_to_fp16)[name = tensor("aw_chunk_1365_cast_fp16")]; tensor var_14073_to_fp16 = const()[name = tensor("op_14073_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1367_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1367_cast_fp16, y = var_14073_to_fp16)[name = tensor("aw_chunk_1367_cast_fp16")]; tensor var_14075_to_fp16 = const()[name = tensor("op_14075_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1369_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1369_cast_fp16, y = var_14075_to_fp16)[name = tensor("aw_chunk_1369_cast_fp16")]; tensor var_14077_to_fp16 = const()[name = tensor("op_14077_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1371_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1371_cast_fp16, y = var_14077_to_fp16)[name = tensor("aw_chunk_1371_cast_fp16")]; tensor var_14079_to_fp16 = const()[name = tensor("op_14079_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1373_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1373_cast_fp16, y = var_14079_to_fp16)[name = tensor("aw_chunk_1373_cast_fp16")]; tensor var_14081_to_fp16 = const()[name = tensor("op_14081_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1375_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1375_cast_fp16, y = var_14081_to_fp16)[name = tensor("aw_chunk_1375_cast_fp16")]; tensor var_14083_to_fp16 = const()[name = tensor("op_14083_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1377_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1377_cast_fp16, y = var_14083_to_fp16)[name = tensor("aw_chunk_1377_cast_fp16")]; tensor var_14085_to_fp16 = const()[name = tensor("op_14085_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1379_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1379_cast_fp16, y = var_14085_to_fp16)[name = tensor("aw_chunk_1379_cast_fp16")]; tensor var_14087_to_fp16 = const()[name = tensor("op_14087_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1381_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1381_cast_fp16, y = var_14087_to_fp16)[name = tensor("aw_chunk_1381_cast_fp16")]; tensor var_14089_to_fp16 = const()[name = tensor("op_14089_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1383_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1383_cast_fp16, y = var_14089_to_fp16)[name = tensor("aw_chunk_1383_cast_fp16")]; tensor var_14091_to_fp16 = const()[name = tensor("op_14091_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1385_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1385_cast_fp16, y = var_14091_to_fp16)[name = tensor("aw_chunk_1385_cast_fp16")]; tensor var_14093_to_fp16 = const()[name = tensor("op_14093_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1387_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1387_cast_fp16, y = var_14093_to_fp16)[name = tensor("aw_chunk_1387_cast_fp16")]; tensor var_14095_to_fp16 = const()[name = tensor("op_14095_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1389_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1389_cast_fp16, y = var_14095_to_fp16)[name = tensor("aw_chunk_1389_cast_fp16")]; tensor var_14097_to_fp16 = const()[name = tensor("op_14097_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1391_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1391_cast_fp16, y = var_14097_to_fp16)[name = tensor("aw_chunk_1391_cast_fp16")]; tensor var_14099_to_fp16 = const()[name = tensor("op_14099_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1393_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1393_cast_fp16, y = var_14099_to_fp16)[name = tensor("aw_chunk_1393_cast_fp16")]; tensor var_14101_to_fp16 = const()[name = tensor("op_14101_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1395_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1395_cast_fp16, y = var_14101_to_fp16)[name = tensor("aw_chunk_1395_cast_fp16")]; tensor var_14103_to_fp16 = const()[name = tensor("op_14103_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1397_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1397_cast_fp16, y = var_14103_to_fp16)[name = tensor("aw_chunk_1397_cast_fp16")]; tensor var_14105_to_fp16 = const()[name = tensor("op_14105_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1399_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1399_cast_fp16, y = var_14105_to_fp16)[name = tensor("aw_chunk_1399_cast_fp16")]; tensor var_14107_to_fp16 = const()[name = tensor("op_14107_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1401_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1401_cast_fp16, y = var_14107_to_fp16)[name = tensor("aw_chunk_1401_cast_fp16")]; tensor var_14109_to_fp16 = const()[name = tensor("op_14109_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1403_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1403_cast_fp16, y = var_14109_to_fp16)[name = tensor("aw_chunk_1403_cast_fp16")]; tensor var_14111_to_fp16 = const()[name = tensor("op_14111_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1405_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1405_cast_fp16, y = var_14111_to_fp16)[name = tensor("aw_chunk_1405_cast_fp16")]; tensor var_14113_to_fp16 = const()[name = tensor("op_14113_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1407_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1407_cast_fp16, y = var_14113_to_fp16)[name = tensor("aw_chunk_1407_cast_fp16")]; tensor var_14115_to_fp16 = const()[name = tensor("op_14115_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1409_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1409_cast_fp16, y = var_14115_to_fp16)[name = tensor("aw_chunk_1409_cast_fp16")]; tensor var_14117_to_fp16 = const()[name = tensor("op_14117_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1411_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1411_cast_fp16, y = var_14117_to_fp16)[name = tensor("aw_chunk_1411_cast_fp16")]; tensor var_14119_to_fp16 = const()[name = tensor("op_14119_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1413_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1413_cast_fp16, y = var_14119_to_fp16)[name = tensor("aw_chunk_1413_cast_fp16")]; tensor var_14121_to_fp16 = const()[name = tensor("op_14121_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1415_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1415_cast_fp16, y = var_14121_to_fp16)[name = tensor("aw_chunk_1415_cast_fp16")]; tensor var_14123_to_fp16 = const()[name = tensor("op_14123_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1417_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1417_cast_fp16, y = var_14123_to_fp16)[name = tensor("aw_chunk_1417_cast_fp16")]; tensor var_14125_to_fp16 = const()[name = tensor("op_14125_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1419_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1419_cast_fp16, y = var_14125_to_fp16)[name = tensor("aw_chunk_1419_cast_fp16")]; tensor var_14127_to_fp16 = const()[name = tensor("op_14127_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1421_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1421_cast_fp16, y = var_14127_to_fp16)[name = tensor("aw_chunk_1421_cast_fp16")]; tensor var_14129_to_fp16 = const()[name = tensor("op_14129_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1423_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1423_cast_fp16, y = var_14129_to_fp16)[name = tensor("aw_chunk_1423_cast_fp16")]; tensor var_14131_to_fp16 = const()[name = tensor("op_14131_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1425_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1425_cast_fp16, y = var_14131_to_fp16)[name = tensor("aw_chunk_1425_cast_fp16")]; tensor var_14133_to_fp16 = const()[name = tensor("op_14133_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1427_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1427_cast_fp16, y = var_14133_to_fp16)[name = tensor("aw_chunk_1427_cast_fp16")]; tensor var_14135_to_fp16 = const()[name = tensor("op_14135_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1429_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1429_cast_fp16, y = var_14135_to_fp16)[name = tensor("aw_chunk_1429_cast_fp16")]; tensor var_14137_to_fp16 = const()[name = tensor("op_14137_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1431_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1431_cast_fp16, y = var_14137_to_fp16)[name = tensor("aw_chunk_1431_cast_fp16")]; tensor var_14139_to_fp16 = const()[name = tensor("op_14139_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1433_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1433_cast_fp16, y = var_14139_to_fp16)[name = tensor("aw_chunk_1433_cast_fp16")]; tensor var_14141_to_fp16 = const()[name = tensor("op_14141_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1435_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1435_cast_fp16, y = var_14141_to_fp16)[name = tensor("aw_chunk_1435_cast_fp16")]; tensor var_14143_to_fp16 = const()[name = tensor("op_14143_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1437_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1437_cast_fp16, y = var_14143_to_fp16)[name = tensor("aw_chunk_1437_cast_fp16")]; tensor var_14145_to_fp16 = const()[name = tensor("op_14145_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1439_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1439_cast_fp16, y = var_14145_to_fp16)[name = tensor("aw_chunk_1439_cast_fp16")]; tensor var_14147_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1281_cast_fp16)[name = tensor("op_14147_cast_fp16")]; tensor var_14148_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1283_cast_fp16)[name = tensor("op_14148_cast_fp16")]; tensor var_14149_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1285_cast_fp16)[name = tensor("op_14149_cast_fp16")]; tensor var_14150_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1287_cast_fp16)[name = tensor("op_14150_cast_fp16")]; tensor var_14151_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1289_cast_fp16)[name = tensor("op_14151_cast_fp16")]; tensor var_14152_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1291_cast_fp16)[name = tensor("op_14152_cast_fp16")]; tensor var_14153_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1293_cast_fp16)[name = tensor("op_14153_cast_fp16")]; tensor var_14154_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1295_cast_fp16)[name = tensor("op_14154_cast_fp16")]; tensor var_14155_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1297_cast_fp16)[name = tensor("op_14155_cast_fp16")]; tensor var_14156_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1299_cast_fp16)[name = tensor("op_14156_cast_fp16")]; tensor var_14157_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1301_cast_fp16)[name = tensor("op_14157_cast_fp16")]; tensor var_14158_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1303_cast_fp16)[name = tensor("op_14158_cast_fp16")]; tensor var_14159_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1305_cast_fp16)[name = tensor("op_14159_cast_fp16")]; tensor var_14160_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1307_cast_fp16)[name = tensor("op_14160_cast_fp16")]; tensor var_14161_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1309_cast_fp16)[name = tensor("op_14161_cast_fp16")]; tensor var_14162_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1311_cast_fp16)[name = tensor("op_14162_cast_fp16")]; tensor var_14163_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1313_cast_fp16)[name = tensor("op_14163_cast_fp16")]; tensor var_14164_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1315_cast_fp16)[name = tensor("op_14164_cast_fp16")]; tensor var_14165_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1317_cast_fp16)[name = tensor("op_14165_cast_fp16")]; tensor var_14166_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1319_cast_fp16)[name = tensor("op_14166_cast_fp16")]; tensor var_14167_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1321_cast_fp16)[name = tensor("op_14167_cast_fp16")]; tensor var_14168_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1323_cast_fp16)[name = tensor("op_14168_cast_fp16")]; tensor var_14169_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1325_cast_fp16)[name = tensor("op_14169_cast_fp16")]; tensor var_14170_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1327_cast_fp16)[name = tensor("op_14170_cast_fp16")]; tensor var_14171_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1329_cast_fp16)[name = tensor("op_14171_cast_fp16")]; tensor var_14172_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1331_cast_fp16)[name = tensor("op_14172_cast_fp16")]; tensor var_14173_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1333_cast_fp16)[name = tensor("op_14173_cast_fp16")]; tensor var_14174_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1335_cast_fp16)[name = tensor("op_14174_cast_fp16")]; tensor var_14175_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1337_cast_fp16)[name = tensor("op_14175_cast_fp16")]; tensor var_14176_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1339_cast_fp16)[name = tensor("op_14176_cast_fp16")]; tensor var_14177_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1341_cast_fp16)[name = tensor("op_14177_cast_fp16")]; tensor var_14178_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1343_cast_fp16)[name = tensor("op_14178_cast_fp16")]; tensor var_14179_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1345_cast_fp16)[name = tensor("op_14179_cast_fp16")]; tensor var_14180_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1347_cast_fp16)[name = tensor("op_14180_cast_fp16")]; tensor var_14181_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1349_cast_fp16)[name = tensor("op_14181_cast_fp16")]; tensor var_14182_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1351_cast_fp16)[name = tensor("op_14182_cast_fp16")]; tensor var_14183_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1353_cast_fp16)[name = tensor("op_14183_cast_fp16")]; tensor var_14184_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1355_cast_fp16)[name = tensor("op_14184_cast_fp16")]; tensor var_14185_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1357_cast_fp16)[name = tensor("op_14185_cast_fp16")]; tensor var_14186_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1359_cast_fp16)[name = tensor("op_14186_cast_fp16")]; tensor var_14187_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1361_cast_fp16)[name = tensor("op_14187_cast_fp16")]; tensor var_14188_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1363_cast_fp16)[name = tensor("op_14188_cast_fp16")]; tensor var_14189_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1365_cast_fp16)[name = tensor("op_14189_cast_fp16")]; tensor var_14190_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1367_cast_fp16)[name = tensor("op_14190_cast_fp16")]; tensor var_14191_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1369_cast_fp16)[name = tensor("op_14191_cast_fp16")]; tensor var_14192_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1371_cast_fp16)[name = tensor("op_14192_cast_fp16")]; tensor var_14193_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1373_cast_fp16)[name = tensor("op_14193_cast_fp16")]; tensor var_14194_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1375_cast_fp16)[name = tensor("op_14194_cast_fp16")]; tensor var_14195_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1377_cast_fp16)[name = tensor("op_14195_cast_fp16")]; tensor var_14196_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1379_cast_fp16)[name = tensor("op_14196_cast_fp16")]; tensor var_14197_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1381_cast_fp16)[name = tensor("op_14197_cast_fp16")]; tensor var_14198_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1383_cast_fp16)[name = tensor("op_14198_cast_fp16")]; tensor var_14199_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1385_cast_fp16)[name = tensor("op_14199_cast_fp16")]; tensor var_14200_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1387_cast_fp16)[name = tensor("op_14200_cast_fp16")]; tensor var_14201_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1389_cast_fp16)[name = tensor("op_14201_cast_fp16")]; tensor var_14202_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1391_cast_fp16)[name = tensor("op_14202_cast_fp16")]; tensor var_14203_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1393_cast_fp16)[name = tensor("op_14203_cast_fp16")]; tensor var_14204_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1395_cast_fp16)[name = tensor("op_14204_cast_fp16")]; tensor var_14205_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1397_cast_fp16)[name = tensor("op_14205_cast_fp16")]; tensor var_14206_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1399_cast_fp16)[name = tensor("op_14206_cast_fp16")]; tensor var_14207_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1401_cast_fp16)[name = tensor("op_14207_cast_fp16")]; tensor var_14208_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1403_cast_fp16)[name = tensor("op_14208_cast_fp16")]; tensor var_14209_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1405_cast_fp16)[name = tensor("op_14209_cast_fp16")]; tensor var_14210_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1407_cast_fp16)[name = tensor("op_14210_cast_fp16")]; tensor var_14211_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1409_cast_fp16)[name = tensor("op_14211_cast_fp16")]; tensor var_14212_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1411_cast_fp16)[name = tensor("op_14212_cast_fp16")]; tensor var_14213_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1413_cast_fp16)[name = tensor("op_14213_cast_fp16")]; tensor var_14214_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1415_cast_fp16)[name = tensor("op_14214_cast_fp16")]; tensor var_14215_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1417_cast_fp16)[name = tensor("op_14215_cast_fp16")]; tensor var_14216_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1419_cast_fp16)[name = tensor("op_14216_cast_fp16")]; tensor var_14217_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1421_cast_fp16)[name = tensor("op_14217_cast_fp16")]; tensor var_14218_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1423_cast_fp16)[name = tensor("op_14218_cast_fp16")]; tensor var_14219_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1425_cast_fp16)[name = tensor("op_14219_cast_fp16")]; tensor var_14220_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1427_cast_fp16)[name = tensor("op_14220_cast_fp16")]; tensor var_14221_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1429_cast_fp16)[name = tensor("op_14221_cast_fp16")]; tensor var_14222_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1431_cast_fp16)[name = tensor("op_14222_cast_fp16")]; tensor var_14223_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1433_cast_fp16)[name = tensor("op_14223_cast_fp16")]; tensor var_14224_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1435_cast_fp16)[name = tensor("op_14224_cast_fp16")]; tensor var_14225_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1437_cast_fp16)[name = tensor("op_14225_cast_fp16")]; tensor var_14226_cast_fp16 = softmax(axis = var_12945, x = aw_chunk_1439_cast_fp16)[name = tensor("op_14226_cast_fp16")]; tensor var_14228_equation_0 = const()[name = tensor("op_14228_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14228_cast_fp16 = einsum(equation = var_14228_equation_0, values = (var_13748_cast_fp16, var_14147_cast_fp16))[name = tensor("op_14228_cast_fp16")]; tensor var_14230_equation_0 = const()[name = tensor("op_14230_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14230_cast_fp16 = einsum(equation = var_14230_equation_0, values = (var_13748_cast_fp16, var_14148_cast_fp16))[name = tensor("op_14230_cast_fp16")]; tensor var_14232_equation_0 = const()[name = tensor("op_14232_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14232_cast_fp16 = einsum(equation = var_14232_equation_0, values = (var_13748_cast_fp16, var_14149_cast_fp16))[name = tensor("op_14232_cast_fp16")]; tensor var_14234_equation_0 = const()[name = tensor("op_14234_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14234_cast_fp16 = einsum(equation = var_14234_equation_0, values = (var_13748_cast_fp16, var_14150_cast_fp16))[name = tensor("op_14234_cast_fp16")]; tensor var_14236_equation_0 = const()[name = tensor("op_14236_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14236_cast_fp16 = einsum(equation = var_14236_equation_0, values = (var_13752_cast_fp16, var_14151_cast_fp16))[name = tensor("op_14236_cast_fp16")]; tensor var_14238_equation_0 = const()[name = tensor("op_14238_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14238_cast_fp16 = einsum(equation = var_14238_equation_0, values = (var_13752_cast_fp16, var_14152_cast_fp16))[name = tensor("op_14238_cast_fp16")]; tensor var_14240_equation_0 = const()[name = tensor("op_14240_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14240_cast_fp16 = einsum(equation = var_14240_equation_0, values = (var_13752_cast_fp16, var_14153_cast_fp16))[name = tensor("op_14240_cast_fp16")]; tensor var_14242_equation_0 = const()[name = tensor("op_14242_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14242_cast_fp16 = einsum(equation = var_14242_equation_0, values = (var_13752_cast_fp16, var_14154_cast_fp16))[name = tensor("op_14242_cast_fp16")]; tensor var_14244_equation_0 = const()[name = tensor("op_14244_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14244_cast_fp16 = einsum(equation = var_14244_equation_0, values = (var_13756_cast_fp16, var_14155_cast_fp16))[name = tensor("op_14244_cast_fp16")]; tensor var_14246_equation_0 = const()[name = tensor("op_14246_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14246_cast_fp16 = einsum(equation = var_14246_equation_0, values = (var_13756_cast_fp16, var_14156_cast_fp16))[name = tensor("op_14246_cast_fp16")]; tensor var_14248_equation_0 = const()[name = tensor("op_14248_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14248_cast_fp16 = einsum(equation = var_14248_equation_0, values = (var_13756_cast_fp16, var_14157_cast_fp16))[name = tensor("op_14248_cast_fp16")]; tensor var_14250_equation_0 = const()[name = tensor("op_14250_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14250_cast_fp16 = einsum(equation = var_14250_equation_0, values = (var_13756_cast_fp16, var_14158_cast_fp16))[name = tensor("op_14250_cast_fp16")]; tensor var_14252_equation_0 = const()[name = tensor("op_14252_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14252_cast_fp16 = einsum(equation = var_14252_equation_0, values = (var_13760_cast_fp16, var_14159_cast_fp16))[name = tensor("op_14252_cast_fp16")]; tensor var_14254_equation_0 = const()[name = tensor("op_14254_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14254_cast_fp16 = einsum(equation = var_14254_equation_0, values = (var_13760_cast_fp16, var_14160_cast_fp16))[name = tensor("op_14254_cast_fp16")]; tensor var_14256_equation_0 = const()[name = tensor("op_14256_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14256_cast_fp16 = einsum(equation = var_14256_equation_0, values = (var_13760_cast_fp16, var_14161_cast_fp16))[name = tensor("op_14256_cast_fp16")]; tensor var_14258_equation_0 = const()[name = tensor("op_14258_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14258_cast_fp16 = einsum(equation = var_14258_equation_0, values = (var_13760_cast_fp16, var_14162_cast_fp16))[name = tensor("op_14258_cast_fp16")]; tensor var_14260_equation_0 = const()[name = tensor("op_14260_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14260_cast_fp16 = einsum(equation = var_14260_equation_0, values = (var_13764_cast_fp16, var_14163_cast_fp16))[name = tensor("op_14260_cast_fp16")]; tensor var_14262_equation_0 = const()[name = tensor("op_14262_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14262_cast_fp16 = einsum(equation = var_14262_equation_0, values = (var_13764_cast_fp16, var_14164_cast_fp16))[name = tensor("op_14262_cast_fp16")]; tensor var_14264_equation_0 = const()[name = tensor("op_14264_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14264_cast_fp16 = einsum(equation = var_14264_equation_0, values = (var_13764_cast_fp16, var_14165_cast_fp16))[name = tensor("op_14264_cast_fp16")]; tensor var_14266_equation_0 = const()[name = tensor("op_14266_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14266_cast_fp16 = einsum(equation = var_14266_equation_0, values = (var_13764_cast_fp16, var_14166_cast_fp16))[name = tensor("op_14266_cast_fp16")]; tensor var_14268_equation_0 = const()[name = tensor("op_14268_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14268_cast_fp16 = einsum(equation = var_14268_equation_0, values = (var_13768_cast_fp16, var_14167_cast_fp16))[name = tensor("op_14268_cast_fp16")]; tensor var_14270_equation_0 = const()[name = tensor("op_14270_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14270_cast_fp16 = einsum(equation = var_14270_equation_0, values = (var_13768_cast_fp16, var_14168_cast_fp16))[name = tensor("op_14270_cast_fp16")]; tensor var_14272_equation_0 = const()[name = tensor("op_14272_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14272_cast_fp16 = einsum(equation = var_14272_equation_0, values = (var_13768_cast_fp16, var_14169_cast_fp16))[name = tensor("op_14272_cast_fp16")]; tensor var_14274_equation_0 = const()[name = tensor("op_14274_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14274_cast_fp16 = einsum(equation = var_14274_equation_0, values = (var_13768_cast_fp16, var_14170_cast_fp16))[name = tensor("op_14274_cast_fp16")]; tensor var_14276_equation_0 = const()[name = tensor("op_14276_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14276_cast_fp16 = einsum(equation = var_14276_equation_0, values = (var_13772_cast_fp16, var_14171_cast_fp16))[name = tensor("op_14276_cast_fp16")]; tensor var_14278_equation_0 = const()[name = tensor("op_14278_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14278_cast_fp16 = einsum(equation = var_14278_equation_0, values = (var_13772_cast_fp16, var_14172_cast_fp16))[name = tensor("op_14278_cast_fp16")]; tensor var_14280_equation_0 = const()[name = tensor("op_14280_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14280_cast_fp16 = einsum(equation = var_14280_equation_0, values = (var_13772_cast_fp16, var_14173_cast_fp16))[name = tensor("op_14280_cast_fp16")]; tensor var_14282_equation_0 = const()[name = tensor("op_14282_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14282_cast_fp16 = einsum(equation = var_14282_equation_0, values = (var_13772_cast_fp16, var_14174_cast_fp16))[name = tensor("op_14282_cast_fp16")]; tensor var_14284_equation_0 = const()[name = tensor("op_14284_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14284_cast_fp16 = einsum(equation = var_14284_equation_0, values = (var_13776_cast_fp16, var_14175_cast_fp16))[name = tensor("op_14284_cast_fp16")]; tensor var_14286_equation_0 = const()[name = tensor("op_14286_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14286_cast_fp16 = einsum(equation = var_14286_equation_0, values = (var_13776_cast_fp16, var_14176_cast_fp16))[name = tensor("op_14286_cast_fp16")]; tensor var_14288_equation_0 = const()[name = tensor("op_14288_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14288_cast_fp16 = einsum(equation = var_14288_equation_0, values = (var_13776_cast_fp16, var_14177_cast_fp16))[name = tensor("op_14288_cast_fp16")]; tensor var_14290_equation_0 = const()[name = tensor("op_14290_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14290_cast_fp16 = einsum(equation = var_14290_equation_0, values = (var_13776_cast_fp16, var_14178_cast_fp16))[name = tensor("op_14290_cast_fp16")]; tensor var_14292_equation_0 = const()[name = tensor("op_14292_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14292_cast_fp16 = einsum(equation = var_14292_equation_0, values = (var_13780_cast_fp16, var_14179_cast_fp16))[name = tensor("op_14292_cast_fp16")]; tensor var_14294_equation_0 = const()[name = tensor("op_14294_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14294_cast_fp16 = einsum(equation = var_14294_equation_0, values = (var_13780_cast_fp16, var_14180_cast_fp16))[name = tensor("op_14294_cast_fp16")]; tensor var_14296_equation_0 = const()[name = tensor("op_14296_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14296_cast_fp16 = einsum(equation = var_14296_equation_0, values = (var_13780_cast_fp16, var_14181_cast_fp16))[name = tensor("op_14296_cast_fp16")]; tensor var_14298_equation_0 = const()[name = tensor("op_14298_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14298_cast_fp16 = einsum(equation = var_14298_equation_0, values = (var_13780_cast_fp16, var_14182_cast_fp16))[name = tensor("op_14298_cast_fp16")]; tensor var_14300_equation_0 = const()[name = tensor("op_14300_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14300_cast_fp16 = einsum(equation = var_14300_equation_0, values = (var_13784_cast_fp16, var_14183_cast_fp16))[name = tensor("op_14300_cast_fp16")]; tensor var_14302_equation_0 = const()[name = tensor("op_14302_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14302_cast_fp16 = einsum(equation = var_14302_equation_0, values = (var_13784_cast_fp16, var_14184_cast_fp16))[name = tensor("op_14302_cast_fp16")]; tensor var_14304_equation_0 = const()[name = tensor("op_14304_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14304_cast_fp16 = einsum(equation = var_14304_equation_0, values = (var_13784_cast_fp16, var_14185_cast_fp16))[name = tensor("op_14304_cast_fp16")]; tensor var_14306_equation_0 = const()[name = tensor("op_14306_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14306_cast_fp16 = einsum(equation = var_14306_equation_0, values = (var_13784_cast_fp16, var_14186_cast_fp16))[name = tensor("op_14306_cast_fp16")]; tensor var_14308_equation_0 = const()[name = tensor("op_14308_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14308_cast_fp16 = einsum(equation = var_14308_equation_0, values = (var_13788_cast_fp16, var_14187_cast_fp16))[name = tensor("op_14308_cast_fp16")]; tensor var_14310_equation_0 = const()[name = tensor("op_14310_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14310_cast_fp16 = einsum(equation = var_14310_equation_0, values = (var_13788_cast_fp16, var_14188_cast_fp16))[name = tensor("op_14310_cast_fp16")]; tensor var_14312_equation_0 = const()[name = tensor("op_14312_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14312_cast_fp16 = einsum(equation = var_14312_equation_0, values = (var_13788_cast_fp16, var_14189_cast_fp16))[name = tensor("op_14312_cast_fp16")]; tensor var_14314_equation_0 = const()[name = tensor("op_14314_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14314_cast_fp16 = einsum(equation = var_14314_equation_0, values = (var_13788_cast_fp16, var_14190_cast_fp16))[name = tensor("op_14314_cast_fp16")]; tensor var_14316_equation_0 = const()[name = tensor("op_14316_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14316_cast_fp16 = einsum(equation = var_14316_equation_0, values = (var_13792_cast_fp16, var_14191_cast_fp16))[name = tensor("op_14316_cast_fp16")]; tensor var_14318_equation_0 = const()[name = tensor("op_14318_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14318_cast_fp16 = einsum(equation = var_14318_equation_0, values = (var_13792_cast_fp16, var_14192_cast_fp16))[name = tensor("op_14318_cast_fp16")]; tensor var_14320_equation_0 = const()[name = tensor("op_14320_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14320_cast_fp16 = einsum(equation = var_14320_equation_0, values = (var_13792_cast_fp16, var_14193_cast_fp16))[name = tensor("op_14320_cast_fp16")]; tensor var_14322_equation_0 = const()[name = tensor("op_14322_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14322_cast_fp16 = einsum(equation = var_14322_equation_0, values = (var_13792_cast_fp16, var_14194_cast_fp16))[name = tensor("op_14322_cast_fp16")]; tensor var_14324_equation_0 = const()[name = tensor("op_14324_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14324_cast_fp16 = einsum(equation = var_14324_equation_0, values = (var_13796_cast_fp16, var_14195_cast_fp16))[name = tensor("op_14324_cast_fp16")]; tensor var_14326_equation_0 = const()[name = tensor("op_14326_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14326_cast_fp16 = einsum(equation = var_14326_equation_0, values = (var_13796_cast_fp16, var_14196_cast_fp16))[name = tensor("op_14326_cast_fp16")]; tensor var_14328_equation_0 = const()[name = tensor("op_14328_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14328_cast_fp16 = einsum(equation = var_14328_equation_0, values = (var_13796_cast_fp16, var_14197_cast_fp16))[name = tensor("op_14328_cast_fp16")]; tensor var_14330_equation_0 = const()[name = tensor("op_14330_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14330_cast_fp16 = einsum(equation = var_14330_equation_0, values = (var_13796_cast_fp16, var_14198_cast_fp16))[name = tensor("op_14330_cast_fp16")]; tensor var_14332_equation_0 = const()[name = tensor("op_14332_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14332_cast_fp16 = einsum(equation = var_14332_equation_0, values = (var_13800_cast_fp16, var_14199_cast_fp16))[name = tensor("op_14332_cast_fp16")]; tensor var_14334_equation_0 = const()[name = tensor("op_14334_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14334_cast_fp16 = einsum(equation = var_14334_equation_0, values = (var_13800_cast_fp16, var_14200_cast_fp16))[name = tensor("op_14334_cast_fp16")]; tensor var_14336_equation_0 = const()[name = tensor("op_14336_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14336_cast_fp16 = einsum(equation = var_14336_equation_0, values = (var_13800_cast_fp16, var_14201_cast_fp16))[name = tensor("op_14336_cast_fp16")]; tensor var_14338_equation_0 = const()[name = tensor("op_14338_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14338_cast_fp16 = einsum(equation = var_14338_equation_0, values = (var_13800_cast_fp16, var_14202_cast_fp16))[name = tensor("op_14338_cast_fp16")]; tensor var_14340_equation_0 = const()[name = tensor("op_14340_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14340_cast_fp16 = einsum(equation = var_14340_equation_0, values = (var_13804_cast_fp16, var_14203_cast_fp16))[name = tensor("op_14340_cast_fp16")]; tensor var_14342_equation_0 = const()[name = tensor("op_14342_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14342_cast_fp16 = einsum(equation = var_14342_equation_0, values = (var_13804_cast_fp16, var_14204_cast_fp16))[name = tensor("op_14342_cast_fp16")]; tensor var_14344_equation_0 = const()[name = tensor("op_14344_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14344_cast_fp16 = einsum(equation = var_14344_equation_0, values = (var_13804_cast_fp16, var_14205_cast_fp16))[name = tensor("op_14344_cast_fp16")]; tensor var_14346_equation_0 = const()[name = tensor("op_14346_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14346_cast_fp16 = einsum(equation = var_14346_equation_0, values = (var_13804_cast_fp16, var_14206_cast_fp16))[name = tensor("op_14346_cast_fp16")]; tensor var_14348_equation_0 = const()[name = tensor("op_14348_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14348_cast_fp16 = einsum(equation = var_14348_equation_0, values = (var_13808_cast_fp16, var_14207_cast_fp16))[name = tensor("op_14348_cast_fp16")]; tensor var_14350_equation_0 = const()[name = tensor("op_14350_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14350_cast_fp16 = einsum(equation = var_14350_equation_0, values = (var_13808_cast_fp16, var_14208_cast_fp16))[name = tensor("op_14350_cast_fp16")]; tensor var_14352_equation_0 = const()[name = tensor("op_14352_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14352_cast_fp16 = einsum(equation = var_14352_equation_0, values = (var_13808_cast_fp16, var_14209_cast_fp16))[name = tensor("op_14352_cast_fp16")]; tensor var_14354_equation_0 = const()[name = tensor("op_14354_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14354_cast_fp16 = einsum(equation = var_14354_equation_0, values = (var_13808_cast_fp16, var_14210_cast_fp16))[name = tensor("op_14354_cast_fp16")]; tensor var_14356_equation_0 = const()[name = tensor("op_14356_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14356_cast_fp16 = einsum(equation = var_14356_equation_0, values = (var_13812_cast_fp16, var_14211_cast_fp16))[name = tensor("op_14356_cast_fp16")]; tensor var_14358_equation_0 = const()[name = tensor("op_14358_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14358_cast_fp16 = einsum(equation = var_14358_equation_0, values = (var_13812_cast_fp16, var_14212_cast_fp16))[name = tensor("op_14358_cast_fp16")]; tensor var_14360_equation_0 = const()[name = tensor("op_14360_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14360_cast_fp16 = einsum(equation = var_14360_equation_0, values = (var_13812_cast_fp16, var_14213_cast_fp16))[name = tensor("op_14360_cast_fp16")]; tensor var_14362_equation_0 = const()[name = tensor("op_14362_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14362_cast_fp16 = einsum(equation = var_14362_equation_0, values = (var_13812_cast_fp16, var_14214_cast_fp16))[name = tensor("op_14362_cast_fp16")]; tensor var_14364_equation_0 = const()[name = tensor("op_14364_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14364_cast_fp16 = einsum(equation = var_14364_equation_0, values = (var_13816_cast_fp16, var_14215_cast_fp16))[name = tensor("op_14364_cast_fp16")]; tensor var_14366_equation_0 = const()[name = tensor("op_14366_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14366_cast_fp16 = einsum(equation = var_14366_equation_0, values = (var_13816_cast_fp16, var_14216_cast_fp16))[name = tensor("op_14366_cast_fp16")]; tensor var_14368_equation_0 = const()[name = tensor("op_14368_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14368_cast_fp16 = einsum(equation = var_14368_equation_0, values = (var_13816_cast_fp16, var_14217_cast_fp16))[name = tensor("op_14368_cast_fp16")]; tensor var_14370_equation_0 = const()[name = tensor("op_14370_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14370_cast_fp16 = einsum(equation = var_14370_equation_0, values = (var_13816_cast_fp16, var_14218_cast_fp16))[name = tensor("op_14370_cast_fp16")]; tensor var_14372_equation_0 = const()[name = tensor("op_14372_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14372_cast_fp16 = einsum(equation = var_14372_equation_0, values = (var_13820_cast_fp16, var_14219_cast_fp16))[name = tensor("op_14372_cast_fp16")]; tensor var_14374_equation_0 = const()[name = tensor("op_14374_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14374_cast_fp16 = einsum(equation = var_14374_equation_0, values = (var_13820_cast_fp16, var_14220_cast_fp16))[name = tensor("op_14374_cast_fp16")]; tensor var_14376_equation_0 = const()[name = tensor("op_14376_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14376_cast_fp16 = einsum(equation = var_14376_equation_0, values = (var_13820_cast_fp16, var_14221_cast_fp16))[name = tensor("op_14376_cast_fp16")]; tensor var_14378_equation_0 = const()[name = tensor("op_14378_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14378_cast_fp16 = einsum(equation = var_14378_equation_0, values = (var_13820_cast_fp16, var_14222_cast_fp16))[name = tensor("op_14378_cast_fp16")]; tensor var_14380_equation_0 = const()[name = tensor("op_14380_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14380_cast_fp16 = einsum(equation = var_14380_equation_0, values = (var_13824_cast_fp16, var_14223_cast_fp16))[name = tensor("op_14380_cast_fp16")]; tensor var_14382_equation_0 = const()[name = tensor("op_14382_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14382_cast_fp16 = einsum(equation = var_14382_equation_0, values = (var_13824_cast_fp16, var_14224_cast_fp16))[name = tensor("op_14382_cast_fp16")]; tensor var_14384_equation_0 = const()[name = tensor("op_14384_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14384_cast_fp16 = einsum(equation = var_14384_equation_0, values = (var_13824_cast_fp16, var_14225_cast_fp16))[name = tensor("op_14384_cast_fp16")]; tensor var_14386_equation_0 = const()[name = tensor("op_14386_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_14386_cast_fp16 = einsum(equation = var_14386_equation_0, values = (var_13824_cast_fp16, var_14226_cast_fp16))[name = tensor("op_14386_cast_fp16")]; tensor var_14388_interleave_0 = const()[name = tensor("op_14388_interleave_0"), val = tensor(false)]; tensor var_14388_cast_fp16 = concat(axis = var_12920, interleave = var_14388_interleave_0, values = (var_14228_cast_fp16, var_14230_cast_fp16, var_14232_cast_fp16, var_14234_cast_fp16))[name = tensor("op_14388_cast_fp16")]; tensor var_14390_interleave_0 = const()[name = tensor("op_14390_interleave_0"), val = tensor(false)]; tensor var_14390_cast_fp16 = concat(axis = var_12920, interleave = var_14390_interleave_0, values = (var_14236_cast_fp16, var_14238_cast_fp16, var_14240_cast_fp16, var_14242_cast_fp16))[name = tensor("op_14390_cast_fp16")]; tensor var_14392_interleave_0 = const()[name = tensor("op_14392_interleave_0"), val = tensor(false)]; tensor var_14392_cast_fp16 = concat(axis = var_12920, interleave = var_14392_interleave_0, values = (var_14244_cast_fp16, var_14246_cast_fp16, var_14248_cast_fp16, var_14250_cast_fp16))[name = tensor("op_14392_cast_fp16")]; tensor var_14394_interleave_0 = const()[name = tensor("op_14394_interleave_0"), val = tensor(false)]; tensor var_14394_cast_fp16 = concat(axis = var_12920, interleave = var_14394_interleave_0, values = (var_14252_cast_fp16, var_14254_cast_fp16, var_14256_cast_fp16, var_14258_cast_fp16))[name = tensor("op_14394_cast_fp16")]; tensor var_14396_interleave_0 = const()[name = tensor("op_14396_interleave_0"), val = tensor(false)]; tensor var_14396_cast_fp16 = concat(axis = var_12920, interleave = var_14396_interleave_0, values = (var_14260_cast_fp16, var_14262_cast_fp16, var_14264_cast_fp16, var_14266_cast_fp16))[name = tensor("op_14396_cast_fp16")]; tensor var_14398_interleave_0 = const()[name = tensor("op_14398_interleave_0"), val = tensor(false)]; tensor var_14398_cast_fp16 = concat(axis = var_12920, interleave = var_14398_interleave_0, values = (var_14268_cast_fp16, var_14270_cast_fp16, var_14272_cast_fp16, var_14274_cast_fp16))[name = tensor("op_14398_cast_fp16")]; tensor var_14400_interleave_0 = const()[name = tensor("op_14400_interleave_0"), val = tensor(false)]; tensor var_14400_cast_fp16 = concat(axis = var_12920, interleave = var_14400_interleave_0, values = (var_14276_cast_fp16, var_14278_cast_fp16, var_14280_cast_fp16, var_14282_cast_fp16))[name = tensor("op_14400_cast_fp16")]; tensor var_14402_interleave_0 = const()[name = tensor("op_14402_interleave_0"), val = tensor(false)]; tensor var_14402_cast_fp16 = concat(axis = var_12920, interleave = var_14402_interleave_0, values = (var_14284_cast_fp16, var_14286_cast_fp16, var_14288_cast_fp16, var_14290_cast_fp16))[name = tensor("op_14402_cast_fp16")]; tensor var_14404_interleave_0 = const()[name = tensor("op_14404_interleave_0"), val = tensor(false)]; tensor var_14404_cast_fp16 = concat(axis = var_12920, interleave = var_14404_interleave_0, values = (var_14292_cast_fp16, var_14294_cast_fp16, var_14296_cast_fp16, var_14298_cast_fp16))[name = tensor("op_14404_cast_fp16")]; tensor var_14406_interleave_0 = const()[name = tensor("op_14406_interleave_0"), val = tensor(false)]; tensor var_14406_cast_fp16 = concat(axis = var_12920, interleave = var_14406_interleave_0, values = (var_14300_cast_fp16, var_14302_cast_fp16, var_14304_cast_fp16, var_14306_cast_fp16))[name = tensor("op_14406_cast_fp16")]; tensor var_14408_interleave_0 = const()[name = tensor("op_14408_interleave_0"), val = tensor(false)]; tensor var_14408_cast_fp16 = concat(axis = var_12920, interleave = var_14408_interleave_0, values = (var_14308_cast_fp16, var_14310_cast_fp16, var_14312_cast_fp16, var_14314_cast_fp16))[name = tensor("op_14408_cast_fp16")]; tensor var_14410_interleave_0 = const()[name = tensor("op_14410_interleave_0"), val = tensor(false)]; tensor var_14410_cast_fp16 = concat(axis = var_12920, interleave = var_14410_interleave_0, values = (var_14316_cast_fp16, var_14318_cast_fp16, var_14320_cast_fp16, var_14322_cast_fp16))[name = tensor("op_14410_cast_fp16")]; tensor var_14412_interleave_0 = const()[name = tensor("op_14412_interleave_0"), val = tensor(false)]; tensor var_14412_cast_fp16 = concat(axis = var_12920, interleave = var_14412_interleave_0, values = (var_14324_cast_fp16, var_14326_cast_fp16, var_14328_cast_fp16, var_14330_cast_fp16))[name = tensor("op_14412_cast_fp16")]; tensor var_14414_interleave_0 = const()[name = tensor("op_14414_interleave_0"), val = tensor(false)]; tensor var_14414_cast_fp16 = concat(axis = var_12920, interleave = var_14414_interleave_0, values = (var_14332_cast_fp16, var_14334_cast_fp16, var_14336_cast_fp16, var_14338_cast_fp16))[name = tensor("op_14414_cast_fp16")]; tensor var_14416_interleave_0 = const()[name = tensor("op_14416_interleave_0"), val = tensor(false)]; tensor var_14416_cast_fp16 = concat(axis = var_12920, interleave = var_14416_interleave_0, values = (var_14340_cast_fp16, var_14342_cast_fp16, var_14344_cast_fp16, var_14346_cast_fp16))[name = tensor("op_14416_cast_fp16")]; tensor var_14418_interleave_0 = const()[name = tensor("op_14418_interleave_0"), val = tensor(false)]; tensor var_14418_cast_fp16 = concat(axis = var_12920, interleave = var_14418_interleave_0, values = (var_14348_cast_fp16, var_14350_cast_fp16, var_14352_cast_fp16, var_14354_cast_fp16))[name = tensor("op_14418_cast_fp16")]; tensor var_14420_interleave_0 = const()[name = tensor("op_14420_interleave_0"), val = tensor(false)]; tensor var_14420_cast_fp16 = concat(axis = var_12920, interleave = var_14420_interleave_0, values = (var_14356_cast_fp16, var_14358_cast_fp16, var_14360_cast_fp16, var_14362_cast_fp16))[name = tensor("op_14420_cast_fp16")]; tensor var_14422_interleave_0 = const()[name = tensor("op_14422_interleave_0"), val = tensor(false)]; tensor var_14422_cast_fp16 = concat(axis = var_12920, interleave = var_14422_interleave_0, values = (var_14364_cast_fp16, var_14366_cast_fp16, var_14368_cast_fp16, var_14370_cast_fp16))[name = tensor("op_14422_cast_fp16")]; tensor var_14424_interleave_0 = const()[name = tensor("op_14424_interleave_0"), val = tensor(false)]; tensor var_14424_cast_fp16 = concat(axis = var_12920, interleave = var_14424_interleave_0, values = (var_14372_cast_fp16, var_14374_cast_fp16, var_14376_cast_fp16, var_14378_cast_fp16))[name = tensor("op_14424_cast_fp16")]; tensor var_14426_interleave_0 = const()[name = tensor("op_14426_interleave_0"), val = tensor(false)]; tensor var_14426_cast_fp16 = concat(axis = var_12920, interleave = var_14426_interleave_0, values = (var_14380_cast_fp16, var_14382_cast_fp16, var_14384_cast_fp16, var_14386_cast_fp16))[name = tensor("op_14426_cast_fp16")]; tensor input_65_interleave_0 = const()[name = tensor("input_65_interleave_0"), val = tensor(false)]; tensor input_65_cast_fp16 = concat(axis = var_12945, interleave = input_65_interleave_0, values = (var_14388_cast_fp16, var_14390_cast_fp16, var_14392_cast_fp16, var_14394_cast_fp16, var_14396_cast_fp16, var_14398_cast_fp16, var_14400_cast_fp16, var_14402_cast_fp16, var_14404_cast_fp16, var_14406_cast_fp16, var_14408_cast_fp16, var_14410_cast_fp16, var_14412_cast_fp16, var_14414_cast_fp16, var_14416_cast_fp16, var_14418_cast_fp16, var_14420_cast_fp16, var_14422_cast_fp16, var_14424_cast_fp16, var_14426_cast_fp16))[name = tensor("input_65_cast_fp16")]; tensor var_14437_pad_type_0 = const()[name = tensor("op_14437_pad_type_0"), val = tensor("valid")]; tensor var_14437_strides_0 = const()[name = tensor("op_14437_strides_0"), val = tensor([1, 1])]; tensor var_14437_pad_0 = const()[name = tensor("op_14437_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_14437_dilations_0 = const()[name = tensor("op_14437_dilations_0"), val = tensor([1, 1])]; tensor var_14437_groups_0 = const()[name = tensor("op_14437_groups_0"), val = tensor(1)]; tensor layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(122976832))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123796096))), name = tensor("layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_8_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123796224)))]; tensor var_14437_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_14437_dilations_0, groups = var_14437_groups_0, pad = var_14437_pad_0, pad_type = var_14437_pad_type_0, strides = var_14437_strides_0, weight = layers_8_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_65_cast_fp16)[name = tensor("op_14437_cast_fp16")]; tensor var_14443_pad_type_0 = const()[name = tensor("op_14443_pad_type_0"), val = tensor("valid")]; tensor var_14443_strides_0 = const()[name = tensor("op_14443_strides_0"), val = tensor([1, 1])]; tensor var_14443_pad_0 = const()[name = tensor("op_14443_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_14443_dilations_0 = const()[name = tensor("op_14443_dilations_0"), val = tensor([1, 1])]; tensor var_14443_groups_0 = const()[name = tensor("op_14443_groups_0"), val = tensor(1)]; tensor layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123819136))), name = tensor("layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123798848))), shape = tensor([1280, 1280, 1, 1])]; tensor var_14443_cast_fp16 = conv(dilations = var_14443_dilations_0, groups = var_14443_groups_0, pad = var_14443_pad_0, pad_type = var_14443_pad_type_0, strides = var_14443_strides_0, weight = layers_8_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_65_cast_fp16)[name = tensor("op_14443_cast_fp16")]; tensor obj_35_cast_fp16 = add(x = var_14437_cast_fp16, y = var_14443_cast_fp16)[name = tensor("obj_35_cast_fp16")]; tensor inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_35_cast_fp16)[name = tensor("inputs_35_cast_fp16")]; tensor out_35_axes_0 = const()[name = tensor("out_35_axes_0"), val = tensor([1])]; tensor var_14454_to_fp16 = const()[name = tensor("op_14454_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_35_cast_fp16 = layer_norm(axes = out_35_axes_0, epsilon = var_14454_to_fp16, x = inputs_35_cast_fp16)[name = tensor("out_35_cast_fp16")]; tensor input_67_gamma_0_to_fp16 = const()[name = tensor("input_67_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124024000)))]; tensor input_67_beta_0_to_fp16 = const()[name = tensor("input_67_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124026624)))]; tensor input_67_epsilon_0_to_fp16 = const()[name = tensor("input_67_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_67_cast_fp16 = batch_norm(beta = input_67_beta_0_to_fp16, epsilon = input_67_epsilon_0_to_fp16, gamma = input_67_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = tensor("input_67_cast_fp16")]; tensor var_14472_pad_type_0 = const()[name = tensor("op_14472_pad_type_0"), val = tensor("valid")]; tensor var_14472_strides_0 = const()[name = tensor("op_14472_strides_0"), val = tensor([1, 1])]; tensor var_14472_pad_0 = const()[name = tensor("op_14472_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_14472_dilations_0 = const()[name = tensor("op_14472_dilations_0"), val = tensor([1, 1])]; tensor var_14472_groups_0 = const()[name = tensor("op_14472_groups_0"), val = tensor(1)]; tensor layers_8_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124029248))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127306112))), name = tensor("layers_8_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_8_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127306240)))]; tensor var_14472_cast_fp16 = conv(bias = layers_8_fc1_inlier_module_bias_to_fp16, dilations = var_14472_dilations_0, groups = var_14472_groups_0, pad = var_14472_pad_0, pad_type = var_14472_pad_type_0, strides = var_14472_strides_0, weight = layers_8_fc1_inlier_module_weight_to_fp16_palettized, x = input_67_cast_fp16)[name = tensor("op_14472_cast_fp16")]; tensor var_14478_pad_type_0 = const()[name = tensor("op_14478_pad_type_0"), val = tensor("valid")]; tensor var_14478_strides_0 = const()[name = tensor("op_14478_strides_0"), val = tensor([1, 1])]; tensor var_14478_pad_0 = const()[name = tensor("op_14478_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_14478_dilations_0 = const()[name = tensor("op_14478_dilations_0"), val = tensor([1, 1])]; tensor var_14478_groups_0 = const()[name = tensor("op_14478_groups_0"), val = tensor(1)]; tensor layers_8_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127346368))), name = tensor("layers_8_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(127316544))), shape = tensor([5120, 1280, 1, 1])]; tensor var_14478_cast_fp16 = conv(dilations = var_14478_dilations_0, groups = var_14478_groups_0, pad = var_14478_pad_0, pad_type = var_14478_pad_type_0, strides = var_14478_strides_0, weight = layers_8_fc1_outlier_module_weight_to_fp16_sparsified, x = input_67_cast_fp16)[name = tensor("op_14478_cast_fp16")]; tensor input_69_cast_fp16 = add(x = var_14472_cast_fp16, y = var_14478_cast_fp16)[name = tensor("input_69_cast_fp16")]; tensor input_71_mode_0 = const()[name = tensor("input_71_mode_0"), val = tensor("EXACT")]; tensor input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = tensor("input_71_cast_fp16")]; tensor var_14489_pad_type_0 = const()[name = tensor("op_14489_pad_type_0"), val = tensor("valid")]; tensor var_14489_strides_0 = const()[name = tensor("op_14489_strides_0"), val = tensor([1, 1])]; tensor var_14489_pad_0 = const()[name = tensor("op_14489_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_14489_dilations_0 = const()[name = tensor("op_14489_dilations_0"), val = tensor([1, 1])]; tensor var_14489_groups_0 = const()[name = tensor("op_14489_groups_0"), val = tensor(1)]; tensor layers_8_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(128165632))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131442496))), name = tensor("layers_8_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_8_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_8_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131442624)))]; tensor var_14489_cast_fp16 = conv(bias = layers_8_fc2_inlier_module_bias_to_fp16, dilations = var_14489_dilations_0, groups = var_14489_groups_0, pad = var_14489_pad_0, pad_type = var_14489_pad_type_0, strides = var_14489_strides_0, weight = layers_8_fc2_inlier_module_weight_to_fp16_palettized, x = input_71_cast_fp16)[name = tensor("op_14489_cast_fp16")]; tensor var_14495_pad_type_0 = const()[name = tensor("op_14495_pad_type_0"), val = tensor("valid")]; tensor var_14495_strides_0 = const()[name = tensor("op_14495_strides_0"), val = tensor([1, 1])]; tensor var_14495_pad_0 = const()[name = tensor("op_14495_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_14495_dilations_0 = const()[name = tensor("op_14495_dilations_0"), val = tensor([1, 1])]; tensor var_14495_groups_0 = const()[name = tensor("op_14495_groups_0"), val = tensor(1)]; tensor layers_8_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131699584))), name = tensor("layers_8_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(131445248))), shape = tensor([1280, 5120, 1, 1])]; tensor var_14495_cast_fp16 = conv(dilations = var_14495_dilations_0, groups = var_14495_groups_0, pad = var_14495_pad_0, pad_type = var_14495_pad_type_0, strides = var_14495_strides_0, weight = layers_8_fc2_outlier_module_weight_to_fp16_sparsified, x = input_71_cast_fp16)[name = tensor("op_14495_cast_fp16")]; tensor hidden_states_21_cast_fp16 = add(x = var_14489_cast_fp16, y = var_14495_cast_fp16)[name = tensor("hidden_states_21_cast_fp16")]; tensor inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_21_cast_fp16)[name = tensor("inputs_37_cast_fp16")]; tensor var_14501 = const()[name = tensor("op_14501"), val = tensor(3)]; tensor var_14526 = const()[name = tensor("op_14526"), val = tensor(1)]; tensor out_37_axes_0 = const()[name = tensor("out_37_axes_0"), val = tensor([1])]; tensor var_14543_to_fp16 = const()[name = tensor("op_14543_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_37_cast_fp16 = layer_norm(axes = out_37_axes_0, epsilon = var_14543_to_fp16, x = inputs_37_cast_fp16)[name = tensor("out_37_cast_fp16")]; tensor obj_37_gamma_0_to_fp16 = const()[name = tensor("obj_37_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132518848)))]; tensor obj_37_beta_0_to_fp16 = const()[name = tensor("obj_37_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132521472)))]; tensor obj_37_epsilon_0_to_fp16 = const()[name = tensor("obj_37_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = tensor("obj_37_cast_fp16")]; tensor var_14565_pad_type_0 = const()[name = tensor("op_14565_pad_type_0"), val = tensor("valid")]; tensor var_14565_strides_0 = const()[name = tensor("op_14565_strides_0"), val = tensor([1, 1])]; tensor var_14565_pad_0 = const()[name = tensor("op_14565_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_14565_dilations_0 = const()[name = tensor("op_14565_dilations_0"), val = tensor([1, 1])]; tensor var_14565_groups_0 = const()[name = tensor("op_14565_groups_0"), val = tensor(1)]; tensor layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132524096))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133343360))), name = tensor("layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_9_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133343488)))]; tensor var_14565_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_14565_dilations_0, groups = var_14565_groups_0, pad = var_14565_pad_0, pad_type = var_14565_pad_type_0, strides = var_14565_strides_0, weight = layers_9_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = tensor("op_14565_cast_fp16")]; tensor var_14571_pad_type_0 = const()[name = tensor("op_14571_pad_type_0"), val = tensor("valid")]; tensor var_14571_strides_0 = const()[name = tensor("op_14571_strides_0"), val = tensor([1, 1])]; tensor var_14571_pad_0 = const()[name = tensor("op_14571_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_14571_dilations_0 = const()[name = tensor("op_14571_dilations_0"), val = tensor([1, 1])]; tensor var_14571_groups_0 = const()[name = tensor("op_14571_groups_0"), val = tensor(1)]; tensor layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133392896))), name = tensor("layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133346112))), shape = tensor([1280, 1280, 1, 1])]; tensor var_14571_cast_fp16 = conv(dilations = var_14571_dilations_0, groups = var_14571_groups_0, pad = var_14571_pad_0, pad_type = var_14571_pad_type_0, strides = var_14571_strides_0, weight = layers_9_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = tensor("op_14571_cast_fp16")]; tensor query_19_cast_fp16 = add(x = var_14565_cast_fp16, y = var_14571_cast_fp16)[name = tensor("query_19_cast_fp16")]; tensor var_14580_pad_type_0 = const()[name = tensor("op_14580_pad_type_0"), val = tensor("valid")]; tensor var_14580_strides_0 = const()[name = tensor("op_14580_strides_0"), val = tensor([1, 1])]; tensor var_14580_pad_0 = const()[name = tensor("op_14580_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_14580_dilations_0 = const()[name = tensor("op_14580_dilations_0"), val = tensor([1, 1])]; tensor var_14580_groups_0 = const()[name = tensor("op_14580_groups_0"), val = tensor(1)]; tensor layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133597760))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134417024))), name = tensor("layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_14580_cast_fp16 = conv(dilations = var_14580_dilations_0, groups = var_14580_groups_0, pad = var_14580_pad_0, pad_type = var_14580_pad_type_0, strides = var_14580_strides_0, weight = layers_9_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = tensor("op_14580_cast_fp16")]; tensor var_14586_pad_type_0 = const()[name = tensor("op_14586_pad_type_0"), val = tensor("valid")]; tensor var_14586_strides_0 = const()[name = tensor("op_14586_strides_0"), val = tensor([1, 1])]; tensor var_14586_pad_0 = const()[name = tensor("op_14586_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_14586_dilations_0 = const()[name = tensor("op_14586_dilations_0"), val = tensor([1, 1])]; tensor var_14586_groups_0 = const()[name = tensor("op_14586_groups_0"), val = tensor(1)]; tensor layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134451584))), name = tensor("layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134417152))), shape = tensor([1280, 1280, 1, 1])]; tensor var_14586_cast_fp16 = conv(dilations = var_14586_dilations_0, groups = var_14586_groups_0, pad = var_14586_pad_0, pad_type = var_14586_pad_type_0, strides = var_14586_strides_0, weight = layers_9_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = tensor("op_14586_cast_fp16")]; tensor key_19_cast_fp16 = add(x = var_14580_cast_fp16, y = var_14586_cast_fp16)[name = tensor("key_19_cast_fp16")]; tensor var_14596_pad_type_0 = const()[name = tensor("op_14596_pad_type_0"), val = tensor("valid")]; tensor var_14596_strides_0 = const()[name = tensor("op_14596_strides_0"), val = tensor([1, 1])]; tensor var_14596_pad_0 = const()[name = tensor("op_14596_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_14596_dilations_0 = const()[name = tensor("op_14596_dilations_0"), val = tensor([1, 1])]; tensor var_14596_groups_0 = const()[name = tensor("op_14596_groups_0"), val = tensor(1)]; tensor layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134656448))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135475712))), name = tensor("layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_9_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135475840)))]; tensor var_14596_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_14596_dilations_0, groups = var_14596_groups_0, pad = var_14596_pad_0, pad_type = var_14596_pad_type_0, strides = var_14596_strides_0, weight = layers_9_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_37_cast_fp16)[name = tensor("op_14596_cast_fp16")]; tensor var_14602_pad_type_0 = const()[name = tensor("op_14602_pad_type_0"), val = tensor("valid")]; tensor var_14602_strides_0 = const()[name = tensor("op_14602_strides_0"), val = tensor([1, 1])]; tensor var_14602_pad_0 = const()[name = tensor("op_14602_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_14602_dilations_0 = const()[name = tensor("op_14602_dilations_0"), val = tensor([1, 1])]; tensor var_14602_groups_0 = const()[name = tensor("op_14602_groups_0"), val = tensor(1)]; tensor layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135500160))), name = tensor("layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135478464))), shape = tensor([1280, 1280, 1, 1])]; tensor var_14602_cast_fp16 = conv(dilations = var_14602_dilations_0, groups = var_14602_groups_0, pad = var_14602_pad_0, pad_type = var_14602_pad_type_0, strides = var_14602_strides_0, weight = layers_9_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_37_cast_fp16)[name = tensor("op_14602_cast_fp16")]; tensor value_19_cast_fp16 = add(x = var_14596_cast_fp16, y = var_14602_cast_fp16)[name = tensor("value_19_cast_fp16")]; tensor var_14608_begin_0 = const()[name = tensor("op_14608_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14608_end_0 = const()[name = tensor("op_14608_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_14608_end_mask_0 = const()[name = tensor("op_14608_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14608_cast_fp16 = slice_by_index(begin = var_14608_begin_0, end = var_14608_end_0, end_mask = var_14608_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14608_cast_fp16")]; tensor var_14612_begin_0 = const()[name = tensor("op_14612_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_14612_end_0 = const()[name = tensor("op_14612_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_14612_end_mask_0 = const()[name = tensor("op_14612_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14612_cast_fp16 = slice_by_index(begin = var_14612_begin_0, end = var_14612_end_0, end_mask = var_14612_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14612_cast_fp16")]; tensor var_14616_begin_0 = const()[name = tensor("op_14616_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_14616_end_0 = const()[name = tensor("op_14616_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_14616_end_mask_0 = const()[name = tensor("op_14616_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14616_cast_fp16 = slice_by_index(begin = var_14616_begin_0, end = var_14616_end_0, end_mask = var_14616_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14616_cast_fp16")]; tensor var_14620_begin_0 = const()[name = tensor("op_14620_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_14620_end_0 = const()[name = tensor("op_14620_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_14620_end_mask_0 = const()[name = tensor("op_14620_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14620_cast_fp16 = slice_by_index(begin = var_14620_begin_0, end = var_14620_end_0, end_mask = var_14620_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14620_cast_fp16")]; tensor var_14624_begin_0 = const()[name = tensor("op_14624_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_14624_end_0 = const()[name = tensor("op_14624_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_14624_end_mask_0 = const()[name = tensor("op_14624_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14624_cast_fp16 = slice_by_index(begin = var_14624_begin_0, end = var_14624_end_0, end_mask = var_14624_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14624_cast_fp16")]; tensor var_14628_begin_0 = const()[name = tensor("op_14628_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_14628_end_0 = const()[name = tensor("op_14628_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_14628_end_mask_0 = const()[name = tensor("op_14628_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14628_cast_fp16 = slice_by_index(begin = var_14628_begin_0, end = var_14628_end_0, end_mask = var_14628_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14628_cast_fp16")]; tensor var_14632_begin_0 = const()[name = tensor("op_14632_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_14632_end_0 = const()[name = tensor("op_14632_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_14632_end_mask_0 = const()[name = tensor("op_14632_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14632_cast_fp16 = slice_by_index(begin = var_14632_begin_0, end = var_14632_end_0, end_mask = var_14632_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14632_cast_fp16")]; tensor var_14636_begin_0 = const()[name = tensor("op_14636_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_14636_end_0 = const()[name = tensor("op_14636_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_14636_end_mask_0 = const()[name = tensor("op_14636_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14636_cast_fp16 = slice_by_index(begin = var_14636_begin_0, end = var_14636_end_0, end_mask = var_14636_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14636_cast_fp16")]; tensor var_14640_begin_0 = const()[name = tensor("op_14640_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_14640_end_0 = const()[name = tensor("op_14640_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_14640_end_mask_0 = const()[name = tensor("op_14640_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14640_cast_fp16 = slice_by_index(begin = var_14640_begin_0, end = var_14640_end_0, end_mask = var_14640_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14640_cast_fp16")]; tensor var_14644_begin_0 = const()[name = tensor("op_14644_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_14644_end_0 = const()[name = tensor("op_14644_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_14644_end_mask_0 = const()[name = tensor("op_14644_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14644_cast_fp16 = slice_by_index(begin = var_14644_begin_0, end = var_14644_end_0, end_mask = var_14644_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14644_cast_fp16")]; tensor var_14648_begin_0 = const()[name = tensor("op_14648_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_14648_end_0 = const()[name = tensor("op_14648_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_14648_end_mask_0 = const()[name = tensor("op_14648_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14648_cast_fp16 = slice_by_index(begin = var_14648_begin_0, end = var_14648_end_0, end_mask = var_14648_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14648_cast_fp16")]; tensor var_14652_begin_0 = const()[name = tensor("op_14652_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_14652_end_0 = const()[name = tensor("op_14652_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_14652_end_mask_0 = const()[name = tensor("op_14652_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14652_cast_fp16 = slice_by_index(begin = var_14652_begin_0, end = var_14652_end_0, end_mask = var_14652_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14652_cast_fp16")]; tensor var_14656_begin_0 = const()[name = tensor("op_14656_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_14656_end_0 = const()[name = tensor("op_14656_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_14656_end_mask_0 = const()[name = tensor("op_14656_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14656_cast_fp16 = slice_by_index(begin = var_14656_begin_0, end = var_14656_end_0, end_mask = var_14656_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14656_cast_fp16")]; tensor var_14660_begin_0 = const()[name = tensor("op_14660_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_14660_end_0 = const()[name = tensor("op_14660_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_14660_end_mask_0 = const()[name = tensor("op_14660_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14660_cast_fp16 = slice_by_index(begin = var_14660_begin_0, end = var_14660_end_0, end_mask = var_14660_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14660_cast_fp16")]; tensor var_14664_begin_0 = const()[name = tensor("op_14664_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_14664_end_0 = const()[name = tensor("op_14664_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_14664_end_mask_0 = const()[name = tensor("op_14664_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14664_cast_fp16 = slice_by_index(begin = var_14664_begin_0, end = var_14664_end_0, end_mask = var_14664_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14664_cast_fp16")]; tensor var_14668_begin_0 = const()[name = tensor("op_14668_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_14668_end_0 = const()[name = tensor("op_14668_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_14668_end_mask_0 = const()[name = tensor("op_14668_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14668_cast_fp16 = slice_by_index(begin = var_14668_begin_0, end = var_14668_end_0, end_mask = var_14668_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14668_cast_fp16")]; tensor var_14672_begin_0 = const()[name = tensor("op_14672_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_14672_end_0 = const()[name = tensor("op_14672_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_14672_end_mask_0 = const()[name = tensor("op_14672_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14672_cast_fp16 = slice_by_index(begin = var_14672_begin_0, end = var_14672_end_0, end_mask = var_14672_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14672_cast_fp16")]; tensor var_14676_begin_0 = const()[name = tensor("op_14676_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_14676_end_0 = const()[name = tensor("op_14676_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_14676_end_mask_0 = const()[name = tensor("op_14676_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14676_cast_fp16 = slice_by_index(begin = var_14676_begin_0, end = var_14676_end_0, end_mask = var_14676_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14676_cast_fp16")]; tensor var_14680_begin_0 = const()[name = tensor("op_14680_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_14680_end_0 = const()[name = tensor("op_14680_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_14680_end_mask_0 = const()[name = tensor("op_14680_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14680_cast_fp16 = slice_by_index(begin = var_14680_begin_0, end = var_14680_end_0, end_mask = var_14680_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14680_cast_fp16")]; tensor var_14684_begin_0 = const()[name = tensor("op_14684_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_14684_end_0 = const()[name = tensor("op_14684_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_14684_end_mask_0 = const()[name = tensor("op_14684_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_14684_cast_fp16 = slice_by_index(begin = var_14684_begin_0, end = var_14684_end_0, end_mask = var_14684_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14684_cast_fp16")]; tensor var_14693_begin_0 = const()[name = tensor("op_14693_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14693_end_0 = const()[name = tensor("op_14693_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_14693_end_mask_0 = const()[name = tensor("op_14693_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14693_cast_fp16 = slice_by_index(begin = var_14693_begin_0, end = var_14693_end_0, end_mask = var_14693_end_mask_0, x = var_14608_cast_fp16)[name = tensor("op_14693_cast_fp16")]; tensor var_14700_begin_0 = const()[name = tensor("op_14700_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_14700_end_0 = const()[name = tensor("op_14700_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_14700_end_mask_0 = const()[name = tensor("op_14700_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14700_cast_fp16 = slice_by_index(begin = var_14700_begin_0, end = var_14700_end_0, end_mask = var_14700_end_mask_0, x = var_14608_cast_fp16)[name = tensor("op_14700_cast_fp16")]; tensor var_14707_begin_0 = const()[name = tensor("op_14707_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_14707_end_0 = const()[name = tensor("op_14707_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_14707_end_mask_0 = const()[name = tensor("op_14707_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14707_cast_fp16 = slice_by_index(begin = var_14707_begin_0, end = var_14707_end_0, end_mask = var_14707_end_mask_0, x = var_14608_cast_fp16)[name = tensor("op_14707_cast_fp16")]; tensor var_14714_begin_0 = const()[name = tensor("op_14714_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_14714_end_0 = const()[name = tensor("op_14714_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_14714_end_mask_0 = const()[name = tensor("op_14714_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14714_cast_fp16 = slice_by_index(begin = var_14714_begin_0, end = var_14714_end_0, end_mask = var_14714_end_mask_0, x = var_14608_cast_fp16)[name = tensor("op_14714_cast_fp16")]; tensor var_14721_begin_0 = const()[name = tensor("op_14721_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14721_end_0 = const()[name = tensor("op_14721_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_14721_end_mask_0 = const()[name = tensor("op_14721_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14721_cast_fp16 = slice_by_index(begin = var_14721_begin_0, end = var_14721_end_0, end_mask = var_14721_end_mask_0, x = var_14612_cast_fp16)[name = tensor("op_14721_cast_fp16")]; tensor var_14728_begin_0 = const()[name = tensor("op_14728_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_14728_end_0 = const()[name = tensor("op_14728_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_14728_end_mask_0 = const()[name = tensor("op_14728_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14728_cast_fp16 = slice_by_index(begin = var_14728_begin_0, end = var_14728_end_0, end_mask = var_14728_end_mask_0, x = var_14612_cast_fp16)[name = tensor("op_14728_cast_fp16")]; tensor var_14735_begin_0 = const()[name = tensor("op_14735_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_14735_end_0 = const()[name = tensor("op_14735_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_14735_end_mask_0 = const()[name = tensor("op_14735_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14735_cast_fp16 = slice_by_index(begin = var_14735_begin_0, end = var_14735_end_0, end_mask = var_14735_end_mask_0, x = var_14612_cast_fp16)[name = tensor("op_14735_cast_fp16")]; tensor var_14742_begin_0 = const()[name = tensor("op_14742_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_14742_end_0 = const()[name = tensor("op_14742_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_14742_end_mask_0 = const()[name = tensor("op_14742_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14742_cast_fp16 = slice_by_index(begin = var_14742_begin_0, end = var_14742_end_0, end_mask = var_14742_end_mask_0, x = var_14612_cast_fp16)[name = tensor("op_14742_cast_fp16")]; tensor var_14749_begin_0 = const()[name = tensor("op_14749_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14749_end_0 = const()[name = tensor("op_14749_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_14749_end_mask_0 = const()[name = tensor("op_14749_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14749_cast_fp16 = slice_by_index(begin = var_14749_begin_0, end = var_14749_end_0, end_mask = var_14749_end_mask_0, x = var_14616_cast_fp16)[name = tensor("op_14749_cast_fp16")]; tensor var_14756_begin_0 = const()[name = tensor("op_14756_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_14756_end_0 = const()[name = tensor("op_14756_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_14756_end_mask_0 = const()[name = tensor("op_14756_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14756_cast_fp16 = slice_by_index(begin = var_14756_begin_0, end = var_14756_end_0, end_mask = var_14756_end_mask_0, x = var_14616_cast_fp16)[name = tensor("op_14756_cast_fp16")]; tensor var_14763_begin_0 = const()[name = tensor("op_14763_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_14763_end_0 = const()[name = tensor("op_14763_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_14763_end_mask_0 = const()[name = tensor("op_14763_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14763_cast_fp16 = slice_by_index(begin = var_14763_begin_0, end = var_14763_end_0, end_mask = var_14763_end_mask_0, x = var_14616_cast_fp16)[name = tensor("op_14763_cast_fp16")]; tensor var_14770_begin_0 = const()[name = tensor("op_14770_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_14770_end_0 = const()[name = tensor("op_14770_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_14770_end_mask_0 = const()[name = tensor("op_14770_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14770_cast_fp16 = slice_by_index(begin = var_14770_begin_0, end = var_14770_end_0, end_mask = var_14770_end_mask_0, x = var_14616_cast_fp16)[name = tensor("op_14770_cast_fp16")]; tensor var_14777_begin_0 = const()[name = tensor("op_14777_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14777_end_0 = const()[name = tensor("op_14777_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_14777_end_mask_0 = const()[name = tensor("op_14777_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14777_cast_fp16 = slice_by_index(begin = var_14777_begin_0, end = var_14777_end_0, end_mask = var_14777_end_mask_0, x = var_14620_cast_fp16)[name = tensor("op_14777_cast_fp16")]; tensor var_14784_begin_0 = const()[name = tensor("op_14784_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_14784_end_0 = const()[name = tensor("op_14784_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_14784_end_mask_0 = const()[name = tensor("op_14784_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14784_cast_fp16 = slice_by_index(begin = var_14784_begin_0, end = var_14784_end_0, end_mask = var_14784_end_mask_0, x = var_14620_cast_fp16)[name = tensor("op_14784_cast_fp16")]; tensor var_14791_begin_0 = const()[name = tensor("op_14791_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_14791_end_0 = const()[name = tensor("op_14791_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_14791_end_mask_0 = const()[name = tensor("op_14791_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14791_cast_fp16 = slice_by_index(begin = var_14791_begin_0, end = var_14791_end_0, end_mask = var_14791_end_mask_0, x = var_14620_cast_fp16)[name = tensor("op_14791_cast_fp16")]; tensor var_14798_begin_0 = const()[name = tensor("op_14798_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_14798_end_0 = const()[name = tensor("op_14798_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_14798_end_mask_0 = const()[name = tensor("op_14798_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14798_cast_fp16 = slice_by_index(begin = var_14798_begin_0, end = var_14798_end_0, end_mask = var_14798_end_mask_0, x = var_14620_cast_fp16)[name = tensor("op_14798_cast_fp16")]; tensor var_14805_begin_0 = const()[name = tensor("op_14805_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14805_end_0 = const()[name = tensor("op_14805_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_14805_end_mask_0 = const()[name = tensor("op_14805_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14805_cast_fp16 = slice_by_index(begin = var_14805_begin_0, end = var_14805_end_0, end_mask = var_14805_end_mask_0, x = var_14624_cast_fp16)[name = tensor("op_14805_cast_fp16")]; tensor var_14812_begin_0 = const()[name = tensor("op_14812_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_14812_end_0 = const()[name = tensor("op_14812_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_14812_end_mask_0 = const()[name = tensor("op_14812_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14812_cast_fp16 = slice_by_index(begin = var_14812_begin_0, end = var_14812_end_0, end_mask = var_14812_end_mask_0, x = var_14624_cast_fp16)[name = tensor("op_14812_cast_fp16")]; tensor var_14819_begin_0 = const()[name = tensor("op_14819_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_14819_end_0 = const()[name = tensor("op_14819_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_14819_end_mask_0 = const()[name = tensor("op_14819_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14819_cast_fp16 = slice_by_index(begin = var_14819_begin_0, end = var_14819_end_0, end_mask = var_14819_end_mask_0, x = var_14624_cast_fp16)[name = tensor("op_14819_cast_fp16")]; tensor var_14826_begin_0 = const()[name = tensor("op_14826_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_14826_end_0 = const()[name = tensor("op_14826_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_14826_end_mask_0 = const()[name = tensor("op_14826_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14826_cast_fp16 = slice_by_index(begin = var_14826_begin_0, end = var_14826_end_0, end_mask = var_14826_end_mask_0, x = var_14624_cast_fp16)[name = tensor("op_14826_cast_fp16")]; tensor var_14833_begin_0 = const()[name = tensor("op_14833_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14833_end_0 = const()[name = tensor("op_14833_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_14833_end_mask_0 = const()[name = tensor("op_14833_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14833_cast_fp16 = slice_by_index(begin = var_14833_begin_0, end = var_14833_end_0, end_mask = var_14833_end_mask_0, x = var_14628_cast_fp16)[name = tensor("op_14833_cast_fp16")]; tensor var_14840_begin_0 = const()[name = tensor("op_14840_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_14840_end_0 = const()[name = tensor("op_14840_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_14840_end_mask_0 = const()[name = tensor("op_14840_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14840_cast_fp16 = slice_by_index(begin = var_14840_begin_0, end = var_14840_end_0, end_mask = var_14840_end_mask_0, x = var_14628_cast_fp16)[name = tensor("op_14840_cast_fp16")]; tensor var_14847_begin_0 = const()[name = tensor("op_14847_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_14847_end_0 = const()[name = tensor("op_14847_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_14847_end_mask_0 = const()[name = tensor("op_14847_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14847_cast_fp16 = slice_by_index(begin = var_14847_begin_0, end = var_14847_end_0, end_mask = var_14847_end_mask_0, x = var_14628_cast_fp16)[name = tensor("op_14847_cast_fp16")]; tensor var_14854_begin_0 = const()[name = tensor("op_14854_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_14854_end_0 = const()[name = tensor("op_14854_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_14854_end_mask_0 = const()[name = tensor("op_14854_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14854_cast_fp16 = slice_by_index(begin = var_14854_begin_0, end = var_14854_end_0, end_mask = var_14854_end_mask_0, x = var_14628_cast_fp16)[name = tensor("op_14854_cast_fp16")]; tensor var_14861_begin_0 = const()[name = tensor("op_14861_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14861_end_0 = const()[name = tensor("op_14861_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_14861_end_mask_0 = const()[name = tensor("op_14861_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14861_cast_fp16 = slice_by_index(begin = var_14861_begin_0, end = var_14861_end_0, end_mask = var_14861_end_mask_0, x = var_14632_cast_fp16)[name = tensor("op_14861_cast_fp16")]; tensor var_14868_begin_0 = const()[name = tensor("op_14868_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_14868_end_0 = const()[name = tensor("op_14868_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_14868_end_mask_0 = const()[name = tensor("op_14868_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14868_cast_fp16 = slice_by_index(begin = var_14868_begin_0, end = var_14868_end_0, end_mask = var_14868_end_mask_0, x = var_14632_cast_fp16)[name = tensor("op_14868_cast_fp16")]; tensor var_14875_begin_0 = const()[name = tensor("op_14875_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_14875_end_0 = const()[name = tensor("op_14875_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_14875_end_mask_0 = const()[name = tensor("op_14875_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14875_cast_fp16 = slice_by_index(begin = var_14875_begin_0, end = var_14875_end_0, end_mask = var_14875_end_mask_0, x = var_14632_cast_fp16)[name = tensor("op_14875_cast_fp16")]; tensor var_14882_begin_0 = const()[name = tensor("op_14882_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_14882_end_0 = const()[name = tensor("op_14882_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_14882_end_mask_0 = const()[name = tensor("op_14882_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14882_cast_fp16 = slice_by_index(begin = var_14882_begin_0, end = var_14882_end_0, end_mask = var_14882_end_mask_0, x = var_14632_cast_fp16)[name = tensor("op_14882_cast_fp16")]; tensor var_14889_begin_0 = const()[name = tensor("op_14889_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14889_end_0 = const()[name = tensor("op_14889_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_14889_end_mask_0 = const()[name = tensor("op_14889_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14889_cast_fp16 = slice_by_index(begin = var_14889_begin_0, end = var_14889_end_0, end_mask = var_14889_end_mask_0, x = var_14636_cast_fp16)[name = tensor("op_14889_cast_fp16")]; tensor var_14896_begin_0 = const()[name = tensor("op_14896_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_14896_end_0 = const()[name = tensor("op_14896_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_14896_end_mask_0 = const()[name = tensor("op_14896_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14896_cast_fp16 = slice_by_index(begin = var_14896_begin_0, end = var_14896_end_0, end_mask = var_14896_end_mask_0, x = var_14636_cast_fp16)[name = tensor("op_14896_cast_fp16")]; tensor var_14903_begin_0 = const()[name = tensor("op_14903_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_14903_end_0 = const()[name = tensor("op_14903_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_14903_end_mask_0 = const()[name = tensor("op_14903_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14903_cast_fp16 = slice_by_index(begin = var_14903_begin_0, end = var_14903_end_0, end_mask = var_14903_end_mask_0, x = var_14636_cast_fp16)[name = tensor("op_14903_cast_fp16")]; tensor var_14910_begin_0 = const()[name = tensor("op_14910_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_14910_end_0 = const()[name = tensor("op_14910_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_14910_end_mask_0 = const()[name = tensor("op_14910_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14910_cast_fp16 = slice_by_index(begin = var_14910_begin_0, end = var_14910_end_0, end_mask = var_14910_end_mask_0, x = var_14636_cast_fp16)[name = tensor("op_14910_cast_fp16")]; tensor var_14917_begin_0 = const()[name = tensor("op_14917_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14917_end_0 = const()[name = tensor("op_14917_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_14917_end_mask_0 = const()[name = tensor("op_14917_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14917_cast_fp16 = slice_by_index(begin = var_14917_begin_0, end = var_14917_end_0, end_mask = var_14917_end_mask_0, x = var_14640_cast_fp16)[name = tensor("op_14917_cast_fp16")]; tensor var_14924_begin_0 = const()[name = tensor("op_14924_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_14924_end_0 = const()[name = tensor("op_14924_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_14924_end_mask_0 = const()[name = tensor("op_14924_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14924_cast_fp16 = slice_by_index(begin = var_14924_begin_0, end = var_14924_end_0, end_mask = var_14924_end_mask_0, x = var_14640_cast_fp16)[name = tensor("op_14924_cast_fp16")]; tensor var_14931_begin_0 = const()[name = tensor("op_14931_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_14931_end_0 = const()[name = tensor("op_14931_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_14931_end_mask_0 = const()[name = tensor("op_14931_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14931_cast_fp16 = slice_by_index(begin = var_14931_begin_0, end = var_14931_end_0, end_mask = var_14931_end_mask_0, x = var_14640_cast_fp16)[name = tensor("op_14931_cast_fp16")]; tensor var_14938_begin_0 = const()[name = tensor("op_14938_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_14938_end_0 = const()[name = tensor("op_14938_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_14938_end_mask_0 = const()[name = tensor("op_14938_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14938_cast_fp16 = slice_by_index(begin = var_14938_begin_0, end = var_14938_end_0, end_mask = var_14938_end_mask_0, x = var_14640_cast_fp16)[name = tensor("op_14938_cast_fp16")]; tensor var_14945_begin_0 = const()[name = tensor("op_14945_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14945_end_0 = const()[name = tensor("op_14945_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_14945_end_mask_0 = const()[name = tensor("op_14945_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14945_cast_fp16 = slice_by_index(begin = var_14945_begin_0, end = var_14945_end_0, end_mask = var_14945_end_mask_0, x = var_14644_cast_fp16)[name = tensor("op_14945_cast_fp16")]; tensor var_14952_begin_0 = const()[name = tensor("op_14952_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_14952_end_0 = const()[name = tensor("op_14952_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_14952_end_mask_0 = const()[name = tensor("op_14952_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14952_cast_fp16 = slice_by_index(begin = var_14952_begin_0, end = var_14952_end_0, end_mask = var_14952_end_mask_0, x = var_14644_cast_fp16)[name = tensor("op_14952_cast_fp16")]; tensor var_14959_begin_0 = const()[name = tensor("op_14959_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_14959_end_0 = const()[name = tensor("op_14959_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_14959_end_mask_0 = const()[name = tensor("op_14959_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14959_cast_fp16 = slice_by_index(begin = var_14959_begin_0, end = var_14959_end_0, end_mask = var_14959_end_mask_0, x = var_14644_cast_fp16)[name = tensor("op_14959_cast_fp16")]; tensor var_14966_begin_0 = const()[name = tensor("op_14966_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_14966_end_0 = const()[name = tensor("op_14966_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_14966_end_mask_0 = const()[name = tensor("op_14966_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14966_cast_fp16 = slice_by_index(begin = var_14966_begin_0, end = var_14966_end_0, end_mask = var_14966_end_mask_0, x = var_14644_cast_fp16)[name = tensor("op_14966_cast_fp16")]; tensor var_14973_begin_0 = const()[name = tensor("op_14973_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_14973_end_0 = const()[name = tensor("op_14973_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_14973_end_mask_0 = const()[name = tensor("op_14973_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14973_cast_fp16 = slice_by_index(begin = var_14973_begin_0, end = var_14973_end_0, end_mask = var_14973_end_mask_0, x = var_14648_cast_fp16)[name = tensor("op_14973_cast_fp16")]; tensor var_14980_begin_0 = const()[name = tensor("op_14980_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_14980_end_0 = const()[name = tensor("op_14980_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_14980_end_mask_0 = const()[name = tensor("op_14980_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14980_cast_fp16 = slice_by_index(begin = var_14980_begin_0, end = var_14980_end_0, end_mask = var_14980_end_mask_0, x = var_14648_cast_fp16)[name = tensor("op_14980_cast_fp16")]; tensor var_14987_begin_0 = const()[name = tensor("op_14987_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_14987_end_0 = const()[name = tensor("op_14987_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_14987_end_mask_0 = const()[name = tensor("op_14987_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14987_cast_fp16 = slice_by_index(begin = var_14987_begin_0, end = var_14987_end_0, end_mask = var_14987_end_mask_0, x = var_14648_cast_fp16)[name = tensor("op_14987_cast_fp16")]; tensor var_14994_begin_0 = const()[name = tensor("op_14994_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_14994_end_0 = const()[name = tensor("op_14994_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_14994_end_mask_0 = const()[name = tensor("op_14994_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_14994_cast_fp16 = slice_by_index(begin = var_14994_begin_0, end = var_14994_end_0, end_mask = var_14994_end_mask_0, x = var_14648_cast_fp16)[name = tensor("op_14994_cast_fp16")]; tensor var_15001_begin_0 = const()[name = tensor("op_15001_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_15001_end_0 = const()[name = tensor("op_15001_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_15001_end_mask_0 = const()[name = tensor("op_15001_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15001_cast_fp16 = slice_by_index(begin = var_15001_begin_0, end = var_15001_end_0, end_mask = var_15001_end_mask_0, x = var_14652_cast_fp16)[name = tensor("op_15001_cast_fp16")]; tensor var_15008_begin_0 = const()[name = tensor("op_15008_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_15008_end_0 = const()[name = tensor("op_15008_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_15008_end_mask_0 = const()[name = tensor("op_15008_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15008_cast_fp16 = slice_by_index(begin = var_15008_begin_0, end = var_15008_end_0, end_mask = var_15008_end_mask_0, x = var_14652_cast_fp16)[name = tensor("op_15008_cast_fp16")]; tensor var_15015_begin_0 = const()[name = tensor("op_15015_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_15015_end_0 = const()[name = tensor("op_15015_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_15015_end_mask_0 = const()[name = tensor("op_15015_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15015_cast_fp16 = slice_by_index(begin = var_15015_begin_0, end = var_15015_end_0, end_mask = var_15015_end_mask_0, x = var_14652_cast_fp16)[name = tensor("op_15015_cast_fp16")]; tensor var_15022_begin_0 = const()[name = tensor("op_15022_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_15022_end_0 = const()[name = tensor("op_15022_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_15022_end_mask_0 = const()[name = tensor("op_15022_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15022_cast_fp16 = slice_by_index(begin = var_15022_begin_0, end = var_15022_end_0, end_mask = var_15022_end_mask_0, x = var_14652_cast_fp16)[name = tensor("op_15022_cast_fp16")]; tensor var_15029_begin_0 = const()[name = tensor("op_15029_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_15029_end_0 = const()[name = tensor("op_15029_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_15029_end_mask_0 = const()[name = tensor("op_15029_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15029_cast_fp16 = slice_by_index(begin = var_15029_begin_0, end = var_15029_end_0, end_mask = var_15029_end_mask_0, x = var_14656_cast_fp16)[name = tensor("op_15029_cast_fp16")]; tensor var_15036_begin_0 = const()[name = tensor("op_15036_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_15036_end_0 = const()[name = tensor("op_15036_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_15036_end_mask_0 = const()[name = tensor("op_15036_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15036_cast_fp16 = slice_by_index(begin = var_15036_begin_0, end = var_15036_end_0, end_mask = var_15036_end_mask_0, x = var_14656_cast_fp16)[name = tensor("op_15036_cast_fp16")]; tensor var_15043_begin_0 = const()[name = tensor("op_15043_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_15043_end_0 = const()[name = tensor("op_15043_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_15043_end_mask_0 = const()[name = tensor("op_15043_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15043_cast_fp16 = slice_by_index(begin = var_15043_begin_0, end = var_15043_end_0, end_mask = var_15043_end_mask_0, x = var_14656_cast_fp16)[name = tensor("op_15043_cast_fp16")]; tensor var_15050_begin_0 = const()[name = tensor("op_15050_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_15050_end_0 = const()[name = tensor("op_15050_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_15050_end_mask_0 = const()[name = tensor("op_15050_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15050_cast_fp16 = slice_by_index(begin = var_15050_begin_0, end = var_15050_end_0, end_mask = var_15050_end_mask_0, x = var_14656_cast_fp16)[name = tensor("op_15050_cast_fp16")]; tensor var_15057_begin_0 = const()[name = tensor("op_15057_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_15057_end_0 = const()[name = tensor("op_15057_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_15057_end_mask_0 = const()[name = tensor("op_15057_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15057_cast_fp16 = slice_by_index(begin = var_15057_begin_0, end = var_15057_end_0, end_mask = var_15057_end_mask_0, x = var_14660_cast_fp16)[name = tensor("op_15057_cast_fp16")]; tensor var_15064_begin_0 = const()[name = tensor("op_15064_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_15064_end_0 = const()[name = tensor("op_15064_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_15064_end_mask_0 = const()[name = tensor("op_15064_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15064_cast_fp16 = slice_by_index(begin = var_15064_begin_0, end = var_15064_end_0, end_mask = var_15064_end_mask_0, x = var_14660_cast_fp16)[name = tensor("op_15064_cast_fp16")]; tensor var_15071_begin_0 = const()[name = tensor("op_15071_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_15071_end_0 = const()[name = tensor("op_15071_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_15071_end_mask_0 = const()[name = tensor("op_15071_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15071_cast_fp16 = slice_by_index(begin = var_15071_begin_0, end = var_15071_end_0, end_mask = var_15071_end_mask_0, x = var_14660_cast_fp16)[name = tensor("op_15071_cast_fp16")]; tensor var_15078_begin_0 = const()[name = tensor("op_15078_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_15078_end_0 = const()[name = tensor("op_15078_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_15078_end_mask_0 = const()[name = tensor("op_15078_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15078_cast_fp16 = slice_by_index(begin = var_15078_begin_0, end = var_15078_end_0, end_mask = var_15078_end_mask_0, x = var_14660_cast_fp16)[name = tensor("op_15078_cast_fp16")]; tensor var_15085_begin_0 = const()[name = tensor("op_15085_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_15085_end_0 = const()[name = tensor("op_15085_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_15085_end_mask_0 = const()[name = tensor("op_15085_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15085_cast_fp16 = slice_by_index(begin = var_15085_begin_0, end = var_15085_end_0, end_mask = var_15085_end_mask_0, x = var_14664_cast_fp16)[name = tensor("op_15085_cast_fp16")]; tensor var_15092_begin_0 = const()[name = tensor("op_15092_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_15092_end_0 = const()[name = tensor("op_15092_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_15092_end_mask_0 = const()[name = tensor("op_15092_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15092_cast_fp16 = slice_by_index(begin = var_15092_begin_0, end = var_15092_end_0, end_mask = var_15092_end_mask_0, x = var_14664_cast_fp16)[name = tensor("op_15092_cast_fp16")]; tensor var_15099_begin_0 = const()[name = tensor("op_15099_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_15099_end_0 = const()[name = tensor("op_15099_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_15099_end_mask_0 = const()[name = tensor("op_15099_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15099_cast_fp16 = slice_by_index(begin = var_15099_begin_0, end = var_15099_end_0, end_mask = var_15099_end_mask_0, x = var_14664_cast_fp16)[name = tensor("op_15099_cast_fp16")]; tensor var_15106_begin_0 = const()[name = tensor("op_15106_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_15106_end_0 = const()[name = tensor("op_15106_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_15106_end_mask_0 = const()[name = tensor("op_15106_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15106_cast_fp16 = slice_by_index(begin = var_15106_begin_0, end = var_15106_end_0, end_mask = var_15106_end_mask_0, x = var_14664_cast_fp16)[name = tensor("op_15106_cast_fp16")]; tensor var_15113_begin_0 = const()[name = tensor("op_15113_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_15113_end_0 = const()[name = tensor("op_15113_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_15113_end_mask_0 = const()[name = tensor("op_15113_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15113_cast_fp16 = slice_by_index(begin = var_15113_begin_0, end = var_15113_end_0, end_mask = var_15113_end_mask_0, x = var_14668_cast_fp16)[name = tensor("op_15113_cast_fp16")]; tensor var_15120_begin_0 = const()[name = tensor("op_15120_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_15120_end_0 = const()[name = tensor("op_15120_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_15120_end_mask_0 = const()[name = tensor("op_15120_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15120_cast_fp16 = slice_by_index(begin = var_15120_begin_0, end = var_15120_end_0, end_mask = var_15120_end_mask_0, x = var_14668_cast_fp16)[name = tensor("op_15120_cast_fp16")]; tensor var_15127_begin_0 = const()[name = tensor("op_15127_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_15127_end_0 = const()[name = tensor("op_15127_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_15127_end_mask_0 = const()[name = tensor("op_15127_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15127_cast_fp16 = slice_by_index(begin = var_15127_begin_0, end = var_15127_end_0, end_mask = var_15127_end_mask_0, x = var_14668_cast_fp16)[name = tensor("op_15127_cast_fp16")]; tensor var_15134_begin_0 = const()[name = tensor("op_15134_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_15134_end_0 = const()[name = tensor("op_15134_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_15134_end_mask_0 = const()[name = tensor("op_15134_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15134_cast_fp16 = slice_by_index(begin = var_15134_begin_0, end = var_15134_end_0, end_mask = var_15134_end_mask_0, x = var_14668_cast_fp16)[name = tensor("op_15134_cast_fp16")]; tensor var_15141_begin_0 = const()[name = tensor("op_15141_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_15141_end_0 = const()[name = tensor("op_15141_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_15141_end_mask_0 = const()[name = tensor("op_15141_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15141_cast_fp16 = slice_by_index(begin = var_15141_begin_0, end = var_15141_end_0, end_mask = var_15141_end_mask_0, x = var_14672_cast_fp16)[name = tensor("op_15141_cast_fp16")]; tensor var_15148_begin_0 = const()[name = tensor("op_15148_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_15148_end_0 = const()[name = tensor("op_15148_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_15148_end_mask_0 = const()[name = tensor("op_15148_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15148_cast_fp16 = slice_by_index(begin = var_15148_begin_0, end = var_15148_end_0, end_mask = var_15148_end_mask_0, x = var_14672_cast_fp16)[name = tensor("op_15148_cast_fp16")]; tensor var_15155_begin_0 = const()[name = tensor("op_15155_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_15155_end_0 = const()[name = tensor("op_15155_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_15155_end_mask_0 = const()[name = tensor("op_15155_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15155_cast_fp16 = slice_by_index(begin = var_15155_begin_0, end = var_15155_end_0, end_mask = var_15155_end_mask_0, x = var_14672_cast_fp16)[name = tensor("op_15155_cast_fp16")]; tensor var_15162_begin_0 = const()[name = tensor("op_15162_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_15162_end_0 = const()[name = tensor("op_15162_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_15162_end_mask_0 = const()[name = tensor("op_15162_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15162_cast_fp16 = slice_by_index(begin = var_15162_begin_0, end = var_15162_end_0, end_mask = var_15162_end_mask_0, x = var_14672_cast_fp16)[name = tensor("op_15162_cast_fp16")]; tensor var_15169_begin_0 = const()[name = tensor("op_15169_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_15169_end_0 = const()[name = tensor("op_15169_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_15169_end_mask_0 = const()[name = tensor("op_15169_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15169_cast_fp16 = slice_by_index(begin = var_15169_begin_0, end = var_15169_end_0, end_mask = var_15169_end_mask_0, x = var_14676_cast_fp16)[name = tensor("op_15169_cast_fp16")]; tensor var_15176_begin_0 = const()[name = tensor("op_15176_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_15176_end_0 = const()[name = tensor("op_15176_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_15176_end_mask_0 = const()[name = tensor("op_15176_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15176_cast_fp16 = slice_by_index(begin = var_15176_begin_0, end = var_15176_end_0, end_mask = var_15176_end_mask_0, x = var_14676_cast_fp16)[name = tensor("op_15176_cast_fp16")]; tensor var_15183_begin_0 = const()[name = tensor("op_15183_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_15183_end_0 = const()[name = tensor("op_15183_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_15183_end_mask_0 = const()[name = tensor("op_15183_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15183_cast_fp16 = slice_by_index(begin = var_15183_begin_0, end = var_15183_end_0, end_mask = var_15183_end_mask_0, x = var_14676_cast_fp16)[name = tensor("op_15183_cast_fp16")]; tensor var_15190_begin_0 = const()[name = tensor("op_15190_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_15190_end_0 = const()[name = tensor("op_15190_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_15190_end_mask_0 = const()[name = tensor("op_15190_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15190_cast_fp16 = slice_by_index(begin = var_15190_begin_0, end = var_15190_end_0, end_mask = var_15190_end_mask_0, x = var_14676_cast_fp16)[name = tensor("op_15190_cast_fp16")]; tensor var_15197_begin_0 = const()[name = tensor("op_15197_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_15197_end_0 = const()[name = tensor("op_15197_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_15197_end_mask_0 = const()[name = tensor("op_15197_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15197_cast_fp16 = slice_by_index(begin = var_15197_begin_0, end = var_15197_end_0, end_mask = var_15197_end_mask_0, x = var_14680_cast_fp16)[name = tensor("op_15197_cast_fp16")]; tensor var_15204_begin_0 = const()[name = tensor("op_15204_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_15204_end_0 = const()[name = tensor("op_15204_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_15204_end_mask_0 = const()[name = tensor("op_15204_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15204_cast_fp16 = slice_by_index(begin = var_15204_begin_0, end = var_15204_end_0, end_mask = var_15204_end_mask_0, x = var_14680_cast_fp16)[name = tensor("op_15204_cast_fp16")]; tensor var_15211_begin_0 = const()[name = tensor("op_15211_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_15211_end_0 = const()[name = tensor("op_15211_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_15211_end_mask_0 = const()[name = tensor("op_15211_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15211_cast_fp16 = slice_by_index(begin = var_15211_begin_0, end = var_15211_end_0, end_mask = var_15211_end_mask_0, x = var_14680_cast_fp16)[name = tensor("op_15211_cast_fp16")]; tensor var_15218_begin_0 = const()[name = tensor("op_15218_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_15218_end_0 = const()[name = tensor("op_15218_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_15218_end_mask_0 = const()[name = tensor("op_15218_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15218_cast_fp16 = slice_by_index(begin = var_15218_begin_0, end = var_15218_end_0, end_mask = var_15218_end_mask_0, x = var_14680_cast_fp16)[name = tensor("op_15218_cast_fp16")]; tensor var_15225_begin_0 = const()[name = tensor("op_15225_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_15225_end_0 = const()[name = tensor("op_15225_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_15225_end_mask_0 = const()[name = tensor("op_15225_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15225_cast_fp16 = slice_by_index(begin = var_15225_begin_0, end = var_15225_end_0, end_mask = var_15225_end_mask_0, x = var_14684_cast_fp16)[name = tensor("op_15225_cast_fp16")]; tensor var_15232_begin_0 = const()[name = tensor("op_15232_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_15232_end_0 = const()[name = tensor("op_15232_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_15232_end_mask_0 = const()[name = tensor("op_15232_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15232_cast_fp16 = slice_by_index(begin = var_15232_begin_0, end = var_15232_end_0, end_mask = var_15232_end_mask_0, x = var_14684_cast_fp16)[name = tensor("op_15232_cast_fp16")]; tensor var_15239_begin_0 = const()[name = tensor("op_15239_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_15239_end_0 = const()[name = tensor("op_15239_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_15239_end_mask_0 = const()[name = tensor("op_15239_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15239_cast_fp16 = slice_by_index(begin = var_15239_begin_0, end = var_15239_end_0, end_mask = var_15239_end_mask_0, x = var_14684_cast_fp16)[name = tensor("op_15239_cast_fp16")]; tensor var_15246_begin_0 = const()[name = tensor("op_15246_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_15246_end_0 = const()[name = tensor("op_15246_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_15246_end_mask_0 = const()[name = tensor("op_15246_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15246_cast_fp16 = slice_by_index(begin = var_15246_begin_0, end = var_15246_end_0, end_mask = var_15246_end_mask_0, x = var_14684_cast_fp16)[name = tensor("op_15246_cast_fp16")]; tensor k_19_perm_0 = const()[name = tensor("k_19_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_15251_begin_0 = const()[name = tensor("op_15251_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_15251_end_0 = const()[name = tensor("op_15251_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_15251_end_mask_0 = const()[name = tensor("op_15251_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_19_cast_fp16 = transpose(perm = k_19_perm_0, x = key_19_cast_fp16)[name = tensor("transpose_22")]; tensor var_15251_cast_fp16 = slice_by_index(begin = var_15251_begin_0, end = var_15251_end_0, end_mask = var_15251_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15251_cast_fp16")]; tensor var_15255_begin_0 = const()[name = tensor("op_15255_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_15255_end_0 = const()[name = tensor("op_15255_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_15255_end_mask_0 = const()[name = tensor("op_15255_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15255_cast_fp16 = slice_by_index(begin = var_15255_begin_0, end = var_15255_end_0, end_mask = var_15255_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15255_cast_fp16")]; tensor var_15259_begin_0 = const()[name = tensor("op_15259_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_15259_end_0 = const()[name = tensor("op_15259_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_15259_end_mask_0 = const()[name = tensor("op_15259_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15259_cast_fp16 = slice_by_index(begin = var_15259_begin_0, end = var_15259_end_0, end_mask = var_15259_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15259_cast_fp16")]; tensor var_15263_begin_0 = const()[name = tensor("op_15263_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_15263_end_0 = const()[name = tensor("op_15263_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_15263_end_mask_0 = const()[name = tensor("op_15263_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15263_cast_fp16 = slice_by_index(begin = var_15263_begin_0, end = var_15263_end_0, end_mask = var_15263_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15263_cast_fp16")]; tensor var_15267_begin_0 = const()[name = tensor("op_15267_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_15267_end_0 = const()[name = tensor("op_15267_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_15267_end_mask_0 = const()[name = tensor("op_15267_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15267_cast_fp16 = slice_by_index(begin = var_15267_begin_0, end = var_15267_end_0, end_mask = var_15267_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15267_cast_fp16")]; tensor var_15271_begin_0 = const()[name = tensor("op_15271_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_15271_end_0 = const()[name = tensor("op_15271_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_15271_end_mask_0 = const()[name = tensor("op_15271_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15271_cast_fp16 = slice_by_index(begin = var_15271_begin_0, end = var_15271_end_0, end_mask = var_15271_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15271_cast_fp16")]; tensor var_15275_begin_0 = const()[name = tensor("op_15275_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_15275_end_0 = const()[name = tensor("op_15275_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_15275_end_mask_0 = const()[name = tensor("op_15275_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15275_cast_fp16 = slice_by_index(begin = var_15275_begin_0, end = var_15275_end_0, end_mask = var_15275_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15275_cast_fp16")]; tensor var_15279_begin_0 = const()[name = tensor("op_15279_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_15279_end_0 = const()[name = tensor("op_15279_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_15279_end_mask_0 = const()[name = tensor("op_15279_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15279_cast_fp16 = slice_by_index(begin = var_15279_begin_0, end = var_15279_end_0, end_mask = var_15279_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15279_cast_fp16")]; tensor var_15283_begin_0 = const()[name = tensor("op_15283_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_15283_end_0 = const()[name = tensor("op_15283_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_15283_end_mask_0 = const()[name = tensor("op_15283_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15283_cast_fp16 = slice_by_index(begin = var_15283_begin_0, end = var_15283_end_0, end_mask = var_15283_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15283_cast_fp16")]; tensor var_15287_begin_0 = const()[name = tensor("op_15287_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_15287_end_0 = const()[name = tensor("op_15287_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_15287_end_mask_0 = const()[name = tensor("op_15287_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15287_cast_fp16 = slice_by_index(begin = var_15287_begin_0, end = var_15287_end_0, end_mask = var_15287_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15287_cast_fp16")]; tensor var_15291_begin_0 = const()[name = tensor("op_15291_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_15291_end_0 = const()[name = tensor("op_15291_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_15291_end_mask_0 = const()[name = tensor("op_15291_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15291_cast_fp16 = slice_by_index(begin = var_15291_begin_0, end = var_15291_end_0, end_mask = var_15291_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15291_cast_fp16")]; tensor var_15295_begin_0 = const()[name = tensor("op_15295_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_15295_end_0 = const()[name = tensor("op_15295_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_15295_end_mask_0 = const()[name = tensor("op_15295_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15295_cast_fp16 = slice_by_index(begin = var_15295_begin_0, end = var_15295_end_0, end_mask = var_15295_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15295_cast_fp16")]; tensor var_15299_begin_0 = const()[name = tensor("op_15299_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_15299_end_0 = const()[name = tensor("op_15299_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_15299_end_mask_0 = const()[name = tensor("op_15299_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15299_cast_fp16 = slice_by_index(begin = var_15299_begin_0, end = var_15299_end_0, end_mask = var_15299_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15299_cast_fp16")]; tensor var_15303_begin_0 = const()[name = tensor("op_15303_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_15303_end_0 = const()[name = tensor("op_15303_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_15303_end_mask_0 = const()[name = tensor("op_15303_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15303_cast_fp16 = slice_by_index(begin = var_15303_begin_0, end = var_15303_end_0, end_mask = var_15303_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15303_cast_fp16")]; tensor var_15307_begin_0 = const()[name = tensor("op_15307_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_15307_end_0 = const()[name = tensor("op_15307_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_15307_end_mask_0 = const()[name = tensor("op_15307_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15307_cast_fp16 = slice_by_index(begin = var_15307_begin_0, end = var_15307_end_0, end_mask = var_15307_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15307_cast_fp16")]; tensor var_15311_begin_0 = const()[name = tensor("op_15311_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_15311_end_0 = const()[name = tensor("op_15311_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_15311_end_mask_0 = const()[name = tensor("op_15311_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15311_cast_fp16 = slice_by_index(begin = var_15311_begin_0, end = var_15311_end_0, end_mask = var_15311_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15311_cast_fp16")]; tensor var_15315_begin_0 = const()[name = tensor("op_15315_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_15315_end_0 = const()[name = tensor("op_15315_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_15315_end_mask_0 = const()[name = tensor("op_15315_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15315_cast_fp16 = slice_by_index(begin = var_15315_begin_0, end = var_15315_end_0, end_mask = var_15315_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15315_cast_fp16")]; tensor var_15319_begin_0 = const()[name = tensor("op_15319_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_15319_end_0 = const()[name = tensor("op_15319_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_15319_end_mask_0 = const()[name = tensor("op_15319_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15319_cast_fp16 = slice_by_index(begin = var_15319_begin_0, end = var_15319_end_0, end_mask = var_15319_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15319_cast_fp16")]; tensor var_15323_begin_0 = const()[name = tensor("op_15323_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_15323_end_0 = const()[name = tensor("op_15323_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_15323_end_mask_0 = const()[name = tensor("op_15323_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15323_cast_fp16 = slice_by_index(begin = var_15323_begin_0, end = var_15323_end_0, end_mask = var_15323_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15323_cast_fp16")]; tensor var_15327_begin_0 = const()[name = tensor("op_15327_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_15327_end_0 = const()[name = tensor("op_15327_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_15327_end_mask_0 = const()[name = tensor("op_15327_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_15327_cast_fp16 = slice_by_index(begin = var_15327_begin_0, end = var_15327_end_0, end_mask = var_15327_end_mask_0, x = k_19_cast_fp16)[name = tensor("op_15327_cast_fp16")]; tensor var_15329_begin_0 = const()[name = tensor("op_15329_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_15329_end_0 = const()[name = tensor("op_15329_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_15329_end_mask_0 = const()[name = tensor("op_15329_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15329_cast_fp16 = slice_by_index(begin = var_15329_begin_0, end = var_15329_end_0, end_mask = var_15329_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15329_cast_fp16")]; tensor var_15333_begin_0 = const()[name = tensor("op_15333_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_15333_end_0 = const()[name = tensor("op_15333_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_15333_end_mask_0 = const()[name = tensor("op_15333_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15333_cast_fp16 = slice_by_index(begin = var_15333_begin_0, end = var_15333_end_0, end_mask = var_15333_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15333_cast_fp16")]; tensor var_15337_begin_0 = const()[name = tensor("op_15337_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_15337_end_0 = const()[name = tensor("op_15337_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_15337_end_mask_0 = const()[name = tensor("op_15337_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15337_cast_fp16 = slice_by_index(begin = var_15337_begin_0, end = var_15337_end_0, end_mask = var_15337_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15337_cast_fp16")]; tensor var_15341_begin_0 = const()[name = tensor("op_15341_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_15341_end_0 = const()[name = tensor("op_15341_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_15341_end_mask_0 = const()[name = tensor("op_15341_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15341_cast_fp16 = slice_by_index(begin = var_15341_begin_0, end = var_15341_end_0, end_mask = var_15341_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15341_cast_fp16")]; tensor var_15345_begin_0 = const()[name = tensor("op_15345_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_15345_end_0 = const()[name = tensor("op_15345_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_15345_end_mask_0 = const()[name = tensor("op_15345_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15345_cast_fp16 = slice_by_index(begin = var_15345_begin_0, end = var_15345_end_0, end_mask = var_15345_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15345_cast_fp16")]; tensor var_15349_begin_0 = const()[name = tensor("op_15349_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_15349_end_0 = const()[name = tensor("op_15349_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_15349_end_mask_0 = const()[name = tensor("op_15349_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15349_cast_fp16 = slice_by_index(begin = var_15349_begin_0, end = var_15349_end_0, end_mask = var_15349_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15349_cast_fp16")]; tensor var_15353_begin_0 = const()[name = tensor("op_15353_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_15353_end_0 = const()[name = tensor("op_15353_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_15353_end_mask_0 = const()[name = tensor("op_15353_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15353_cast_fp16 = slice_by_index(begin = var_15353_begin_0, end = var_15353_end_0, end_mask = var_15353_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15353_cast_fp16")]; tensor var_15357_begin_0 = const()[name = tensor("op_15357_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_15357_end_0 = const()[name = tensor("op_15357_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_15357_end_mask_0 = const()[name = tensor("op_15357_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15357_cast_fp16 = slice_by_index(begin = var_15357_begin_0, end = var_15357_end_0, end_mask = var_15357_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15357_cast_fp16")]; tensor var_15361_begin_0 = const()[name = tensor("op_15361_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_15361_end_0 = const()[name = tensor("op_15361_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_15361_end_mask_0 = const()[name = tensor("op_15361_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15361_cast_fp16 = slice_by_index(begin = var_15361_begin_0, end = var_15361_end_0, end_mask = var_15361_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15361_cast_fp16")]; tensor var_15365_begin_0 = const()[name = tensor("op_15365_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_15365_end_0 = const()[name = tensor("op_15365_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_15365_end_mask_0 = const()[name = tensor("op_15365_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15365_cast_fp16 = slice_by_index(begin = var_15365_begin_0, end = var_15365_end_0, end_mask = var_15365_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15365_cast_fp16")]; tensor var_15369_begin_0 = const()[name = tensor("op_15369_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_15369_end_0 = const()[name = tensor("op_15369_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_15369_end_mask_0 = const()[name = tensor("op_15369_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15369_cast_fp16 = slice_by_index(begin = var_15369_begin_0, end = var_15369_end_0, end_mask = var_15369_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15369_cast_fp16")]; tensor var_15373_begin_0 = const()[name = tensor("op_15373_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_15373_end_0 = const()[name = tensor("op_15373_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_15373_end_mask_0 = const()[name = tensor("op_15373_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15373_cast_fp16 = slice_by_index(begin = var_15373_begin_0, end = var_15373_end_0, end_mask = var_15373_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15373_cast_fp16")]; tensor var_15377_begin_0 = const()[name = tensor("op_15377_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_15377_end_0 = const()[name = tensor("op_15377_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_15377_end_mask_0 = const()[name = tensor("op_15377_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15377_cast_fp16 = slice_by_index(begin = var_15377_begin_0, end = var_15377_end_0, end_mask = var_15377_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15377_cast_fp16")]; tensor var_15381_begin_0 = const()[name = tensor("op_15381_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_15381_end_0 = const()[name = tensor("op_15381_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_15381_end_mask_0 = const()[name = tensor("op_15381_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15381_cast_fp16 = slice_by_index(begin = var_15381_begin_0, end = var_15381_end_0, end_mask = var_15381_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15381_cast_fp16")]; tensor var_15385_begin_0 = const()[name = tensor("op_15385_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_15385_end_0 = const()[name = tensor("op_15385_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_15385_end_mask_0 = const()[name = tensor("op_15385_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15385_cast_fp16 = slice_by_index(begin = var_15385_begin_0, end = var_15385_end_0, end_mask = var_15385_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15385_cast_fp16")]; tensor var_15389_begin_0 = const()[name = tensor("op_15389_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_15389_end_0 = const()[name = tensor("op_15389_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_15389_end_mask_0 = const()[name = tensor("op_15389_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15389_cast_fp16 = slice_by_index(begin = var_15389_begin_0, end = var_15389_end_0, end_mask = var_15389_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15389_cast_fp16")]; tensor var_15393_begin_0 = const()[name = tensor("op_15393_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_15393_end_0 = const()[name = tensor("op_15393_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_15393_end_mask_0 = const()[name = tensor("op_15393_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15393_cast_fp16 = slice_by_index(begin = var_15393_begin_0, end = var_15393_end_0, end_mask = var_15393_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15393_cast_fp16")]; tensor var_15397_begin_0 = const()[name = tensor("op_15397_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_15397_end_0 = const()[name = tensor("op_15397_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_15397_end_mask_0 = const()[name = tensor("op_15397_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15397_cast_fp16 = slice_by_index(begin = var_15397_begin_0, end = var_15397_end_0, end_mask = var_15397_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15397_cast_fp16")]; tensor var_15401_begin_0 = const()[name = tensor("op_15401_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_15401_end_0 = const()[name = tensor("op_15401_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_15401_end_mask_0 = const()[name = tensor("op_15401_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15401_cast_fp16 = slice_by_index(begin = var_15401_begin_0, end = var_15401_end_0, end_mask = var_15401_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15401_cast_fp16")]; tensor var_15405_begin_0 = const()[name = tensor("op_15405_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_15405_end_0 = const()[name = tensor("op_15405_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_15405_end_mask_0 = const()[name = tensor("op_15405_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_15405_cast_fp16 = slice_by_index(begin = var_15405_begin_0, end = var_15405_end_0, end_mask = var_15405_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_15405_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1441_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1441_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1441_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1441_equation_0, values = (var_15251_cast_fp16, var_14693_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1441_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1443_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1443_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1443_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1443_equation_0, values = (var_15251_cast_fp16, var_14700_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1443_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1445_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1445_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1445_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1445_equation_0, values = (var_15251_cast_fp16, var_14707_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1445_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1447_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1447_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1447_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1447_equation_0, values = (var_15251_cast_fp16, var_14714_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1447_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1449_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1449_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1449_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1449_equation_0, values = (var_15255_cast_fp16, var_14721_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1449_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1451_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1451_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1451_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1451_equation_0, values = (var_15255_cast_fp16, var_14728_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1451_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1453_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1453_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1453_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1453_equation_0, values = (var_15255_cast_fp16, var_14735_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1453_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1455_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1455_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1455_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1455_equation_0, values = (var_15255_cast_fp16, var_14742_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1455_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1457_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1457_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1457_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1457_equation_0, values = (var_15259_cast_fp16, var_14749_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1457_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1459_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1459_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1459_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1459_equation_0, values = (var_15259_cast_fp16, var_14756_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1459_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1461_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1461_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1461_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1461_equation_0, values = (var_15259_cast_fp16, var_14763_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1461_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1463_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1463_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1463_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1463_equation_0, values = (var_15259_cast_fp16, var_14770_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1463_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1465_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1465_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1465_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1465_equation_0, values = (var_15263_cast_fp16, var_14777_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1465_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1467_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1467_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1467_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1467_equation_0, values = (var_15263_cast_fp16, var_14784_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1467_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1469_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1469_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1469_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1469_equation_0, values = (var_15263_cast_fp16, var_14791_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1469_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1471_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1471_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1471_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1471_equation_0, values = (var_15263_cast_fp16, var_14798_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1471_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1473_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1473_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1473_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1473_equation_0, values = (var_15267_cast_fp16, var_14805_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1473_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1475_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1475_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1475_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1475_equation_0, values = (var_15267_cast_fp16, var_14812_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1475_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1477_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1477_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1477_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1477_equation_0, values = (var_15267_cast_fp16, var_14819_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1477_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1479_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1479_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1479_equation_0, values = (var_15267_cast_fp16, var_14826_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1479_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1481_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1481_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1481_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1481_equation_0, values = (var_15271_cast_fp16, var_14833_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1481_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1483_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1483_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1483_equation_0, values = (var_15271_cast_fp16, var_14840_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1483_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1485_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1485_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1485_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1485_equation_0, values = (var_15271_cast_fp16, var_14847_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1485_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1487_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1487_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1487_equation_0, values = (var_15271_cast_fp16, var_14854_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1487_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1489_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1489_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1489_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1489_equation_0, values = (var_15275_cast_fp16, var_14861_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1489_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1491_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1491_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1491_equation_0, values = (var_15275_cast_fp16, var_14868_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1491_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1493_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1493_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1493_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1493_equation_0, values = (var_15275_cast_fp16, var_14875_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1493_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1495_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1495_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1495_equation_0, values = (var_15275_cast_fp16, var_14882_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1495_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1497_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1497_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1497_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1497_equation_0, values = (var_15279_cast_fp16, var_14889_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1497_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1499_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1499_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1499_equation_0, values = (var_15279_cast_fp16, var_14896_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1499_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1501_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1501_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1501_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1501_equation_0, values = (var_15279_cast_fp16, var_14903_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1501_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1503_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1503_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1503_equation_0, values = (var_15279_cast_fp16, var_14910_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1503_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1505_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1505_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1505_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1505_equation_0, values = (var_15283_cast_fp16, var_14917_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1505_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1507_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1507_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1507_equation_0, values = (var_15283_cast_fp16, var_14924_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1507_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1509_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1509_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1509_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1509_equation_0, values = (var_15283_cast_fp16, var_14931_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1509_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1511_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1511_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1511_equation_0, values = (var_15283_cast_fp16, var_14938_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1511_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1513_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1513_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1513_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1513_equation_0, values = (var_15287_cast_fp16, var_14945_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1513_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1515_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1515_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1515_equation_0, values = (var_15287_cast_fp16, var_14952_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1515_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1517_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1517_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1517_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1517_equation_0, values = (var_15287_cast_fp16, var_14959_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1517_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1519_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1519_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1519_equation_0, values = (var_15287_cast_fp16, var_14966_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1519_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1521_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1521_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1521_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1521_equation_0, values = (var_15291_cast_fp16, var_14973_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1521_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1523_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1523_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1523_equation_0, values = (var_15291_cast_fp16, var_14980_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1523_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1525_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1525_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1525_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1525_equation_0, values = (var_15291_cast_fp16, var_14987_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1525_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1527_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1527_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1527_equation_0, values = (var_15291_cast_fp16, var_14994_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1527_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1529_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1529_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1529_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1529_equation_0, values = (var_15295_cast_fp16, var_15001_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1529_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1531_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1531_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1531_equation_0, values = (var_15295_cast_fp16, var_15008_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1531_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1533_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1533_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1533_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1533_equation_0, values = (var_15295_cast_fp16, var_15015_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1533_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1535_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1535_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1535_equation_0, values = (var_15295_cast_fp16, var_15022_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1535_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1537_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1537_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1537_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1537_equation_0, values = (var_15299_cast_fp16, var_15029_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1537_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1539_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1539_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1539_equation_0, values = (var_15299_cast_fp16, var_15036_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1539_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1541_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1541_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1541_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1541_equation_0, values = (var_15299_cast_fp16, var_15043_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1541_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1543_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1543_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1543_equation_0, values = (var_15299_cast_fp16, var_15050_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1543_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1545_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1545_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1545_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1545_equation_0, values = (var_15303_cast_fp16, var_15057_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1545_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1547_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1547_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1547_equation_0, values = (var_15303_cast_fp16, var_15064_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1547_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1549_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1549_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1549_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1549_equation_0, values = (var_15303_cast_fp16, var_15071_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1549_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1551_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1551_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1551_equation_0, values = (var_15303_cast_fp16, var_15078_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1551_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1553_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1553_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1553_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1553_equation_0, values = (var_15307_cast_fp16, var_15085_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1553_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1555_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1555_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1555_equation_0, values = (var_15307_cast_fp16, var_15092_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1555_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1557_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1557_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1557_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1557_equation_0, values = (var_15307_cast_fp16, var_15099_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1557_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1559_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1559_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1559_equation_0, values = (var_15307_cast_fp16, var_15106_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1559_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1561_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1561_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1561_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1561_equation_0, values = (var_15311_cast_fp16, var_15113_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1561_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1563_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1563_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1563_equation_0, values = (var_15311_cast_fp16, var_15120_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1563_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1565_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1565_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1565_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1565_equation_0, values = (var_15311_cast_fp16, var_15127_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1565_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1567_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1567_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1567_equation_0, values = (var_15311_cast_fp16, var_15134_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1567_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1569_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1569_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1569_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1569_equation_0, values = (var_15315_cast_fp16, var_15141_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1569_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1571_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1571_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1571_equation_0, values = (var_15315_cast_fp16, var_15148_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1571_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1573_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1573_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1573_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1573_equation_0, values = (var_15315_cast_fp16, var_15155_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1573_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1575_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1575_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1575_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1575_equation_0, values = (var_15315_cast_fp16, var_15162_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1575_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1577_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1577_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1577_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1577_equation_0, values = (var_15319_cast_fp16, var_15169_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1577_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1579_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1579_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1579_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1579_equation_0, values = (var_15319_cast_fp16, var_15176_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1579_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1581_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1581_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1581_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1581_equation_0, values = (var_15319_cast_fp16, var_15183_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1581_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1583_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1583_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1583_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1583_equation_0, values = (var_15319_cast_fp16, var_15190_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1583_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1585_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1585_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1585_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1585_equation_0, values = (var_15323_cast_fp16, var_15197_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1585_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1587_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1587_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1587_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1587_equation_0, values = (var_15323_cast_fp16, var_15204_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1587_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1589_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1589_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1589_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1589_equation_0, values = (var_15323_cast_fp16, var_15211_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1589_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1591_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1591_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1591_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1591_equation_0, values = (var_15323_cast_fp16, var_15218_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1591_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1593_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1593_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1593_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1593_equation_0, values = (var_15327_cast_fp16, var_15225_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1593_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1595_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1595_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1595_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1595_equation_0, values = (var_15327_cast_fp16, var_15232_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1595_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1597_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1597_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1597_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1597_equation_0, values = (var_15327_cast_fp16, var_15239_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1597_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1599_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1599_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1599_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1599_equation_0, values = (var_15327_cast_fp16, var_15246_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1599_cast_fp16")]; tensor var_15568_to_fp16 = const()[name = tensor("op_15568_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1441_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1441_cast_fp16, y = var_15568_to_fp16)[name = tensor("aw_chunk_1441_cast_fp16")]; tensor var_15570_to_fp16 = const()[name = tensor("op_15570_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1443_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1443_cast_fp16, y = var_15570_to_fp16)[name = tensor("aw_chunk_1443_cast_fp16")]; tensor var_15572_to_fp16 = const()[name = tensor("op_15572_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1445_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1445_cast_fp16, y = var_15572_to_fp16)[name = tensor("aw_chunk_1445_cast_fp16")]; tensor var_15574_to_fp16 = const()[name = tensor("op_15574_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1447_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1447_cast_fp16, y = var_15574_to_fp16)[name = tensor("aw_chunk_1447_cast_fp16")]; tensor var_15576_to_fp16 = const()[name = tensor("op_15576_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1449_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1449_cast_fp16, y = var_15576_to_fp16)[name = tensor("aw_chunk_1449_cast_fp16")]; tensor var_15578_to_fp16 = const()[name = tensor("op_15578_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1451_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1451_cast_fp16, y = var_15578_to_fp16)[name = tensor("aw_chunk_1451_cast_fp16")]; tensor var_15580_to_fp16 = const()[name = tensor("op_15580_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1453_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1453_cast_fp16, y = var_15580_to_fp16)[name = tensor("aw_chunk_1453_cast_fp16")]; tensor var_15582_to_fp16 = const()[name = tensor("op_15582_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1455_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1455_cast_fp16, y = var_15582_to_fp16)[name = tensor("aw_chunk_1455_cast_fp16")]; tensor var_15584_to_fp16 = const()[name = tensor("op_15584_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1457_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1457_cast_fp16, y = var_15584_to_fp16)[name = tensor("aw_chunk_1457_cast_fp16")]; tensor var_15586_to_fp16 = const()[name = tensor("op_15586_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1459_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1459_cast_fp16, y = var_15586_to_fp16)[name = tensor("aw_chunk_1459_cast_fp16")]; tensor var_15588_to_fp16 = const()[name = tensor("op_15588_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1461_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1461_cast_fp16, y = var_15588_to_fp16)[name = tensor("aw_chunk_1461_cast_fp16")]; tensor var_15590_to_fp16 = const()[name = tensor("op_15590_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1463_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1463_cast_fp16, y = var_15590_to_fp16)[name = tensor("aw_chunk_1463_cast_fp16")]; tensor var_15592_to_fp16 = const()[name = tensor("op_15592_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1465_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1465_cast_fp16, y = var_15592_to_fp16)[name = tensor("aw_chunk_1465_cast_fp16")]; tensor var_15594_to_fp16 = const()[name = tensor("op_15594_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1467_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1467_cast_fp16, y = var_15594_to_fp16)[name = tensor("aw_chunk_1467_cast_fp16")]; tensor var_15596_to_fp16 = const()[name = tensor("op_15596_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1469_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1469_cast_fp16, y = var_15596_to_fp16)[name = tensor("aw_chunk_1469_cast_fp16")]; tensor var_15598_to_fp16 = const()[name = tensor("op_15598_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1471_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1471_cast_fp16, y = var_15598_to_fp16)[name = tensor("aw_chunk_1471_cast_fp16")]; tensor var_15600_to_fp16 = const()[name = tensor("op_15600_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1473_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1473_cast_fp16, y = var_15600_to_fp16)[name = tensor("aw_chunk_1473_cast_fp16")]; tensor var_15602_to_fp16 = const()[name = tensor("op_15602_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1475_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1475_cast_fp16, y = var_15602_to_fp16)[name = tensor("aw_chunk_1475_cast_fp16")]; tensor var_15604_to_fp16 = const()[name = tensor("op_15604_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1477_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1477_cast_fp16, y = var_15604_to_fp16)[name = tensor("aw_chunk_1477_cast_fp16")]; tensor var_15606_to_fp16 = const()[name = tensor("op_15606_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1479_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1479_cast_fp16, y = var_15606_to_fp16)[name = tensor("aw_chunk_1479_cast_fp16")]; tensor var_15608_to_fp16 = const()[name = tensor("op_15608_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1481_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1481_cast_fp16, y = var_15608_to_fp16)[name = tensor("aw_chunk_1481_cast_fp16")]; tensor var_15610_to_fp16 = const()[name = tensor("op_15610_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1483_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1483_cast_fp16, y = var_15610_to_fp16)[name = tensor("aw_chunk_1483_cast_fp16")]; tensor var_15612_to_fp16 = const()[name = tensor("op_15612_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1485_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1485_cast_fp16, y = var_15612_to_fp16)[name = tensor("aw_chunk_1485_cast_fp16")]; tensor var_15614_to_fp16 = const()[name = tensor("op_15614_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1487_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1487_cast_fp16, y = var_15614_to_fp16)[name = tensor("aw_chunk_1487_cast_fp16")]; tensor var_15616_to_fp16 = const()[name = tensor("op_15616_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1489_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1489_cast_fp16, y = var_15616_to_fp16)[name = tensor("aw_chunk_1489_cast_fp16")]; tensor var_15618_to_fp16 = const()[name = tensor("op_15618_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1491_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1491_cast_fp16, y = var_15618_to_fp16)[name = tensor("aw_chunk_1491_cast_fp16")]; tensor var_15620_to_fp16 = const()[name = tensor("op_15620_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1493_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1493_cast_fp16, y = var_15620_to_fp16)[name = tensor("aw_chunk_1493_cast_fp16")]; tensor var_15622_to_fp16 = const()[name = tensor("op_15622_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1495_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1495_cast_fp16, y = var_15622_to_fp16)[name = tensor("aw_chunk_1495_cast_fp16")]; tensor var_15624_to_fp16 = const()[name = tensor("op_15624_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1497_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1497_cast_fp16, y = var_15624_to_fp16)[name = tensor("aw_chunk_1497_cast_fp16")]; tensor var_15626_to_fp16 = const()[name = tensor("op_15626_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1499_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1499_cast_fp16, y = var_15626_to_fp16)[name = tensor("aw_chunk_1499_cast_fp16")]; tensor var_15628_to_fp16 = const()[name = tensor("op_15628_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1501_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1501_cast_fp16, y = var_15628_to_fp16)[name = tensor("aw_chunk_1501_cast_fp16")]; tensor var_15630_to_fp16 = const()[name = tensor("op_15630_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1503_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1503_cast_fp16, y = var_15630_to_fp16)[name = tensor("aw_chunk_1503_cast_fp16")]; tensor var_15632_to_fp16 = const()[name = tensor("op_15632_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1505_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1505_cast_fp16, y = var_15632_to_fp16)[name = tensor("aw_chunk_1505_cast_fp16")]; tensor var_15634_to_fp16 = const()[name = tensor("op_15634_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1507_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1507_cast_fp16, y = var_15634_to_fp16)[name = tensor("aw_chunk_1507_cast_fp16")]; tensor var_15636_to_fp16 = const()[name = tensor("op_15636_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1509_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1509_cast_fp16, y = var_15636_to_fp16)[name = tensor("aw_chunk_1509_cast_fp16")]; tensor var_15638_to_fp16 = const()[name = tensor("op_15638_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1511_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1511_cast_fp16, y = var_15638_to_fp16)[name = tensor("aw_chunk_1511_cast_fp16")]; tensor var_15640_to_fp16 = const()[name = tensor("op_15640_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1513_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1513_cast_fp16, y = var_15640_to_fp16)[name = tensor("aw_chunk_1513_cast_fp16")]; tensor var_15642_to_fp16 = const()[name = tensor("op_15642_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1515_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1515_cast_fp16, y = var_15642_to_fp16)[name = tensor("aw_chunk_1515_cast_fp16")]; tensor var_15644_to_fp16 = const()[name = tensor("op_15644_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1517_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1517_cast_fp16, y = var_15644_to_fp16)[name = tensor("aw_chunk_1517_cast_fp16")]; tensor var_15646_to_fp16 = const()[name = tensor("op_15646_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1519_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1519_cast_fp16, y = var_15646_to_fp16)[name = tensor("aw_chunk_1519_cast_fp16")]; tensor var_15648_to_fp16 = const()[name = tensor("op_15648_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1521_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1521_cast_fp16, y = var_15648_to_fp16)[name = tensor("aw_chunk_1521_cast_fp16")]; tensor var_15650_to_fp16 = const()[name = tensor("op_15650_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1523_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1523_cast_fp16, y = var_15650_to_fp16)[name = tensor("aw_chunk_1523_cast_fp16")]; tensor var_15652_to_fp16 = const()[name = tensor("op_15652_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1525_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1525_cast_fp16, y = var_15652_to_fp16)[name = tensor("aw_chunk_1525_cast_fp16")]; tensor var_15654_to_fp16 = const()[name = tensor("op_15654_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1527_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1527_cast_fp16, y = var_15654_to_fp16)[name = tensor("aw_chunk_1527_cast_fp16")]; tensor var_15656_to_fp16 = const()[name = tensor("op_15656_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1529_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1529_cast_fp16, y = var_15656_to_fp16)[name = tensor("aw_chunk_1529_cast_fp16")]; tensor var_15658_to_fp16 = const()[name = tensor("op_15658_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1531_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1531_cast_fp16, y = var_15658_to_fp16)[name = tensor("aw_chunk_1531_cast_fp16")]; tensor var_15660_to_fp16 = const()[name = tensor("op_15660_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1533_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1533_cast_fp16, y = var_15660_to_fp16)[name = tensor("aw_chunk_1533_cast_fp16")]; tensor var_15662_to_fp16 = const()[name = tensor("op_15662_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1535_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1535_cast_fp16, y = var_15662_to_fp16)[name = tensor("aw_chunk_1535_cast_fp16")]; tensor var_15664_to_fp16 = const()[name = tensor("op_15664_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1537_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1537_cast_fp16, y = var_15664_to_fp16)[name = tensor("aw_chunk_1537_cast_fp16")]; tensor var_15666_to_fp16 = const()[name = tensor("op_15666_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1539_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1539_cast_fp16, y = var_15666_to_fp16)[name = tensor("aw_chunk_1539_cast_fp16")]; tensor var_15668_to_fp16 = const()[name = tensor("op_15668_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1541_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1541_cast_fp16, y = var_15668_to_fp16)[name = tensor("aw_chunk_1541_cast_fp16")]; tensor var_15670_to_fp16 = const()[name = tensor("op_15670_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1543_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1543_cast_fp16, y = var_15670_to_fp16)[name = tensor("aw_chunk_1543_cast_fp16")]; tensor var_15672_to_fp16 = const()[name = tensor("op_15672_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1545_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1545_cast_fp16, y = var_15672_to_fp16)[name = tensor("aw_chunk_1545_cast_fp16")]; tensor var_15674_to_fp16 = const()[name = tensor("op_15674_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1547_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1547_cast_fp16, y = var_15674_to_fp16)[name = tensor("aw_chunk_1547_cast_fp16")]; tensor var_15676_to_fp16 = const()[name = tensor("op_15676_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1549_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1549_cast_fp16, y = var_15676_to_fp16)[name = tensor("aw_chunk_1549_cast_fp16")]; tensor var_15678_to_fp16 = const()[name = tensor("op_15678_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1551_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1551_cast_fp16, y = var_15678_to_fp16)[name = tensor("aw_chunk_1551_cast_fp16")]; tensor var_15680_to_fp16 = const()[name = tensor("op_15680_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1553_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1553_cast_fp16, y = var_15680_to_fp16)[name = tensor("aw_chunk_1553_cast_fp16")]; tensor var_15682_to_fp16 = const()[name = tensor("op_15682_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1555_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1555_cast_fp16, y = var_15682_to_fp16)[name = tensor("aw_chunk_1555_cast_fp16")]; tensor var_15684_to_fp16 = const()[name = tensor("op_15684_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1557_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1557_cast_fp16, y = var_15684_to_fp16)[name = tensor("aw_chunk_1557_cast_fp16")]; tensor var_15686_to_fp16 = const()[name = tensor("op_15686_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1559_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1559_cast_fp16, y = var_15686_to_fp16)[name = tensor("aw_chunk_1559_cast_fp16")]; tensor var_15688_to_fp16 = const()[name = tensor("op_15688_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1561_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1561_cast_fp16, y = var_15688_to_fp16)[name = tensor("aw_chunk_1561_cast_fp16")]; tensor var_15690_to_fp16 = const()[name = tensor("op_15690_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1563_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1563_cast_fp16, y = var_15690_to_fp16)[name = tensor("aw_chunk_1563_cast_fp16")]; tensor var_15692_to_fp16 = const()[name = tensor("op_15692_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1565_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1565_cast_fp16, y = var_15692_to_fp16)[name = tensor("aw_chunk_1565_cast_fp16")]; tensor var_15694_to_fp16 = const()[name = tensor("op_15694_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1567_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1567_cast_fp16, y = var_15694_to_fp16)[name = tensor("aw_chunk_1567_cast_fp16")]; tensor var_15696_to_fp16 = const()[name = tensor("op_15696_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1569_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1569_cast_fp16, y = var_15696_to_fp16)[name = tensor("aw_chunk_1569_cast_fp16")]; tensor var_15698_to_fp16 = const()[name = tensor("op_15698_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1571_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1571_cast_fp16, y = var_15698_to_fp16)[name = tensor("aw_chunk_1571_cast_fp16")]; tensor var_15700_to_fp16 = const()[name = tensor("op_15700_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1573_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1573_cast_fp16, y = var_15700_to_fp16)[name = tensor("aw_chunk_1573_cast_fp16")]; tensor var_15702_to_fp16 = const()[name = tensor("op_15702_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1575_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1575_cast_fp16, y = var_15702_to_fp16)[name = tensor("aw_chunk_1575_cast_fp16")]; tensor var_15704_to_fp16 = const()[name = tensor("op_15704_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1577_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1577_cast_fp16, y = var_15704_to_fp16)[name = tensor("aw_chunk_1577_cast_fp16")]; tensor var_15706_to_fp16 = const()[name = tensor("op_15706_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1579_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1579_cast_fp16, y = var_15706_to_fp16)[name = tensor("aw_chunk_1579_cast_fp16")]; tensor var_15708_to_fp16 = const()[name = tensor("op_15708_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1581_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1581_cast_fp16, y = var_15708_to_fp16)[name = tensor("aw_chunk_1581_cast_fp16")]; tensor var_15710_to_fp16 = const()[name = tensor("op_15710_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1583_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1583_cast_fp16, y = var_15710_to_fp16)[name = tensor("aw_chunk_1583_cast_fp16")]; tensor var_15712_to_fp16 = const()[name = tensor("op_15712_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1585_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1585_cast_fp16, y = var_15712_to_fp16)[name = tensor("aw_chunk_1585_cast_fp16")]; tensor var_15714_to_fp16 = const()[name = tensor("op_15714_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1587_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1587_cast_fp16, y = var_15714_to_fp16)[name = tensor("aw_chunk_1587_cast_fp16")]; tensor var_15716_to_fp16 = const()[name = tensor("op_15716_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1589_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1589_cast_fp16, y = var_15716_to_fp16)[name = tensor("aw_chunk_1589_cast_fp16")]; tensor var_15718_to_fp16 = const()[name = tensor("op_15718_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1591_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1591_cast_fp16, y = var_15718_to_fp16)[name = tensor("aw_chunk_1591_cast_fp16")]; tensor var_15720_to_fp16 = const()[name = tensor("op_15720_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1593_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1593_cast_fp16, y = var_15720_to_fp16)[name = tensor("aw_chunk_1593_cast_fp16")]; tensor var_15722_to_fp16 = const()[name = tensor("op_15722_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1595_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1595_cast_fp16, y = var_15722_to_fp16)[name = tensor("aw_chunk_1595_cast_fp16")]; tensor var_15724_to_fp16 = const()[name = tensor("op_15724_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1597_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1597_cast_fp16, y = var_15724_to_fp16)[name = tensor("aw_chunk_1597_cast_fp16")]; tensor var_15726_to_fp16 = const()[name = tensor("op_15726_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1599_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1599_cast_fp16, y = var_15726_to_fp16)[name = tensor("aw_chunk_1599_cast_fp16")]; tensor var_15728_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1441_cast_fp16)[name = tensor("op_15728_cast_fp16")]; tensor var_15729_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1443_cast_fp16)[name = tensor("op_15729_cast_fp16")]; tensor var_15730_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1445_cast_fp16)[name = tensor("op_15730_cast_fp16")]; tensor var_15731_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1447_cast_fp16)[name = tensor("op_15731_cast_fp16")]; tensor var_15732_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1449_cast_fp16)[name = tensor("op_15732_cast_fp16")]; tensor var_15733_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1451_cast_fp16)[name = tensor("op_15733_cast_fp16")]; tensor var_15734_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1453_cast_fp16)[name = tensor("op_15734_cast_fp16")]; tensor var_15735_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1455_cast_fp16)[name = tensor("op_15735_cast_fp16")]; tensor var_15736_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1457_cast_fp16)[name = tensor("op_15736_cast_fp16")]; tensor var_15737_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1459_cast_fp16)[name = tensor("op_15737_cast_fp16")]; tensor var_15738_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1461_cast_fp16)[name = tensor("op_15738_cast_fp16")]; tensor var_15739_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1463_cast_fp16)[name = tensor("op_15739_cast_fp16")]; tensor var_15740_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1465_cast_fp16)[name = tensor("op_15740_cast_fp16")]; tensor var_15741_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1467_cast_fp16)[name = tensor("op_15741_cast_fp16")]; tensor var_15742_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1469_cast_fp16)[name = tensor("op_15742_cast_fp16")]; tensor var_15743_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1471_cast_fp16)[name = tensor("op_15743_cast_fp16")]; tensor var_15744_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1473_cast_fp16)[name = tensor("op_15744_cast_fp16")]; tensor var_15745_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1475_cast_fp16)[name = tensor("op_15745_cast_fp16")]; tensor var_15746_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1477_cast_fp16)[name = tensor("op_15746_cast_fp16")]; tensor var_15747_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1479_cast_fp16)[name = tensor("op_15747_cast_fp16")]; tensor var_15748_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1481_cast_fp16)[name = tensor("op_15748_cast_fp16")]; tensor var_15749_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1483_cast_fp16)[name = tensor("op_15749_cast_fp16")]; tensor var_15750_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1485_cast_fp16)[name = tensor("op_15750_cast_fp16")]; tensor var_15751_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1487_cast_fp16)[name = tensor("op_15751_cast_fp16")]; tensor var_15752_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1489_cast_fp16)[name = tensor("op_15752_cast_fp16")]; tensor var_15753_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1491_cast_fp16)[name = tensor("op_15753_cast_fp16")]; tensor var_15754_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1493_cast_fp16)[name = tensor("op_15754_cast_fp16")]; tensor var_15755_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1495_cast_fp16)[name = tensor("op_15755_cast_fp16")]; tensor var_15756_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1497_cast_fp16)[name = tensor("op_15756_cast_fp16")]; tensor var_15757_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1499_cast_fp16)[name = tensor("op_15757_cast_fp16")]; tensor var_15758_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1501_cast_fp16)[name = tensor("op_15758_cast_fp16")]; tensor var_15759_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1503_cast_fp16)[name = tensor("op_15759_cast_fp16")]; tensor var_15760_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1505_cast_fp16)[name = tensor("op_15760_cast_fp16")]; tensor var_15761_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1507_cast_fp16)[name = tensor("op_15761_cast_fp16")]; tensor var_15762_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1509_cast_fp16)[name = tensor("op_15762_cast_fp16")]; tensor var_15763_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1511_cast_fp16)[name = tensor("op_15763_cast_fp16")]; tensor var_15764_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1513_cast_fp16)[name = tensor("op_15764_cast_fp16")]; tensor var_15765_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1515_cast_fp16)[name = tensor("op_15765_cast_fp16")]; tensor var_15766_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1517_cast_fp16)[name = tensor("op_15766_cast_fp16")]; tensor var_15767_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1519_cast_fp16)[name = tensor("op_15767_cast_fp16")]; tensor var_15768_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1521_cast_fp16)[name = tensor("op_15768_cast_fp16")]; tensor var_15769_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1523_cast_fp16)[name = tensor("op_15769_cast_fp16")]; tensor var_15770_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1525_cast_fp16)[name = tensor("op_15770_cast_fp16")]; tensor var_15771_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1527_cast_fp16)[name = tensor("op_15771_cast_fp16")]; tensor var_15772_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1529_cast_fp16)[name = tensor("op_15772_cast_fp16")]; tensor var_15773_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1531_cast_fp16)[name = tensor("op_15773_cast_fp16")]; tensor var_15774_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1533_cast_fp16)[name = tensor("op_15774_cast_fp16")]; tensor var_15775_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1535_cast_fp16)[name = tensor("op_15775_cast_fp16")]; tensor var_15776_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1537_cast_fp16)[name = tensor("op_15776_cast_fp16")]; tensor var_15777_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1539_cast_fp16)[name = tensor("op_15777_cast_fp16")]; tensor var_15778_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1541_cast_fp16)[name = tensor("op_15778_cast_fp16")]; tensor var_15779_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1543_cast_fp16)[name = tensor("op_15779_cast_fp16")]; tensor var_15780_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1545_cast_fp16)[name = tensor("op_15780_cast_fp16")]; tensor var_15781_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1547_cast_fp16)[name = tensor("op_15781_cast_fp16")]; tensor var_15782_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1549_cast_fp16)[name = tensor("op_15782_cast_fp16")]; tensor var_15783_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1551_cast_fp16)[name = tensor("op_15783_cast_fp16")]; tensor var_15784_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1553_cast_fp16)[name = tensor("op_15784_cast_fp16")]; tensor var_15785_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1555_cast_fp16)[name = tensor("op_15785_cast_fp16")]; tensor var_15786_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1557_cast_fp16)[name = tensor("op_15786_cast_fp16")]; tensor var_15787_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1559_cast_fp16)[name = tensor("op_15787_cast_fp16")]; tensor var_15788_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1561_cast_fp16)[name = tensor("op_15788_cast_fp16")]; tensor var_15789_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1563_cast_fp16)[name = tensor("op_15789_cast_fp16")]; tensor var_15790_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1565_cast_fp16)[name = tensor("op_15790_cast_fp16")]; tensor var_15791_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1567_cast_fp16)[name = tensor("op_15791_cast_fp16")]; tensor var_15792_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1569_cast_fp16)[name = tensor("op_15792_cast_fp16")]; tensor var_15793_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1571_cast_fp16)[name = tensor("op_15793_cast_fp16")]; tensor var_15794_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1573_cast_fp16)[name = tensor("op_15794_cast_fp16")]; tensor var_15795_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1575_cast_fp16)[name = tensor("op_15795_cast_fp16")]; tensor var_15796_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1577_cast_fp16)[name = tensor("op_15796_cast_fp16")]; tensor var_15797_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1579_cast_fp16)[name = tensor("op_15797_cast_fp16")]; tensor var_15798_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1581_cast_fp16)[name = tensor("op_15798_cast_fp16")]; tensor var_15799_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1583_cast_fp16)[name = tensor("op_15799_cast_fp16")]; tensor var_15800_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1585_cast_fp16)[name = tensor("op_15800_cast_fp16")]; tensor var_15801_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1587_cast_fp16)[name = tensor("op_15801_cast_fp16")]; tensor var_15802_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1589_cast_fp16)[name = tensor("op_15802_cast_fp16")]; tensor var_15803_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1591_cast_fp16)[name = tensor("op_15803_cast_fp16")]; tensor var_15804_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1593_cast_fp16)[name = tensor("op_15804_cast_fp16")]; tensor var_15805_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1595_cast_fp16)[name = tensor("op_15805_cast_fp16")]; tensor var_15806_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1597_cast_fp16)[name = tensor("op_15806_cast_fp16")]; tensor var_15807_cast_fp16 = softmax(axis = var_14526, x = aw_chunk_1599_cast_fp16)[name = tensor("op_15807_cast_fp16")]; tensor var_15809_equation_0 = const()[name = tensor("op_15809_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15809_cast_fp16 = einsum(equation = var_15809_equation_0, values = (var_15329_cast_fp16, var_15728_cast_fp16))[name = tensor("op_15809_cast_fp16")]; tensor var_15811_equation_0 = const()[name = tensor("op_15811_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15811_cast_fp16 = einsum(equation = var_15811_equation_0, values = (var_15329_cast_fp16, var_15729_cast_fp16))[name = tensor("op_15811_cast_fp16")]; tensor var_15813_equation_0 = const()[name = tensor("op_15813_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15813_cast_fp16 = einsum(equation = var_15813_equation_0, values = (var_15329_cast_fp16, var_15730_cast_fp16))[name = tensor("op_15813_cast_fp16")]; tensor var_15815_equation_0 = const()[name = tensor("op_15815_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15815_cast_fp16 = einsum(equation = var_15815_equation_0, values = (var_15329_cast_fp16, var_15731_cast_fp16))[name = tensor("op_15815_cast_fp16")]; tensor var_15817_equation_0 = const()[name = tensor("op_15817_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15817_cast_fp16 = einsum(equation = var_15817_equation_0, values = (var_15333_cast_fp16, var_15732_cast_fp16))[name = tensor("op_15817_cast_fp16")]; tensor var_15819_equation_0 = const()[name = tensor("op_15819_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15819_cast_fp16 = einsum(equation = var_15819_equation_0, values = (var_15333_cast_fp16, var_15733_cast_fp16))[name = tensor("op_15819_cast_fp16")]; tensor var_15821_equation_0 = const()[name = tensor("op_15821_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15821_cast_fp16 = einsum(equation = var_15821_equation_0, values = (var_15333_cast_fp16, var_15734_cast_fp16))[name = tensor("op_15821_cast_fp16")]; tensor var_15823_equation_0 = const()[name = tensor("op_15823_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15823_cast_fp16 = einsum(equation = var_15823_equation_0, values = (var_15333_cast_fp16, var_15735_cast_fp16))[name = tensor("op_15823_cast_fp16")]; tensor var_15825_equation_0 = const()[name = tensor("op_15825_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15825_cast_fp16 = einsum(equation = var_15825_equation_0, values = (var_15337_cast_fp16, var_15736_cast_fp16))[name = tensor("op_15825_cast_fp16")]; tensor var_15827_equation_0 = const()[name = tensor("op_15827_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15827_cast_fp16 = einsum(equation = var_15827_equation_0, values = (var_15337_cast_fp16, var_15737_cast_fp16))[name = tensor("op_15827_cast_fp16")]; tensor var_15829_equation_0 = const()[name = tensor("op_15829_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15829_cast_fp16 = einsum(equation = var_15829_equation_0, values = (var_15337_cast_fp16, var_15738_cast_fp16))[name = tensor("op_15829_cast_fp16")]; tensor var_15831_equation_0 = const()[name = tensor("op_15831_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15831_cast_fp16 = einsum(equation = var_15831_equation_0, values = (var_15337_cast_fp16, var_15739_cast_fp16))[name = tensor("op_15831_cast_fp16")]; tensor var_15833_equation_0 = const()[name = tensor("op_15833_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15833_cast_fp16 = einsum(equation = var_15833_equation_0, values = (var_15341_cast_fp16, var_15740_cast_fp16))[name = tensor("op_15833_cast_fp16")]; tensor var_15835_equation_0 = const()[name = tensor("op_15835_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15835_cast_fp16 = einsum(equation = var_15835_equation_0, values = (var_15341_cast_fp16, var_15741_cast_fp16))[name = tensor("op_15835_cast_fp16")]; tensor var_15837_equation_0 = const()[name = tensor("op_15837_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15837_cast_fp16 = einsum(equation = var_15837_equation_0, values = (var_15341_cast_fp16, var_15742_cast_fp16))[name = tensor("op_15837_cast_fp16")]; tensor var_15839_equation_0 = const()[name = tensor("op_15839_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15839_cast_fp16 = einsum(equation = var_15839_equation_0, values = (var_15341_cast_fp16, var_15743_cast_fp16))[name = tensor("op_15839_cast_fp16")]; tensor var_15841_equation_0 = const()[name = tensor("op_15841_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15841_cast_fp16 = einsum(equation = var_15841_equation_0, values = (var_15345_cast_fp16, var_15744_cast_fp16))[name = tensor("op_15841_cast_fp16")]; tensor var_15843_equation_0 = const()[name = tensor("op_15843_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15843_cast_fp16 = einsum(equation = var_15843_equation_0, values = (var_15345_cast_fp16, var_15745_cast_fp16))[name = tensor("op_15843_cast_fp16")]; tensor var_15845_equation_0 = const()[name = tensor("op_15845_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15845_cast_fp16 = einsum(equation = var_15845_equation_0, values = (var_15345_cast_fp16, var_15746_cast_fp16))[name = tensor("op_15845_cast_fp16")]; tensor var_15847_equation_0 = const()[name = tensor("op_15847_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15847_cast_fp16 = einsum(equation = var_15847_equation_0, values = (var_15345_cast_fp16, var_15747_cast_fp16))[name = tensor("op_15847_cast_fp16")]; tensor var_15849_equation_0 = const()[name = tensor("op_15849_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15849_cast_fp16 = einsum(equation = var_15849_equation_0, values = (var_15349_cast_fp16, var_15748_cast_fp16))[name = tensor("op_15849_cast_fp16")]; tensor var_15851_equation_0 = const()[name = tensor("op_15851_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15851_cast_fp16 = einsum(equation = var_15851_equation_0, values = (var_15349_cast_fp16, var_15749_cast_fp16))[name = tensor("op_15851_cast_fp16")]; tensor var_15853_equation_0 = const()[name = tensor("op_15853_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15853_cast_fp16 = einsum(equation = var_15853_equation_0, values = (var_15349_cast_fp16, var_15750_cast_fp16))[name = tensor("op_15853_cast_fp16")]; tensor var_15855_equation_0 = const()[name = tensor("op_15855_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15855_cast_fp16 = einsum(equation = var_15855_equation_0, values = (var_15349_cast_fp16, var_15751_cast_fp16))[name = tensor("op_15855_cast_fp16")]; tensor var_15857_equation_0 = const()[name = tensor("op_15857_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15857_cast_fp16 = einsum(equation = var_15857_equation_0, values = (var_15353_cast_fp16, var_15752_cast_fp16))[name = tensor("op_15857_cast_fp16")]; tensor var_15859_equation_0 = const()[name = tensor("op_15859_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15859_cast_fp16 = einsum(equation = var_15859_equation_0, values = (var_15353_cast_fp16, var_15753_cast_fp16))[name = tensor("op_15859_cast_fp16")]; tensor var_15861_equation_0 = const()[name = tensor("op_15861_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15861_cast_fp16 = einsum(equation = var_15861_equation_0, values = (var_15353_cast_fp16, var_15754_cast_fp16))[name = tensor("op_15861_cast_fp16")]; tensor var_15863_equation_0 = const()[name = tensor("op_15863_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15863_cast_fp16 = einsum(equation = var_15863_equation_0, values = (var_15353_cast_fp16, var_15755_cast_fp16))[name = tensor("op_15863_cast_fp16")]; tensor var_15865_equation_0 = const()[name = tensor("op_15865_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15865_cast_fp16 = einsum(equation = var_15865_equation_0, values = (var_15357_cast_fp16, var_15756_cast_fp16))[name = tensor("op_15865_cast_fp16")]; tensor var_15867_equation_0 = const()[name = tensor("op_15867_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15867_cast_fp16 = einsum(equation = var_15867_equation_0, values = (var_15357_cast_fp16, var_15757_cast_fp16))[name = tensor("op_15867_cast_fp16")]; tensor var_15869_equation_0 = const()[name = tensor("op_15869_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15869_cast_fp16 = einsum(equation = var_15869_equation_0, values = (var_15357_cast_fp16, var_15758_cast_fp16))[name = tensor("op_15869_cast_fp16")]; tensor var_15871_equation_0 = const()[name = tensor("op_15871_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15871_cast_fp16 = einsum(equation = var_15871_equation_0, values = (var_15357_cast_fp16, var_15759_cast_fp16))[name = tensor("op_15871_cast_fp16")]; tensor var_15873_equation_0 = const()[name = tensor("op_15873_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15873_cast_fp16 = einsum(equation = var_15873_equation_0, values = (var_15361_cast_fp16, var_15760_cast_fp16))[name = tensor("op_15873_cast_fp16")]; tensor var_15875_equation_0 = const()[name = tensor("op_15875_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15875_cast_fp16 = einsum(equation = var_15875_equation_0, values = (var_15361_cast_fp16, var_15761_cast_fp16))[name = tensor("op_15875_cast_fp16")]; tensor var_15877_equation_0 = const()[name = tensor("op_15877_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15877_cast_fp16 = einsum(equation = var_15877_equation_0, values = (var_15361_cast_fp16, var_15762_cast_fp16))[name = tensor("op_15877_cast_fp16")]; tensor var_15879_equation_0 = const()[name = tensor("op_15879_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15879_cast_fp16 = einsum(equation = var_15879_equation_0, values = (var_15361_cast_fp16, var_15763_cast_fp16))[name = tensor("op_15879_cast_fp16")]; tensor var_15881_equation_0 = const()[name = tensor("op_15881_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15881_cast_fp16 = einsum(equation = var_15881_equation_0, values = (var_15365_cast_fp16, var_15764_cast_fp16))[name = tensor("op_15881_cast_fp16")]; tensor var_15883_equation_0 = const()[name = tensor("op_15883_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15883_cast_fp16 = einsum(equation = var_15883_equation_0, values = (var_15365_cast_fp16, var_15765_cast_fp16))[name = tensor("op_15883_cast_fp16")]; tensor var_15885_equation_0 = const()[name = tensor("op_15885_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15885_cast_fp16 = einsum(equation = var_15885_equation_0, values = (var_15365_cast_fp16, var_15766_cast_fp16))[name = tensor("op_15885_cast_fp16")]; tensor var_15887_equation_0 = const()[name = tensor("op_15887_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15887_cast_fp16 = einsum(equation = var_15887_equation_0, values = (var_15365_cast_fp16, var_15767_cast_fp16))[name = tensor("op_15887_cast_fp16")]; tensor var_15889_equation_0 = const()[name = tensor("op_15889_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15889_cast_fp16 = einsum(equation = var_15889_equation_0, values = (var_15369_cast_fp16, var_15768_cast_fp16))[name = tensor("op_15889_cast_fp16")]; tensor var_15891_equation_0 = const()[name = tensor("op_15891_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15891_cast_fp16 = einsum(equation = var_15891_equation_0, values = (var_15369_cast_fp16, var_15769_cast_fp16))[name = tensor("op_15891_cast_fp16")]; tensor var_15893_equation_0 = const()[name = tensor("op_15893_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15893_cast_fp16 = einsum(equation = var_15893_equation_0, values = (var_15369_cast_fp16, var_15770_cast_fp16))[name = tensor("op_15893_cast_fp16")]; tensor var_15895_equation_0 = const()[name = tensor("op_15895_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15895_cast_fp16 = einsum(equation = var_15895_equation_0, values = (var_15369_cast_fp16, var_15771_cast_fp16))[name = tensor("op_15895_cast_fp16")]; tensor var_15897_equation_0 = const()[name = tensor("op_15897_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15897_cast_fp16 = einsum(equation = var_15897_equation_0, values = (var_15373_cast_fp16, var_15772_cast_fp16))[name = tensor("op_15897_cast_fp16")]; tensor var_15899_equation_0 = const()[name = tensor("op_15899_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15899_cast_fp16 = einsum(equation = var_15899_equation_0, values = (var_15373_cast_fp16, var_15773_cast_fp16))[name = tensor("op_15899_cast_fp16")]; tensor var_15901_equation_0 = const()[name = tensor("op_15901_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15901_cast_fp16 = einsum(equation = var_15901_equation_0, values = (var_15373_cast_fp16, var_15774_cast_fp16))[name = tensor("op_15901_cast_fp16")]; tensor var_15903_equation_0 = const()[name = tensor("op_15903_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15903_cast_fp16 = einsum(equation = var_15903_equation_0, values = (var_15373_cast_fp16, var_15775_cast_fp16))[name = tensor("op_15903_cast_fp16")]; tensor var_15905_equation_0 = const()[name = tensor("op_15905_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15905_cast_fp16 = einsum(equation = var_15905_equation_0, values = (var_15377_cast_fp16, var_15776_cast_fp16))[name = tensor("op_15905_cast_fp16")]; tensor var_15907_equation_0 = const()[name = tensor("op_15907_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15907_cast_fp16 = einsum(equation = var_15907_equation_0, values = (var_15377_cast_fp16, var_15777_cast_fp16))[name = tensor("op_15907_cast_fp16")]; tensor var_15909_equation_0 = const()[name = tensor("op_15909_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15909_cast_fp16 = einsum(equation = var_15909_equation_0, values = (var_15377_cast_fp16, var_15778_cast_fp16))[name = tensor("op_15909_cast_fp16")]; tensor var_15911_equation_0 = const()[name = tensor("op_15911_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15911_cast_fp16 = einsum(equation = var_15911_equation_0, values = (var_15377_cast_fp16, var_15779_cast_fp16))[name = tensor("op_15911_cast_fp16")]; tensor var_15913_equation_0 = const()[name = tensor("op_15913_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15913_cast_fp16 = einsum(equation = var_15913_equation_0, values = (var_15381_cast_fp16, var_15780_cast_fp16))[name = tensor("op_15913_cast_fp16")]; tensor var_15915_equation_0 = const()[name = tensor("op_15915_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15915_cast_fp16 = einsum(equation = var_15915_equation_0, values = (var_15381_cast_fp16, var_15781_cast_fp16))[name = tensor("op_15915_cast_fp16")]; tensor var_15917_equation_0 = const()[name = tensor("op_15917_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15917_cast_fp16 = einsum(equation = var_15917_equation_0, values = (var_15381_cast_fp16, var_15782_cast_fp16))[name = tensor("op_15917_cast_fp16")]; tensor var_15919_equation_0 = const()[name = tensor("op_15919_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15919_cast_fp16 = einsum(equation = var_15919_equation_0, values = (var_15381_cast_fp16, var_15783_cast_fp16))[name = tensor("op_15919_cast_fp16")]; tensor var_15921_equation_0 = const()[name = tensor("op_15921_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15921_cast_fp16 = einsum(equation = var_15921_equation_0, values = (var_15385_cast_fp16, var_15784_cast_fp16))[name = tensor("op_15921_cast_fp16")]; tensor var_15923_equation_0 = const()[name = tensor("op_15923_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15923_cast_fp16 = einsum(equation = var_15923_equation_0, values = (var_15385_cast_fp16, var_15785_cast_fp16))[name = tensor("op_15923_cast_fp16")]; tensor var_15925_equation_0 = const()[name = tensor("op_15925_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15925_cast_fp16 = einsum(equation = var_15925_equation_0, values = (var_15385_cast_fp16, var_15786_cast_fp16))[name = tensor("op_15925_cast_fp16")]; tensor var_15927_equation_0 = const()[name = tensor("op_15927_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15927_cast_fp16 = einsum(equation = var_15927_equation_0, values = (var_15385_cast_fp16, var_15787_cast_fp16))[name = tensor("op_15927_cast_fp16")]; tensor var_15929_equation_0 = const()[name = tensor("op_15929_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15929_cast_fp16 = einsum(equation = var_15929_equation_0, values = (var_15389_cast_fp16, var_15788_cast_fp16))[name = tensor("op_15929_cast_fp16")]; tensor var_15931_equation_0 = const()[name = tensor("op_15931_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15931_cast_fp16 = einsum(equation = var_15931_equation_0, values = (var_15389_cast_fp16, var_15789_cast_fp16))[name = tensor("op_15931_cast_fp16")]; tensor var_15933_equation_0 = const()[name = tensor("op_15933_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15933_cast_fp16 = einsum(equation = var_15933_equation_0, values = (var_15389_cast_fp16, var_15790_cast_fp16))[name = tensor("op_15933_cast_fp16")]; tensor var_15935_equation_0 = const()[name = tensor("op_15935_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15935_cast_fp16 = einsum(equation = var_15935_equation_0, values = (var_15389_cast_fp16, var_15791_cast_fp16))[name = tensor("op_15935_cast_fp16")]; tensor var_15937_equation_0 = const()[name = tensor("op_15937_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15937_cast_fp16 = einsum(equation = var_15937_equation_0, values = (var_15393_cast_fp16, var_15792_cast_fp16))[name = tensor("op_15937_cast_fp16")]; tensor var_15939_equation_0 = const()[name = tensor("op_15939_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15939_cast_fp16 = einsum(equation = var_15939_equation_0, values = (var_15393_cast_fp16, var_15793_cast_fp16))[name = tensor("op_15939_cast_fp16")]; tensor var_15941_equation_0 = const()[name = tensor("op_15941_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15941_cast_fp16 = einsum(equation = var_15941_equation_0, values = (var_15393_cast_fp16, var_15794_cast_fp16))[name = tensor("op_15941_cast_fp16")]; tensor var_15943_equation_0 = const()[name = tensor("op_15943_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15943_cast_fp16 = einsum(equation = var_15943_equation_0, values = (var_15393_cast_fp16, var_15795_cast_fp16))[name = tensor("op_15943_cast_fp16")]; tensor var_15945_equation_0 = const()[name = tensor("op_15945_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15945_cast_fp16 = einsum(equation = var_15945_equation_0, values = (var_15397_cast_fp16, var_15796_cast_fp16))[name = tensor("op_15945_cast_fp16")]; tensor var_15947_equation_0 = const()[name = tensor("op_15947_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15947_cast_fp16 = einsum(equation = var_15947_equation_0, values = (var_15397_cast_fp16, var_15797_cast_fp16))[name = tensor("op_15947_cast_fp16")]; tensor var_15949_equation_0 = const()[name = tensor("op_15949_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15949_cast_fp16 = einsum(equation = var_15949_equation_0, values = (var_15397_cast_fp16, var_15798_cast_fp16))[name = tensor("op_15949_cast_fp16")]; tensor var_15951_equation_0 = const()[name = tensor("op_15951_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15951_cast_fp16 = einsum(equation = var_15951_equation_0, values = (var_15397_cast_fp16, var_15799_cast_fp16))[name = tensor("op_15951_cast_fp16")]; tensor var_15953_equation_0 = const()[name = tensor("op_15953_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15953_cast_fp16 = einsum(equation = var_15953_equation_0, values = (var_15401_cast_fp16, var_15800_cast_fp16))[name = tensor("op_15953_cast_fp16")]; tensor var_15955_equation_0 = const()[name = tensor("op_15955_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15955_cast_fp16 = einsum(equation = var_15955_equation_0, values = (var_15401_cast_fp16, var_15801_cast_fp16))[name = tensor("op_15955_cast_fp16")]; tensor var_15957_equation_0 = const()[name = tensor("op_15957_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15957_cast_fp16 = einsum(equation = var_15957_equation_0, values = (var_15401_cast_fp16, var_15802_cast_fp16))[name = tensor("op_15957_cast_fp16")]; tensor var_15959_equation_0 = const()[name = tensor("op_15959_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15959_cast_fp16 = einsum(equation = var_15959_equation_0, values = (var_15401_cast_fp16, var_15803_cast_fp16))[name = tensor("op_15959_cast_fp16")]; tensor var_15961_equation_0 = const()[name = tensor("op_15961_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15961_cast_fp16 = einsum(equation = var_15961_equation_0, values = (var_15405_cast_fp16, var_15804_cast_fp16))[name = tensor("op_15961_cast_fp16")]; tensor var_15963_equation_0 = const()[name = tensor("op_15963_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15963_cast_fp16 = einsum(equation = var_15963_equation_0, values = (var_15405_cast_fp16, var_15805_cast_fp16))[name = tensor("op_15963_cast_fp16")]; tensor var_15965_equation_0 = const()[name = tensor("op_15965_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15965_cast_fp16 = einsum(equation = var_15965_equation_0, values = (var_15405_cast_fp16, var_15806_cast_fp16))[name = tensor("op_15965_cast_fp16")]; tensor var_15967_equation_0 = const()[name = tensor("op_15967_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_15967_cast_fp16 = einsum(equation = var_15967_equation_0, values = (var_15405_cast_fp16, var_15807_cast_fp16))[name = tensor("op_15967_cast_fp16")]; tensor var_15969_interleave_0 = const()[name = tensor("op_15969_interleave_0"), val = tensor(false)]; tensor var_15969_cast_fp16 = concat(axis = var_14501, interleave = var_15969_interleave_0, values = (var_15809_cast_fp16, var_15811_cast_fp16, var_15813_cast_fp16, var_15815_cast_fp16))[name = tensor("op_15969_cast_fp16")]; tensor var_15971_interleave_0 = const()[name = tensor("op_15971_interleave_0"), val = tensor(false)]; tensor var_15971_cast_fp16 = concat(axis = var_14501, interleave = var_15971_interleave_0, values = (var_15817_cast_fp16, var_15819_cast_fp16, var_15821_cast_fp16, var_15823_cast_fp16))[name = tensor("op_15971_cast_fp16")]; tensor var_15973_interleave_0 = const()[name = tensor("op_15973_interleave_0"), val = tensor(false)]; tensor var_15973_cast_fp16 = concat(axis = var_14501, interleave = var_15973_interleave_0, values = (var_15825_cast_fp16, var_15827_cast_fp16, var_15829_cast_fp16, var_15831_cast_fp16))[name = tensor("op_15973_cast_fp16")]; tensor var_15975_interleave_0 = const()[name = tensor("op_15975_interleave_0"), val = tensor(false)]; tensor var_15975_cast_fp16 = concat(axis = var_14501, interleave = var_15975_interleave_0, values = (var_15833_cast_fp16, var_15835_cast_fp16, var_15837_cast_fp16, var_15839_cast_fp16))[name = tensor("op_15975_cast_fp16")]; tensor var_15977_interleave_0 = const()[name = tensor("op_15977_interleave_0"), val = tensor(false)]; tensor var_15977_cast_fp16 = concat(axis = var_14501, interleave = var_15977_interleave_0, values = (var_15841_cast_fp16, var_15843_cast_fp16, var_15845_cast_fp16, var_15847_cast_fp16))[name = tensor("op_15977_cast_fp16")]; tensor var_15979_interleave_0 = const()[name = tensor("op_15979_interleave_0"), val = tensor(false)]; tensor var_15979_cast_fp16 = concat(axis = var_14501, interleave = var_15979_interleave_0, values = (var_15849_cast_fp16, var_15851_cast_fp16, var_15853_cast_fp16, var_15855_cast_fp16))[name = tensor("op_15979_cast_fp16")]; tensor var_15981_interleave_0 = const()[name = tensor("op_15981_interleave_0"), val = tensor(false)]; tensor var_15981_cast_fp16 = concat(axis = var_14501, interleave = var_15981_interleave_0, values = (var_15857_cast_fp16, var_15859_cast_fp16, var_15861_cast_fp16, var_15863_cast_fp16))[name = tensor("op_15981_cast_fp16")]; tensor var_15983_interleave_0 = const()[name = tensor("op_15983_interleave_0"), val = tensor(false)]; tensor var_15983_cast_fp16 = concat(axis = var_14501, interleave = var_15983_interleave_0, values = (var_15865_cast_fp16, var_15867_cast_fp16, var_15869_cast_fp16, var_15871_cast_fp16))[name = tensor("op_15983_cast_fp16")]; tensor var_15985_interleave_0 = const()[name = tensor("op_15985_interleave_0"), val = tensor(false)]; tensor var_15985_cast_fp16 = concat(axis = var_14501, interleave = var_15985_interleave_0, values = (var_15873_cast_fp16, var_15875_cast_fp16, var_15877_cast_fp16, var_15879_cast_fp16))[name = tensor("op_15985_cast_fp16")]; tensor var_15987_interleave_0 = const()[name = tensor("op_15987_interleave_0"), val = tensor(false)]; tensor var_15987_cast_fp16 = concat(axis = var_14501, interleave = var_15987_interleave_0, values = (var_15881_cast_fp16, var_15883_cast_fp16, var_15885_cast_fp16, var_15887_cast_fp16))[name = tensor("op_15987_cast_fp16")]; tensor var_15989_interleave_0 = const()[name = tensor("op_15989_interleave_0"), val = tensor(false)]; tensor var_15989_cast_fp16 = concat(axis = var_14501, interleave = var_15989_interleave_0, values = (var_15889_cast_fp16, var_15891_cast_fp16, var_15893_cast_fp16, var_15895_cast_fp16))[name = tensor("op_15989_cast_fp16")]; tensor var_15991_interleave_0 = const()[name = tensor("op_15991_interleave_0"), val = tensor(false)]; tensor var_15991_cast_fp16 = concat(axis = var_14501, interleave = var_15991_interleave_0, values = (var_15897_cast_fp16, var_15899_cast_fp16, var_15901_cast_fp16, var_15903_cast_fp16))[name = tensor("op_15991_cast_fp16")]; tensor var_15993_interleave_0 = const()[name = tensor("op_15993_interleave_0"), val = tensor(false)]; tensor var_15993_cast_fp16 = concat(axis = var_14501, interleave = var_15993_interleave_0, values = (var_15905_cast_fp16, var_15907_cast_fp16, var_15909_cast_fp16, var_15911_cast_fp16))[name = tensor("op_15993_cast_fp16")]; tensor var_15995_interleave_0 = const()[name = tensor("op_15995_interleave_0"), val = tensor(false)]; tensor var_15995_cast_fp16 = concat(axis = var_14501, interleave = var_15995_interleave_0, values = (var_15913_cast_fp16, var_15915_cast_fp16, var_15917_cast_fp16, var_15919_cast_fp16))[name = tensor("op_15995_cast_fp16")]; tensor var_15997_interleave_0 = const()[name = tensor("op_15997_interleave_0"), val = tensor(false)]; tensor var_15997_cast_fp16 = concat(axis = var_14501, interleave = var_15997_interleave_0, values = (var_15921_cast_fp16, var_15923_cast_fp16, var_15925_cast_fp16, var_15927_cast_fp16))[name = tensor("op_15997_cast_fp16")]; tensor var_15999_interleave_0 = const()[name = tensor("op_15999_interleave_0"), val = tensor(false)]; tensor var_15999_cast_fp16 = concat(axis = var_14501, interleave = var_15999_interleave_0, values = (var_15929_cast_fp16, var_15931_cast_fp16, var_15933_cast_fp16, var_15935_cast_fp16))[name = tensor("op_15999_cast_fp16")]; tensor var_16001_interleave_0 = const()[name = tensor("op_16001_interleave_0"), val = tensor(false)]; tensor var_16001_cast_fp16 = concat(axis = var_14501, interleave = var_16001_interleave_0, values = (var_15937_cast_fp16, var_15939_cast_fp16, var_15941_cast_fp16, var_15943_cast_fp16))[name = tensor("op_16001_cast_fp16")]; tensor var_16003_interleave_0 = const()[name = tensor("op_16003_interleave_0"), val = tensor(false)]; tensor var_16003_cast_fp16 = concat(axis = var_14501, interleave = var_16003_interleave_0, values = (var_15945_cast_fp16, var_15947_cast_fp16, var_15949_cast_fp16, var_15951_cast_fp16))[name = tensor("op_16003_cast_fp16")]; tensor var_16005_interleave_0 = const()[name = tensor("op_16005_interleave_0"), val = tensor(false)]; tensor var_16005_cast_fp16 = concat(axis = var_14501, interleave = var_16005_interleave_0, values = (var_15953_cast_fp16, var_15955_cast_fp16, var_15957_cast_fp16, var_15959_cast_fp16))[name = tensor("op_16005_cast_fp16")]; tensor var_16007_interleave_0 = const()[name = tensor("op_16007_interleave_0"), val = tensor(false)]; tensor var_16007_cast_fp16 = concat(axis = var_14501, interleave = var_16007_interleave_0, values = (var_15961_cast_fp16, var_15963_cast_fp16, var_15965_cast_fp16, var_15967_cast_fp16))[name = tensor("op_16007_cast_fp16")]; tensor input_73_interleave_0 = const()[name = tensor("input_73_interleave_0"), val = tensor(false)]; tensor input_73_cast_fp16 = concat(axis = var_14526, interleave = input_73_interleave_0, values = (var_15969_cast_fp16, var_15971_cast_fp16, var_15973_cast_fp16, var_15975_cast_fp16, var_15977_cast_fp16, var_15979_cast_fp16, var_15981_cast_fp16, var_15983_cast_fp16, var_15985_cast_fp16, var_15987_cast_fp16, var_15989_cast_fp16, var_15991_cast_fp16, var_15993_cast_fp16, var_15995_cast_fp16, var_15997_cast_fp16, var_15999_cast_fp16, var_16001_cast_fp16, var_16003_cast_fp16, var_16005_cast_fp16, var_16007_cast_fp16))[name = tensor("input_73_cast_fp16")]; tensor var_16018_pad_type_0 = const()[name = tensor("op_16018_pad_type_0"), val = tensor("valid")]; tensor var_16018_strides_0 = const()[name = tensor("op_16018_strides_0"), val = tensor([1, 1])]; tensor var_16018_pad_0 = const()[name = tensor("op_16018_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_16018_dilations_0 = const()[name = tensor("op_16018_dilations_0"), val = tensor([1, 1])]; tensor var_16018_groups_0 = const()[name = tensor("op_16018_groups_0"), val = tensor(1)]; tensor layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135705024))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136524288))), name = tensor("layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_9_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136524416)))]; tensor var_16018_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_16018_dilations_0, groups = var_16018_groups_0, pad = var_16018_pad_0, pad_type = var_16018_pad_type_0, strides = var_16018_strides_0, weight = layers_9_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_73_cast_fp16)[name = tensor("op_16018_cast_fp16")]; tensor var_16024_pad_type_0 = const()[name = tensor("op_16024_pad_type_0"), val = tensor("valid")]; tensor var_16024_strides_0 = const()[name = tensor("op_16024_strides_0"), val = tensor([1, 1])]; tensor var_16024_pad_0 = const()[name = tensor("op_16024_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_16024_dilations_0 = const()[name = tensor("op_16024_dilations_0"), val = tensor([1, 1])]; tensor var_16024_groups_0 = const()[name = tensor("op_16024_groups_0"), val = tensor(1)]; tensor layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136545344))), name = tensor("layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136527040))), shape = tensor([1280, 1280, 1, 1])]; tensor var_16024_cast_fp16 = conv(dilations = var_16024_dilations_0, groups = var_16024_groups_0, pad = var_16024_pad_0, pad_type = var_16024_pad_type_0, strides = var_16024_strides_0, weight = layers_9_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_73_cast_fp16)[name = tensor("op_16024_cast_fp16")]; tensor obj_39_cast_fp16 = add(x = var_16018_cast_fp16, y = var_16024_cast_fp16)[name = tensor("obj_39_cast_fp16")]; tensor inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_39_cast_fp16)[name = tensor("inputs_39_cast_fp16")]; tensor out_39_axes_0 = const()[name = tensor("out_39_axes_0"), val = tensor([1])]; tensor var_16035_to_fp16 = const()[name = tensor("op_16035_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_39_cast_fp16 = layer_norm(axes = out_39_axes_0, epsilon = var_16035_to_fp16, x = inputs_39_cast_fp16)[name = tensor("out_39_cast_fp16")]; tensor input_75_gamma_0_to_fp16 = const()[name = tensor("input_75_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136750208)))]; tensor input_75_beta_0_to_fp16 = const()[name = tensor("input_75_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136752832)))]; tensor input_75_epsilon_0_to_fp16 = const()[name = tensor("input_75_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = tensor("input_75_cast_fp16")]; tensor var_16053_pad_type_0 = const()[name = tensor("op_16053_pad_type_0"), val = tensor("valid")]; tensor var_16053_strides_0 = const()[name = tensor("op_16053_strides_0"), val = tensor([1, 1])]; tensor var_16053_pad_0 = const()[name = tensor("op_16053_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_16053_dilations_0 = const()[name = tensor("op_16053_dilations_0"), val = tensor([1, 1])]; tensor var_16053_groups_0 = const()[name = tensor("op_16053_groups_0"), val = tensor(1)]; tensor layers_9_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136755456))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140032320))), name = tensor("layers_9_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_9_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140032448)))]; tensor var_16053_cast_fp16 = conv(bias = layers_9_fc1_inlier_module_bias_to_fp16, dilations = var_16053_dilations_0, groups = var_16053_groups_0, pad = var_16053_pad_0, pad_type = var_16053_pad_type_0, strides = var_16053_strides_0, weight = layers_9_fc1_inlier_module_weight_to_fp16_palettized, x = input_75_cast_fp16)[name = tensor("op_16053_cast_fp16")]; tensor var_16059_pad_type_0 = const()[name = tensor("op_16059_pad_type_0"), val = tensor("valid")]; tensor var_16059_strides_0 = const()[name = tensor("op_16059_strides_0"), val = tensor([1, 1])]; tensor var_16059_pad_0 = const()[name = tensor("op_16059_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_16059_dilations_0 = const()[name = tensor("op_16059_dilations_0"), val = tensor([1, 1])]; tensor var_16059_groups_0 = const()[name = tensor("op_16059_groups_0"), val = tensor(1)]; tensor layers_9_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140073152))), name = tensor("layers_9_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140042752))), shape = tensor([5120, 1280, 1, 1])]; tensor var_16059_cast_fp16 = conv(dilations = var_16059_dilations_0, groups = var_16059_groups_0, pad = var_16059_pad_0, pad_type = var_16059_pad_type_0, strides = var_16059_strides_0, weight = layers_9_fc1_outlier_module_weight_to_fp16_sparsified, x = input_75_cast_fp16)[name = tensor("op_16059_cast_fp16")]; tensor input_77_cast_fp16 = add(x = var_16053_cast_fp16, y = var_16059_cast_fp16)[name = tensor("input_77_cast_fp16")]; tensor input_79_mode_0 = const()[name = tensor("input_79_mode_0"), val = tensor("EXACT")]; tensor input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = tensor("input_79_cast_fp16")]; tensor var_16070_pad_type_0 = const()[name = tensor("op_16070_pad_type_0"), val = tensor("valid")]; tensor var_16070_strides_0 = const()[name = tensor("op_16070_strides_0"), val = tensor([1, 1])]; tensor var_16070_pad_0 = const()[name = tensor("op_16070_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_16070_dilations_0 = const()[name = tensor("op_16070_dilations_0"), val = tensor([1, 1])]; tensor var_16070_groups_0 = const()[name = tensor("op_16070_groups_0"), val = tensor(1)]; tensor layers_9_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(140892416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144169280))), name = tensor("layers_9_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_9_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_9_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144169408)))]; tensor var_16070_cast_fp16 = conv(bias = layers_9_fc2_inlier_module_bias_to_fp16, dilations = var_16070_dilations_0, groups = var_16070_groups_0, pad = var_16070_pad_0, pad_type = var_16070_pad_type_0, strides = var_16070_strides_0, weight = layers_9_fc2_inlier_module_weight_to_fp16_palettized, x = input_79_cast_fp16)[name = tensor("op_16070_cast_fp16")]; tensor var_16076_pad_type_0 = const()[name = tensor("op_16076_pad_type_0"), val = tensor("valid")]; tensor var_16076_strides_0 = const()[name = tensor("op_16076_strides_0"), val = tensor([1, 1])]; tensor var_16076_pad_0 = const()[name = tensor("op_16076_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_16076_dilations_0 = const()[name = tensor("op_16076_dilations_0"), val = tensor([1, 1])]; tensor var_16076_groups_0 = const()[name = tensor("op_16076_groups_0"), val = tensor(1)]; tensor layers_9_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144438144))), name = tensor("layers_9_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(144172032))), shape = tensor([1280, 5120, 1, 1])]; tensor var_16076_cast_fp16 = conv(dilations = var_16076_dilations_0, groups = var_16076_groups_0, pad = var_16076_pad_0, pad_type = var_16076_pad_type_0, strides = var_16076_strides_0, weight = layers_9_fc2_outlier_module_weight_to_fp16_sparsified, x = input_79_cast_fp16)[name = tensor("op_16076_cast_fp16")]; tensor hidden_states_23_cast_fp16 = add(x = var_16070_cast_fp16, y = var_16076_cast_fp16)[name = tensor("hidden_states_23_cast_fp16")]; tensor inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = hidden_states_23_cast_fp16)[name = tensor("inputs_41_cast_fp16")]; tensor var_16082 = const()[name = tensor("op_16082"), val = tensor(3)]; tensor var_16107 = const()[name = tensor("op_16107"), val = tensor(1)]; tensor out_41_axes_0 = const()[name = tensor("out_41_axes_0"), val = tensor([1])]; tensor var_16124_to_fp16 = const()[name = tensor("op_16124_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_41_cast_fp16 = layer_norm(axes = out_41_axes_0, epsilon = var_16124_to_fp16, x = inputs_41_cast_fp16)[name = tensor("out_41_cast_fp16")]; tensor obj_41_gamma_0_to_fp16 = const()[name = tensor("obj_41_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145257408)))]; tensor obj_41_beta_0_to_fp16 = const()[name = tensor("obj_41_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145260032)))]; tensor obj_41_epsilon_0_to_fp16 = const()[name = tensor("obj_41_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_41_cast_fp16 = batch_norm(beta = obj_41_beta_0_to_fp16, epsilon = obj_41_epsilon_0_to_fp16, gamma = obj_41_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = tensor("obj_41_cast_fp16")]; tensor var_16146_pad_type_0 = const()[name = tensor("op_16146_pad_type_0"), val = tensor("valid")]; tensor var_16146_strides_0 = const()[name = tensor("op_16146_strides_0"), val = tensor([1, 1])]; tensor var_16146_pad_0 = const()[name = tensor("op_16146_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_16146_dilations_0 = const()[name = tensor("op_16146_dilations_0"), val = tensor([1, 1])]; tensor var_16146_groups_0 = const()[name = tensor("op_16146_groups_0"), val = tensor(1)]; tensor layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145262656))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146081920))), name = tensor("layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_10_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146082048)))]; tensor var_16146_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_16146_dilations_0, groups = var_16146_groups_0, pad = var_16146_pad_0, pad_type = var_16146_pad_type_0, strides = var_16146_strides_0, weight = layers_10_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = tensor("op_16146_cast_fp16")]; tensor var_16152_pad_type_0 = const()[name = tensor("op_16152_pad_type_0"), val = tensor("valid")]; tensor var_16152_strides_0 = const()[name = tensor("op_16152_strides_0"), val = tensor([1, 1])]; tensor var_16152_pad_0 = const()[name = tensor("op_16152_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_16152_dilations_0 = const()[name = tensor("op_16152_dilations_0"), val = tensor([1, 1])]; tensor var_16152_groups_0 = const()[name = tensor("op_16152_groups_0"), val = tensor(1)]; tensor layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146130624))), name = tensor("layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146084672))), shape = tensor([1280, 1280, 1, 1])]; tensor var_16152_cast_fp16 = conv(dilations = var_16152_dilations_0, groups = var_16152_groups_0, pad = var_16152_pad_0, pad_type = var_16152_pad_type_0, strides = var_16152_strides_0, weight = layers_10_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = tensor("op_16152_cast_fp16")]; tensor query_21_cast_fp16 = add(x = var_16146_cast_fp16, y = var_16152_cast_fp16)[name = tensor("query_21_cast_fp16")]; tensor var_16161_pad_type_0 = const()[name = tensor("op_16161_pad_type_0"), val = tensor("valid")]; tensor var_16161_strides_0 = const()[name = tensor("op_16161_strides_0"), val = tensor([1, 1])]; tensor var_16161_pad_0 = const()[name = tensor("op_16161_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_16161_dilations_0 = const()[name = tensor("op_16161_dilations_0"), val = tensor([1, 1])]; tensor var_16161_groups_0 = const()[name = tensor("op_16161_groups_0"), val = tensor(1)]; tensor layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(146335488))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147154752))), name = tensor("layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_16161_cast_fp16 = conv(dilations = var_16161_dilations_0, groups = var_16161_groups_0, pad = var_16161_pad_0, pad_type = var_16161_pad_type_0, strides = var_16161_strides_0, weight = layers_10_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = tensor("op_16161_cast_fp16")]; tensor var_16167_pad_type_0 = const()[name = tensor("op_16167_pad_type_0"), val = tensor("valid")]; tensor var_16167_strides_0 = const()[name = tensor("op_16167_strides_0"), val = tensor([1, 1])]; tensor var_16167_pad_0 = const()[name = tensor("op_16167_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_16167_dilations_0 = const()[name = tensor("op_16167_dilations_0"), val = tensor([1, 1])]; tensor var_16167_groups_0 = const()[name = tensor("op_16167_groups_0"), val = tensor(1)]; tensor layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147187392))), name = tensor("layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147154880))), shape = tensor([1280, 1280, 1, 1])]; tensor var_16167_cast_fp16 = conv(dilations = var_16167_dilations_0, groups = var_16167_groups_0, pad = var_16167_pad_0, pad_type = var_16167_pad_type_0, strides = var_16167_strides_0, weight = layers_10_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = tensor("op_16167_cast_fp16")]; tensor key_21_cast_fp16 = add(x = var_16161_cast_fp16, y = var_16167_cast_fp16)[name = tensor("key_21_cast_fp16")]; tensor var_16177_pad_type_0 = const()[name = tensor("op_16177_pad_type_0"), val = tensor("valid")]; tensor var_16177_strides_0 = const()[name = tensor("op_16177_strides_0"), val = tensor([1, 1])]; tensor var_16177_pad_0 = const()[name = tensor("op_16177_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_16177_dilations_0 = const()[name = tensor("op_16177_dilations_0"), val = tensor([1, 1])]; tensor var_16177_groups_0 = const()[name = tensor("op_16177_groups_0"), val = tensor(1)]; tensor layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147392256))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148211520))), name = tensor("layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_10_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148211648)))]; tensor var_16177_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_16177_dilations_0, groups = var_16177_groups_0, pad = var_16177_pad_0, pad_type = var_16177_pad_type_0, strides = var_16177_strides_0, weight = layers_10_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_41_cast_fp16)[name = tensor("op_16177_cast_fp16")]; tensor var_16183_pad_type_0 = const()[name = tensor("op_16183_pad_type_0"), val = tensor("valid")]; tensor var_16183_strides_0 = const()[name = tensor("op_16183_strides_0"), val = tensor([1, 1])]; tensor var_16183_pad_0 = const()[name = tensor("op_16183_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_16183_dilations_0 = const()[name = tensor("op_16183_dilations_0"), val = tensor([1, 1])]; tensor var_16183_groups_0 = const()[name = tensor("op_16183_groups_0"), val = tensor(1)]; tensor layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148234688))), name = tensor("layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148214272))), shape = tensor([1280, 1280, 1, 1])]; tensor var_16183_cast_fp16 = conv(dilations = var_16183_dilations_0, groups = var_16183_groups_0, pad = var_16183_pad_0, pad_type = var_16183_pad_type_0, strides = var_16183_strides_0, weight = layers_10_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_41_cast_fp16)[name = tensor("op_16183_cast_fp16")]; tensor value_21_cast_fp16 = add(x = var_16177_cast_fp16, y = var_16183_cast_fp16)[name = tensor("value_21_cast_fp16")]; tensor var_16189_begin_0 = const()[name = tensor("op_16189_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16189_end_0 = const()[name = tensor("op_16189_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16189_end_mask_0 = const()[name = tensor("op_16189_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16189_cast_fp16 = slice_by_index(begin = var_16189_begin_0, end = var_16189_end_0, end_mask = var_16189_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16189_cast_fp16")]; tensor var_16193_begin_0 = const()[name = tensor("op_16193_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_16193_end_0 = const()[name = tensor("op_16193_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_16193_end_mask_0 = const()[name = tensor("op_16193_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16193_cast_fp16 = slice_by_index(begin = var_16193_begin_0, end = var_16193_end_0, end_mask = var_16193_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16193_cast_fp16")]; tensor var_16197_begin_0 = const()[name = tensor("op_16197_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_16197_end_0 = const()[name = tensor("op_16197_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_16197_end_mask_0 = const()[name = tensor("op_16197_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16197_cast_fp16 = slice_by_index(begin = var_16197_begin_0, end = var_16197_end_0, end_mask = var_16197_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16197_cast_fp16")]; tensor var_16201_begin_0 = const()[name = tensor("op_16201_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_16201_end_0 = const()[name = tensor("op_16201_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_16201_end_mask_0 = const()[name = tensor("op_16201_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16201_cast_fp16 = slice_by_index(begin = var_16201_begin_0, end = var_16201_end_0, end_mask = var_16201_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16201_cast_fp16")]; tensor var_16205_begin_0 = const()[name = tensor("op_16205_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_16205_end_0 = const()[name = tensor("op_16205_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_16205_end_mask_0 = const()[name = tensor("op_16205_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16205_cast_fp16 = slice_by_index(begin = var_16205_begin_0, end = var_16205_end_0, end_mask = var_16205_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16205_cast_fp16")]; tensor var_16209_begin_0 = const()[name = tensor("op_16209_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_16209_end_0 = const()[name = tensor("op_16209_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_16209_end_mask_0 = const()[name = tensor("op_16209_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16209_cast_fp16 = slice_by_index(begin = var_16209_begin_0, end = var_16209_end_0, end_mask = var_16209_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16209_cast_fp16")]; tensor var_16213_begin_0 = const()[name = tensor("op_16213_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_16213_end_0 = const()[name = tensor("op_16213_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_16213_end_mask_0 = const()[name = tensor("op_16213_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16213_cast_fp16 = slice_by_index(begin = var_16213_begin_0, end = var_16213_end_0, end_mask = var_16213_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16213_cast_fp16")]; tensor var_16217_begin_0 = const()[name = tensor("op_16217_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_16217_end_0 = const()[name = tensor("op_16217_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_16217_end_mask_0 = const()[name = tensor("op_16217_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16217_cast_fp16 = slice_by_index(begin = var_16217_begin_0, end = var_16217_end_0, end_mask = var_16217_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16217_cast_fp16")]; tensor var_16221_begin_0 = const()[name = tensor("op_16221_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_16221_end_0 = const()[name = tensor("op_16221_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_16221_end_mask_0 = const()[name = tensor("op_16221_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16221_cast_fp16 = slice_by_index(begin = var_16221_begin_0, end = var_16221_end_0, end_mask = var_16221_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16221_cast_fp16")]; tensor var_16225_begin_0 = const()[name = tensor("op_16225_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_16225_end_0 = const()[name = tensor("op_16225_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_16225_end_mask_0 = const()[name = tensor("op_16225_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16225_cast_fp16 = slice_by_index(begin = var_16225_begin_0, end = var_16225_end_0, end_mask = var_16225_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16225_cast_fp16")]; tensor var_16229_begin_0 = const()[name = tensor("op_16229_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_16229_end_0 = const()[name = tensor("op_16229_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_16229_end_mask_0 = const()[name = tensor("op_16229_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16229_cast_fp16 = slice_by_index(begin = var_16229_begin_0, end = var_16229_end_0, end_mask = var_16229_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16229_cast_fp16")]; tensor var_16233_begin_0 = const()[name = tensor("op_16233_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_16233_end_0 = const()[name = tensor("op_16233_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_16233_end_mask_0 = const()[name = tensor("op_16233_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16233_cast_fp16 = slice_by_index(begin = var_16233_begin_0, end = var_16233_end_0, end_mask = var_16233_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16233_cast_fp16")]; tensor var_16237_begin_0 = const()[name = tensor("op_16237_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_16237_end_0 = const()[name = tensor("op_16237_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_16237_end_mask_0 = const()[name = tensor("op_16237_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16237_cast_fp16 = slice_by_index(begin = var_16237_begin_0, end = var_16237_end_0, end_mask = var_16237_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16237_cast_fp16")]; tensor var_16241_begin_0 = const()[name = tensor("op_16241_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_16241_end_0 = const()[name = tensor("op_16241_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_16241_end_mask_0 = const()[name = tensor("op_16241_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16241_cast_fp16 = slice_by_index(begin = var_16241_begin_0, end = var_16241_end_0, end_mask = var_16241_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16241_cast_fp16")]; tensor var_16245_begin_0 = const()[name = tensor("op_16245_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_16245_end_0 = const()[name = tensor("op_16245_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_16245_end_mask_0 = const()[name = tensor("op_16245_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16245_cast_fp16 = slice_by_index(begin = var_16245_begin_0, end = var_16245_end_0, end_mask = var_16245_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16245_cast_fp16")]; tensor var_16249_begin_0 = const()[name = tensor("op_16249_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_16249_end_0 = const()[name = tensor("op_16249_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_16249_end_mask_0 = const()[name = tensor("op_16249_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16249_cast_fp16 = slice_by_index(begin = var_16249_begin_0, end = var_16249_end_0, end_mask = var_16249_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16249_cast_fp16")]; tensor var_16253_begin_0 = const()[name = tensor("op_16253_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_16253_end_0 = const()[name = tensor("op_16253_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_16253_end_mask_0 = const()[name = tensor("op_16253_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16253_cast_fp16 = slice_by_index(begin = var_16253_begin_0, end = var_16253_end_0, end_mask = var_16253_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16253_cast_fp16")]; tensor var_16257_begin_0 = const()[name = tensor("op_16257_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_16257_end_0 = const()[name = tensor("op_16257_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_16257_end_mask_0 = const()[name = tensor("op_16257_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16257_cast_fp16 = slice_by_index(begin = var_16257_begin_0, end = var_16257_end_0, end_mask = var_16257_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16257_cast_fp16")]; tensor var_16261_begin_0 = const()[name = tensor("op_16261_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_16261_end_0 = const()[name = tensor("op_16261_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_16261_end_mask_0 = const()[name = tensor("op_16261_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16261_cast_fp16 = slice_by_index(begin = var_16261_begin_0, end = var_16261_end_0, end_mask = var_16261_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16261_cast_fp16")]; tensor var_16265_begin_0 = const()[name = tensor("op_16265_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_16265_end_0 = const()[name = tensor("op_16265_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_16265_end_mask_0 = const()[name = tensor("op_16265_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16265_cast_fp16 = slice_by_index(begin = var_16265_begin_0, end = var_16265_end_0, end_mask = var_16265_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_16265_cast_fp16")]; tensor var_16274_begin_0 = const()[name = tensor("op_16274_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16274_end_0 = const()[name = tensor("op_16274_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16274_end_mask_0 = const()[name = tensor("op_16274_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16274_cast_fp16 = slice_by_index(begin = var_16274_begin_0, end = var_16274_end_0, end_mask = var_16274_end_mask_0, x = var_16189_cast_fp16)[name = tensor("op_16274_cast_fp16")]; tensor var_16281_begin_0 = const()[name = tensor("op_16281_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16281_end_0 = const()[name = tensor("op_16281_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16281_end_mask_0 = const()[name = tensor("op_16281_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16281_cast_fp16 = slice_by_index(begin = var_16281_begin_0, end = var_16281_end_0, end_mask = var_16281_end_mask_0, x = var_16189_cast_fp16)[name = tensor("op_16281_cast_fp16")]; tensor var_16288_begin_0 = const()[name = tensor("op_16288_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16288_end_0 = const()[name = tensor("op_16288_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16288_end_mask_0 = const()[name = tensor("op_16288_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16288_cast_fp16 = slice_by_index(begin = var_16288_begin_0, end = var_16288_end_0, end_mask = var_16288_end_mask_0, x = var_16189_cast_fp16)[name = tensor("op_16288_cast_fp16")]; tensor var_16295_begin_0 = const()[name = tensor("op_16295_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16295_end_0 = const()[name = tensor("op_16295_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16295_end_mask_0 = const()[name = tensor("op_16295_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16295_cast_fp16 = slice_by_index(begin = var_16295_begin_0, end = var_16295_end_0, end_mask = var_16295_end_mask_0, x = var_16189_cast_fp16)[name = tensor("op_16295_cast_fp16")]; tensor var_16302_begin_0 = const()[name = tensor("op_16302_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16302_end_0 = const()[name = tensor("op_16302_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16302_end_mask_0 = const()[name = tensor("op_16302_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16302_cast_fp16 = slice_by_index(begin = var_16302_begin_0, end = var_16302_end_0, end_mask = var_16302_end_mask_0, x = var_16193_cast_fp16)[name = tensor("op_16302_cast_fp16")]; tensor var_16309_begin_0 = const()[name = tensor("op_16309_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16309_end_0 = const()[name = tensor("op_16309_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16309_end_mask_0 = const()[name = tensor("op_16309_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16309_cast_fp16 = slice_by_index(begin = var_16309_begin_0, end = var_16309_end_0, end_mask = var_16309_end_mask_0, x = var_16193_cast_fp16)[name = tensor("op_16309_cast_fp16")]; tensor var_16316_begin_0 = const()[name = tensor("op_16316_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16316_end_0 = const()[name = tensor("op_16316_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16316_end_mask_0 = const()[name = tensor("op_16316_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16316_cast_fp16 = slice_by_index(begin = var_16316_begin_0, end = var_16316_end_0, end_mask = var_16316_end_mask_0, x = var_16193_cast_fp16)[name = tensor("op_16316_cast_fp16")]; tensor var_16323_begin_0 = const()[name = tensor("op_16323_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16323_end_0 = const()[name = tensor("op_16323_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16323_end_mask_0 = const()[name = tensor("op_16323_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16323_cast_fp16 = slice_by_index(begin = var_16323_begin_0, end = var_16323_end_0, end_mask = var_16323_end_mask_0, x = var_16193_cast_fp16)[name = tensor("op_16323_cast_fp16")]; tensor var_16330_begin_0 = const()[name = tensor("op_16330_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16330_end_0 = const()[name = tensor("op_16330_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16330_end_mask_0 = const()[name = tensor("op_16330_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16330_cast_fp16 = slice_by_index(begin = var_16330_begin_0, end = var_16330_end_0, end_mask = var_16330_end_mask_0, x = var_16197_cast_fp16)[name = tensor("op_16330_cast_fp16")]; tensor var_16337_begin_0 = const()[name = tensor("op_16337_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16337_end_0 = const()[name = tensor("op_16337_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16337_end_mask_0 = const()[name = tensor("op_16337_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16337_cast_fp16 = slice_by_index(begin = var_16337_begin_0, end = var_16337_end_0, end_mask = var_16337_end_mask_0, x = var_16197_cast_fp16)[name = tensor("op_16337_cast_fp16")]; tensor var_16344_begin_0 = const()[name = tensor("op_16344_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16344_end_0 = const()[name = tensor("op_16344_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16344_end_mask_0 = const()[name = tensor("op_16344_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16344_cast_fp16 = slice_by_index(begin = var_16344_begin_0, end = var_16344_end_0, end_mask = var_16344_end_mask_0, x = var_16197_cast_fp16)[name = tensor("op_16344_cast_fp16")]; tensor var_16351_begin_0 = const()[name = tensor("op_16351_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16351_end_0 = const()[name = tensor("op_16351_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16351_end_mask_0 = const()[name = tensor("op_16351_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16351_cast_fp16 = slice_by_index(begin = var_16351_begin_0, end = var_16351_end_0, end_mask = var_16351_end_mask_0, x = var_16197_cast_fp16)[name = tensor("op_16351_cast_fp16")]; tensor var_16358_begin_0 = const()[name = tensor("op_16358_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16358_end_0 = const()[name = tensor("op_16358_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16358_end_mask_0 = const()[name = tensor("op_16358_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16358_cast_fp16 = slice_by_index(begin = var_16358_begin_0, end = var_16358_end_0, end_mask = var_16358_end_mask_0, x = var_16201_cast_fp16)[name = tensor("op_16358_cast_fp16")]; tensor var_16365_begin_0 = const()[name = tensor("op_16365_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16365_end_0 = const()[name = tensor("op_16365_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16365_end_mask_0 = const()[name = tensor("op_16365_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16365_cast_fp16 = slice_by_index(begin = var_16365_begin_0, end = var_16365_end_0, end_mask = var_16365_end_mask_0, x = var_16201_cast_fp16)[name = tensor("op_16365_cast_fp16")]; tensor var_16372_begin_0 = const()[name = tensor("op_16372_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16372_end_0 = const()[name = tensor("op_16372_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16372_end_mask_0 = const()[name = tensor("op_16372_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16372_cast_fp16 = slice_by_index(begin = var_16372_begin_0, end = var_16372_end_0, end_mask = var_16372_end_mask_0, x = var_16201_cast_fp16)[name = tensor("op_16372_cast_fp16")]; tensor var_16379_begin_0 = const()[name = tensor("op_16379_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16379_end_0 = const()[name = tensor("op_16379_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16379_end_mask_0 = const()[name = tensor("op_16379_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16379_cast_fp16 = slice_by_index(begin = var_16379_begin_0, end = var_16379_end_0, end_mask = var_16379_end_mask_0, x = var_16201_cast_fp16)[name = tensor("op_16379_cast_fp16")]; tensor var_16386_begin_0 = const()[name = tensor("op_16386_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16386_end_0 = const()[name = tensor("op_16386_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16386_end_mask_0 = const()[name = tensor("op_16386_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16386_cast_fp16 = slice_by_index(begin = var_16386_begin_0, end = var_16386_end_0, end_mask = var_16386_end_mask_0, x = var_16205_cast_fp16)[name = tensor("op_16386_cast_fp16")]; tensor var_16393_begin_0 = const()[name = tensor("op_16393_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16393_end_0 = const()[name = tensor("op_16393_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16393_end_mask_0 = const()[name = tensor("op_16393_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16393_cast_fp16 = slice_by_index(begin = var_16393_begin_0, end = var_16393_end_0, end_mask = var_16393_end_mask_0, x = var_16205_cast_fp16)[name = tensor("op_16393_cast_fp16")]; tensor var_16400_begin_0 = const()[name = tensor("op_16400_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16400_end_0 = const()[name = tensor("op_16400_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16400_end_mask_0 = const()[name = tensor("op_16400_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16400_cast_fp16 = slice_by_index(begin = var_16400_begin_0, end = var_16400_end_0, end_mask = var_16400_end_mask_0, x = var_16205_cast_fp16)[name = tensor("op_16400_cast_fp16")]; tensor var_16407_begin_0 = const()[name = tensor("op_16407_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16407_end_0 = const()[name = tensor("op_16407_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16407_end_mask_0 = const()[name = tensor("op_16407_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16407_cast_fp16 = slice_by_index(begin = var_16407_begin_0, end = var_16407_end_0, end_mask = var_16407_end_mask_0, x = var_16205_cast_fp16)[name = tensor("op_16407_cast_fp16")]; tensor var_16414_begin_0 = const()[name = tensor("op_16414_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16414_end_0 = const()[name = tensor("op_16414_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16414_end_mask_0 = const()[name = tensor("op_16414_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16414_cast_fp16 = slice_by_index(begin = var_16414_begin_0, end = var_16414_end_0, end_mask = var_16414_end_mask_0, x = var_16209_cast_fp16)[name = tensor("op_16414_cast_fp16")]; tensor var_16421_begin_0 = const()[name = tensor("op_16421_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16421_end_0 = const()[name = tensor("op_16421_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16421_end_mask_0 = const()[name = tensor("op_16421_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16421_cast_fp16 = slice_by_index(begin = var_16421_begin_0, end = var_16421_end_0, end_mask = var_16421_end_mask_0, x = var_16209_cast_fp16)[name = tensor("op_16421_cast_fp16")]; tensor var_16428_begin_0 = const()[name = tensor("op_16428_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16428_end_0 = const()[name = tensor("op_16428_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16428_end_mask_0 = const()[name = tensor("op_16428_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16428_cast_fp16 = slice_by_index(begin = var_16428_begin_0, end = var_16428_end_0, end_mask = var_16428_end_mask_0, x = var_16209_cast_fp16)[name = tensor("op_16428_cast_fp16")]; tensor var_16435_begin_0 = const()[name = tensor("op_16435_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16435_end_0 = const()[name = tensor("op_16435_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16435_end_mask_0 = const()[name = tensor("op_16435_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16435_cast_fp16 = slice_by_index(begin = var_16435_begin_0, end = var_16435_end_0, end_mask = var_16435_end_mask_0, x = var_16209_cast_fp16)[name = tensor("op_16435_cast_fp16")]; tensor var_16442_begin_0 = const()[name = tensor("op_16442_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16442_end_0 = const()[name = tensor("op_16442_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16442_end_mask_0 = const()[name = tensor("op_16442_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16442_cast_fp16 = slice_by_index(begin = var_16442_begin_0, end = var_16442_end_0, end_mask = var_16442_end_mask_0, x = var_16213_cast_fp16)[name = tensor("op_16442_cast_fp16")]; tensor var_16449_begin_0 = const()[name = tensor("op_16449_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16449_end_0 = const()[name = tensor("op_16449_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16449_end_mask_0 = const()[name = tensor("op_16449_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16449_cast_fp16 = slice_by_index(begin = var_16449_begin_0, end = var_16449_end_0, end_mask = var_16449_end_mask_0, x = var_16213_cast_fp16)[name = tensor("op_16449_cast_fp16")]; tensor var_16456_begin_0 = const()[name = tensor("op_16456_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16456_end_0 = const()[name = tensor("op_16456_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16456_end_mask_0 = const()[name = tensor("op_16456_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16456_cast_fp16 = slice_by_index(begin = var_16456_begin_0, end = var_16456_end_0, end_mask = var_16456_end_mask_0, x = var_16213_cast_fp16)[name = tensor("op_16456_cast_fp16")]; tensor var_16463_begin_0 = const()[name = tensor("op_16463_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16463_end_0 = const()[name = tensor("op_16463_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16463_end_mask_0 = const()[name = tensor("op_16463_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16463_cast_fp16 = slice_by_index(begin = var_16463_begin_0, end = var_16463_end_0, end_mask = var_16463_end_mask_0, x = var_16213_cast_fp16)[name = tensor("op_16463_cast_fp16")]; tensor var_16470_begin_0 = const()[name = tensor("op_16470_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16470_end_0 = const()[name = tensor("op_16470_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16470_end_mask_0 = const()[name = tensor("op_16470_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16470_cast_fp16 = slice_by_index(begin = var_16470_begin_0, end = var_16470_end_0, end_mask = var_16470_end_mask_0, x = var_16217_cast_fp16)[name = tensor("op_16470_cast_fp16")]; tensor var_16477_begin_0 = const()[name = tensor("op_16477_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16477_end_0 = const()[name = tensor("op_16477_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16477_end_mask_0 = const()[name = tensor("op_16477_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16477_cast_fp16 = slice_by_index(begin = var_16477_begin_0, end = var_16477_end_0, end_mask = var_16477_end_mask_0, x = var_16217_cast_fp16)[name = tensor("op_16477_cast_fp16")]; tensor var_16484_begin_0 = const()[name = tensor("op_16484_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16484_end_0 = const()[name = tensor("op_16484_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16484_end_mask_0 = const()[name = tensor("op_16484_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16484_cast_fp16 = slice_by_index(begin = var_16484_begin_0, end = var_16484_end_0, end_mask = var_16484_end_mask_0, x = var_16217_cast_fp16)[name = tensor("op_16484_cast_fp16")]; tensor var_16491_begin_0 = const()[name = tensor("op_16491_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16491_end_0 = const()[name = tensor("op_16491_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16491_end_mask_0 = const()[name = tensor("op_16491_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16491_cast_fp16 = slice_by_index(begin = var_16491_begin_0, end = var_16491_end_0, end_mask = var_16491_end_mask_0, x = var_16217_cast_fp16)[name = tensor("op_16491_cast_fp16")]; tensor var_16498_begin_0 = const()[name = tensor("op_16498_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16498_end_0 = const()[name = tensor("op_16498_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16498_end_mask_0 = const()[name = tensor("op_16498_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16498_cast_fp16 = slice_by_index(begin = var_16498_begin_0, end = var_16498_end_0, end_mask = var_16498_end_mask_0, x = var_16221_cast_fp16)[name = tensor("op_16498_cast_fp16")]; tensor var_16505_begin_0 = const()[name = tensor("op_16505_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16505_end_0 = const()[name = tensor("op_16505_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16505_end_mask_0 = const()[name = tensor("op_16505_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16505_cast_fp16 = slice_by_index(begin = var_16505_begin_0, end = var_16505_end_0, end_mask = var_16505_end_mask_0, x = var_16221_cast_fp16)[name = tensor("op_16505_cast_fp16")]; tensor var_16512_begin_0 = const()[name = tensor("op_16512_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16512_end_0 = const()[name = tensor("op_16512_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16512_end_mask_0 = const()[name = tensor("op_16512_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16512_cast_fp16 = slice_by_index(begin = var_16512_begin_0, end = var_16512_end_0, end_mask = var_16512_end_mask_0, x = var_16221_cast_fp16)[name = tensor("op_16512_cast_fp16")]; tensor var_16519_begin_0 = const()[name = tensor("op_16519_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16519_end_0 = const()[name = tensor("op_16519_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16519_end_mask_0 = const()[name = tensor("op_16519_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16519_cast_fp16 = slice_by_index(begin = var_16519_begin_0, end = var_16519_end_0, end_mask = var_16519_end_mask_0, x = var_16221_cast_fp16)[name = tensor("op_16519_cast_fp16")]; tensor var_16526_begin_0 = const()[name = tensor("op_16526_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16526_end_0 = const()[name = tensor("op_16526_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16526_end_mask_0 = const()[name = tensor("op_16526_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16526_cast_fp16 = slice_by_index(begin = var_16526_begin_0, end = var_16526_end_0, end_mask = var_16526_end_mask_0, x = var_16225_cast_fp16)[name = tensor("op_16526_cast_fp16")]; tensor var_16533_begin_0 = const()[name = tensor("op_16533_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16533_end_0 = const()[name = tensor("op_16533_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16533_end_mask_0 = const()[name = tensor("op_16533_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16533_cast_fp16 = slice_by_index(begin = var_16533_begin_0, end = var_16533_end_0, end_mask = var_16533_end_mask_0, x = var_16225_cast_fp16)[name = tensor("op_16533_cast_fp16")]; tensor var_16540_begin_0 = const()[name = tensor("op_16540_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16540_end_0 = const()[name = tensor("op_16540_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16540_end_mask_0 = const()[name = tensor("op_16540_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16540_cast_fp16 = slice_by_index(begin = var_16540_begin_0, end = var_16540_end_0, end_mask = var_16540_end_mask_0, x = var_16225_cast_fp16)[name = tensor("op_16540_cast_fp16")]; tensor var_16547_begin_0 = const()[name = tensor("op_16547_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16547_end_0 = const()[name = tensor("op_16547_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16547_end_mask_0 = const()[name = tensor("op_16547_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16547_cast_fp16 = slice_by_index(begin = var_16547_begin_0, end = var_16547_end_0, end_mask = var_16547_end_mask_0, x = var_16225_cast_fp16)[name = tensor("op_16547_cast_fp16")]; tensor var_16554_begin_0 = const()[name = tensor("op_16554_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16554_end_0 = const()[name = tensor("op_16554_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16554_end_mask_0 = const()[name = tensor("op_16554_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16554_cast_fp16 = slice_by_index(begin = var_16554_begin_0, end = var_16554_end_0, end_mask = var_16554_end_mask_0, x = var_16229_cast_fp16)[name = tensor("op_16554_cast_fp16")]; tensor var_16561_begin_0 = const()[name = tensor("op_16561_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16561_end_0 = const()[name = tensor("op_16561_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16561_end_mask_0 = const()[name = tensor("op_16561_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16561_cast_fp16 = slice_by_index(begin = var_16561_begin_0, end = var_16561_end_0, end_mask = var_16561_end_mask_0, x = var_16229_cast_fp16)[name = tensor("op_16561_cast_fp16")]; tensor var_16568_begin_0 = const()[name = tensor("op_16568_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16568_end_0 = const()[name = tensor("op_16568_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16568_end_mask_0 = const()[name = tensor("op_16568_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16568_cast_fp16 = slice_by_index(begin = var_16568_begin_0, end = var_16568_end_0, end_mask = var_16568_end_mask_0, x = var_16229_cast_fp16)[name = tensor("op_16568_cast_fp16")]; tensor var_16575_begin_0 = const()[name = tensor("op_16575_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16575_end_0 = const()[name = tensor("op_16575_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16575_end_mask_0 = const()[name = tensor("op_16575_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16575_cast_fp16 = slice_by_index(begin = var_16575_begin_0, end = var_16575_end_0, end_mask = var_16575_end_mask_0, x = var_16229_cast_fp16)[name = tensor("op_16575_cast_fp16")]; tensor var_16582_begin_0 = const()[name = tensor("op_16582_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16582_end_0 = const()[name = tensor("op_16582_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16582_end_mask_0 = const()[name = tensor("op_16582_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16582_cast_fp16 = slice_by_index(begin = var_16582_begin_0, end = var_16582_end_0, end_mask = var_16582_end_mask_0, x = var_16233_cast_fp16)[name = tensor("op_16582_cast_fp16")]; tensor var_16589_begin_0 = const()[name = tensor("op_16589_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16589_end_0 = const()[name = tensor("op_16589_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16589_end_mask_0 = const()[name = tensor("op_16589_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16589_cast_fp16 = slice_by_index(begin = var_16589_begin_0, end = var_16589_end_0, end_mask = var_16589_end_mask_0, x = var_16233_cast_fp16)[name = tensor("op_16589_cast_fp16")]; tensor var_16596_begin_0 = const()[name = tensor("op_16596_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16596_end_0 = const()[name = tensor("op_16596_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16596_end_mask_0 = const()[name = tensor("op_16596_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16596_cast_fp16 = slice_by_index(begin = var_16596_begin_0, end = var_16596_end_0, end_mask = var_16596_end_mask_0, x = var_16233_cast_fp16)[name = tensor("op_16596_cast_fp16")]; tensor var_16603_begin_0 = const()[name = tensor("op_16603_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16603_end_0 = const()[name = tensor("op_16603_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16603_end_mask_0 = const()[name = tensor("op_16603_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16603_cast_fp16 = slice_by_index(begin = var_16603_begin_0, end = var_16603_end_0, end_mask = var_16603_end_mask_0, x = var_16233_cast_fp16)[name = tensor("op_16603_cast_fp16")]; tensor var_16610_begin_0 = const()[name = tensor("op_16610_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16610_end_0 = const()[name = tensor("op_16610_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16610_end_mask_0 = const()[name = tensor("op_16610_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16610_cast_fp16 = slice_by_index(begin = var_16610_begin_0, end = var_16610_end_0, end_mask = var_16610_end_mask_0, x = var_16237_cast_fp16)[name = tensor("op_16610_cast_fp16")]; tensor var_16617_begin_0 = const()[name = tensor("op_16617_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16617_end_0 = const()[name = tensor("op_16617_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16617_end_mask_0 = const()[name = tensor("op_16617_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16617_cast_fp16 = slice_by_index(begin = var_16617_begin_0, end = var_16617_end_0, end_mask = var_16617_end_mask_0, x = var_16237_cast_fp16)[name = tensor("op_16617_cast_fp16")]; tensor var_16624_begin_0 = const()[name = tensor("op_16624_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16624_end_0 = const()[name = tensor("op_16624_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16624_end_mask_0 = const()[name = tensor("op_16624_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16624_cast_fp16 = slice_by_index(begin = var_16624_begin_0, end = var_16624_end_0, end_mask = var_16624_end_mask_0, x = var_16237_cast_fp16)[name = tensor("op_16624_cast_fp16")]; tensor var_16631_begin_0 = const()[name = tensor("op_16631_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16631_end_0 = const()[name = tensor("op_16631_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16631_end_mask_0 = const()[name = tensor("op_16631_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16631_cast_fp16 = slice_by_index(begin = var_16631_begin_0, end = var_16631_end_0, end_mask = var_16631_end_mask_0, x = var_16237_cast_fp16)[name = tensor("op_16631_cast_fp16")]; tensor var_16638_begin_0 = const()[name = tensor("op_16638_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16638_end_0 = const()[name = tensor("op_16638_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16638_end_mask_0 = const()[name = tensor("op_16638_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16638_cast_fp16 = slice_by_index(begin = var_16638_begin_0, end = var_16638_end_0, end_mask = var_16638_end_mask_0, x = var_16241_cast_fp16)[name = tensor("op_16638_cast_fp16")]; tensor var_16645_begin_0 = const()[name = tensor("op_16645_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16645_end_0 = const()[name = tensor("op_16645_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16645_end_mask_0 = const()[name = tensor("op_16645_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16645_cast_fp16 = slice_by_index(begin = var_16645_begin_0, end = var_16645_end_0, end_mask = var_16645_end_mask_0, x = var_16241_cast_fp16)[name = tensor("op_16645_cast_fp16")]; tensor var_16652_begin_0 = const()[name = tensor("op_16652_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16652_end_0 = const()[name = tensor("op_16652_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16652_end_mask_0 = const()[name = tensor("op_16652_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16652_cast_fp16 = slice_by_index(begin = var_16652_begin_0, end = var_16652_end_0, end_mask = var_16652_end_mask_0, x = var_16241_cast_fp16)[name = tensor("op_16652_cast_fp16")]; tensor var_16659_begin_0 = const()[name = tensor("op_16659_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16659_end_0 = const()[name = tensor("op_16659_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16659_end_mask_0 = const()[name = tensor("op_16659_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16659_cast_fp16 = slice_by_index(begin = var_16659_begin_0, end = var_16659_end_0, end_mask = var_16659_end_mask_0, x = var_16241_cast_fp16)[name = tensor("op_16659_cast_fp16")]; tensor var_16666_begin_0 = const()[name = tensor("op_16666_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16666_end_0 = const()[name = tensor("op_16666_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16666_end_mask_0 = const()[name = tensor("op_16666_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16666_cast_fp16 = slice_by_index(begin = var_16666_begin_0, end = var_16666_end_0, end_mask = var_16666_end_mask_0, x = var_16245_cast_fp16)[name = tensor("op_16666_cast_fp16")]; tensor var_16673_begin_0 = const()[name = tensor("op_16673_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16673_end_0 = const()[name = tensor("op_16673_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16673_end_mask_0 = const()[name = tensor("op_16673_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16673_cast_fp16 = slice_by_index(begin = var_16673_begin_0, end = var_16673_end_0, end_mask = var_16673_end_mask_0, x = var_16245_cast_fp16)[name = tensor("op_16673_cast_fp16")]; tensor var_16680_begin_0 = const()[name = tensor("op_16680_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16680_end_0 = const()[name = tensor("op_16680_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16680_end_mask_0 = const()[name = tensor("op_16680_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16680_cast_fp16 = slice_by_index(begin = var_16680_begin_0, end = var_16680_end_0, end_mask = var_16680_end_mask_0, x = var_16245_cast_fp16)[name = tensor("op_16680_cast_fp16")]; tensor var_16687_begin_0 = const()[name = tensor("op_16687_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16687_end_0 = const()[name = tensor("op_16687_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16687_end_mask_0 = const()[name = tensor("op_16687_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16687_cast_fp16 = slice_by_index(begin = var_16687_begin_0, end = var_16687_end_0, end_mask = var_16687_end_mask_0, x = var_16245_cast_fp16)[name = tensor("op_16687_cast_fp16")]; tensor var_16694_begin_0 = const()[name = tensor("op_16694_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16694_end_0 = const()[name = tensor("op_16694_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16694_end_mask_0 = const()[name = tensor("op_16694_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16694_cast_fp16 = slice_by_index(begin = var_16694_begin_0, end = var_16694_end_0, end_mask = var_16694_end_mask_0, x = var_16249_cast_fp16)[name = tensor("op_16694_cast_fp16")]; tensor var_16701_begin_0 = const()[name = tensor("op_16701_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16701_end_0 = const()[name = tensor("op_16701_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16701_end_mask_0 = const()[name = tensor("op_16701_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16701_cast_fp16 = slice_by_index(begin = var_16701_begin_0, end = var_16701_end_0, end_mask = var_16701_end_mask_0, x = var_16249_cast_fp16)[name = tensor("op_16701_cast_fp16")]; tensor var_16708_begin_0 = const()[name = tensor("op_16708_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16708_end_0 = const()[name = tensor("op_16708_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16708_end_mask_0 = const()[name = tensor("op_16708_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16708_cast_fp16 = slice_by_index(begin = var_16708_begin_0, end = var_16708_end_0, end_mask = var_16708_end_mask_0, x = var_16249_cast_fp16)[name = tensor("op_16708_cast_fp16")]; tensor var_16715_begin_0 = const()[name = tensor("op_16715_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16715_end_0 = const()[name = tensor("op_16715_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16715_end_mask_0 = const()[name = tensor("op_16715_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16715_cast_fp16 = slice_by_index(begin = var_16715_begin_0, end = var_16715_end_0, end_mask = var_16715_end_mask_0, x = var_16249_cast_fp16)[name = tensor("op_16715_cast_fp16")]; tensor var_16722_begin_0 = const()[name = tensor("op_16722_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16722_end_0 = const()[name = tensor("op_16722_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16722_end_mask_0 = const()[name = tensor("op_16722_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16722_cast_fp16 = slice_by_index(begin = var_16722_begin_0, end = var_16722_end_0, end_mask = var_16722_end_mask_0, x = var_16253_cast_fp16)[name = tensor("op_16722_cast_fp16")]; tensor var_16729_begin_0 = const()[name = tensor("op_16729_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16729_end_0 = const()[name = tensor("op_16729_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16729_end_mask_0 = const()[name = tensor("op_16729_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16729_cast_fp16 = slice_by_index(begin = var_16729_begin_0, end = var_16729_end_0, end_mask = var_16729_end_mask_0, x = var_16253_cast_fp16)[name = tensor("op_16729_cast_fp16")]; tensor var_16736_begin_0 = const()[name = tensor("op_16736_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16736_end_0 = const()[name = tensor("op_16736_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16736_end_mask_0 = const()[name = tensor("op_16736_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16736_cast_fp16 = slice_by_index(begin = var_16736_begin_0, end = var_16736_end_0, end_mask = var_16736_end_mask_0, x = var_16253_cast_fp16)[name = tensor("op_16736_cast_fp16")]; tensor var_16743_begin_0 = const()[name = tensor("op_16743_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16743_end_0 = const()[name = tensor("op_16743_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16743_end_mask_0 = const()[name = tensor("op_16743_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16743_cast_fp16 = slice_by_index(begin = var_16743_begin_0, end = var_16743_end_0, end_mask = var_16743_end_mask_0, x = var_16253_cast_fp16)[name = tensor("op_16743_cast_fp16")]; tensor var_16750_begin_0 = const()[name = tensor("op_16750_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16750_end_0 = const()[name = tensor("op_16750_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16750_end_mask_0 = const()[name = tensor("op_16750_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16750_cast_fp16 = slice_by_index(begin = var_16750_begin_0, end = var_16750_end_0, end_mask = var_16750_end_mask_0, x = var_16257_cast_fp16)[name = tensor("op_16750_cast_fp16")]; tensor var_16757_begin_0 = const()[name = tensor("op_16757_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16757_end_0 = const()[name = tensor("op_16757_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16757_end_mask_0 = const()[name = tensor("op_16757_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16757_cast_fp16 = slice_by_index(begin = var_16757_begin_0, end = var_16757_end_0, end_mask = var_16757_end_mask_0, x = var_16257_cast_fp16)[name = tensor("op_16757_cast_fp16")]; tensor var_16764_begin_0 = const()[name = tensor("op_16764_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16764_end_0 = const()[name = tensor("op_16764_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16764_end_mask_0 = const()[name = tensor("op_16764_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16764_cast_fp16 = slice_by_index(begin = var_16764_begin_0, end = var_16764_end_0, end_mask = var_16764_end_mask_0, x = var_16257_cast_fp16)[name = tensor("op_16764_cast_fp16")]; tensor var_16771_begin_0 = const()[name = tensor("op_16771_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16771_end_0 = const()[name = tensor("op_16771_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16771_end_mask_0 = const()[name = tensor("op_16771_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16771_cast_fp16 = slice_by_index(begin = var_16771_begin_0, end = var_16771_end_0, end_mask = var_16771_end_mask_0, x = var_16257_cast_fp16)[name = tensor("op_16771_cast_fp16")]; tensor var_16778_begin_0 = const()[name = tensor("op_16778_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16778_end_0 = const()[name = tensor("op_16778_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16778_end_mask_0 = const()[name = tensor("op_16778_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16778_cast_fp16 = slice_by_index(begin = var_16778_begin_0, end = var_16778_end_0, end_mask = var_16778_end_mask_0, x = var_16261_cast_fp16)[name = tensor("op_16778_cast_fp16")]; tensor var_16785_begin_0 = const()[name = tensor("op_16785_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16785_end_0 = const()[name = tensor("op_16785_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16785_end_mask_0 = const()[name = tensor("op_16785_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16785_cast_fp16 = slice_by_index(begin = var_16785_begin_0, end = var_16785_end_0, end_mask = var_16785_end_mask_0, x = var_16261_cast_fp16)[name = tensor("op_16785_cast_fp16")]; tensor var_16792_begin_0 = const()[name = tensor("op_16792_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16792_end_0 = const()[name = tensor("op_16792_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16792_end_mask_0 = const()[name = tensor("op_16792_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16792_cast_fp16 = slice_by_index(begin = var_16792_begin_0, end = var_16792_end_0, end_mask = var_16792_end_mask_0, x = var_16261_cast_fp16)[name = tensor("op_16792_cast_fp16")]; tensor var_16799_begin_0 = const()[name = tensor("op_16799_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16799_end_0 = const()[name = tensor("op_16799_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16799_end_mask_0 = const()[name = tensor("op_16799_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16799_cast_fp16 = slice_by_index(begin = var_16799_begin_0, end = var_16799_end_0, end_mask = var_16799_end_mask_0, x = var_16261_cast_fp16)[name = tensor("op_16799_cast_fp16")]; tensor var_16806_begin_0 = const()[name = tensor("op_16806_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16806_end_0 = const()[name = tensor("op_16806_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_16806_end_mask_0 = const()[name = tensor("op_16806_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16806_cast_fp16 = slice_by_index(begin = var_16806_begin_0, end = var_16806_end_0, end_mask = var_16806_end_mask_0, x = var_16265_cast_fp16)[name = tensor("op_16806_cast_fp16")]; tensor var_16813_begin_0 = const()[name = tensor("op_16813_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_16813_end_0 = const()[name = tensor("op_16813_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_16813_end_mask_0 = const()[name = tensor("op_16813_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16813_cast_fp16 = slice_by_index(begin = var_16813_begin_0, end = var_16813_end_0, end_mask = var_16813_end_mask_0, x = var_16265_cast_fp16)[name = tensor("op_16813_cast_fp16")]; tensor var_16820_begin_0 = const()[name = tensor("op_16820_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_16820_end_0 = const()[name = tensor("op_16820_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_16820_end_mask_0 = const()[name = tensor("op_16820_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16820_cast_fp16 = slice_by_index(begin = var_16820_begin_0, end = var_16820_end_0, end_mask = var_16820_end_mask_0, x = var_16265_cast_fp16)[name = tensor("op_16820_cast_fp16")]; tensor var_16827_begin_0 = const()[name = tensor("op_16827_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_16827_end_0 = const()[name = tensor("op_16827_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16827_end_mask_0 = const()[name = tensor("op_16827_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16827_cast_fp16 = slice_by_index(begin = var_16827_begin_0, end = var_16827_end_0, end_mask = var_16827_end_mask_0, x = var_16265_cast_fp16)[name = tensor("op_16827_cast_fp16")]; tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_16832_begin_0 = const()[name = tensor("op_16832_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16832_end_0 = const()[name = tensor("op_16832_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_16832_end_mask_0 = const()[name = tensor("op_16832_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_21_cast_fp16 = transpose(perm = k_21_perm_0, x = key_21_cast_fp16)[name = tensor("transpose_21")]; tensor var_16832_cast_fp16 = slice_by_index(begin = var_16832_begin_0, end = var_16832_end_0, end_mask = var_16832_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16832_cast_fp16")]; tensor var_16836_begin_0 = const()[name = tensor("op_16836_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_16836_end_0 = const()[name = tensor("op_16836_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_16836_end_mask_0 = const()[name = tensor("op_16836_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16836_cast_fp16 = slice_by_index(begin = var_16836_begin_0, end = var_16836_end_0, end_mask = var_16836_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16836_cast_fp16")]; tensor var_16840_begin_0 = const()[name = tensor("op_16840_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_16840_end_0 = const()[name = tensor("op_16840_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_16840_end_mask_0 = const()[name = tensor("op_16840_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16840_cast_fp16 = slice_by_index(begin = var_16840_begin_0, end = var_16840_end_0, end_mask = var_16840_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16840_cast_fp16")]; tensor var_16844_begin_0 = const()[name = tensor("op_16844_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_16844_end_0 = const()[name = tensor("op_16844_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_16844_end_mask_0 = const()[name = tensor("op_16844_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16844_cast_fp16 = slice_by_index(begin = var_16844_begin_0, end = var_16844_end_0, end_mask = var_16844_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16844_cast_fp16")]; tensor var_16848_begin_0 = const()[name = tensor("op_16848_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_16848_end_0 = const()[name = tensor("op_16848_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_16848_end_mask_0 = const()[name = tensor("op_16848_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16848_cast_fp16 = slice_by_index(begin = var_16848_begin_0, end = var_16848_end_0, end_mask = var_16848_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16848_cast_fp16")]; tensor var_16852_begin_0 = const()[name = tensor("op_16852_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_16852_end_0 = const()[name = tensor("op_16852_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_16852_end_mask_0 = const()[name = tensor("op_16852_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16852_cast_fp16 = slice_by_index(begin = var_16852_begin_0, end = var_16852_end_0, end_mask = var_16852_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16852_cast_fp16")]; tensor var_16856_begin_0 = const()[name = tensor("op_16856_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_16856_end_0 = const()[name = tensor("op_16856_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_16856_end_mask_0 = const()[name = tensor("op_16856_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16856_cast_fp16 = slice_by_index(begin = var_16856_begin_0, end = var_16856_end_0, end_mask = var_16856_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16856_cast_fp16")]; tensor var_16860_begin_0 = const()[name = tensor("op_16860_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_16860_end_0 = const()[name = tensor("op_16860_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_16860_end_mask_0 = const()[name = tensor("op_16860_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16860_cast_fp16 = slice_by_index(begin = var_16860_begin_0, end = var_16860_end_0, end_mask = var_16860_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16860_cast_fp16")]; tensor var_16864_begin_0 = const()[name = tensor("op_16864_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_16864_end_0 = const()[name = tensor("op_16864_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_16864_end_mask_0 = const()[name = tensor("op_16864_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16864_cast_fp16 = slice_by_index(begin = var_16864_begin_0, end = var_16864_end_0, end_mask = var_16864_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16864_cast_fp16")]; tensor var_16868_begin_0 = const()[name = tensor("op_16868_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_16868_end_0 = const()[name = tensor("op_16868_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_16868_end_mask_0 = const()[name = tensor("op_16868_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16868_cast_fp16 = slice_by_index(begin = var_16868_begin_0, end = var_16868_end_0, end_mask = var_16868_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16868_cast_fp16")]; tensor var_16872_begin_0 = const()[name = tensor("op_16872_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_16872_end_0 = const()[name = tensor("op_16872_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_16872_end_mask_0 = const()[name = tensor("op_16872_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16872_cast_fp16 = slice_by_index(begin = var_16872_begin_0, end = var_16872_end_0, end_mask = var_16872_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16872_cast_fp16")]; tensor var_16876_begin_0 = const()[name = tensor("op_16876_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_16876_end_0 = const()[name = tensor("op_16876_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_16876_end_mask_0 = const()[name = tensor("op_16876_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16876_cast_fp16 = slice_by_index(begin = var_16876_begin_0, end = var_16876_end_0, end_mask = var_16876_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16876_cast_fp16")]; tensor var_16880_begin_0 = const()[name = tensor("op_16880_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_16880_end_0 = const()[name = tensor("op_16880_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_16880_end_mask_0 = const()[name = tensor("op_16880_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16880_cast_fp16 = slice_by_index(begin = var_16880_begin_0, end = var_16880_end_0, end_mask = var_16880_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16880_cast_fp16")]; tensor var_16884_begin_0 = const()[name = tensor("op_16884_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_16884_end_0 = const()[name = tensor("op_16884_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_16884_end_mask_0 = const()[name = tensor("op_16884_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16884_cast_fp16 = slice_by_index(begin = var_16884_begin_0, end = var_16884_end_0, end_mask = var_16884_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16884_cast_fp16")]; tensor var_16888_begin_0 = const()[name = tensor("op_16888_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_16888_end_0 = const()[name = tensor("op_16888_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_16888_end_mask_0 = const()[name = tensor("op_16888_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16888_cast_fp16 = slice_by_index(begin = var_16888_begin_0, end = var_16888_end_0, end_mask = var_16888_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16888_cast_fp16")]; tensor var_16892_begin_0 = const()[name = tensor("op_16892_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_16892_end_0 = const()[name = tensor("op_16892_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_16892_end_mask_0 = const()[name = tensor("op_16892_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16892_cast_fp16 = slice_by_index(begin = var_16892_begin_0, end = var_16892_end_0, end_mask = var_16892_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16892_cast_fp16")]; tensor var_16896_begin_0 = const()[name = tensor("op_16896_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_16896_end_0 = const()[name = tensor("op_16896_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_16896_end_mask_0 = const()[name = tensor("op_16896_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16896_cast_fp16 = slice_by_index(begin = var_16896_begin_0, end = var_16896_end_0, end_mask = var_16896_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16896_cast_fp16")]; tensor var_16900_begin_0 = const()[name = tensor("op_16900_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_16900_end_0 = const()[name = tensor("op_16900_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_16900_end_mask_0 = const()[name = tensor("op_16900_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16900_cast_fp16 = slice_by_index(begin = var_16900_begin_0, end = var_16900_end_0, end_mask = var_16900_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16900_cast_fp16")]; tensor var_16904_begin_0 = const()[name = tensor("op_16904_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_16904_end_0 = const()[name = tensor("op_16904_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_16904_end_mask_0 = const()[name = tensor("op_16904_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16904_cast_fp16 = slice_by_index(begin = var_16904_begin_0, end = var_16904_end_0, end_mask = var_16904_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16904_cast_fp16")]; tensor var_16908_begin_0 = const()[name = tensor("op_16908_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_16908_end_0 = const()[name = tensor("op_16908_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_16908_end_mask_0 = const()[name = tensor("op_16908_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_16908_cast_fp16 = slice_by_index(begin = var_16908_begin_0, end = var_16908_end_0, end_mask = var_16908_end_mask_0, x = k_21_cast_fp16)[name = tensor("op_16908_cast_fp16")]; tensor var_16910_begin_0 = const()[name = tensor("op_16910_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_16910_end_0 = const()[name = tensor("op_16910_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_16910_end_mask_0 = const()[name = tensor("op_16910_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16910_cast_fp16 = slice_by_index(begin = var_16910_begin_0, end = var_16910_end_0, end_mask = var_16910_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16910_cast_fp16")]; tensor var_16914_begin_0 = const()[name = tensor("op_16914_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_16914_end_0 = const()[name = tensor("op_16914_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_16914_end_mask_0 = const()[name = tensor("op_16914_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16914_cast_fp16 = slice_by_index(begin = var_16914_begin_0, end = var_16914_end_0, end_mask = var_16914_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16914_cast_fp16")]; tensor var_16918_begin_0 = const()[name = tensor("op_16918_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_16918_end_0 = const()[name = tensor("op_16918_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_16918_end_mask_0 = const()[name = tensor("op_16918_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16918_cast_fp16 = slice_by_index(begin = var_16918_begin_0, end = var_16918_end_0, end_mask = var_16918_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16918_cast_fp16")]; tensor var_16922_begin_0 = const()[name = tensor("op_16922_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_16922_end_0 = const()[name = tensor("op_16922_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_16922_end_mask_0 = const()[name = tensor("op_16922_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16922_cast_fp16 = slice_by_index(begin = var_16922_begin_0, end = var_16922_end_0, end_mask = var_16922_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16922_cast_fp16")]; tensor var_16926_begin_0 = const()[name = tensor("op_16926_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_16926_end_0 = const()[name = tensor("op_16926_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_16926_end_mask_0 = const()[name = tensor("op_16926_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16926_cast_fp16 = slice_by_index(begin = var_16926_begin_0, end = var_16926_end_0, end_mask = var_16926_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16926_cast_fp16")]; tensor var_16930_begin_0 = const()[name = tensor("op_16930_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_16930_end_0 = const()[name = tensor("op_16930_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_16930_end_mask_0 = const()[name = tensor("op_16930_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16930_cast_fp16 = slice_by_index(begin = var_16930_begin_0, end = var_16930_end_0, end_mask = var_16930_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16930_cast_fp16")]; tensor var_16934_begin_0 = const()[name = tensor("op_16934_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_16934_end_0 = const()[name = tensor("op_16934_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_16934_end_mask_0 = const()[name = tensor("op_16934_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16934_cast_fp16 = slice_by_index(begin = var_16934_begin_0, end = var_16934_end_0, end_mask = var_16934_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16934_cast_fp16")]; tensor var_16938_begin_0 = const()[name = tensor("op_16938_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_16938_end_0 = const()[name = tensor("op_16938_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_16938_end_mask_0 = const()[name = tensor("op_16938_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16938_cast_fp16 = slice_by_index(begin = var_16938_begin_0, end = var_16938_end_0, end_mask = var_16938_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16938_cast_fp16")]; tensor var_16942_begin_0 = const()[name = tensor("op_16942_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_16942_end_0 = const()[name = tensor("op_16942_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_16942_end_mask_0 = const()[name = tensor("op_16942_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16942_cast_fp16 = slice_by_index(begin = var_16942_begin_0, end = var_16942_end_0, end_mask = var_16942_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16942_cast_fp16")]; tensor var_16946_begin_0 = const()[name = tensor("op_16946_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_16946_end_0 = const()[name = tensor("op_16946_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_16946_end_mask_0 = const()[name = tensor("op_16946_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16946_cast_fp16 = slice_by_index(begin = var_16946_begin_0, end = var_16946_end_0, end_mask = var_16946_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16946_cast_fp16")]; tensor var_16950_begin_0 = const()[name = tensor("op_16950_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_16950_end_0 = const()[name = tensor("op_16950_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_16950_end_mask_0 = const()[name = tensor("op_16950_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16950_cast_fp16 = slice_by_index(begin = var_16950_begin_0, end = var_16950_end_0, end_mask = var_16950_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16950_cast_fp16")]; tensor var_16954_begin_0 = const()[name = tensor("op_16954_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_16954_end_0 = const()[name = tensor("op_16954_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_16954_end_mask_0 = const()[name = tensor("op_16954_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16954_cast_fp16 = slice_by_index(begin = var_16954_begin_0, end = var_16954_end_0, end_mask = var_16954_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16954_cast_fp16")]; tensor var_16958_begin_0 = const()[name = tensor("op_16958_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_16958_end_0 = const()[name = tensor("op_16958_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_16958_end_mask_0 = const()[name = tensor("op_16958_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16958_cast_fp16 = slice_by_index(begin = var_16958_begin_0, end = var_16958_end_0, end_mask = var_16958_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16958_cast_fp16")]; tensor var_16962_begin_0 = const()[name = tensor("op_16962_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_16962_end_0 = const()[name = tensor("op_16962_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_16962_end_mask_0 = const()[name = tensor("op_16962_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16962_cast_fp16 = slice_by_index(begin = var_16962_begin_0, end = var_16962_end_0, end_mask = var_16962_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16962_cast_fp16")]; tensor var_16966_begin_0 = const()[name = tensor("op_16966_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_16966_end_0 = const()[name = tensor("op_16966_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_16966_end_mask_0 = const()[name = tensor("op_16966_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16966_cast_fp16 = slice_by_index(begin = var_16966_begin_0, end = var_16966_end_0, end_mask = var_16966_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16966_cast_fp16")]; tensor var_16970_begin_0 = const()[name = tensor("op_16970_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_16970_end_0 = const()[name = tensor("op_16970_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_16970_end_mask_0 = const()[name = tensor("op_16970_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16970_cast_fp16 = slice_by_index(begin = var_16970_begin_0, end = var_16970_end_0, end_mask = var_16970_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16970_cast_fp16")]; tensor var_16974_begin_0 = const()[name = tensor("op_16974_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_16974_end_0 = const()[name = tensor("op_16974_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_16974_end_mask_0 = const()[name = tensor("op_16974_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16974_cast_fp16 = slice_by_index(begin = var_16974_begin_0, end = var_16974_end_0, end_mask = var_16974_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16974_cast_fp16")]; tensor var_16978_begin_0 = const()[name = tensor("op_16978_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_16978_end_0 = const()[name = tensor("op_16978_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_16978_end_mask_0 = const()[name = tensor("op_16978_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16978_cast_fp16 = slice_by_index(begin = var_16978_begin_0, end = var_16978_end_0, end_mask = var_16978_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16978_cast_fp16")]; tensor var_16982_begin_0 = const()[name = tensor("op_16982_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_16982_end_0 = const()[name = tensor("op_16982_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_16982_end_mask_0 = const()[name = tensor("op_16982_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16982_cast_fp16 = slice_by_index(begin = var_16982_begin_0, end = var_16982_end_0, end_mask = var_16982_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16982_cast_fp16")]; tensor var_16986_begin_0 = const()[name = tensor("op_16986_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_16986_end_0 = const()[name = tensor("op_16986_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_16986_end_mask_0 = const()[name = tensor("op_16986_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_16986_cast_fp16 = slice_by_index(begin = var_16986_begin_0, end = var_16986_end_0, end_mask = var_16986_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16986_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1601_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1601_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1601_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1601_equation_0, values = (var_16832_cast_fp16, var_16274_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1601_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1603_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1603_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1603_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1603_equation_0, values = (var_16832_cast_fp16, var_16281_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1603_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1605_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1605_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1605_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1605_equation_0, values = (var_16832_cast_fp16, var_16288_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1605_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1607_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1607_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1607_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1607_equation_0, values = (var_16832_cast_fp16, var_16295_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1607_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1609_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1609_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1609_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1609_equation_0, values = (var_16836_cast_fp16, var_16302_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1609_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1611_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1611_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1611_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1611_equation_0, values = (var_16836_cast_fp16, var_16309_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1611_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1613_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1613_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1613_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1613_equation_0, values = (var_16836_cast_fp16, var_16316_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1613_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1615_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1615_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1615_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1615_equation_0, values = (var_16836_cast_fp16, var_16323_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1615_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1617_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1617_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1617_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1617_equation_0, values = (var_16840_cast_fp16, var_16330_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1617_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1619_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1619_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1619_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1619_equation_0, values = (var_16840_cast_fp16, var_16337_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1619_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1621_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1621_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1621_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1621_equation_0, values = (var_16840_cast_fp16, var_16344_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1621_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1623_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1623_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1623_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1623_equation_0, values = (var_16840_cast_fp16, var_16351_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1623_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1625_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1625_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1625_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1625_equation_0, values = (var_16844_cast_fp16, var_16358_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1625_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1627_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1627_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1627_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1627_equation_0, values = (var_16844_cast_fp16, var_16365_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1627_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1629_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1629_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1629_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1629_equation_0, values = (var_16844_cast_fp16, var_16372_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1629_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1631_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1631_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1631_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1631_equation_0, values = (var_16844_cast_fp16, var_16379_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1631_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1633_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1633_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1633_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1633_equation_0, values = (var_16848_cast_fp16, var_16386_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1633_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1635_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1635_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1635_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1635_equation_0, values = (var_16848_cast_fp16, var_16393_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1635_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1637_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1637_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1637_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1637_equation_0, values = (var_16848_cast_fp16, var_16400_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1637_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1639_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1639_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1639_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1639_equation_0, values = (var_16848_cast_fp16, var_16407_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1639_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1641_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1641_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1641_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1641_equation_0, values = (var_16852_cast_fp16, var_16414_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1641_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1643_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1643_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1643_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1643_equation_0, values = (var_16852_cast_fp16, var_16421_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1643_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1645_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1645_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1645_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1645_equation_0, values = (var_16852_cast_fp16, var_16428_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1645_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1647_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1647_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1647_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1647_equation_0, values = (var_16852_cast_fp16, var_16435_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1647_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1649_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1649_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1649_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1649_equation_0, values = (var_16856_cast_fp16, var_16442_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1649_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1651_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1651_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1651_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1651_equation_0, values = (var_16856_cast_fp16, var_16449_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1651_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1653_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1653_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1653_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1653_equation_0, values = (var_16856_cast_fp16, var_16456_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1653_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1655_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1655_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1655_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1655_equation_0, values = (var_16856_cast_fp16, var_16463_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1655_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1657_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1657_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1657_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1657_equation_0, values = (var_16860_cast_fp16, var_16470_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1657_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1659_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1659_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1659_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1659_equation_0, values = (var_16860_cast_fp16, var_16477_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1659_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1661_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1661_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1661_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1661_equation_0, values = (var_16860_cast_fp16, var_16484_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1661_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1663_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1663_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1663_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1663_equation_0, values = (var_16860_cast_fp16, var_16491_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1663_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1665_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1665_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1665_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1665_equation_0, values = (var_16864_cast_fp16, var_16498_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1665_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1667_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1667_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1667_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1667_equation_0, values = (var_16864_cast_fp16, var_16505_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1667_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1669_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1669_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1669_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1669_equation_0, values = (var_16864_cast_fp16, var_16512_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1669_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1671_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1671_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1671_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1671_equation_0, values = (var_16864_cast_fp16, var_16519_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1671_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1673_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1673_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1673_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1673_equation_0, values = (var_16868_cast_fp16, var_16526_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1673_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1675_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1675_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1675_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1675_equation_0, values = (var_16868_cast_fp16, var_16533_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1675_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1677_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1677_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1677_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1677_equation_0, values = (var_16868_cast_fp16, var_16540_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1677_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1679_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1679_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1679_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1679_equation_0, values = (var_16868_cast_fp16, var_16547_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1679_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1681_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1681_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1681_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1681_equation_0, values = (var_16872_cast_fp16, var_16554_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1681_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1683_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1683_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1683_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1683_equation_0, values = (var_16872_cast_fp16, var_16561_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1683_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1685_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1685_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1685_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1685_equation_0, values = (var_16872_cast_fp16, var_16568_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1685_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1687_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1687_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1687_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1687_equation_0, values = (var_16872_cast_fp16, var_16575_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1687_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1689_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1689_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1689_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1689_equation_0, values = (var_16876_cast_fp16, var_16582_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1689_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1691_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1691_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1691_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1691_equation_0, values = (var_16876_cast_fp16, var_16589_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1691_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1693_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1693_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1693_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1693_equation_0, values = (var_16876_cast_fp16, var_16596_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1693_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1695_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1695_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1695_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1695_equation_0, values = (var_16876_cast_fp16, var_16603_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1695_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1697_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1697_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1697_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1697_equation_0, values = (var_16880_cast_fp16, var_16610_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1697_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1699_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1699_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1699_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1699_equation_0, values = (var_16880_cast_fp16, var_16617_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1699_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1701_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1701_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1701_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1701_equation_0, values = (var_16880_cast_fp16, var_16624_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1701_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1703_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1703_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1703_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1703_equation_0, values = (var_16880_cast_fp16, var_16631_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1703_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1705_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1705_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1705_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1705_equation_0, values = (var_16884_cast_fp16, var_16638_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1705_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1707_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1707_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1707_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1707_equation_0, values = (var_16884_cast_fp16, var_16645_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1707_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1709_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1709_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1709_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1709_equation_0, values = (var_16884_cast_fp16, var_16652_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1709_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1711_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1711_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1711_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1711_equation_0, values = (var_16884_cast_fp16, var_16659_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1711_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1713_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1713_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1713_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1713_equation_0, values = (var_16888_cast_fp16, var_16666_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1713_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1715_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1715_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1715_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1715_equation_0, values = (var_16888_cast_fp16, var_16673_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1715_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1717_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1717_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1717_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1717_equation_0, values = (var_16888_cast_fp16, var_16680_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1717_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1719_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1719_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1719_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1719_equation_0, values = (var_16888_cast_fp16, var_16687_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1719_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1721_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1721_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1721_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1721_equation_0, values = (var_16892_cast_fp16, var_16694_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1721_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1723_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1723_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1723_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1723_equation_0, values = (var_16892_cast_fp16, var_16701_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1723_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1725_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1725_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1725_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1725_equation_0, values = (var_16892_cast_fp16, var_16708_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1725_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1727_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1727_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1727_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1727_equation_0, values = (var_16892_cast_fp16, var_16715_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1727_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1729_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1729_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1729_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1729_equation_0, values = (var_16896_cast_fp16, var_16722_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1729_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1731_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1731_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1731_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1731_equation_0, values = (var_16896_cast_fp16, var_16729_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1731_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1733_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1733_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1733_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1733_equation_0, values = (var_16896_cast_fp16, var_16736_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1733_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1735_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1735_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1735_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1735_equation_0, values = (var_16896_cast_fp16, var_16743_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1735_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1737_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1737_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1737_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1737_equation_0, values = (var_16900_cast_fp16, var_16750_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1737_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1739_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1739_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1739_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1739_equation_0, values = (var_16900_cast_fp16, var_16757_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1739_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1741_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1741_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1741_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1741_equation_0, values = (var_16900_cast_fp16, var_16764_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1741_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1743_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1743_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1743_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1743_equation_0, values = (var_16900_cast_fp16, var_16771_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1743_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1745_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1745_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1745_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1745_equation_0, values = (var_16904_cast_fp16, var_16778_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1745_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1747_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1747_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1747_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1747_equation_0, values = (var_16904_cast_fp16, var_16785_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1747_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1749_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1749_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1749_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1749_equation_0, values = (var_16904_cast_fp16, var_16792_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1749_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1751_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1751_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1751_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1751_equation_0, values = (var_16904_cast_fp16, var_16799_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1751_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1753_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1753_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1753_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1753_equation_0, values = (var_16908_cast_fp16, var_16806_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1753_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1755_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1755_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1755_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1755_equation_0, values = (var_16908_cast_fp16, var_16813_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1755_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1757_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1757_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1757_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1757_equation_0, values = (var_16908_cast_fp16, var_16820_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1757_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1759_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1759_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1759_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1759_equation_0, values = (var_16908_cast_fp16, var_16827_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1759_cast_fp16")]; tensor var_17149_to_fp16 = const()[name = tensor("op_17149_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1601_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1601_cast_fp16, y = var_17149_to_fp16)[name = tensor("aw_chunk_1601_cast_fp16")]; tensor var_17151_to_fp16 = const()[name = tensor("op_17151_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1603_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1603_cast_fp16, y = var_17151_to_fp16)[name = tensor("aw_chunk_1603_cast_fp16")]; tensor var_17153_to_fp16 = const()[name = tensor("op_17153_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1605_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1605_cast_fp16, y = var_17153_to_fp16)[name = tensor("aw_chunk_1605_cast_fp16")]; tensor var_17155_to_fp16 = const()[name = tensor("op_17155_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1607_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1607_cast_fp16, y = var_17155_to_fp16)[name = tensor("aw_chunk_1607_cast_fp16")]; tensor var_17157_to_fp16 = const()[name = tensor("op_17157_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1609_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1609_cast_fp16, y = var_17157_to_fp16)[name = tensor("aw_chunk_1609_cast_fp16")]; tensor var_17159_to_fp16 = const()[name = tensor("op_17159_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1611_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1611_cast_fp16, y = var_17159_to_fp16)[name = tensor("aw_chunk_1611_cast_fp16")]; tensor var_17161_to_fp16 = const()[name = tensor("op_17161_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1613_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1613_cast_fp16, y = var_17161_to_fp16)[name = tensor("aw_chunk_1613_cast_fp16")]; tensor var_17163_to_fp16 = const()[name = tensor("op_17163_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1615_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1615_cast_fp16, y = var_17163_to_fp16)[name = tensor("aw_chunk_1615_cast_fp16")]; tensor var_17165_to_fp16 = const()[name = tensor("op_17165_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1617_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1617_cast_fp16, y = var_17165_to_fp16)[name = tensor("aw_chunk_1617_cast_fp16")]; tensor var_17167_to_fp16 = const()[name = tensor("op_17167_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1619_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1619_cast_fp16, y = var_17167_to_fp16)[name = tensor("aw_chunk_1619_cast_fp16")]; tensor var_17169_to_fp16 = const()[name = tensor("op_17169_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1621_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1621_cast_fp16, y = var_17169_to_fp16)[name = tensor("aw_chunk_1621_cast_fp16")]; tensor var_17171_to_fp16 = const()[name = tensor("op_17171_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1623_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1623_cast_fp16, y = var_17171_to_fp16)[name = tensor("aw_chunk_1623_cast_fp16")]; tensor var_17173_to_fp16 = const()[name = tensor("op_17173_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1625_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1625_cast_fp16, y = var_17173_to_fp16)[name = tensor("aw_chunk_1625_cast_fp16")]; tensor var_17175_to_fp16 = const()[name = tensor("op_17175_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1627_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1627_cast_fp16, y = var_17175_to_fp16)[name = tensor("aw_chunk_1627_cast_fp16")]; tensor var_17177_to_fp16 = const()[name = tensor("op_17177_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1629_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1629_cast_fp16, y = var_17177_to_fp16)[name = tensor("aw_chunk_1629_cast_fp16")]; tensor var_17179_to_fp16 = const()[name = tensor("op_17179_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1631_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1631_cast_fp16, y = var_17179_to_fp16)[name = tensor("aw_chunk_1631_cast_fp16")]; tensor var_17181_to_fp16 = const()[name = tensor("op_17181_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1633_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1633_cast_fp16, y = var_17181_to_fp16)[name = tensor("aw_chunk_1633_cast_fp16")]; tensor var_17183_to_fp16 = const()[name = tensor("op_17183_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1635_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1635_cast_fp16, y = var_17183_to_fp16)[name = tensor("aw_chunk_1635_cast_fp16")]; tensor var_17185_to_fp16 = const()[name = tensor("op_17185_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1637_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1637_cast_fp16, y = var_17185_to_fp16)[name = tensor("aw_chunk_1637_cast_fp16")]; tensor var_17187_to_fp16 = const()[name = tensor("op_17187_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1639_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1639_cast_fp16, y = var_17187_to_fp16)[name = tensor("aw_chunk_1639_cast_fp16")]; tensor var_17189_to_fp16 = const()[name = tensor("op_17189_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1641_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1641_cast_fp16, y = var_17189_to_fp16)[name = tensor("aw_chunk_1641_cast_fp16")]; tensor var_17191_to_fp16 = const()[name = tensor("op_17191_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1643_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1643_cast_fp16, y = var_17191_to_fp16)[name = tensor("aw_chunk_1643_cast_fp16")]; tensor var_17193_to_fp16 = const()[name = tensor("op_17193_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1645_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1645_cast_fp16, y = var_17193_to_fp16)[name = tensor("aw_chunk_1645_cast_fp16")]; tensor var_17195_to_fp16 = const()[name = tensor("op_17195_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1647_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1647_cast_fp16, y = var_17195_to_fp16)[name = tensor("aw_chunk_1647_cast_fp16")]; tensor var_17197_to_fp16 = const()[name = tensor("op_17197_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1649_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1649_cast_fp16, y = var_17197_to_fp16)[name = tensor("aw_chunk_1649_cast_fp16")]; tensor var_17199_to_fp16 = const()[name = tensor("op_17199_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1651_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1651_cast_fp16, y = var_17199_to_fp16)[name = tensor("aw_chunk_1651_cast_fp16")]; tensor var_17201_to_fp16 = const()[name = tensor("op_17201_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1653_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1653_cast_fp16, y = var_17201_to_fp16)[name = tensor("aw_chunk_1653_cast_fp16")]; tensor var_17203_to_fp16 = const()[name = tensor("op_17203_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1655_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1655_cast_fp16, y = var_17203_to_fp16)[name = tensor("aw_chunk_1655_cast_fp16")]; tensor var_17205_to_fp16 = const()[name = tensor("op_17205_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1657_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1657_cast_fp16, y = var_17205_to_fp16)[name = tensor("aw_chunk_1657_cast_fp16")]; tensor var_17207_to_fp16 = const()[name = tensor("op_17207_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1659_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1659_cast_fp16, y = var_17207_to_fp16)[name = tensor("aw_chunk_1659_cast_fp16")]; tensor var_17209_to_fp16 = const()[name = tensor("op_17209_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1661_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1661_cast_fp16, y = var_17209_to_fp16)[name = tensor("aw_chunk_1661_cast_fp16")]; tensor var_17211_to_fp16 = const()[name = tensor("op_17211_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1663_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1663_cast_fp16, y = var_17211_to_fp16)[name = tensor("aw_chunk_1663_cast_fp16")]; tensor var_17213_to_fp16 = const()[name = tensor("op_17213_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1665_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1665_cast_fp16, y = var_17213_to_fp16)[name = tensor("aw_chunk_1665_cast_fp16")]; tensor var_17215_to_fp16 = const()[name = tensor("op_17215_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1667_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1667_cast_fp16, y = var_17215_to_fp16)[name = tensor("aw_chunk_1667_cast_fp16")]; tensor var_17217_to_fp16 = const()[name = tensor("op_17217_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1669_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1669_cast_fp16, y = var_17217_to_fp16)[name = tensor("aw_chunk_1669_cast_fp16")]; tensor var_17219_to_fp16 = const()[name = tensor("op_17219_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1671_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1671_cast_fp16, y = var_17219_to_fp16)[name = tensor("aw_chunk_1671_cast_fp16")]; tensor var_17221_to_fp16 = const()[name = tensor("op_17221_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1673_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1673_cast_fp16, y = var_17221_to_fp16)[name = tensor("aw_chunk_1673_cast_fp16")]; tensor var_17223_to_fp16 = const()[name = tensor("op_17223_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1675_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1675_cast_fp16, y = var_17223_to_fp16)[name = tensor("aw_chunk_1675_cast_fp16")]; tensor var_17225_to_fp16 = const()[name = tensor("op_17225_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1677_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1677_cast_fp16, y = var_17225_to_fp16)[name = tensor("aw_chunk_1677_cast_fp16")]; tensor var_17227_to_fp16 = const()[name = tensor("op_17227_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1679_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1679_cast_fp16, y = var_17227_to_fp16)[name = tensor("aw_chunk_1679_cast_fp16")]; tensor var_17229_to_fp16 = const()[name = tensor("op_17229_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1681_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1681_cast_fp16, y = var_17229_to_fp16)[name = tensor("aw_chunk_1681_cast_fp16")]; tensor var_17231_to_fp16 = const()[name = tensor("op_17231_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1683_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1683_cast_fp16, y = var_17231_to_fp16)[name = tensor("aw_chunk_1683_cast_fp16")]; tensor var_17233_to_fp16 = const()[name = tensor("op_17233_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1685_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1685_cast_fp16, y = var_17233_to_fp16)[name = tensor("aw_chunk_1685_cast_fp16")]; tensor var_17235_to_fp16 = const()[name = tensor("op_17235_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1687_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1687_cast_fp16, y = var_17235_to_fp16)[name = tensor("aw_chunk_1687_cast_fp16")]; tensor var_17237_to_fp16 = const()[name = tensor("op_17237_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1689_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1689_cast_fp16, y = var_17237_to_fp16)[name = tensor("aw_chunk_1689_cast_fp16")]; tensor var_17239_to_fp16 = const()[name = tensor("op_17239_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1691_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1691_cast_fp16, y = var_17239_to_fp16)[name = tensor("aw_chunk_1691_cast_fp16")]; tensor var_17241_to_fp16 = const()[name = tensor("op_17241_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1693_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1693_cast_fp16, y = var_17241_to_fp16)[name = tensor("aw_chunk_1693_cast_fp16")]; tensor var_17243_to_fp16 = const()[name = tensor("op_17243_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1695_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1695_cast_fp16, y = var_17243_to_fp16)[name = tensor("aw_chunk_1695_cast_fp16")]; tensor var_17245_to_fp16 = const()[name = tensor("op_17245_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1697_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1697_cast_fp16, y = var_17245_to_fp16)[name = tensor("aw_chunk_1697_cast_fp16")]; tensor var_17247_to_fp16 = const()[name = tensor("op_17247_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1699_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1699_cast_fp16, y = var_17247_to_fp16)[name = tensor("aw_chunk_1699_cast_fp16")]; tensor var_17249_to_fp16 = const()[name = tensor("op_17249_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1701_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1701_cast_fp16, y = var_17249_to_fp16)[name = tensor("aw_chunk_1701_cast_fp16")]; tensor var_17251_to_fp16 = const()[name = tensor("op_17251_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1703_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1703_cast_fp16, y = var_17251_to_fp16)[name = tensor("aw_chunk_1703_cast_fp16")]; tensor var_17253_to_fp16 = const()[name = tensor("op_17253_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1705_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1705_cast_fp16, y = var_17253_to_fp16)[name = tensor("aw_chunk_1705_cast_fp16")]; tensor var_17255_to_fp16 = const()[name = tensor("op_17255_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1707_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1707_cast_fp16, y = var_17255_to_fp16)[name = tensor("aw_chunk_1707_cast_fp16")]; tensor var_17257_to_fp16 = const()[name = tensor("op_17257_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1709_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1709_cast_fp16, y = var_17257_to_fp16)[name = tensor("aw_chunk_1709_cast_fp16")]; tensor var_17259_to_fp16 = const()[name = tensor("op_17259_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1711_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1711_cast_fp16, y = var_17259_to_fp16)[name = tensor("aw_chunk_1711_cast_fp16")]; tensor var_17261_to_fp16 = const()[name = tensor("op_17261_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1713_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1713_cast_fp16, y = var_17261_to_fp16)[name = tensor("aw_chunk_1713_cast_fp16")]; tensor var_17263_to_fp16 = const()[name = tensor("op_17263_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1715_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1715_cast_fp16, y = var_17263_to_fp16)[name = tensor("aw_chunk_1715_cast_fp16")]; tensor var_17265_to_fp16 = const()[name = tensor("op_17265_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1717_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1717_cast_fp16, y = var_17265_to_fp16)[name = tensor("aw_chunk_1717_cast_fp16")]; tensor var_17267_to_fp16 = const()[name = tensor("op_17267_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1719_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1719_cast_fp16, y = var_17267_to_fp16)[name = tensor("aw_chunk_1719_cast_fp16")]; tensor var_17269_to_fp16 = const()[name = tensor("op_17269_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1721_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1721_cast_fp16, y = var_17269_to_fp16)[name = tensor("aw_chunk_1721_cast_fp16")]; tensor var_17271_to_fp16 = const()[name = tensor("op_17271_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1723_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1723_cast_fp16, y = var_17271_to_fp16)[name = tensor("aw_chunk_1723_cast_fp16")]; tensor var_17273_to_fp16 = const()[name = tensor("op_17273_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1725_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1725_cast_fp16, y = var_17273_to_fp16)[name = tensor("aw_chunk_1725_cast_fp16")]; tensor var_17275_to_fp16 = const()[name = tensor("op_17275_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1727_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1727_cast_fp16, y = var_17275_to_fp16)[name = tensor("aw_chunk_1727_cast_fp16")]; tensor var_17277_to_fp16 = const()[name = tensor("op_17277_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1729_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1729_cast_fp16, y = var_17277_to_fp16)[name = tensor("aw_chunk_1729_cast_fp16")]; tensor var_17279_to_fp16 = const()[name = tensor("op_17279_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1731_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1731_cast_fp16, y = var_17279_to_fp16)[name = tensor("aw_chunk_1731_cast_fp16")]; tensor var_17281_to_fp16 = const()[name = tensor("op_17281_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1733_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1733_cast_fp16, y = var_17281_to_fp16)[name = tensor("aw_chunk_1733_cast_fp16")]; tensor var_17283_to_fp16 = const()[name = tensor("op_17283_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1735_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1735_cast_fp16, y = var_17283_to_fp16)[name = tensor("aw_chunk_1735_cast_fp16")]; tensor var_17285_to_fp16 = const()[name = tensor("op_17285_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1737_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1737_cast_fp16, y = var_17285_to_fp16)[name = tensor("aw_chunk_1737_cast_fp16")]; tensor var_17287_to_fp16 = const()[name = tensor("op_17287_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1739_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1739_cast_fp16, y = var_17287_to_fp16)[name = tensor("aw_chunk_1739_cast_fp16")]; tensor var_17289_to_fp16 = const()[name = tensor("op_17289_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1741_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1741_cast_fp16, y = var_17289_to_fp16)[name = tensor("aw_chunk_1741_cast_fp16")]; tensor var_17291_to_fp16 = const()[name = tensor("op_17291_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1743_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1743_cast_fp16, y = var_17291_to_fp16)[name = tensor("aw_chunk_1743_cast_fp16")]; tensor var_17293_to_fp16 = const()[name = tensor("op_17293_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1745_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1745_cast_fp16, y = var_17293_to_fp16)[name = tensor("aw_chunk_1745_cast_fp16")]; tensor var_17295_to_fp16 = const()[name = tensor("op_17295_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1747_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1747_cast_fp16, y = var_17295_to_fp16)[name = tensor("aw_chunk_1747_cast_fp16")]; tensor var_17297_to_fp16 = const()[name = tensor("op_17297_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1749_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1749_cast_fp16, y = var_17297_to_fp16)[name = tensor("aw_chunk_1749_cast_fp16")]; tensor var_17299_to_fp16 = const()[name = tensor("op_17299_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1751_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1751_cast_fp16, y = var_17299_to_fp16)[name = tensor("aw_chunk_1751_cast_fp16")]; tensor var_17301_to_fp16 = const()[name = tensor("op_17301_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1753_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1753_cast_fp16, y = var_17301_to_fp16)[name = tensor("aw_chunk_1753_cast_fp16")]; tensor var_17303_to_fp16 = const()[name = tensor("op_17303_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1755_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1755_cast_fp16, y = var_17303_to_fp16)[name = tensor("aw_chunk_1755_cast_fp16")]; tensor var_17305_to_fp16 = const()[name = tensor("op_17305_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1757_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1757_cast_fp16, y = var_17305_to_fp16)[name = tensor("aw_chunk_1757_cast_fp16")]; tensor var_17307_to_fp16 = const()[name = tensor("op_17307_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1759_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1759_cast_fp16, y = var_17307_to_fp16)[name = tensor("aw_chunk_1759_cast_fp16")]; tensor var_17309_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1601_cast_fp16)[name = tensor("op_17309_cast_fp16")]; tensor var_17310_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1603_cast_fp16)[name = tensor("op_17310_cast_fp16")]; tensor var_17311_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1605_cast_fp16)[name = tensor("op_17311_cast_fp16")]; tensor var_17312_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1607_cast_fp16)[name = tensor("op_17312_cast_fp16")]; tensor var_17313_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1609_cast_fp16)[name = tensor("op_17313_cast_fp16")]; tensor var_17314_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1611_cast_fp16)[name = tensor("op_17314_cast_fp16")]; tensor var_17315_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1613_cast_fp16)[name = tensor("op_17315_cast_fp16")]; tensor var_17316_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1615_cast_fp16)[name = tensor("op_17316_cast_fp16")]; tensor var_17317_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1617_cast_fp16)[name = tensor("op_17317_cast_fp16")]; tensor var_17318_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1619_cast_fp16)[name = tensor("op_17318_cast_fp16")]; tensor var_17319_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1621_cast_fp16)[name = tensor("op_17319_cast_fp16")]; tensor var_17320_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1623_cast_fp16)[name = tensor("op_17320_cast_fp16")]; tensor var_17321_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1625_cast_fp16)[name = tensor("op_17321_cast_fp16")]; tensor var_17322_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1627_cast_fp16)[name = tensor("op_17322_cast_fp16")]; tensor var_17323_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1629_cast_fp16)[name = tensor("op_17323_cast_fp16")]; tensor var_17324_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1631_cast_fp16)[name = tensor("op_17324_cast_fp16")]; tensor var_17325_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1633_cast_fp16)[name = tensor("op_17325_cast_fp16")]; tensor var_17326_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1635_cast_fp16)[name = tensor("op_17326_cast_fp16")]; tensor var_17327_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1637_cast_fp16)[name = tensor("op_17327_cast_fp16")]; tensor var_17328_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1639_cast_fp16)[name = tensor("op_17328_cast_fp16")]; tensor var_17329_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1641_cast_fp16)[name = tensor("op_17329_cast_fp16")]; tensor var_17330_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1643_cast_fp16)[name = tensor("op_17330_cast_fp16")]; tensor var_17331_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1645_cast_fp16)[name = tensor("op_17331_cast_fp16")]; tensor var_17332_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1647_cast_fp16)[name = tensor("op_17332_cast_fp16")]; tensor var_17333_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1649_cast_fp16)[name = tensor("op_17333_cast_fp16")]; tensor var_17334_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1651_cast_fp16)[name = tensor("op_17334_cast_fp16")]; tensor var_17335_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1653_cast_fp16)[name = tensor("op_17335_cast_fp16")]; tensor var_17336_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1655_cast_fp16)[name = tensor("op_17336_cast_fp16")]; tensor var_17337_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1657_cast_fp16)[name = tensor("op_17337_cast_fp16")]; tensor var_17338_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1659_cast_fp16)[name = tensor("op_17338_cast_fp16")]; tensor var_17339_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1661_cast_fp16)[name = tensor("op_17339_cast_fp16")]; tensor var_17340_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1663_cast_fp16)[name = tensor("op_17340_cast_fp16")]; tensor var_17341_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1665_cast_fp16)[name = tensor("op_17341_cast_fp16")]; tensor var_17342_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1667_cast_fp16)[name = tensor("op_17342_cast_fp16")]; tensor var_17343_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1669_cast_fp16)[name = tensor("op_17343_cast_fp16")]; tensor var_17344_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1671_cast_fp16)[name = tensor("op_17344_cast_fp16")]; tensor var_17345_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1673_cast_fp16)[name = tensor("op_17345_cast_fp16")]; tensor var_17346_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1675_cast_fp16)[name = tensor("op_17346_cast_fp16")]; tensor var_17347_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1677_cast_fp16)[name = tensor("op_17347_cast_fp16")]; tensor var_17348_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1679_cast_fp16)[name = tensor("op_17348_cast_fp16")]; tensor var_17349_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1681_cast_fp16)[name = tensor("op_17349_cast_fp16")]; tensor var_17350_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1683_cast_fp16)[name = tensor("op_17350_cast_fp16")]; tensor var_17351_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1685_cast_fp16)[name = tensor("op_17351_cast_fp16")]; tensor var_17352_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1687_cast_fp16)[name = tensor("op_17352_cast_fp16")]; tensor var_17353_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1689_cast_fp16)[name = tensor("op_17353_cast_fp16")]; tensor var_17354_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1691_cast_fp16)[name = tensor("op_17354_cast_fp16")]; tensor var_17355_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1693_cast_fp16)[name = tensor("op_17355_cast_fp16")]; tensor var_17356_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1695_cast_fp16)[name = tensor("op_17356_cast_fp16")]; tensor var_17357_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1697_cast_fp16)[name = tensor("op_17357_cast_fp16")]; tensor var_17358_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1699_cast_fp16)[name = tensor("op_17358_cast_fp16")]; tensor var_17359_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1701_cast_fp16)[name = tensor("op_17359_cast_fp16")]; tensor var_17360_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1703_cast_fp16)[name = tensor("op_17360_cast_fp16")]; tensor var_17361_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1705_cast_fp16)[name = tensor("op_17361_cast_fp16")]; tensor var_17362_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1707_cast_fp16)[name = tensor("op_17362_cast_fp16")]; tensor var_17363_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1709_cast_fp16)[name = tensor("op_17363_cast_fp16")]; tensor var_17364_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1711_cast_fp16)[name = tensor("op_17364_cast_fp16")]; tensor var_17365_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1713_cast_fp16)[name = tensor("op_17365_cast_fp16")]; tensor var_17366_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1715_cast_fp16)[name = tensor("op_17366_cast_fp16")]; tensor var_17367_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1717_cast_fp16)[name = tensor("op_17367_cast_fp16")]; tensor var_17368_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1719_cast_fp16)[name = tensor("op_17368_cast_fp16")]; tensor var_17369_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1721_cast_fp16)[name = tensor("op_17369_cast_fp16")]; tensor var_17370_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1723_cast_fp16)[name = tensor("op_17370_cast_fp16")]; tensor var_17371_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1725_cast_fp16)[name = tensor("op_17371_cast_fp16")]; tensor var_17372_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1727_cast_fp16)[name = tensor("op_17372_cast_fp16")]; tensor var_17373_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1729_cast_fp16)[name = tensor("op_17373_cast_fp16")]; tensor var_17374_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1731_cast_fp16)[name = tensor("op_17374_cast_fp16")]; tensor var_17375_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1733_cast_fp16)[name = tensor("op_17375_cast_fp16")]; tensor var_17376_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1735_cast_fp16)[name = tensor("op_17376_cast_fp16")]; tensor var_17377_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1737_cast_fp16)[name = tensor("op_17377_cast_fp16")]; tensor var_17378_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1739_cast_fp16)[name = tensor("op_17378_cast_fp16")]; tensor var_17379_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1741_cast_fp16)[name = tensor("op_17379_cast_fp16")]; tensor var_17380_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1743_cast_fp16)[name = tensor("op_17380_cast_fp16")]; tensor var_17381_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1745_cast_fp16)[name = tensor("op_17381_cast_fp16")]; tensor var_17382_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1747_cast_fp16)[name = tensor("op_17382_cast_fp16")]; tensor var_17383_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1749_cast_fp16)[name = tensor("op_17383_cast_fp16")]; tensor var_17384_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1751_cast_fp16)[name = tensor("op_17384_cast_fp16")]; tensor var_17385_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1753_cast_fp16)[name = tensor("op_17385_cast_fp16")]; tensor var_17386_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1755_cast_fp16)[name = tensor("op_17386_cast_fp16")]; tensor var_17387_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1757_cast_fp16)[name = tensor("op_17387_cast_fp16")]; tensor var_17388_cast_fp16 = softmax(axis = var_16107, x = aw_chunk_1759_cast_fp16)[name = tensor("op_17388_cast_fp16")]; tensor var_17390_equation_0 = const()[name = tensor("op_17390_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17390_cast_fp16 = einsum(equation = var_17390_equation_0, values = (var_16910_cast_fp16, var_17309_cast_fp16))[name = tensor("op_17390_cast_fp16")]; tensor var_17392_equation_0 = const()[name = tensor("op_17392_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17392_cast_fp16 = einsum(equation = var_17392_equation_0, values = (var_16910_cast_fp16, var_17310_cast_fp16))[name = tensor("op_17392_cast_fp16")]; tensor var_17394_equation_0 = const()[name = tensor("op_17394_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17394_cast_fp16 = einsum(equation = var_17394_equation_0, values = (var_16910_cast_fp16, var_17311_cast_fp16))[name = tensor("op_17394_cast_fp16")]; tensor var_17396_equation_0 = const()[name = tensor("op_17396_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17396_cast_fp16 = einsum(equation = var_17396_equation_0, values = (var_16910_cast_fp16, var_17312_cast_fp16))[name = tensor("op_17396_cast_fp16")]; tensor var_17398_equation_0 = const()[name = tensor("op_17398_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17398_cast_fp16 = einsum(equation = var_17398_equation_0, values = (var_16914_cast_fp16, var_17313_cast_fp16))[name = tensor("op_17398_cast_fp16")]; tensor var_17400_equation_0 = const()[name = tensor("op_17400_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17400_cast_fp16 = einsum(equation = var_17400_equation_0, values = (var_16914_cast_fp16, var_17314_cast_fp16))[name = tensor("op_17400_cast_fp16")]; tensor var_17402_equation_0 = const()[name = tensor("op_17402_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17402_cast_fp16 = einsum(equation = var_17402_equation_0, values = (var_16914_cast_fp16, var_17315_cast_fp16))[name = tensor("op_17402_cast_fp16")]; tensor var_17404_equation_0 = const()[name = tensor("op_17404_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17404_cast_fp16 = einsum(equation = var_17404_equation_0, values = (var_16914_cast_fp16, var_17316_cast_fp16))[name = tensor("op_17404_cast_fp16")]; tensor var_17406_equation_0 = const()[name = tensor("op_17406_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17406_cast_fp16 = einsum(equation = var_17406_equation_0, values = (var_16918_cast_fp16, var_17317_cast_fp16))[name = tensor("op_17406_cast_fp16")]; tensor var_17408_equation_0 = const()[name = tensor("op_17408_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17408_cast_fp16 = einsum(equation = var_17408_equation_0, values = (var_16918_cast_fp16, var_17318_cast_fp16))[name = tensor("op_17408_cast_fp16")]; tensor var_17410_equation_0 = const()[name = tensor("op_17410_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17410_cast_fp16 = einsum(equation = var_17410_equation_0, values = (var_16918_cast_fp16, var_17319_cast_fp16))[name = tensor("op_17410_cast_fp16")]; tensor var_17412_equation_0 = const()[name = tensor("op_17412_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17412_cast_fp16 = einsum(equation = var_17412_equation_0, values = (var_16918_cast_fp16, var_17320_cast_fp16))[name = tensor("op_17412_cast_fp16")]; tensor var_17414_equation_0 = const()[name = tensor("op_17414_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17414_cast_fp16 = einsum(equation = var_17414_equation_0, values = (var_16922_cast_fp16, var_17321_cast_fp16))[name = tensor("op_17414_cast_fp16")]; tensor var_17416_equation_0 = const()[name = tensor("op_17416_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17416_cast_fp16 = einsum(equation = var_17416_equation_0, values = (var_16922_cast_fp16, var_17322_cast_fp16))[name = tensor("op_17416_cast_fp16")]; tensor var_17418_equation_0 = const()[name = tensor("op_17418_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17418_cast_fp16 = einsum(equation = var_17418_equation_0, values = (var_16922_cast_fp16, var_17323_cast_fp16))[name = tensor("op_17418_cast_fp16")]; tensor var_17420_equation_0 = const()[name = tensor("op_17420_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17420_cast_fp16 = einsum(equation = var_17420_equation_0, values = (var_16922_cast_fp16, var_17324_cast_fp16))[name = tensor("op_17420_cast_fp16")]; tensor var_17422_equation_0 = const()[name = tensor("op_17422_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17422_cast_fp16 = einsum(equation = var_17422_equation_0, values = (var_16926_cast_fp16, var_17325_cast_fp16))[name = tensor("op_17422_cast_fp16")]; tensor var_17424_equation_0 = const()[name = tensor("op_17424_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17424_cast_fp16 = einsum(equation = var_17424_equation_0, values = (var_16926_cast_fp16, var_17326_cast_fp16))[name = tensor("op_17424_cast_fp16")]; tensor var_17426_equation_0 = const()[name = tensor("op_17426_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17426_cast_fp16 = einsum(equation = var_17426_equation_0, values = (var_16926_cast_fp16, var_17327_cast_fp16))[name = tensor("op_17426_cast_fp16")]; tensor var_17428_equation_0 = const()[name = tensor("op_17428_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17428_cast_fp16 = einsum(equation = var_17428_equation_0, values = (var_16926_cast_fp16, var_17328_cast_fp16))[name = tensor("op_17428_cast_fp16")]; tensor var_17430_equation_0 = const()[name = tensor("op_17430_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17430_cast_fp16 = einsum(equation = var_17430_equation_0, values = (var_16930_cast_fp16, var_17329_cast_fp16))[name = tensor("op_17430_cast_fp16")]; tensor var_17432_equation_0 = const()[name = tensor("op_17432_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17432_cast_fp16 = einsum(equation = var_17432_equation_0, values = (var_16930_cast_fp16, var_17330_cast_fp16))[name = tensor("op_17432_cast_fp16")]; tensor var_17434_equation_0 = const()[name = tensor("op_17434_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17434_cast_fp16 = einsum(equation = var_17434_equation_0, values = (var_16930_cast_fp16, var_17331_cast_fp16))[name = tensor("op_17434_cast_fp16")]; tensor var_17436_equation_0 = const()[name = tensor("op_17436_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17436_cast_fp16 = einsum(equation = var_17436_equation_0, values = (var_16930_cast_fp16, var_17332_cast_fp16))[name = tensor("op_17436_cast_fp16")]; tensor var_17438_equation_0 = const()[name = tensor("op_17438_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17438_cast_fp16 = einsum(equation = var_17438_equation_0, values = (var_16934_cast_fp16, var_17333_cast_fp16))[name = tensor("op_17438_cast_fp16")]; tensor var_17440_equation_0 = const()[name = tensor("op_17440_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17440_cast_fp16 = einsum(equation = var_17440_equation_0, values = (var_16934_cast_fp16, var_17334_cast_fp16))[name = tensor("op_17440_cast_fp16")]; tensor var_17442_equation_0 = const()[name = tensor("op_17442_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17442_cast_fp16 = einsum(equation = var_17442_equation_0, values = (var_16934_cast_fp16, var_17335_cast_fp16))[name = tensor("op_17442_cast_fp16")]; tensor var_17444_equation_0 = const()[name = tensor("op_17444_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17444_cast_fp16 = einsum(equation = var_17444_equation_0, values = (var_16934_cast_fp16, var_17336_cast_fp16))[name = tensor("op_17444_cast_fp16")]; tensor var_17446_equation_0 = const()[name = tensor("op_17446_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17446_cast_fp16 = einsum(equation = var_17446_equation_0, values = (var_16938_cast_fp16, var_17337_cast_fp16))[name = tensor("op_17446_cast_fp16")]; tensor var_17448_equation_0 = const()[name = tensor("op_17448_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17448_cast_fp16 = einsum(equation = var_17448_equation_0, values = (var_16938_cast_fp16, var_17338_cast_fp16))[name = tensor("op_17448_cast_fp16")]; tensor var_17450_equation_0 = const()[name = tensor("op_17450_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17450_cast_fp16 = einsum(equation = var_17450_equation_0, values = (var_16938_cast_fp16, var_17339_cast_fp16))[name = tensor("op_17450_cast_fp16")]; tensor var_17452_equation_0 = const()[name = tensor("op_17452_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17452_cast_fp16 = einsum(equation = var_17452_equation_0, values = (var_16938_cast_fp16, var_17340_cast_fp16))[name = tensor("op_17452_cast_fp16")]; tensor var_17454_equation_0 = const()[name = tensor("op_17454_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17454_cast_fp16 = einsum(equation = var_17454_equation_0, values = (var_16942_cast_fp16, var_17341_cast_fp16))[name = tensor("op_17454_cast_fp16")]; tensor var_17456_equation_0 = const()[name = tensor("op_17456_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17456_cast_fp16 = einsum(equation = var_17456_equation_0, values = (var_16942_cast_fp16, var_17342_cast_fp16))[name = tensor("op_17456_cast_fp16")]; tensor var_17458_equation_0 = const()[name = tensor("op_17458_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17458_cast_fp16 = einsum(equation = var_17458_equation_0, values = (var_16942_cast_fp16, var_17343_cast_fp16))[name = tensor("op_17458_cast_fp16")]; tensor var_17460_equation_0 = const()[name = tensor("op_17460_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17460_cast_fp16 = einsum(equation = var_17460_equation_0, values = (var_16942_cast_fp16, var_17344_cast_fp16))[name = tensor("op_17460_cast_fp16")]; tensor var_17462_equation_0 = const()[name = tensor("op_17462_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17462_cast_fp16 = einsum(equation = var_17462_equation_0, values = (var_16946_cast_fp16, var_17345_cast_fp16))[name = tensor("op_17462_cast_fp16")]; tensor var_17464_equation_0 = const()[name = tensor("op_17464_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17464_cast_fp16 = einsum(equation = var_17464_equation_0, values = (var_16946_cast_fp16, var_17346_cast_fp16))[name = tensor("op_17464_cast_fp16")]; tensor var_17466_equation_0 = const()[name = tensor("op_17466_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17466_cast_fp16 = einsum(equation = var_17466_equation_0, values = (var_16946_cast_fp16, var_17347_cast_fp16))[name = tensor("op_17466_cast_fp16")]; tensor var_17468_equation_0 = const()[name = tensor("op_17468_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17468_cast_fp16 = einsum(equation = var_17468_equation_0, values = (var_16946_cast_fp16, var_17348_cast_fp16))[name = tensor("op_17468_cast_fp16")]; tensor var_17470_equation_0 = const()[name = tensor("op_17470_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17470_cast_fp16 = einsum(equation = var_17470_equation_0, values = (var_16950_cast_fp16, var_17349_cast_fp16))[name = tensor("op_17470_cast_fp16")]; tensor var_17472_equation_0 = const()[name = tensor("op_17472_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17472_cast_fp16 = einsum(equation = var_17472_equation_0, values = (var_16950_cast_fp16, var_17350_cast_fp16))[name = tensor("op_17472_cast_fp16")]; tensor var_17474_equation_0 = const()[name = tensor("op_17474_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17474_cast_fp16 = einsum(equation = var_17474_equation_0, values = (var_16950_cast_fp16, var_17351_cast_fp16))[name = tensor("op_17474_cast_fp16")]; tensor var_17476_equation_0 = const()[name = tensor("op_17476_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17476_cast_fp16 = einsum(equation = var_17476_equation_0, values = (var_16950_cast_fp16, var_17352_cast_fp16))[name = tensor("op_17476_cast_fp16")]; tensor var_17478_equation_0 = const()[name = tensor("op_17478_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17478_cast_fp16 = einsum(equation = var_17478_equation_0, values = (var_16954_cast_fp16, var_17353_cast_fp16))[name = tensor("op_17478_cast_fp16")]; tensor var_17480_equation_0 = const()[name = tensor("op_17480_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17480_cast_fp16 = einsum(equation = var_17480_equation_0, values = (var_16954_cast_fp16, var_17354_cast_fp16))[name = tensor("op_17480_cast_fp16")]; tensor var_17482_equation_0 = const()[name = tensor("op_17482_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17482_cast_fp16 = einsum(equation = var_17482_equation_0, values = (var_16954_cast_fp16, var_17355_cast_fp16))[name = tensor("op_17482_cast_fp16")]; tensor var_17484_equation_0 = const()[name = tensor("op_17484_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17484_cast_fp16 = einsum(equation = var_17484_equation_0, values = (var_16954_cast_fp16, var_17356_cast_fp16))[name = tensor("op_17484_cast_fp16")]; tensor var_17486_equation_0 = const()[name = tensor("op_17486_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17486_cast_fp16 = einsum(equation = var_17486_equation_0, values = (var_16958_cast_fp16, var_17357_cast_fp16))[name = tensor("op_17486_cast_fp16")]; tensor var_17488_equation_0 = const()[name = tensor("op_17488_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17488_cast_fp16 = einsum(equation = var_17488_equation_0, values = (var_16958_cast_fp16, var_17358_cast_fp16))[name = tensor("op_17488_cast_fp16")]; tensor var_17490_equation_0 = const()[name = tensor("op_17490_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17490_cast_fp16 = einsum(equation = var_17490_equation_0, values = (var_16958_cast_fp16, var_17359_cast_fp16))[name = tensor("op_17490_cast_fp16")]; tensor var_17492_equation_0 = const()[name = tensor("op_17492_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17492_cast_fp16 = einsum(equation = var_17492_equation_0, values = (var_16958_cast_fp16, var_17360_cast_fp16))[name = tensor("op_17492_cast_fp16")]; tensor var_17494_equation_0 = const()[name = tensor("op_17494_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17494_cast_fp16 = einsum(equation = var_17494_equation_0, values = (var_16962_cast_fp16, var_17361_cast_fp16))[name = tensor("op_17494_cast_fp16")]; tensor var_17496_equation_0 = const()[name = tensor("op_17496_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17496_cast_fp16 = einsum(equation = var_17496_equation_0, values = (var_16962_cast_fp16, var_17362_cast_fp16))[name = tensor("op_17496_cast_fp16")]; tensor var_17498_equation_0 = const()[name = tensor("op_17498_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17498_cast_fp16 = einsum(equation = var_17498_equation_0, values = (var_16962_cast_fp16, var_17363_cast_fp16))[name = tensor("op_17498_cast_fp16")]; tensor var_17500_equation_0 = const()[name = tensor("op_17500_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17500_cast_fp16 = einsum(equation = var_17500_equation_0, values = (var_16962_cast_fp16, var_17364_cast_fp16))[name = tensor("op_17500_cast_fp16")]; tensor var_17502_equation_0 = const()[name = tensor("op_17502_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17502_cast_fp16 = einsum(equation = var_17502_equation_0, values = (var_16966_cast_fp16, var_17365_cast_fp16))[name = tensor("op_17502_cast_fp16")]; tensor var_17504_equation_0 = const()[name = tensor("op_17504_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17504_cast_fp16 = einsum(equation = var_17504_equation_0, values = (var_16966_cast_fp16, var_17366_cast_fp16))[name = tensor("op_17504_cast_fp16")]; tensor var_17506_equation_0 = const()[name = tensor("op_17506_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17506_cast_fp16 = einsum(equation = var_17506_equation_0, values = (var_16966_cast_fp16, var_17367_cast_fp16))[name = tensor("op_17506_cast_fp16")]; tensor var_17508_equation_0 = const()[name = tensor("op_17508_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17508_cast_fp16 = einsum(equation = var_17508_equation_0, values = (var_16966_cast_fp16, var_17368_cast_fp16))[name = tensor("op_17508_cast_fp16")]; tensor var_17510_equation_0 = const()[name = tensor("op_17510_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17510_cast_fp16 = einsum(equation = var_17510_equation_0, values = (var_16970_cast_fp16, var_17369_cast_fp16))[name = tensor("op_17510_cast_fp16")]; tensor var_17512_equation_0 = const()[name = tensor("op_17512_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17512_cast_fp16 = einsum(equation = var_17512_equation_0, values = (var_16970_cast_fp16, var_17370_cast_fp16))[name = tensor("op_17512_cast_fp16")]; tensor var_17514_equation_0 = const()[name = tensor("op_17514_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17514_cast_fp16 = einsum(equation = var_17514_equation_0, values = (var_16970_cast_fp16, var_17371_cast_fp16))[name = tensor("op_17514_cast_fp16")]; tensor var_17516_equation_0 = const()[name = tensor("op_17516_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17516_cast_fp16 = einsum(equation = var_17516_equation_0, values = (var_16970_cast_fp16, var_17372_cast_fp16))[name = tensor("op_17516_cast_fp16")]; tensor var_17518_equation_0 = const()[name = tensor("op_17518_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17518_cast_fp16 = einsum(equation = var_17518_equation_0, values = (var_16974_cast_fp16, var_17373_cast_fp16))[name = tensor("op_17518_cast_fp16")]; tensor var_17520_equation_0 = const()[name = tensor("op_17520_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17520_cast_fp16 = einsum(equation = var_17520_equation_0, values = (var_16974_cast_fp16, var_17374_cast_fp16))[name = tensor("op_17520_cast_fp16")]; tensor var_17522_equation_0 = const()[name = tensor("op_17522_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17522_cast_fp16 = einsum(equation = var_17522_equation_0, values = (var_16974_cast_fp16, var_17375_cast_fp16))[name = tensor("op_17522_cast_fp16")]; tensor var_17524_equation_0 = const()[name = tensor("op_17524_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17524_cast_fp16 = einsum(equation = var_17524_equation_0, values = (var_16974_cast_fp16, var_17376_cast_fp16))[name = tensor("op_17524_cast_fp16")]; tensor var_17526_equation_0 = const()[name = tensor("op_17526_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17526_cast_fp16 = einsum(equation = var_17526_equation_0, values = (var_16978_cast_fp16, var_17377_cast_fp16))[name = tensor("op_17526_cast_fp16")]; tensor var_17528_equation_0 = const()[name = tensor("op_17528_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17528_cast_fp16 = einsum(equation = var_17528_equation_0, values = (var_16978_cast_fp16, var_17378_cast_fp16))[name = tensor("op_17528_cast_fp16")]; tensor var_17530_equation_0 = const()[name = tensor("op_17530_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17530_cast_fp16 = einsum(equation = var_17530_equation_0, values = (var_16978_cast_fp16, var_17379_cast_fp16))[name = tensor("op_17530_cast_fp16")]; tensor var_17532_equation_0 = const()[name = tensor("op_17532_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17532_cast_fp16 = einsum(equation = var_17532_equation_0, values = (var_16978_cast_fp16, var_17380_cast_fp16))[name = tensor("op_17532_cast_fp16")]; tensor var_17534_equation_0 = const()[name = tensor("op_17534_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17534_cast_fp16 = einsum(equation = var_17534_equation_0, values = (var_16982_cast_fp16, var_17381_cast_fp16))[name = tensor("op_17534_cast_fp16")]; tensor var_17536_equation_0 = const()[name = tensor("op_17536_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17536_cast_fp16 = einsum(equation = var_17536_equation_0, values = (var_16982_cast_fp16, var_17382_cast_fp16))[name = tensor("op_17536_cast_fp16")]; tensor var_17538_equation_0 = const()[name = tensor("op_17538_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17538_cast_fp16 = einsum(equation = var_17538_equation_0, values = (var_16982_cast_fp16, var_17383_cast_fp16))[name = tensor("op_17538_cast_fp16")]; tensor var_17540_equation_0 = const()[name = tensor("op_17540_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17540_cast_fp16 = einsum(equation = var_17540_equation_0, values = (var_16982_cast_fp16, var_17384_cast_fp16))[name = tensor("op_17540_cast_fp16")]; tensor var_17542_equation_0 = const()[name = tensor("op_17542_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17542_cast_fp16 = einsum(equation = var_17542_equation_0, values = (var_16986_cast_fp16, var_17385_cast_fp16))[name = tensor("op_17542_cast_fp16")]; tensor var_17544_equation_0 = const()[name = tensor("op_17544_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17544_cast_fp16 = einsum(equation = var_17544_equation_0, values = (var_16986_cast_fp16, var_17386_cast_fp16))[name = tensor("op_17544_cast_fp16")]; tensor var_17546_equation_0 = const()[name = tensor("op_17546_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17546_cast_fp16 = einsum(equation = var_17546_equation_0, values = (var_16986_cast_fp16, var_17387_cast_fp16))[name = tensor("op_17546_cast_fp16")]; tensor var_17548_equation_0 = const()[name = tensor("op_17548_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_17548_cast_fp16 = einsum(equation = var_17548_equation_0, values = (var_16986_cast_fp16, var_17388_cast_fp16))[name = tensor("op_17548_cast_fp16")]; tensor var_17550_interleave_0 = const()[name = tensor("op_17550_interleave_0"), val = tensor(false)]; tensor var_17550_cast_fp16 = concat(axis = var_16082, interleave = var_17550_interleave_0, values = (var_17390_cast_fp16, var_17392_cast_fp16, var_17394_cast_fp16, var_17396_cast_fp16))[name = tensor("op_17550_cast_fp16")]; tensor var_17552_interleave_0 = const()[name = tensor("op_17552_interleave_0"), val = tensor(false)]; tensor var_17552_cast_fp16 = concat(axis = var_16082, interleave = var_17552_interleave_0, values = (var_17398_cast_fp16, var_17400_cast_fp16, var_17402_cast_fp16, var_17404_cast_fp16))[name = tensor("op_17552_cast_fp16")]; tensor var_17554_interleave_0 = const()[name = tensor("op_17554_interleave_0"), val = tensor(false)]; tensor var_17554_cast_fp16 = concat(axis = var_16082, interleave = var_17554_interleave_0, values = (var_17406_cast_fp16, var_17408_cast_fp16, var_17410_cast_fp16, var_17412_cast_fp16))[name = tensor("op_17554_cast_fp16")]; tensor var_17556_interleave_0 = const()[name = tensor("op_17556_interleave_0"), val = tensor(false)]; tensor var_17556_cast_fp16 = concat(axis = var_16082, interleave = var_17556_interleave_0, values = (var_17414_cast_fp16, var_17416_cast_fp16, var_17418_cast_fp16, var_17420_cast_fp16))[name = tensor("op_17556_cast_fp16")]; tensor var_17558_interleave_0 = const()[name = tensor("op_17558_interleave_0"), val = tensor(false)]; tensor var_17558_cast_fp16 = concat(axis = var_16082, interleave = var_17558_interleave_0, values = (var_17422_cast_fp16, var_17424_cast_fp16, var_17426_cast_fp16, var_17428_cast_fp16))[name = tensor("op_17558_cast_fp16")]; tensor var_17560_interleave_0 = const()[name = tensor("op_17560_interleave_0"), val = tensor(false)]; tensor var_17560_cast_fp16 = concat(axis = var_16082, interleave = var_17560_interleave_0, values = (var_17430_cast_fp16, var_17432_cast_fp16, var_17434_cast_fp16, var_17436_cast_fp16))[name = tensor("op_17560_cast_fp16")]; tensor var_17562_interleave_0 = const()[name = tensor("op_17562_interleave_0"), val = tensor(false)]; tensor var_17562_cast_fp16 = concat(axis = var_16082, interleave = var_17562_interleave_0, values = (var_17438_cast_fp16, var_17440_cast_fp16, var_17442_cast_fp16, var_17444_cast_fp16))[name = tensor("op_17562_cast_fp16")]; tensor var_17564_interleave_0 = const()[name = tensor("op_17564_interleave_0"), val = tensor(false)]; tensor var_17564_cast_fp16 = concat(axis = var_16082, interleave = var_17564_interleave_0, values = (var_17446_cast_fp16, var_17448_cast_fp16, var_17450_cast_fp16, var_17452_cast_fp16))[name = tensor("op_17564_cast_fp16")]; tensor var_17566_interleave_0 = const()[name = tensor("op_17566_interleave_0"), val = tensor(false)]; tensor var_17566_cast_fp16 = concat(axis = var_16082, interleave = var_17566_interleave_0, values = (var_17454_cast_fp16, var_17456_cast_fp16, var_17458_cast_fp16, var_17460_cast_fp16))[name = tensor("op_17566_cast_fp16")]; tensor var_17568_interleave_0 = const()[name = tensor("op_17568_interleave_0"), val = tensor(false)]; tensor var_17568_cast_fp16 = concat(axis = var_16082, interleave = var_17568_interleave_0, values = (var_17462_cast_fp16, var_17464_cast_fp16, var_17466_cast_fp16, var_17468_cast_fp16))[name = tensor("op_17568_cast_fp16")]; tensor var_17570_interleave_0 = const()[name = tensor("op_17570_interleave_0"), val = tensor(false)]; tensor var_17570_cast_fp16 = concat(axis = var_16082, interleave = var_17570_interleave_0, values = (var_17470_cast_fp16, var_17472_cast_fp16, var_17474_cast_fp16, var_17476_cast_fp16))[name = tensor("op_17570_cast_fp16")]; tensor var_17572_interleave_0 = const()[name = tensor("op_17572_interleave_0"), val = tensor(false)]; tensor var_17572_cast_fp16 = concat(axis = var_16082, interleave = var_17572_interleave_0, values = (var_17478_cast_fp16, var_17480_cast_fp16, var_17482_cast_fp16, var_17484_cast_fp16))[name = tensor("op_17572_cast_fp16")]; tensor var_17574_interleave_0 = const()[name = tensor("op_17574_interleave_0"), val = tensor(false)]; tensor var_17574_cast_fp16 = concat(axis = var_16082, interleave = var_17574_interleave_0, values = (var_17486_cast_fp16, var_17488_cast_fp16, var_17490_cast_fp16, var_17492_cast_fp16))[name = tensor("op_17574_cast_fp16")]; tensor var_17576_interleave_0 = const()[name = tensor("op_17576_interleave_0"), val = tensor(false)]; tensor var_17576_cast_fp16 = concat(axis = var_16082, interleave = var_17576_interleave_0, values = (var_17494_cast_fp16, var_17496_cast_fp16, var_17498_cast_fp16, var_17500_cast_fp16))[name = tensor("op_17576_cast_fp16")]; tensor var_17578_interleave_0 = const()[name = tensor("op_17578_interleave_0"), val = tensor(false)]; tensor var_17578_cast_fp16 = concat(axis = var_16082, interleave = var_17578_interleave_0, values = (var_17502_cast_fp16, var_17504_cast_fp16, var_17506_cast_fp16, var_17508_cast_fp16))[name = tensor("op_17578_cast_fp16")]; tensor var_17580_interleave_0 = const()[name = tensor("op_17580_interleave_0"), val = tensor(false)]; tensor var_17580_cast_fp16 = concat(axis = var_16082, interleave = var_17580_interleave_0, values = (var_17510_cast_fp16, var_17512_cast_fp16, var_17514_cast_fp16, var_17516_cast_fp16))[name = tensor("op_17580_cast_fp16")]; tensor var_17582_interleave_0 = const()[name = tensor("op_17582_interleave_0"), val = tensor(false)]; tensor var_17582_cast_fp16 = concat(axis = var_16082, interleave = var_17582_interleave_0, values = (var_17518_cast_fp16, var_17520_cast_fp16, var_17522_cast_fp16, var_17524_cast_fp16))[name = tensor("op_17582_cast_fp16")]; tensor var_17584_interleave_0 = const()[name = tensor("op_17584_interleave_0"), val = tensor(false)]; tensor var_17584_cast_fp16 = concat(axis = var_16082, interleave = var_17584_interleave_0, values = (var_17526_cast_fp16, var_17528_cast_fp16, var_17530_cast_fp16, var_17532_cast_fp16))[name = tensor("op_17584_cast_fp16")]; tensor var_17586_interleave_0 = const()[name = tensor("op_17586_interleave_0"), val = tensor(false)]; tensor var_17586_cast_fp16 = concat(axis = var_16082, interleave = var_17586_interleave_0, values = (var_17534_cast_fp16, var_17536_cast_fp16, var_17538_cast_fp16, var_17540_cast_fp16))[name = tensor("op_17586_cast_fp16")]; tensor var_17588_interleave_0 = const()[name = tensor("op_17588_interleave_0"), val = tensor(false)]; tensor var_17588_cast_fp16 = concat(axis = var_16082, interleave = var_17588_interleave_0, values = (var_17542_cast_fp16, var_17544_cast_fp16, var_17546_cast_fp16, var_17548_cast_fp16))[name = tensor("op_17588_cast_fp16")]; tensor input_81_interleave_0 = const()[name = tensor("input_81_interleave_0"), val = tensor(false)]; tensor input_81_cast_fp16 = concat(axis = var_16107, interleave = input_81_interleave_0, values = (var_17550_cast_fp16, var_17552_cast_fp16, var_17554_cast_fp16, var_17556_cast_fp16, var_17558_cast_fp16, var_17560_cast_fp16, var_17562_cast_fp16, var_17564_cast_fp16, var_17566_cast_fp16, var_17568_cast_fp16, var_17570_cast_fp16, var_17572_cast_fp16, var_17574_cast_fp16, var_17576_cast_fp16, var_17578_cast_fp16, var_17580_cast_fp16, var_17582_cast_fp16, var_17584_cast_fp16, var_17586_cast_fp16, var_17588_cast_fp16))[name = tensor("input_81_cast_fp16")]; tensor var_17599_pad_type_0 = const()[name = tensor("op_17599_pad_type_0"), val = tensor("valid")]; tensor var_17599_strides_0 = const()[name = tensor("op_17599_strides_0"), val = tensor([1, 1])]; tensor var_17599_pad_0 = const()[name = tensor("op_17599_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_17599_dilations_0 = const()[name = tensor("op_17599_dilations_0"), val = tensor([1, 1])]; tensor var_17599_groups_0 = const()[name = tensor("op_17599_groups_0"), val = tensor(1)]; tensor layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(148439552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149258816))), name = tensor("layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_10_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149258944)))]; tensor var_17599_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_17599_dilations_0, groups = var_17599_groups_0, pad = var_17599_pad_0, pad_type = var_17599_pad_type_0, strides = var_17599_strides_0, weight = layers_10_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_81_cast_fp16)[name = tensor("op_17599_cast_fp16")]; tensor var_17605_pad_type_0 = const()[name = tensor("op_17605_pad_type_0"), val = tensor("valid")]; tensor var_17605_strides_0 = const()[name = tensor("op_17605_strides_0"), val = tensor([1, 1])]; tensor var_17605_pad_0 = const()[name = tensor("op_17605_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_17605_dilations_0 = const()[name = tensor("op_17605_dilations_0"), val = tensor([1, 1])]; tensor var_17605_groups_0 = const()[name = tensor("op_17605_groups_0"), val = tensor(1)]; tensor layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149280384))), name = tensor("layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149261568))), shape = tensor([1280, 1280, 1, 1])]; tensor var_17605_cast_fp16 = conv(dilations = var_17605_dilations_0, groups = var_17605_groups_0, pad = var_17605_pad_0, pad_type = var_17605_pad_type_0, strides = var_17605_strides_0, weight = layers_10_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_81_cast_fp16)[name = tensor("op_17605_cast_fp16")]; tensor obj_43_cast_fp16 = add(x = var_17599_cast_fp16, y = var_17605_cast_fp16)[name = tensor("obj_43_cast_fp16")]; tensor inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = obj_43_cast_fp16)[name = tensor("inputs_43_cast_fp16")]; tensor out_43_axes_0 = const()[name = tensor("out_43_axes_0"), val = tensor([1])]; tensor var_17616_to_fp16 = const()[name = tensor("op_17616_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_43_cast_fp16 = layer_norm(axes = out_43_axes_0, epsilon = var_17616_to_fp16, x = inputs_43_cast_fp16)[name = tensor("out_43_cast_fp16")]; tensor input_83_gamma_0_to_fp16 = const()[name = tensor("input_83_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149485248)))]; tensor input_83_beta_0_to_fp16 = const()[name = tensor("input_83_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149487872)))]; tensor input_83_epsilon_0_to_fp16 = const()[name = tensor("input_83_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_83_cast_fp16 = batch_norm(beta = input_83_beta_0_to_fp16, epsilon = input_83_epsilon_0_to_fp16, gamma = input_83_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = tensor("input_83_cast_fp16")]; tensor var_17634_pad_type_0 = const()[name = tensor("op_17634_pad_type_0"), val = tensor("valid")]; tensor var_17634_strides_0 = const()[name = tensor("op_17634_strides_0"), val = tensor([1, 1])]; tensor var_17634_pad_0 = const()[name = tensor("op_17634_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_17634_dilations_0 = const()[name = tensor("op_17634_dilations_0"), val = tensor([1, 1])]; tensor var_17634_groups_0 = const()[name = tensor("op_17634_groups_0"), val = tensor(1)]; tensor layers_10_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149490496))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152767360))), name = tensor("layers_10_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_10_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152767488)))]; tensor var_17634_cast_fp16 = conv(bias = layers_10_fc1_inlier_module_bias_to_fp16, dilations = var_17634_dilations_0, groups = var_17634_groups_0, pad = var_17634_pad_0, pad_type = var_17634_pad_type_0, strides = var_17634_strides_0, weight = layers_10_fc1_inlier_module_weight_to_fp16_palettized, x = input_83_cast_fp16)[name = tensor("op_17634_cast_fp16")]; tensor var_17640_pad_type_0 = const()[name = tensor("op_17640_pad_type_0"), val = tensor("valid")]; tensor var_17640_strides_0 = const()[name = tensor("op_17640_strides_0"), val = tensor([1, 1])]; tensor var_17640_pad_0 = const()[name = tensor("op_17640_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_17640_dilations_0 = const()[name = tensor("op_17640_dilations_0"), val = tensor([1, 1])]; tensor var_17640_groups_0 = const()[name = tensor("op_17640_groups_0"), val = tensor(1)]; tensor layers_10_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152808704))), name = tensor("layers_10_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152777792))), shape = tensor([5120, 1280, 1, 1])]; tensor var_17640_cast_fp16 = conv(dilations = var_17640_dilations_0, groups = var_17640_groups_0, pad = var_17640_pad_0, pad_type = var_17640_pad_type_0, strides = var_17640_strides_0, weight = layers_10_fc1_outlier_module_weight_to_fp16_sparsified, x = input_83_cast_fp16)[name = tensor("op_17640_cast_fp16")]; tensor input_85_cast_fp16 = add(x = var_17634_cast_fp16, y = var_17640_cast_fp16)[name = tensor("input_85_cast_fp16")]; tensor input_87_mode_0 = const()[name = tensor("input_87_mode_0"), val = tensor("EXACT")]; tensor input_87_cast_fp16 = gelu(mode = input_87_mode_0, x = input_85_cast_fp16)[name = tensor("input_87_cast_fp16")]; tensor var_17651_pad_type_0 = const()[name = tensor("op_17651_pad_type_0"), val = tensor("valid")]; tensor var_17651_strides_0 = const()[name = tensor("op_17651_strides_0"), val = tensor([1, 1])]; tensor var_17651_pad_0 = const()[name = tensor("op_17651_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_17651_dilations_0 = const()[name = tensor("op_17651_dilations_0"), val = tensor([1, 1])]; tensor var_17651_groups_0 = const()[name = tensor("op_17651_groups_0"), val = tensor(1)]; tensor layers_10_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(153627968))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(156904832))), name = tensor("layers_10_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_10_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_10_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(156904960)))]; tensor var_17651_cast_fp16 = conv(bias = layers_10_fc2_inlier_module_bias_to_fp16, dilations = var_17651_dilations_0, groups = var_17651_groups_0, pad = var_17651_pad_0, pad_type = var_17651_pad_type_0, strides = var_17651_strides_0, weight = layers_10_fc2_inlier_module_weight_to_fp16_palettized, x = input_87_cast_fp16)[name = tensor("op_17651_cast_fp16")]; tensor var_17657_pad_type_0 = const()[name = tensor("op_17657_pad_type_0"), val = tensor("valid")]; tensor var_17657_strides_0 = const()[name = tensor("op_17657_strides_0"), val = tensor([1, 1])]; tensor var_17657_pad_0 = const()[name = tensor("op_17657_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_17657_dilations_0 = const()[name = tensor("op_17657_dilations_0"), val = tensor([1, 1])]; tensor var_17657_groups_0 = const()[name = tensor("op_17657_groups_0"), val = tensor(1)]; tensor layers_10_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157157056))), name = tensor("layers_10_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(156907584))), shape = tensor([1280, 5120, 1, 1])]; tensor var_17657_cast_fp16 = conv(dilations = var_17657_dilations_0, groups = var_17657_groups_0, pad = var_17657_pad_0, pad_type = var_17657_pad_type_0, strides = var_17657_strides_0, weight = layers_10_fc2_outlier_module_weight_to_fp16_sparsified, x = input_87_cast_fp16)[name = tensor("op_17657_cast_fp16")]; tensor hidden_states_25_cast_fp16 = add(x = var_17651_cast_fp16, y = var_17657_cast_fp16)[name = tensor("hidden_states_25_cast_fp16")]; tensor inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = hidden_states_25_cast_fp16)[name = tensor("inputs_45_cast_fp16")]; tensor var_17663 = const()[name = tensor("op_17663"), val = tensor(3)]; tensor var_17688 = const()[name = tensor("op_17688"), val = tensor(1)]; tensor out_45_axes_0 = const()[name = tensor("out_45_axes_0"), val = tensor([1])]; tensor var_17705_to_fp16 = const()[name = tensor("op_17705_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_45_cast_fp16 = layer_norm(axes = out_45_axes_0, epsilon = var_17705_to_fp16, x = inputs_45_cast_fp16)[name = tensor("out_45_cast_fp16")]; tensor obj_45_gamma_0_to_fp16 = const()[name = tensor("obj_45_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157976320)))]; tensor obj_45_beta_0_to_fp16 = const()[name = tensor("obj_45_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157978944)))]; tensor obj_45_epsilon_0_to_fp16 = const()[name = tensor("obj_45_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_45_cast_fp16 = batch_norm(beta = obj_45_beta_0_to_fp16, epsilon = obj_45_epsilon_0_to_fp16, gamma = obj_45_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = tensor("obj_45_cast_fp16")]; tensor var_17727_pad_type_0 = const()[name = tensor("op_17727_pad_type_0"), val = tensor("valid")]; tensor var_17727_strides_0 = const()[name = tensor("op_17727_strides_0"), val = tensor([1, 1])]; tensor var_17727_pad_0 = const()[name = tensor("op_17727_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_17727_dilations_0 = const()[name = tensor("op_17727_dilations_0"), val = tensor([1, 1])]; tensor var_17727_groups_0 = const()[name = tensor("op_17727_groups_0"), val = tensor(1)]; tensor layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157981568))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158800832))), name = tensor("layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_11_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158800960)))]; tensor var_17727_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_17727_dilations_0, groups = var_17727_groups_0, pad = var_17727_pad_0, pad_type = var_17727_pad_type_0, strides = var_17727_strides_0, weight = layers_11_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = tensor("op_17727_cast_fp16")]; tensor var_17733_pad_type_0 = const()[name = tensor("op_17733_pad_type_0"), val = tensor("valid")]; tensor var_17733_strides_0 = const()[name = tensor("op_17733_strides_0"), val = tensor([1, 1])]; tensor var_17733_pad_0 = const()[name = tensor("op_17733_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_17733_dilations_0 = const()[name = tensor("op_17733_dilations_0"), val = tensor([1, 1])]; tensor var_17733_groups_0 = const()[name = tensor("op_17733_groups_0"), val = tensor(1)]; tensor layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158852864))), name = tensor("layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158803584))), shape = tensor([1280, 1280, 1, 1])]; tensor var_17733_cast_fp16 = conv(dilations = var_17733_dilations_0, groups = var_17733_groups_0, pad = var_17733_pad_0, pad_type = var_17733_pad_type_0, strides = var_17733_strides_0, weight = layers_11_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = tensor("op_17733_cast_fp16")]; tensor query_23_cast_fp16 = add(x = var_17727_cast_fp16, y = var_17733_cast_fp16)[name = tensor("query_23_cast_fp16")]; tensor var_17742_pad_type_0 = const()[name = tensor("op_17742_pad_type_0"), val = tensor("valid")]; tensor var_17742_strides_0 = const()[name = tensor("op_17742_strides_0"), val = tensor([1, 1])]; tensor var_17742_pad_0 = const()[name = tensor("op_17742_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_17742_dilations_0 = const()[name = tensor("op_17742_dilations_0"), val = tensor([1, 1])]; tensor var_17742_groups_0 = const()[name = tensor("op_17742_groups_0"), val = tensor(1)]; tensor layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159057728))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159876992))), name = tensor("layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_17742_cast_fp16 = conv(dilations = var_17742_dilations_0, groups = var_17742_groups_0, pad = var_17742_pad_0, pad_type = var_17742_pad_type_0, strides = var_17742_strides_0, weight = layers_11_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = tensor("op_17742_cast_fp16")]; tensor var_17748_pad_type_0 = const()[name = tensor("op_17748_pad_type_0"), val = tensor("valid")]; tensor var_17748_strides_0 = const()[name = tensor("op_17748_strides_0"), val = tensor([1, 1])]; tensor var_17748_pad_0 = const()[name = tensor("op_17748_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_17748_dilations_0 = const()[name = tensor("op_17748_dilations_0"), val = tensor([1, 1])]; tensor var_17748_groups_0 = const()[name = tensor("op_17748_groups_0"), val = tensor(1)]; tensor layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159906048))), name = tensor("layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(159877120))), shape = tensor([1280, 1280, 1, 1])]; tensor var_17748_cast_fp16 = conv(dilations = var_17748_dilations_0, groups = var_17748_groups_0, pad = var_17748_pad_0, pad_type = var_17748_pad_type_0, strides = var_17748_strides_0, weight = layers_11_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = tensor("op_17748_cast_fp16")]; tensor key_23_cast_fp16 = add(x = var_17742_cast_fp16, y = var_17748_cast_fp16)[name = tensor("key_23_cast_fp16")]; tensor var_17758_pad_type_0 = const()[name = tensor("op_17758_pad_type_0"), val = tensor("valid")]; tensor var_17758_strides_0 = const()[name = tensor("op_17758_strides_0"), val = tensor([1, 1])]; tensor var_17758_pad_0 = const()[name = tensor("op_17758_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_17758_dilations_0 = const()[name = tensor("op_17758_dilations_0"), val = tensor([1, 1])]; tensor var_17758_groups_0 = const()[name = tensor("op_17758_groups_0"), val = tensor(1)]; tensor layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160110912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160930176))), name = tensor("layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_11_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160930304)))]; tensor var_17758_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_17758_dilations_0, groups = var_17758_groups_0, pad = var_17758_pad_0, pad_type = var_17758_pad_type_0, strides = var_17758_strides_0, weight = layers_11_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_45_cast_fp16)[name = tensor("op_17758_cast_fp16")]; tensor var_17764_pad_type_0 = const()[name = tensor("op_17764_pad_type_0"), val = tensor("valid")]; tensor var_17764_strides_0 = const()[name = tensor("op_17764_strides_0"), val = tensor([1, 1])]; tensor var_17764_pad_0 = const()[name = tensor("op_17764_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_17764_dilations_0 = const()[name = tensor("op_17764_dilations_0"), val = tensor([1, 1])]; tensor var_17764_groups_0 = const()[name = tensor("op_17764_groups_0"), val = tensor(1)]; tensor layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160953216))), name = tensor("layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(160932928))), shape = tensor([1280, 1280, 1, 1])]; tensor var_17764_cast_fp16 = conv(dilations = var_17764_dilations_0, groups = var_17764_groups_0, pad = var_17764_pad_0, pad_type = var_17764_pad_type_0, strides = var_17764_strides_0, weight = layers_11_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_45_cast_fp16)[name = tensor("op_17764_cast_fp16")]; tensor value_23_cast_fp16 = add(x = var_17758_cast_fp16, y = var_17764_cast_fp16)[name = tensor("value_23_cast_fp16")]; tensor var_17770_begin_0 = const()[name = tensor("op_17770_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_17770_end_0 = const()[name = tensor("op_17770_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_17770_end_mask_0 = const()[name = tensor("op_17770_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17770_cast_fp16 = slice_by_index(begin = var_17770_begin_0, end = var_17770_end_0, end_mask = var_17770_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17770_cast_fp16")]; tensor var_17774_begin_0 = const()[name = tensor("op_17774_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_17774_end_0 = const()[name = tensor("op_17774_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_17774_end_mask_0 = const()[name = tensor("op_17774_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17774_cast_fp16 = slice_by_index(begin = var_17774_begin_0, end = var_17774_end_0, end_mask = var_17774_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17774_cast_fp16")]; tensor var_17778_begin_0 = const()[name = tensor("op_17778_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_17778_end_0 = const()[name = tensor("op_17778_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_17778_end_mask_0 = const()[name = tensor("op_17778_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17778_cast_fp16 = slice_by_index(begin = var_17778_begin_0, end = var_17778_end_0, end_mask = var_17778_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17778_cast_fp16")]; tensor var_17782_begin_0 = const()[name = tensor("op_17782_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_17782_end_0 = const()[name = tensor("op_17782_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_17782_end_mask_0 = const()[name = tensor("op_17782_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17782_cast_fp16 = slice_by_index(begin = var_17782_begin_0, end = var_17782_end_0, end_mask = var_17782_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17782_cast_fp16")]; tensor var_17786_begin_0 = const()[name = tensor("op_17786_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_17786_end_0 = const()[name = tensor("op_17786_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_17786_end_mask_0 = const()[name = tensor("op_17786_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17786_cast_fp16 = slice_by_index(begin = var_17786_begin_0, end = var_17786_end_0, end_mask = var_17786_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17786_cast_fp16")]; tensor var_17790_begin_0 = const()[name = tensor("op_17790_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_17790_end_0 = const()[name = tensor("op_17790_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_17790_end_mask_0 = const()[name = tensor("op_17790_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17790_cast_fp16 = slice_by_index(begin = var_17790_begin_0, end = var_17790_end_0, end_mask = var_17790_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17790_cast_fp16")]; tensor var_17794_begin_0 = const()[name = tensor("op_17794_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_17794_end_0 = const()[name = tensor("op_17794_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_17794_end_mask_0 = const()[name = tensor("op_17794_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17794_cast_fp16 = slice_by_index(begin = var_17794_begin_0, end = var_17794_end_0, end_mask = var_17794_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17794_cast_fp16")]; tensor var_17798_begin_0 = const()[name = tensor("op_17798_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_17798_end_0 = const()[name = tensor("op_17798_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_17798_end_mask_0 = const()[name = tensor("op_17798_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17798_cast_fp16 = slice_by_index(begin = var_17798_begin_0, end = var_17798_end_0, end_mask = var_17798_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17798_cast_fp16")]; tensor var_17802_begin_0 = const()[name = tensor("op_17802_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_17802_end_0 = const()[name = tensor("op_17802_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_17802_end_mask_0 = const()[name = tensor("op_17802_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17802_cast_fp16 = slice_by_index(begin = var_17802_begin_0, end = var_17802_end_0, end_mask = var_17802_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17802_cast_fp16")]; tensor var_17806_begin_0 = const()[name = tensor("op_17806_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_17806_end_0 = const()[name = tensor("op_17806_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_17806_end_mask_0 = const()[name = tensor("op_17806_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17806_cast_fp16 = slice_by_index(begin = var_17806_begin_0, end = var_17806_end_0, end_mask = var_17806_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17806_cast_fp16")]; tensor var_17810_begin_0 = const()[name = tensor("op_17810_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_17810_end_0 = const()[name = tensor("op_17810_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_17810_end_mask_0 = const()[name = tensor("op_17810_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17810_cast_fp16 = slice_by_index(begin = var_17810_begin_0, end = var_17810_end_0, end_mask = var_17810_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17810_cast_fp16")]; tensor var_17814_begin_0 = const()[name = tensor("op_17814_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_17814_end_0 = const()[name = tensor("op_17814_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_17814_end_mask_0 = const()[name = tensor("op_17814_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17814_cast_fp16 = slice_by_index(begin = var_17814_begin_0, end = var_17814_end_0, end_mask = var_17814_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17814_cast_fp16")]; tensor var_17818_begin_0 = const()[name = tensor("op_17818_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_17818_end_0 = const()[name = tensor("op_17818_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_17818_end_mask_0 = const()[name = tensor("op_17818_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17818_cast_fp16 = slice_by_index(begin = var_17818_begin_0, end = var_17818_end_0, end_mask = var_17818_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17818_cast_fp16")]; tensor var_17822_begin_0 = const()[name = tensor("op_17822_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_17822_end_0 = const()[name = tensor("op_17822_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_17822_end_mask_0 = const()[name = tensor("op_17822_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17822_cast_fp16 = slice_by_index(begin = var_17822_begin_0, end = var_17822_end_0, end_mask = var_17822_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17822_cast_fp16")]; tensor var_17826_begin_0 = const()[name = tensor("op_17826_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_17826_end_0 = const()[name = tensor("op_17826_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_17826_end_mask_0 = const()[name = tensor("op_17826_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17826_cast_fp16 = slice_by_index(begin = var_17826_begin_0, end = var_17826_end_0, end_mask = var_17826_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17826_cast_fp16")]; tensor var_17830_begin_0 = const()[name = tensor("op_17830_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_17830_end_0 = const()[name = tensor("op_17830_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_17830_end_mask_0 = const()[name = tensor("op_17830_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17830_cast_fp16 = slice_by_index(begin = var_17830_begin_0, end = var_17830_end_0, end_mask = var_17830_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17830_cast_fp16")]; tensor var_17834_begin_0 = const()[name = tensor("op_17834_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_17834_end_0 = const()[name = tensor("op_17834_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_17834_end_mask_0 = const()[name = tensor("op_17834_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17834_cast_fp16 = slice_by_index(begin = var_17834_begin_0, end = var_17834_end_0, end_mask = var_17834_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17834_cast_fp16")]; tensor var_17838_begin_0 = const()[name = tensor("op_17838_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_17838_end_0 = const()[name = tensor("op_17838_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_17838_end_mask_0 = const()[name = tensor("op_17838_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17838_cast_fp16 = slice_by_index(begin = var_17838_begin_0, end = var_17838_end_0, end_mask = var_17838_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17838_cast_fp16")]; tensor var_17842_begin_0 = const()[name = tensor("op_17842_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_17842_end_0 = const()[name = tensor("op_17842_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_17842_end_mask_0 = const()[name = tensor("op_17842_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17842_cast_fp16 = slice_by_index(begin = var_17842_begin_0, end = var_17842_end_0, end_mask = var_17842_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17842_cast_fp16")]; tensor var_17846_begin_0 = const()[name = tensor("op_17846_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_17846_end_0 = const()[name = tensor("op_17846_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_17846_end_mask_0 = const()[name = tensor("op_17846_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_17846_cast_fp16 = slice_by_index(begin = var_17846_begin_0, end = var_17846_end_0, end_mask = var_17846_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17846_cast_fp16")]; tensor var_17855_begin_0 = const()[name = tensor("op_17855_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_17855_end_0 = const()[name = tensor("op_17855_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_17855_end_mask_0 = const()[name = tensor("op_17855_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17855_cast_fp16 = slice_by_index(begin = var_17855_begin_0, end = var_17855_end_0, end_mask = var_17855_end_mask_0, x = var_17770_cast_fp16)[name = tensor("op_17855_cast_fp16")]; tensor var_17862_begin_0 = const()[name = tensor("op_17862_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_17862_end_0 = const()[name = tensor("op_17862_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_17862_end_mask_0 = const()[name = tensor("op_17862_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17862_cast_fp16 = slice_by_index(begin = var_17862_begin_0, end = var_17862_end_0, end_mask = var_17862_end_mask_0, x = var_17770_cast_fp16)[name = tensor("op_17862_cast_fp16")]; tensor var_17869_begin_0 = const()[name = tensor("op_17869_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_17869_end_0 = const()[name = tensor("op_17869_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_17869_end_mask_0 = const()[name = tensor("op_17869_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17869_cast_fp16 = slice_by_index(begin = var_17869_begin_0, end = var_17869_end_0, end_mask = var_17869_end_mask_0, x = var_17770_cast_fp16)[name = tensor("op_17869_cast_fp16")]; tensor var_17876_begin_0 = const()[name = tensor("op_17876_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_17876_end_0 = const()[name = tensor("op_17876_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_17876_end_mask_0 = const()[name = tensor("op_17876_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17876_cast_fp16 = slice_by_index(begin = var_17876_begin_0, end = var_17876_end_0, end_mask = var_17876_end_mask_0, x = var_17770_cast_fp16)[name = tensor("op_17876_cast_fp16")]; tensor var_17883_begin_0 = const()[name = tensor("op_17883_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_17883_end_0 = const()[name = tensor("op_17883_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_17883_end_mask_0 = const()[name = tensor("op_17883_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17883_cast_fp16 = slice_by_index(begin = var_17883_begin_0, end = var_17883_end_0, end_mask = var_17883_end_mask_0, x = var_17774_cast_fp16)[name = tensor("op_17883_cast_fp16")]; tensor var_17890_begin_0 = const()[name = tensor("op_17890_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_17890_end_0 = const()[name = tensor("op_17890_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_17890_end_mask_0 = const()[name = tensor("op_17890_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17890_cast_fp16 = slice_by_index(begin = var_17890_begin_0, end = var_17890_end_0, end_mask = var_17890_end_mask_0, x = var_17774_cast_fp16)[name = tensor("op_17890_cast_fp16")]; tensor var_17897_begin_0 = const()[name = tensor("op_17897_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_17897_end_0 = const()[name = tensor("op_17897_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_17897_end_mask_0 = const()[name = tensor("op_17897_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17897_cast_fp16 = slice_by_index(begin = var_17897_begin_0, end = var_17897_end_0, end_mask = var_17897_end_mask_0, x = var_17774_cast_fp16)[name = tensor("op_17897_cast_fp16")]; tensor var_17904_begin_0 = const()[name = tensor("op_17904_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_17904_end_0 = const()[name = tensor("op_17904_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_17904_end_mask_0 = const()[name = tensor("op_17904_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17904_cast_fp16 = slice_by_index(begin = var_17904_begin_0, end = var_17904_end_0, end_mask = var_17904_end_mask_0, x = var_17774_cast_fp16)[name = tensor("op_17904_cast_fp16")]; tensor var_17911_begin_0 = const()[name = tensor("op_17911_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_17911_end_0 = const()[name = tensor("op_17911_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_17911_end_mask_0 = const()[name = tensor("op_17911_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17911_cast_fp16 = slice_by_index(begin = var_17911_begin_0, end = var_17911_end_0, end_mask = var_17911_end_mask_0, x = var_17778_cast_fp16)[name = tensor("op_17911_cast_fp16")]; tensor var_17918_begin_0 = const()[name = tensor("op_17918_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_17918_end_0 = const()[name = tensor("op_17918_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_17918_end_mask_0 = const()[name = tensor("op_17918_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17918_cast_fp16 = slice_by_index(begin = var_17918_begin_0, end = var_17918_end_0, end_mask = var_17918_end_mask_0, x = var_17778_cast_fp16)[name = tensor("op_17918_cast_fp16")]; tensor var_17925_begin_0 = const()[name = tensor("op_17925_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_17925_end_0 = const()[name = tensor("op_17925_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_17925_end_mask_0 = const()[name = tensor("op_17925_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17925_cast_fp16 = slice_by_index(begin = var_17925_begin_0, end = var_17925_end_0, end_mask = var_17925_end_mask_0, x = var_17778_cast_fp16)[name = tensor("op_17925_cast_fp16")]; tensor var_17932_begin_0 = const()[name = tensor("op_17932_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_17932_end_0 = const()[name = tensor("op_17932_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_17932_end_mask_0 = const()[name = tensor("op_17932_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17932_cast_fp16 = slice_by_index(begin = var_17932_begin_0, end = var_17932_end_0, end_mask = var_17932_end_mask_0, x = var_17778_cast_fp16)[name = tensor("op_17932_cast_fp16")]; tensor var_17939_begin_0 = const()[name = tensor("op_17939_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_17939_end_0 = const()[name = tensor("op_17939_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_17939_end_mask_0 = const()[name = tensor("op_17939_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17939_cast_fp16 = slice_by_index(begin = var_17939_begin_0, end = var_17939_end_0, end_mask = var_17939_end_mask_0, x = var_17782_cast_fp16)[name = tensor("op_17939_cast_fp16")]; tensor var_17946_begin_0 = const()[name = tensor("op_17946_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_17946_end_0 = const()[name = tensor("op_17946_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_17946_end_mask_0 = const()[name = tensor("op_17946_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17946_cast_fp16 = slice_by_index(begin = var_17946_begin_0, end = var_17946_end_0, end_mask = var_17946_end_mask_0, x = var_17782_cast_fp16)[name = tensor("op_17946_cast_fp16")]; tensor var_17953_begin_0 = const()[name = tensor("op_17953_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_17953_end_0 = const()[name = tensor("op_17953_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_17953_end_mask_0 = const()[name = tensor("op_17953_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17953_cast_fp16 = slice_by_index(begin = var_17953_begin_0, end = var_17953_end_0, end_mask = var_17953_end_mask_0, x = var_17782_cast_fp16)[name = tensor("op_17953_cast_fp16")]; tensor var_17960_begin_0 = const()[name = tensor("op_17960_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_17960_end_0 = const()[name = tensor("op_17960_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_17960_end_mask_0 = const()[name = tensor("op_17960_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17960_cast_fp16 = slice_by_index(begin = var_17960_begin_0, end = var_17960_end_0, end_mask = var_17960_end_mask_0, x = var_17782_cast_fp16)[name = tensor("op_17960_cast_fp16")]; tensor var_17967_begin_0 = const()[name = tensor("op_17967_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_17967_end_0 = const()[name = tensor("op_17967_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_17967_end_mask_0 = const()[name = tensor("op_17967_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17967_cast_fp16 = slice_by_index(begin = var_17967_begin_0, end = var_17967_end_0, end_mask = var_17967_end_mask_0, x = var_17786_cast_fp16)[name = tensor("op_17967_cast_fp16")]; tensor var_17974_begin_0 = const()[name = tensor("op_17974_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_17974_end_0 = const()[name = tensor("op_17974_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_17974_end_mask_0 = const()[name = tensor("op_17974_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17974_cast_fp16 = slice_by_index(begin = var_17974_begin_0, end = var_17974_end_0, end_mask = var_17974_end_mask_0, x = var_17786_cast_fp16)[name = tensor("op_17974_cast_fp16")]; tensor var_17981_begin_0 = const()[name = tensor("op_17981_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_17981_end_0 = const()[name = tensor("op_17981_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_17981_end_mask_0 = const()[name = tensor("op_17981_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17981_cast_fp16 = slice_by_index(begin = var_17981_begin_0, end = var_17981_end_0, end_mask = var_17981_end_mask_0, x = var_17786_cast_fp16)[name = tensor("op_17981_cast_fp16")]; tensor var_17988_begin_0 = const()[name = tensor("op_17988_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_17988_end_0 = const()[name = tensor("op_17988_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_17988_end_mask_0 = const()[name = tensor("op_17988_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17988_cast_fp16 = slice_by_index(begin = var_17988_begin_0, end = var_17988_end_0, end_mask = var_17988_end_mask_0, x = var_17786_cast_fp16)[name = tensor("op_17988_cast_fp16")]; tensor var_17995_begin_0 = const()[name = tensor("op_17995_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_17995_end_0 = const()[name = tensor("op_17995_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_17995_end_mask_0 = const()[name = tensor("op_17995_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_17995_cast_fp16 = slice_by_index(begin = var_17995_begin_0, end = var_17995_end_0, end_mask = var_17995_end_mask_0, x = var_17790_cast_fp16)[name = tensor("op_17995_cast_fp16")]; tensor var_18002_begin_0 = const()[name = tensor("op_18002_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_18002_end_0 = const()[name = tensor("op_18002_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_18002_end_mask_0 = const()[name = tensor("op_18002_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18002_cast_fp16 = slice_by_index(begin = var_18002_begin_0, end = var_18002_end_0, end_mask = var_18002_end_mask_0, x = var_17790_cast_fp16)[name = tensor("op_18002_cast_fp16")]; tensor var_18009_begin_0 = const()[name = tensor("op_18009_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_18009_end_0 = const()[name = tensor("op_18009_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_18009_end_mask_0 = const()[name = tensor("op_18009_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18009_cast_fp16 = slice_by_index(begin = var_18009_begin_0, end = var_18009_end_0, end_mask = var_18009_end_mask_0, x = var_17790_cast_fp16)[name = tensor("op_18009_cast_fp16")]; tensor var_18016_begin_0 = const()[name = tensor("op_18016_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_18016_end_0 = const()[name = tensor("op_18016_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_18016_end_mask_0 = const()[name = tensor("op_18016_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18016_cast_fp16 = slice_by_index(begin = var_18016_begin_0, end = var_18016_end_0, end_mask = var_18016_end_mask_0, x = var_17790_cast_fp16)[name = tensor("op_18016_cast_fp16")]; tensor var_18023_begin_0 = const()[name = tensor("op_18023_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_18023_end_0 = const()[name = tensor("op_18023_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_18023_end_mask_0 = const()[name = tensor("op_18023_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18023_cast_fp16 = slice_by_index(begin = var_18023_begin_0, end = var_18023_end_0, end_mask = var_18023_end_mask_0, x = var_17794_cast_fp16)[name = tensor("op_18023_cast_fp16")]; tensor var_18030_begin_0 = const()[name = tensor("op_18030_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_18030_end_0 = const()[name = tensor("op_18030_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_18030_end_mask_0 = const()[name = tensor("op_18030_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18030_cast_fp16 = slice_by_index(begin = var_18030_begin_0, end = var_18030_end_0, end_mask = var_18030_end_mask_0, x = var_17794_cast_fp16)[name = tensor("op_18030_cast_fp16")]; tensor var_18037_begin_0 = const()[name = tensor("op_18037_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_18037_end_0 = const()[name = tensor("op_18037_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_18037_end_mask_0 = const()[name = tensor("op_18037_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18037_cast_fp16 = slice_by_index(begin = var_18037_begin_0, end = var_18037_end_0, end_mask = var_18037_end_mask_0, x = var_17794_cast_fp16)[name = tensor("op_18037_cast_fp16")]; tensor var_18044_begin_0 = const()[name = tensor("op_18044_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_18044_end_0 = const()[name = tensor("op_18044_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_18044_end_mask_0 = const()[name = tensor("op_18044_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18044_cast_fp16 = slice_by_index(begin = var_18044_begin_0, end = var_18044_end_0, end_mask = var_18044_end_mask_0, x = var_17794_cast_fp16)[name = tensor("op_18044_cast_fp16")]; tensor var_18051_begin_0 = const()[name = tensor("op_18051_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_18051_end_0 = const()[name = tensor("op_18051_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_18051_end_mask_0 = const()[name = tensor("op_18051_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18051_cast_fp16 = slice_by_index(begin = var_18051_begin_0, end = var_18051_end_0, end_mask = var_18051_end_mask_0, x = var_17798_cast_fp16)[name = tensor("op_18051_cast_fp16")]; tensor var_18058_begin_0 = const()[name = tensor("op_18058_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_18058_end_0 = const()[name = tensor("op_18058_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_18058_end_mask_0 = const()[name = tensor("op_18058_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18058_cast_fp16 = slice_by_index(begin = var_18058_begin_0, end = var_18058_end_0, end_mask = var_18058_end_mask_0, x = var_17798_cast_fp16)[name = tensor("op_18058_cast_fp16")]; tensor var_18065_begin_0 = const()[name = tensor("op_18065_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_18065_end_0 = const()[name = tensor("op_18065_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_18065_end_mask_0 = const()[name = tensor("op_18065_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18065_cast_fp16 = slice_by_index(begin = var_18065_begin_0, end = var_18065_end_0, end_mask = var_18065_end_mask_0, x = var_17798_cast_fp16)[name = tensor("op_18065_cast_fp16")]; tensor var_18072_begin_0 = const()[name = tensor("op_18072_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_18072_end_0 = const()[name = tensor("op_18072_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_18072_end_mask_0 = const()[name = tensor("op_18072_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18072_cast_fp16 = slice_by_index(begin = var_18072_begin_0, end = var_18072_end_0, end_mask = var_18072_end_mask_0, x = var_17798_cast_fp16)[name = tensor("op_18072_cast_fp16")]; tensor var_18079_begin_0 = const()[name = tensor("op_18079_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_18079_end_0 = const()[name = tensor("op_18079_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_18079_end_mask_0 = const()[name = tensor("op_18079_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18079_cast_fp16 = slice_by_index(begin = var_18079_begin_0, end = var_18079_end_0, end_mask = var_18079_end_mask_0, x = var_17802_cast_fp16)[name = tensor("op_18079_cast_fp16")]; tensor var_18086_begin_0 = const()[name = tensor("op_18086_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_18086_end_0 = const()[name = tensor("op_18086_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_18086_end_mask_0 = const()[name = tensor("op_18086_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18086_cast_fp16 = slice_by_index(begin = var_18086_begin_0, end = var_18086_end_0, end_mask = var_18086_end_mask_0, x = var_17802_cast_fp16)[name = tensor("op_18086_cast_fp16")]; tensor var_18093_begin_0 = const()[name = tensor("op_18093_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_18093_end_0 = const()[name = tensor("op_18093_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_18093_end_mask_0 = const()[name = tensor("op_18093_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18093_cast_fp16 = slice_by_index(begin = var_18093_begin_0, end = var_18093_end_0, end_mask = var_18093_end_mask_0, x = var_17802_cast_fp16)[name = tensor("op_18093_cast_fp16")]; tensor var_18100_begin_0 = const()[name = tensor("op_18100_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_18100_end_0 = const()[name = tensor("op_18100_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_18100_end_mask_0 = const()[name = tensor("op_18100_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18100_cast_fp16 = slice_by_index(begin = var_18100_begin_0, end = var_18100_end_0, end_mask = var_18100_end_mask_0, x = var_17802_cast_fp16)[name = tensor("op_18100_cast_fp16")]; tensor var_18107_begin_0 = const()[name = tensor("op_18107_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_18107_end_0 = const()[name = tensor("op_18107_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_18107_end_mask_0 = const()[name = tensor("op_18107_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18107_cast_fp16 = slice_by_index(begin = var_18107_begin_0, end = var_18107_end_0, end_mask = var_18107_end_mask_0, x = var_17806_cast_fp16)[name = tensor("op_18107_cast_fp16")]; tensor var_18114_begin_0 = const()[name = tensor("op_18114_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_18114_end_0 = const()[name = tensor("op_18114_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_18114_end_mask_0 = const()[name = tensor("op_18114_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18114_cast_fp16 = slice_by_index(begin = var_18114_begin_0, end = var_18114_end_0, end_mask = var_18114_end_mask_0, x = var_17806_cast_fp16)[name = tensor("op_18114_cast_fp16")]; tensor var_18121_begin_0 = const()[name = tensor("op_18121_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_18121_end_0 = const()[name = tensor("op_18121_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_18121_end_mask_0 = const()[name = tensor("op_18121_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18121_cast_fp16 = slice_by_index(begin = var_18121_begin_0, end = var_18121_end_0, end_mask = var_18121_end_mask_0, x = var_17806_cast_fp16)[name = tensor("op_18121_cast_fp16")]; tensor var_18128_begin_0 = const()[name = tensor("op_18128_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_18128_end_0 = const()[name = tensor("op_18128_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_18128_end_mask_0 = const()[name = tensor("op_18128_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18128_cast_fp16 = slice_by_index(begin = var_18128_begin_0, end = var_18128_end_0, end_mask = var_18128_end_mask_0, x = var_17806_cast_fp16)[name = tensor("op_18128_cast_fp16")]; tensor var_18135_begin_0 = const()[name = tensor("op_18135_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_18135_end_0 = const()[name = tensor("op_18135_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_18135_end_mask_0 = const()[name = tensor("op_18135_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18135_cast_fp16 = slice_by_index(begin = var_18135_begin_0, end = var_18135_end_0, end_mask = var_18135_end_mask_0, x = var_17810_cast_fp16)[name = tensor("op_18135_cast_fp16")]; tensor var_18142_begin_0 = const()[name = tensor("op_18142_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_18142_end_0 = const()[name = tensor("op_18142_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_18142_end_mask_0 = const()[name = tensor("op_18142_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18142_cast_fp16 = slice_by_index(begin = var_18142_begin_0, end = var_18142_end_0, end_mask = var_18142_end_mask_0, x = var_17810_cast_fp16)[name = tensor("op_18142_cast_fp16")]; tensor var_18149_begin_0 = const()[name = tensor("op_18149_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_18149_end_0 = const()[name = tensor("op_18149_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_18149_end_mask_0 = const()[name = tensor("op_18149_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18149_cast_fp16 = slice_by_index(begin = var_18149_begin_0, end = var_18149_end_0, end_mask = var_18149_end_mask_0, x = var_17810_cast_fp16)[name = tensor("op_18149_cast_fp16")]; tensor var_18156_begin_0 = const()[name = tensor("op_18156_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_18156_end_0 = const()[name = tensor("op_18156_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_18156_end_mask_0 = const()[name = tensor("op_18156_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18156_cast_fp16 = slice_by_index(begin = var_18156_begin_0, end = var_18156_end_0, end_mask = var_18156_end_mask_0, x = var_17810_cast_fp16)[name = tensor("op_18156_cast_fp16")]; tensor var_18163_begin_0 = const()[name = tensor("op_18163_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_18163_end_0 = const()[name = tensor("op_18163_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_18163_end_mask_0 = const()[name = tensor("op_18163_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18163_cast_fp16 = slice_by_index(begin = var_18163_begin_0, end = var_18163_end_0, end_mask = var_18163_end_mask_0, x = var_17814_cast_fp16)[name = tensor("op_18163_cast_fp16")]; tensor var_18170_begin_0 = const()[name = tensor("op_18170_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_18170_end_0 = const()[name = tensor("op_18170_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_18170_end_mask_0 = const()[name = tensor("op_18170_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18170_cast_fp16 = slice_by_index(begin = var_18170_begin_0, end = var_18170_end_0, end_mask = var_18170_end_mask_0, x = var_17814_cast_fp16)[name = tensor("op_18170_cast_fp16")]; tensor var_18177_begin_0 = const()[name = tensor("op_18177_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_18177_end_0 = const()[name = tensor("op_18177_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_18177_end_mask_0 = const()[name = tensor("op_18177_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18177_cast_fp16 = slice_by_index(begin = var_18177_begin_0, end = var_18177_end_0, end_mask = var_18177_end_mask_0, x = var_17814_cast_fp16)[name = tensor("op_18177_cast_fp16")]; tensor var_18184_begin_0 = const()[name = tensor("op_18184_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_18184_end_0 = const()[name = tensor("op_18184_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_18184_end_mask_0 = const()[name = tensor("op_18184_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18184_cast_fp16 = slice_by_index(begin = var_18184_begin_0, end = var_18184_end_0, end_mask = var_18184_end_mask_0, x = var_17814_cast_fp16)[name = tensor("op_18184_cast_fp16")]; tensor var_18191_begin_0 = const()[name = tensor("op_18191_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_18191_end_0 = const()[name = tensor("op_18191_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_18191_end_mask_0 = const()[name = tensor("op_18191_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18191_cast_fp16 = slice_by_index(begin = var_18191_begin_0, end = var_18191_end_0, end_mask = var_18191_end_mask_0, x = var_17818_cast_fp16)[name = tensor("op_18191_cast_fp16")]; tensor var_18198_begin_0 = const()[name = tensor("op_18198_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_18198_end_0 = const()[name = tensor("op_18198_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_18198_end_mask_0 = const()[name = tensor("op_18198_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18198_cast_fp16 = slice_by_index(begin = var_18198_begin_0, end = var_18198_end_0, end_mask = var_18198_end_mask_0, x = var_17818_cast_fp16)[name = tensor("op_18198_cast_fp16")]; tensor var_18205_begin_0 = const()[name = tensor("op_18205_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_18205_end_0 = const()[name = tensor("op_18205_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_18205_end_mask_0 = const()[name = tensor("op_18205_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18205_cast_fp16 = slice_by_index(begin = var_18205_begin_0, end = var_18205_end_0, end_mask = var_18205_end_mask_0, x = var_17818_cast_fp16)[name = tensor("op_18205_cast_fp16")]; tensor var_18212_begin_0 = const()[name = tensor("op_18212_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_18212_end_0 = const()[name = tensor("op_18212_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_18212_end_mask_0 = const()[name = tensor("op_18212_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18212_cast_fp16 = slice_by_index(begin = var_18212_begin_0, end = var_18212_end_0, end_mask = var_18212_end_mask_0, x = var_17818_cast_fp16)[name = tensor("op_18212_cast_fp16")]; tensor var_18219_begin_0 = const()[name = tensor("op_18219_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_18219_end_0 = const()[name = tensor("op_18219_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_18219_end_mask_0 = const()[name = tensor("op_18219_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18219_cast_fp16 = slice_by_index(begin = var_18219_begin_0, end = var_18219_end_0, end_mask = var_18219_end_mask_0, x = var_17822_cast_fp16)[name = tensor("op_18219_cast_fp16")]; tensor var_18226_begin_0 = const()[name = tensor("op_18226_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_18226_end_0 = const()[name = tensor("op_18226_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_18226_end_mask_0 = const()[name = tensor("op_18226_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18226_cast_fp16 = slice_by_index(begin = var_18226_begin_0, end = var_18226_end_0, end_mask = var_18226_end_mask_0, x = var_17822_cast_fp16)[name = tensor("op_18226_cast_fp16")]; tensor var_18233_begin_0 = const()[name = tensor("op_18233_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_18233_end_0 = const()[name = tensor("op_18233_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_18233_end_mask_0 = const()[name = tensor("op_18233_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18233_cast_fp16 = slice_by_index(begin = var_18233_begin_0, end = var_18233_end_0, end_mask = var_18233_end_mask_0, x = var_17822_cast_fp16)[name = tensor("op_18233_cast_fp16")]; tensor var_18240_begin_0 = const()[name = tensor("op_18240_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_18240_end_0 = const()[name = tensor("op_18240_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_18240_end_mask_0 = const()[name = tensor("op_18240_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18240_cast_fp16 = slice_by_index(begin = var_18240_begin_0, end = var_18240_end_0, end_mask = var_18240_end_mask_0, x = var_17822_cast_fp16)[name = tensor("op_18240_cast_fp16")]; tensor var_18247_begin_0 = const()[name = tensor("op_18247_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_18247_end_0 = const()[name = tensor("op_18247_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_18247_end_mask_0 = const()[name = tensor("op_18247_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18247_cast_fp16 = slice_by_index(begin = var_18247_begin_0, end = var_18247_end_0, end_mask = var_18247_end_mask_0, x = var_17826_cast_fp16)[name = tensor("op_18247_cast_fp16")]; tensor var_18254_begin_0 = const()[name = tensor("op_18254_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_18254_end_0 = const()[name = tensor("op_18254_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_18254_end_mask_0 = const()[name = tensor("op_18254_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18254_cast_fp16 = slice_by_index(begin = var_18254_begin_0, end = var_18254_end_0, end_mask = var_18254_end_mask_0, x = var_17826_cast_fp16)[name = tensor("op_18254_cast_fp16")]; tensor var_18261_begin_0 = const()[name = tensor("op_18261_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_18261_end_0 = const()[name = tensor("op_18261_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_18261_end_mask_0 = const()[name = tensor("op_18261_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18261_cast_fp16 = slice_by_index(begin = var_18261_begin_0, end = var_18261_end_0, end_mask = var_18261_end_mask_0, x = var_17826_cast_fp16)[name = tensor("op_18261_cast_fp16")]; tensor var_18268_begin_0 = const()[name = tensor("op_18268_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_18268_end_0 = const()[name = tensor("op_18268_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_18268_end_mask_0 = const()[name = tensor("op_18268_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18268_cast_fp16 = slice_by_index(begin = var_18268_begin_0, end = var_18268_end_0, end_mask = var_18268_end_mask_0, x = var_17826_cast_fp16)[name = tensor("op_18268_cast_fp16")]; tensor var_18275_begin_0 = const()[name = tensor("op_18275_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_18275_end_0 = const()[name = tensor("op_18275_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_18275_end_mask_0 = const()[name = tensor("op_18275_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18275_cast_fp16 = slice_by_index(begin = var_18275_begin_0, end = var_18275_end_0, end_mask = var_18275_end_mask_0, x = var_17830_cast_fp16)[name = tensor("op_18275_cast_fp16")]; tensor var_18282_begin_0 = const()[name = tensor("op_18282_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_18282_end_0 = const()[name = tensor("op_18282_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_18282_end_mask_0 = const()[name = tensor("op_18282_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18282_cast_fp16 = slice_by_index(begin = var_18282_begin_0, end = var_18282_end_0, end_mask = var_18282_end_mask_0, x = var_17830_cast_fp16)[name = tensor("op_18282_cast_fp16")]; tensor var_18289_begin_0 = const()[name = tensor("op_18289_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_18289_end_0 = const()[name = tensor("op_18289_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_18289_end_mask_0 = const()[name = tensor("op_18289_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18289_cast_fp16 = slice_by_index(begin = var_18289_begin_0, end = var_18289_end_0, end_mask = var_18289_end_mask_0, x = var_17830_cast_fp16)[name = tensor("op_18289_cast_fp16")]; tensor var_18296_begin_0 = const()[name = tensor("op_18296_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_18296_end_0 = const()[name = tensor("op_18296_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_18296_end_mask_0 = const()[name = tensor("op_18296_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18296_cast_fp16 = slice_by_index(begin = var_18296_begin_0, end = var_18296_end_0, end_mask = var_18296_end_mask_0, x = var_17830_cast_fp16)[name = tensor("op_18296_cast_fp16")]; tensor var_18303_begin_0 = const()[name = tensor("op_18303_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_18303_end_0 = const()[name = tensor("op_18303_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_18303_end_mask_0 = const()[name = tensor("op_18303_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18303_cast_fp16 = slice_by_index(begin = var_18303_begin_0, end = var_18303_end_0, end_mask = var_18303_end_mask_0, x = var_17834_cast_fp16)[name = tensor("op_18303_cast_fp16")]; tensor var_18310_begin_0 = const()[name = tensor("op_18310_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_18310_end_0 = const()[name = tensor("op_18310_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_18310_end_mask_0 = const()[name = tensor("op_18310_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18310_cast_fp16 = slice_by_index(begin = var_18310_begin_0, end = var_18310_end_0, end_mask = var_18310_end_mask_0, x = var_17834_cast_fp16)[name = tensor("op_18310_cast_fp16")]; tensor var_18317_begin_0 = const()[name = tensor("op_18317_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_18317_end_0 = const()[name = tensor("op_18317_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_18317_end_mask_0 = const()[name = tensor("op_18317_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18317_cast_fp16 = slice_by_index(begin = var_18317_begin_0, end = var_18317_end_0, end_mask = var_18317_end_mask_0, x = var_17834_cast_fp16)[name = tensor("op_18317_cast_fp16")]; tensor var_18324_begin_0 = const()[name = tensor("op_18324_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_18324_end_0 = const()[name = tensor("op_18324_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_18324_end_mask_0 = const()[name = tensor("op_18324_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18324_cast_fp16 = slice_by_index(begin = var_18324_begin_0, end = var_18324_end_0, end_mask = var_18324_end_mask_0, x = var_17834_cast_fp16)[name = tensor("op_18324_cast_fp16")]; tensor var_18331_begin_0 = const()[name = tensor("op_18331_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_18331_end_0 = const()[name = tensor("op_18331_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_18331_end_mask_0 = const()[name = tensor("op_18331_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18331_cast_fp16 = slice_by_index(begin = var_18331_begin_0, end = var_18331_end_0, end_mask = var_18331_end_mask_0, x = var_17838_cast_fp16)[name = tensor("op_18331_cast_fp16")]; tensor var_18338_begin_0 = const()[name = tensor("op_18338_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_18338_end_0 = const()[name = tensor("op_18338_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_18338_end_mask_0 = const()[name = tensor("op_18338_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18338_cast_fp16 = slice_by_index(begin = var_18338_begin_0, end = var_18338_end_0, end_mask = var_18338_end_mask_0, x = var_17838_cast_fp16)[name = tensor("op_18338_cast_fp16")]; tensor var_18345_begin_0 = const()[name = tensor("op_18345_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_18345_end_0 = const()[name = tensor("op_18345_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_18345_end_mask_0 = const()[name = tensor("op_18345_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18345_cast_fp16 = slice_by_index(begin = var_18345_begin_0, end = var_18345_end_0, end_mask = var_18345_end_mask_0, x = var_17838_cast_fp16)[name = tensor("op_18345_cast_fp16")]; tensor var_18352_begin_0 = const()[name = tensor("op_18352_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_18352_end_0 = const()[name = tensor("op_18352_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_18352_end_mask_0 = const()[name = tensor("op_18352_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18352_cast_fp16 = slice_by_index(begin = var_18352_begin_0, end = var_18352_end_0, end_mask = var_18352_end_mask_0, x = var_17838_cast_fp16)[name = tensor("op_18352_cast_fp16")]; tensor var_18359_begin_0 = const()[name = tensor("op_18359_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_18359_end_0 = const()[name = tensor("op_18359_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_18359_end_mask_0 = const()[name = tensor("op_18359_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18359_cast_fp16 = slice_by_index(begin = var_18359_begin_0, end = var_18359_end_0, end_mask = var_18359_end_mask_0, x = var_17842_cast_fp16)[name = tensor("op_18359_cast_fp16")]; tensor var_18366_begin_0 = const()[name = tensor("op_18366_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_18366_end_0 = const()[name = tensor("op_18366_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_18366_end_mask_0 = const()[name = tensor("op_18366_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18366_cast_fp16 = slice_by_index(begin = var_18366_begin_0, end = var_18366_end_0, end_mask = var_18366_end_mask_0, x = var_17842_cast_fp16)[name = tensor("op_18366_cast_fp16")]; tensor var_18373_begin_0 = const()[name = tensor("op_18373_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_18373_end_0 = const()[name = tensor("op_18373_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_18373_end_mask_0 = const()[name = tensor("op_18373_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18373_cast_fp16 = slice_by_index(begin = var_18373_begin_0, end = var_18373_end_0, end_mask = var_18373_end_mask_0, x = var_17842_cast_fp16)[name = tensor("op_18373_cast_fp16")]; tensor var_18380_begin_0 = const()[name = tensor("op_18380_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_18380_end_0 = const()[name = tensor("op_18380_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_18380_end_mask_0 = const()[name = tensor("op_18380_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18380_cast_fp16 = slice_by_index(begin = var_18380_begin_0, end = var_18380_end_0, end_mask = var_18380_end_mask_0, x = var_17842_cast_fp16)[name = tensor("op_18380_cast_fp16")]; tensor var_18387_begin_0 = const()[name = tensor("op_18387_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_18387_end_0 = const()[name = tensor("op_18387_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_18387_end_mask_0 = const()[name = tensor("op_18387_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18387_cast_fp16 = slice_by_index(begin = var_18387_begin_0, end = var_18387_end_0, end_mask = var_18387_end_mask_0, x = var_17846_cast_fp16)[name = tensor("op_18387_cast_fp16")]; tensor var_18394_begin_0 = const()[name = tensor("op_18394_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_18394_end_0 = const()[name = tensor("op_18394_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_18394_end_mask_0 = const()[name = tensor("op_18394_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18394_cast_fp16 = slice_by_index(begin = var_18394_begin_0, end = var_18394_end_0, end_mask = var_18394_end_mask_0, x = var_17846_cast_fp16)[name = tensor("op_18394_cast_fp16")]; tensor var_18401_begin_0 = const()[name = tensor("op_18401_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_18401_end_0 = const()[name = tensor("op_18401_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_18401_end_mask_0 = const()[name = tensor("op_18401_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18401_cast_fp16 = slice_by_index(begin = var_18401_begin_0, end = var_18401_end_0, end_mask = var_18401_end_mask_0, x = var_17846_cast_fp16)[name = tensor("op_18401_cast_fp16")]; tensor var_18408_begin_0 = const()[name = tensor("op_18408_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_18408_end_0 = const()[name = tensor("op_18408_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_18408_end_mask_0 = const()[name = tensor("op_18408_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18408_cast_fp16 = slice_by_index(begin = var_18408_begin_0, end = var_18408_end_0, end_mask = var_18408_end_mask_0, x = var_17846_cast_fp16)[name = tensor("op_18408_cast_fp16")]; tensor k_23_perm_0 = const()[name = tensor("k_23_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_18413_begin_0 = const()[name = tensor("op_18413_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_18413_end_0 = const()[name = tensor("op_18413_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_18413_end_mask_0 = const()[name = tensor("op_18413_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_23_cast_fp16 = transpose(perm = k_23_perm_0, x = key_23_cast_fp16)[name = tensor("transpose_20")]; tensor var_18413_cast_fp16 = slice_by_index(begin = var_18413_begin_0, end = var_18413_end_0, end_mask = var_18413_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18413_cast_fp16")]; tensor var_18417_begin_0 = const()[name = tensor("op_18417_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_18417_end_0 = const()[name = tensor("op_18417_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_18417_end_mask_0 = const()[name = tensor("op_18417_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18417_cast_fp16 = slice_by_index(begin = var_18417_begin_0, end = var_18417_end_0, end_mask = var_18417_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18417_cast_fp16")]; tensor var_18421_begin_0 = const()[name = tensor("op_18421_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_18421_end_0 = const()[name = tensor("op_18421_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_18421_end_mask_0 = const()[name = tensor("op_18421_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18421_cast_fp16 = slice_by_index(begin = var_18421_begin_0, end = var_18421_end_0, end_mask = var_18421_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18421_cast_fp16")]; tensor var_18425_begin_0 = const()[name = tensor("op_18425_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_18425_end_0 = const()[name = tensor("op_18425_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_18425_end_mask_0 = const()[name = tensor("op_18425_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18425_cast_fp16 = slice_by_index(begin = var_18425_begin_0, end = var_18425_end_0, end_mask = var_18425_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18425_cast_fp16")]; tensor var_18429_begin_0 = const()[name = tensor("op_18429_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_18429_end_0 = const()[name = tensor("op_18429_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_18429_end_mask_0 = const()[name = tensor("op_18429_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18429_cast_fp16 = slice_by_index(begin = var_18429_begin_0, end = var_18429_end_0, end_mask = var_18429_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18429_cast_fp16")]; tensor var_18433_begin_0 = const()[name = tensor("op_18433_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_18433_end_0 = const()[name = tensor("op_18433_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_18433_end_mask_0 = const()[name = tensor("op_18433_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18433_cast_fp16 = slice_by_index(begin = var_18433_begin_0, end = var_18433_end_0, end_mask = var_18433_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18433_cast_fp16")]; tensor var_18437_begin_0 = const()[name = tensor("op_18437_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_18437_end_0 = const()[name = tensor("op_18437_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_18437_end_mask_0 = const()[name = tensor("op_18437_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18437_cast_fp16 = slice_by_index(begin = var_18437_begin_0, end = var_18437_end_0, end_mask = var_18437_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18437_cast_fp16")]; tensor var_18441_begin_0 = const()[name = tensor("op_18441_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_18441_end_0 = const()[name = tensor("op_18441_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_18441_end_mask_0 = const()[name = tensor("op_18441_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18441_cast_fp16 = slice_by_index(begin = var_18441_begin_0, end = var_18441_end_0, end_mask = var_18441_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18441_cast_fp16")]; tensor var_18445_begin_0 = const()[name = tensor("op_18445_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_18445_end_0 = const()[name = tensor("op_18445_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_18445_end_mask_0 = const()[name = tensor("op_18445_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18445_cast_fp16 = slice_by_index(begin = var_18445_begin_0, end = var_18445_end_0, end_mask = var_18445_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18445_cast_fp16")]; tensor var_18449_begin_0 = const()[name = tensor("op_18449_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_18449_end_0 = const()[name = tensor("op_18449_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_18449_end_mask_0 = const()[name = tensor("op_18449_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18449_cast_fp16 = slice_by_index(begin = var_18449_begin_0, end = var_18449_end_0, end_mask = var_18449_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18449_cast_fp16")]; tensor var_18453_begin_0 = const()[name = tensor("op_18453_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_18453_end_0 = const()[name = tensor("op_18453_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_18453_end_mask_0 = const()[name = tensor("op_18453_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18453_cast_fp16 = slice_by_index(begin = var_18453_begin_0, end = var_18453_end_0, end_mask = var_18453_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18453_cast_fp16")]; tensor var_18457_begin_0 = const()[name = tensor("op_18457_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_18457_end_0 = const()[name = tensor("op_18457_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_18457_end_mask_0 = const()[name = tensor("op_18457_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18457_cast_fp16 = slice_by_index(begin = var_18457_begin_0, end = var_18457_end_0, end_mask = var_18457_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18457_cast_fp16")]; tensor var_18461_begin_0 = const()[name = tensor("op_18461_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_18461_end_0 = const()[name = tensor("op_18461_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_18461_end_mask_0 = const()[name = tensor("op_18461_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18461_cast_fp16 = slice_by_index(begin = var_18461_begin_0, end = var_18461_end_0, end_mask = var_18461_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18461_cast_fp16")]; tensor var_18465_begin_0 = const()[name = tensor("op_18465_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_18465_end_0 = const()[name = tensor("op_18465_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_18465_end_mask_0 = const()[name = tensor("op_18465_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18465_cast_fp16 = slice_by_index(begin = var_18465_begin_0, end = var_18465_end_0, end_mask = var_18465_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18465_cast_fp16")]; tensor var_18469_begin_0 = const()[name = tensor("op_18469_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_18469_end_0 = const()[name = tensor("op_18469_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_18469_end_mask_0 = const()[name = tensor("op_18469_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18469_cast_fp16 = slice_by_index(begin = var_18469_begin_0, end = var_18469_end_0, end_mask = var_18469_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18469_cast_fp16")]; tensor var_18473_begin_0 = const()[name = tensor("op_18473_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_18473_end_0 = const()[name = tensor("op_18473_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_18473_end_mask_0 = const()[name = tensor("op_18473_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18473_cast_fp16 = slice_by_index(begin = var_18473_begin_0, end = var_18473_end_0, end_mask = var_18473_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18473_cast_fp16")]; tensor var_18477_begin_0 = const()[name = tensor("op_18477_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_18477_end_0 = const()[name = tensor("op_18477_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_18477_end_mask_0 = const()[name = tensor("op_18477_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18477_cast_fp16 = slice_by_index(begin = var_18477_begin_0, end = var_18477_end_0, end_mask = var_18477_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18477_cast_fp16")]; tensor var_18481_begin_0 = const()[name = tensor("op_18481_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_18481_end_0 = const()[name = tensor("op_18481_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_18481_end_mask_0 = const()[name = tensor("op_18481_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18481_cast_fp16 = slice_by_index(begin = var_18481_begin_0, end = var_18481_end_0, end_mask = var_18481_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18481_cast_fp16")]; tensor var_18485_begin_0 = const()[name = tensor("op_18485_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_18485_end_0 = const()[name = tensor("op_18485_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_18485_end_mask_0 = const()[name = tensor("op_18485_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18485_cast_fp16 = slice_by_index(begin = var_18485_begin_0, end = var_18485_end_0, end_mask = var_18485_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18485_cast_fp16")]; tensor var_18489_begin_0 = const()[name = tensor("op_18489_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_18489_end_0 = const()[name = tensor("op_18489_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_18489_end_mask_0 = const()[name = tensor("op_18489_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_18489_cast_fp16 = slice_by_index(begin = var_18489_begin_0, end = var_18489_end_0, end_mask = var_18489_end_mask_0, x = k_23_cast_fp16)[name = tensor("op_18489_cast_fp16")]; tensor var_18491_begin_0 = const()[name = tensor("op_18491_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_18491_end_0 = const()[name = tensor("op_18491_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_18491_end_mask_0 = const()[name = tensor("op_18491_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18491_cast_fp16 = slice_by_index(begin = var_18491_begin_0, end = var_18491_end_0, end_mask = var_18491_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18491_cast_fp16")]; tensor var_18495_begin_0 = const()[name = tensor("op_18495_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_18495_end_0 = const()[name = tensor("op_18495_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_18495_end_mask_0 = const()[name = tensor("op_18495_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18495_cast_fp16 = slice_by_index(begin = var_18495_begin_0, end = var_18495_end_0, end_mask = var_18495_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18495_cast_fp16")]; tensor var_18499_begin_0 = const()[name = tensor("op_18499_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_18499_end_0 = const()[name = tensor("op_18499_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_18499_end_mask_0 = const()[name = tensor("op_18499_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18499_cast_fp16 = slice_by_index(begin = var_18499_begin_0, end = var_18499_end_0, end_mask = var_18499_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18499_cast_fp16")]; tensor var_18503_begin_0 = const()[name = tensor("op_18503_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_18503_end_0 = const()[name = tensor("op_18503_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_18503_end_mask_0 = const()[name = tensor("op_18503_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18503_cast_fp16 = slice_by_index(begin = var_18503_begin_0, end = var_18503_end_0, end_mask = var_18503_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18503_cast_fp16")]; tensor var_18507_begin_0 = const()[name = tensor("op_18507_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_18507_end_0 = const()[name = tensor("op_18507_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_18507_end_mask_0 = const()[name = tensor("op_18507_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18507_cast_fp16 = slice_by_index(begin = var_18507_begin_0, end = var_18507_end_0, end_mask = var_18507_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18507_cast_fp16")]; tensor var_18511_begin_0 = const()[name = tensor("op_18511_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_18511_end_0 = const()[name = tensor("op_18511_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_18511_end_mask_0 = const()[name = tensor("op_18511_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18511_cast_fp16 = slice_by_index(begin = var_18511_begin_0, end = var_18511_end_0, end_mask = var_18511_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18511_cast_fp16")]; tensor var_18515_begin_0 = const()[name = tensor("op_18515_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_18515_end_0 = const()[name = tensor("op_18515_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_18515_end_mask_0 = const()[name = tensor("op_18515_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18515_cast_fp16 = slice_by_index(begin = var_18515_begin_0, end = var_18515_end_0, end_mask = var_18515_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18515_cast_fp16")]; tensor var_18519_begin_0 = const()[name = tensor("op_18519_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_18519_end_0 = const()[name = tensor("op_18519_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_18519_end_mask_0 = const()[name = tensor("op_18519_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18519_cast_fp16 = slice_by_index(begin = var_18519_begin_0, end = var_18519_end_0, end_mask = var_18519_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18519_cast_fp16")]; tensor var_18523_begin_0 = const()[name = tensor("op_18523_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_18523_end_0 = const()[name = tensor("op_18523_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_18523_end_mask_0 = const()[name = tensor("op_18523_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18523_cast_fp16 = slice_by_index(begin = var_18523_begin_0, end = var_18523_end_0, end_mask = var_18523_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18523_cast_fp16")]; tensor var_18527_begin_0 = const()[name = tensor("op_18527_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_18527_end_0 = const()[name = tensor("op_18527_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_18527_end_mask_0 = const()[name = tensor("op_18527_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18527_cast_fp16 = slice_by_index(begin = var_18527_begin_0, end = var_18527_end_0, end_mask = var_18527_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18527_cast_fp16")]; tensor var_18531_begin_0 = const()[name = tensor("op_18531_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_18531_end_0 = const()[name = tensor("op_18531_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_18531_end_mask_0 = const()[name = tensor("op_18531_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18531_cast_fp16 = slice_by_index(begin = var_18531_begin_0, end = var_18531_end_0, end_mask = var_18531_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18531_cast_fp16")]; tensor var_18535_begin_0 = const()[name = tensor("op_18535_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_18535_end_0 = const()[name = tensor("op_18535_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_18535_end_mask_0 = const()[name = tensor("op_18535_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18535_cast_fp16 = slice_by_index(begin = var_18535_begin_0, end = var_18535_end_0, end_mask = var_18535_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18535_cast_fp16")]; tensor var_18539_begin_0 = const()[name = tensor("op_18539_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_18539_end_0 = const()[name = tensor("op_18539_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_18539_end_mask_0 = const()[name = tensor("op_18539_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18539_cast_fp16 = slice_by_index(begin = var_18539_begin_0, end = var_18539_end_0, end_mask = var_18539_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18539_cast_fp16")]; tensor var_18543_begin_0 = const()[name = tensor("op_18543_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_18543_end_0 = const()[name = tensor("op_18543_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_18543_end_mask_0 = const()[name = tensor("op_18543_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18543_cast_fp16 = slice_by_index(begin = var_18543_begin_0, end = var_18543_end_0, end_mask = var_18543_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18543_cast_fp16")]; tensor var_18547_begin_0 = const()[name = tensor("op_18547_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_18547_end_0 = const()[name = tensor("op_18547_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_18547_end_mask_0 = const()[name = tensor("op_18547_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18547_cast_fp16 = slice_by_index(begin = var_18547_begin_0, end = var_18547_end_0, end_mask = var_18547_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18547_cast_fp16")]; tensor var_18551_begin_0 = const()[name = tensor("op_18551_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_18551_end_0 = const()[name = tensor("op_18551_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_18551_end_mask_0 = const()[name = tensor("op_18551_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18551_cast_fp16 = slice_by_index(begin = var_18551_begin_0, end = var_18551_end_0, end_mask = var_18551_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18551_cast_fp16")]; tensor var_18555_begin_0 = const()[name = tensor("op_18555_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_18555_end_0 = const()[name = tensor("op_18555_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_18555_end_mask_0 = const()[name = tensor("op_18555_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18555_cast_fp16 = slice_by_index(begin = var_18555_begin_0, end = var_18555_end_0, end_mask = var_18555_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18555_cast_fp16")]; tensor var_18559_begin_0 = const()[name = tensor("op_18559_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_18559_end_0 = const()[name = tensor("op_18559_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_18559_end_mask_0 = const()[name = tensor("op_18559_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18559_cast_fp16 = slice_by_index(begin = var_18559_begin_0, end = var_18559_end_0, end_mask = var_18559_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18559_cast_fp16")]; tensor var_18563_begin_0 = const()[name = tensor("op_18563_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_18563_end_0 = const()[name = tensor("op_18563_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_18563_end_mask_0 = const()[name = tensor("op_18563_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18563_cast_fp16 = slice_by_index(begin = var_18563_begin_0, end = var_18563_end_0, end_mask = var_18563_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18563_cast_fp16")]; tensor var_18567_begin_0 = const()[name = tensor("op_18567_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_18567_end_0 = const()[name = tensor("op_18567_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_18567_end_mask_0 = const()[name = tensor("op_18567_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_18567_cast_fp16 = slice_by_index(begin = var_18567_begin_0, end = var_18567_end_0, end_mask = var_18567_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_18567_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1761_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1761_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1761_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1761_equation_0, values = (var_18413_cast_fp16, var_17855_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1761_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1763_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1763_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1763_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1763_equation_0, values = (var_18413_cast_fp16, var_17862_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1763_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1765_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1765_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1765_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1765_equation_0, values = (var_18413_cast_fp16, var_17869_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1765_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1767_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1767_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1767_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1767_equation_0, values = (var_18413_cast_fp16, var_17876_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1767_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1769_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1769_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1769_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1769_equation_0, values = (var_18417_cast_fp16, var_17883_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1769_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1771_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1771_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1771_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1771_equation_0, values = (var_18417_cast_fp16, var_17890_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1771_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1773_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1773_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1773_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1773_equation_0, values = (var_18417_cast_fp16, var_17897_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1773_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1775_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1775_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1775_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1775_equation_0, values = (var_18417_cast_fp16, var_17904_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1775_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1777_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1777_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1777_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1777_equation_0, values = (var_18421_cast_fp16, var_17911_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1777_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1779_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1779_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1779_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1779_equation_0, values = (var_18421_cast_fp16, var_17918_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1779_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1781_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1781_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1781_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1781_equation_0, values = (var_18421_cast_fp16, var_17925_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1781_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1783_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1783_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1783_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1783_equation_0, values = (var_18421_cast_fp16, var_17932_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1783_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1785_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1785_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1785_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1785_equation_0, values = (var_18425_cast_fp16, var_17939_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1785_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1787_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1787_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1787_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1787_equation_0, values = (var_18425_cast_fp16, var_17946_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1787_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1789_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1789_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1789_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1789_equation_0, values = (var_18425_cast_fp16, var_17953_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1789_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1791_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1791_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1791_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1791_equation_0, values = (var_18425_cast_fp16, var_17960_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1791_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1793_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1793_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1793_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1793_equation_0, values = (var_18429_cast_fp16, var_17967_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1793_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1795_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1795_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1795_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1795_equation_0, values = (var_18429_cast_fp16, var_17974_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1795_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1797_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1797_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1797_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1797_equation_0, values = (var_18429_cast_fp16, var_17981_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1797_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1799_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1799_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1799_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1799_equation_0, values = (var_18429_cast_fp16, var_17988_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1799_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1801_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1801_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1801_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1801_equation_0, values = (var_18433_cast_fp16, var_17995_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1801_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1803_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1803_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1803_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1803_equation_0, values = (var_18433_cast_fp16, var_18002_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1803_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1805_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1805_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1805_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1805_equation_0, values = (var_18433_cast_fp16, var_18009_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1805_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1807_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1807_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1807_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1807_equation_0, values = (var_18433_cast_fp16, var_18016_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1807_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1809_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1809_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1809_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1809_equation_0, values = (var_18437_cast_fp16, var_18023_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1809_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1811_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1811_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1811_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1811_equation_0, values = (var_18437_cast_fp16, var_18030_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1811_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1813_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1813_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1813_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1813_equation_0, values = (var_18437_cast_fp16, var_18037_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1813_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1815_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1815_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1815_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1815_equation_0, values = (var_18437_cast_fp16, var_18044_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1815_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1817_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1817_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1817_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1817_equation_0, values = (var_18441_cast_fp16, var_18051_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1817_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1819_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1819_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1819_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1819_equation_0, values = (var_18441_cast_fp16, var_18058_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1819_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1821_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1821_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1821_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1821_equation_0, values = (var_18441_cast_fp16, var_18065_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1821_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1823_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1823_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1823_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1823_equation_0, values = (var_18441_cast_fp16, var_18072_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1823_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1825_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1825_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1825_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1825_equation_0, values = (var_18445_cast_fp16, var_18079_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1825_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1827_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1827_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1827_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1827_equation_0, values = (var_18445_cast_fp16, var_18086_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1827_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1829_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1829_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1829_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1829_equation_0, values = (var_18445_cast_fp16, var_18093_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1829_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1831_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1831_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1831_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1831_equation_0, values = (var_18445_cast_fp16, var_18100_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1831_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1833_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1833_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1833_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1833_equation_0, values = (var_18449_cast_fp16, var_18107_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1833_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1835_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1835_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1835_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1835_equation_0, values = (var_18449_cast_fp16, var_18114_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1835_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1837_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1837_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1837_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1837_equation_0, values = (var_18449_cast_fp16, var_18121_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1837_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1839_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1839_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1839_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1839_equation_0, values = (var_18449_cast_fp16, var_18128_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1839_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1841_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1841_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1841_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1841_equation_0, values = (var_18453_cast_fp16, var_18135_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1841_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1843_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1843_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1843_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1843_equation_0, values = (var_18453_cast_fp16, var_18142_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1843_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1845_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1845_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1845_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1845_equation_0, values = (var_18453_cast_fp16, var_18149_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1845_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1847_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1847_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1847_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1847_equation_0, values = (var_18453_cast_fp16, var_18156_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1847_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1849_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1849_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1849_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1849_equation_0, values = (var_18457_cast_fp16, var_18163_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1849_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1851_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1851_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1851_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1851_equation_0, values = (var_18457_cast_fp16, var_18170_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1851_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1853_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1853_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1853_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1853_equation_0, values = (var_18457_cast_fp16, var_18177_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1853_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1855_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1855_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1855_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1855_equation_0, values = (var_18457_cast_fp16, var_18184_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1855_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1857_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1857_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1857_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1857_equation_0, values = (var_18461_cast_fp16, var_18191_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1857_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1859_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1859_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1859_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1859_equation_0, values = (var_18461_cast_fp16, var_18198_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1859_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1861_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1861_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1861_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1861_equation_0, values = (var_18461_cast_fp16, var_18205_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1861_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1863_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1863_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1863_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1863_equation_0, values = (var_18461_cast_fp16, var_18212_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1863_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1865_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1865_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1865_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1865_equation_0, values = (var_18465_cast_fp16, var_18219_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1865_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1867_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1867_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1867_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1867_equation_0, values = (var_18465_cast_fp16, var_18226_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1867_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1869_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1869_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1869_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1869_equation_0, values = (var_18465_cast_fp16, var_18233_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1869_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1871_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1871_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1871_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1871_equation_0, values = (var_18465_cast_fp16, var_18240_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1871_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1873_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1873_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1873_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1873_equation_0, values = (var_18469_cast_fp16, var_18247_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1873_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1875_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1875_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1875_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1875_equation_0, values = (var_18469_cast_fp16, var_18254_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1875_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1877_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1877_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1877_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1877_equation_0, values = (var_18469_cast_fp16, var_18261_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1877_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1879_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1879_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1879_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1879_equation_0, values = (var_18469_cast_fp16, var_18268_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1879_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1881_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1881_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1881_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1881_equation_0, values = (var_18473_cast_fp16, var_18275_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1881_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1883_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1883_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1883_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1883_equation_0, values = (var_18473_cast_fp16, var_18282_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1883_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1885_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1885_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1885_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1885_equation_0, values = (var_18473_cast_fp16, var_18289_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1885_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1887_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1887_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1887_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1887_equation_0, values = (var_18473_cast_fp16, var_18296_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1887_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1889_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1889_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1889_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1889_equation_0, values = (var_18477_cast_fp16, var_18303_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1889_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1891_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1891_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1891_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1891_equation_0, values = (var_18477_cast_fp16, var_18310_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1891_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1893_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1893_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1893_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1893_equation_0, values = (var_18477_cast_fp16, var_18317_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1893_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1895_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1895_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1895_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1895_equation_0, values = (var_18477_cast_fp16, var_18324_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1895_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1897_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1897_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1897_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1897_equation_0, values = (var_18481_cast_fp16, var_18331_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1897_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1899_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1899_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1899_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1899_equation_0, values = (var_18481_cast_fp16, var_18338_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1899_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1901_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1901_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1901_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1901_equation_0, values = (var_18481_cast_fp16, var_18345_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1901_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1903_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1903_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1903_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1903_equation_0, values = (var_18481_cast_fp16, var_18352_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1903_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1905_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1905_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1905_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1905_equation_0, values = (var_18485_cast_fp16, var_18359_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1905_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1907_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1907_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1907_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1907_equation_0, values = (var_18485_cast_fp16, var_18366_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1907_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1909_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1909_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1909_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1909_equation_0, values = (var_18485_cast_fp16, var_18373_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1909_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1911_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1911_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1911_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1911_equation_0, values = (var_18485_cast_fp16, var_18380_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1911_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1913_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1913_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1913_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1913_equation_0, values = (var_18489_cast_fp16, var_18387_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1913_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1915_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1915_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1915_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1915_equation_0, values = (var_18489_cast_fp16, var_18394_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1915_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1917_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1917_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1917_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1917_equation_0, values = (var_18489_cast_fp16, var_18401_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1917_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1919_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1919_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1919_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1919_equation_0, values = (var_18489_cast_fp16, var_18408_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1919_cast_fp16")]; tensor var_18730_to_fp16 = const()[name = tensor("op_18730_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1761_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1761_cast_fp16, y = var_18730_to_fp16)[name = tensor("aw_chunk_1761_cast_fp16")]; tensor var_18732_to_fp16 = const()[name = tensor("op_18732_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1763_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1763_cast_fp16, y = var_18732_to_fp16)[name = tensor("aw_chunk_1763_cast_fp16")]; tensor var_18734_to_fp16 = const()[name = tensor("op_18734_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1765_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1765_cast_fp16, y = var_18734_to_fp16)[name = tensor("aw_chunk_1765_cast_fp16")]; tensor var_18736_to_fp16 = const()[name = tensor("op_18736_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1767_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1767_cast_fp16, y = var_18736_to_fp16)[name = tensor("aw_chunk_1767_cast_fp16")]; tensor var_18738_to_fp16 = const()[name = tensor("op_18738_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1769_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1769_cast_fp16, y = var_18738_to_fp16)[name = tensor("aw_chunk_1769_cast_fp16")]; tensor var_18740_to_fp16 = const()[name = tensor("op_18740_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1771_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1771_cast_fp16, y = var_18740_to_fp16)[name = tensor("aw_chunk_1771_cast_fp16")]; tensor var_18742_to_fp16 = const()[name = tensor("op_18742_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1773_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1773_cast_fp16, y = var_18742_to_fp16)[name = tensor("aw_chunk_1773_cast_fp16")]; tensor var_18744_to_fp16 = const()[name = tensor("op_18744_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1775_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1775_cast_fp16, y = var_18744_to_fp16)[name = tensor("aw_chunk_1775_cast_fp16")]; tensor var_18746_to_fp16 = const()[name = tensor("op_18746_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1777_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1777_cast_fp16, y = var_18746_to_fp16)[name = tensor("aw_chunk_1777_cast_fp16")]; tensor var_18748_to_fp16 = const()[name = tensor("op_18748_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1779_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1779_cast_fp16, y = var_18748_to_fp16)[name = tensor("aw_chunk_1779_cast_fp16")]; tensor var_18750_to_fp16 = const()[name = tensor("op_18750_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1781_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1781_cast_fp16, y = var_18750_to_fp16)[name = tensor("aw_chunk_1781_cast_fp16")]; tensor var_18752_to_fp16 = const()[name = tensor("op_18752_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1783_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1783_cast_fp16, y = var_18752_to_fp16)[name = tensor("aw_chunk_1783_cast_fp16")]; tensor var_18754_to_fp16 = const()[name = tensor("op_18754_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1785_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1785_cast_fp16, y = var_18754_to_fp16)[name = tensor("aw_chunk_1785_cast_fp16")]; tensor var_18756_to_fp16 = const()[name = tensor("op_18756_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1787_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1787_cast_fp16, y = var_18756_to_fp16)[name = tensor("aw_chunk_1787_cast_fp16")]; tensor var_18758_to_fp16 = const()[name = tensor("op_18758_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1789_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1789_cast_fp16, y = var_18758_to_fp16)[name = tensor("aw_chunk_1789_cast_fp16")]; tensor var_18760_to_fp16 = const()[name = tensor("op_18760_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1791_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1791_cast_fp16, y = var_18760_to_fp16)[name = tensor("aw_chunk_1791_cast_fp16")]; tensor var_18762_to_fp16 = const()[name = tensor("op_18762_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1793_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1793_cast_fp16, y = var_18762_to_fp16)[name = tensor("aw_chunk_1793_cast_fp16")]; tensor var_18764_to_fp16 = const()[name = tensor("op_18764_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1795_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1795_cast_fp16, y = var_18764_to_fp16)[name = tensor("aw_chunk_1795_cast_fp16")]; tensor var_18766_to_fp16 = const()[name = tensor("op_18766_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1797_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1797_cast_fp16, y = var_18766_to_fp16)[name = tensor("aw_chunk_1797_cast_fp16")]; tensor var_18768_to_fp16 = const()[name = tensor("op_18768_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1799_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1799_cast_fp16, y = var_18768_to_fp16)[name = tensor("aw_chunk_1799_cast_fp16")]; tensor var_18770_to_fp16 = const()[name = tensor("op_18770_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1801_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1801_cast_fp16, y = var_18770_to_fp16)[name = tensor("aw_chunk_1801_cast_fp16")]; tensor var_18772_to_fp16 = const()[name = tensor("op_18772_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1803_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1803_cast_fp16, y = var_18772_to_fp16)[name = tensor("aw_chunk_1803_cast_fp16")]; tensor var_18774_to_fp16 = const()[name = tensor("op_18774_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1805_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1805_cast_fp16, y = var_18774_to_fp16)[name = tensor("aw_chunk_1805_cast_fp16")]; tensor var_18776_to_fp16 = const()[name = tensor("op_18776_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1807_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1807_cast_fp16, y = var_18776_to_fp16)[name = tensor("aw_chunk_1807_cast_fp16")]; tensor var_18778_to_fp16 = const()[name = tensor("op_18778_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1809_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1809_cast_fp16, y = var_18778_to_fp16)[name = tensor("aw_chunk_1809_cast_fp16")]; tensor var_18780_to_fp16 = const()[name = tensor("op_18780_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1811_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1811_cast_fp16, y = var_18780_to_fp16)[name = tensor("aw_chunk_1811_cast_fp16")]; tensor var_18782_to_fp16 = const()[name = tensor("op_18782_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1813_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1813_cast_fp16, y = var_18782_to_fp16)[name = tensor("aw_chunk_1813_cast_fp16")]; tensor var_18784_to_fp16 = const()[name = tensor("op_18784_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1815_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1815_cast_fp16, y = var_18784_to_fp16)[name = tensor("aw_chunk_1815_cast_fp16")]; tensor var_18786_to_fp16 = const()[name = tensor("op_18786_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1817_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1817_cast_fp16, y = var_18786_to_fp16)[name = tensor("aw_chunk_1817_cast_fp16")]; tensor var_18788_to_fp16 = const()[name = tensor("op_18788_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1819_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1819_cast_fp16, y = var_18788_to_fp16)[name = tensor("aw_chunk_1819_cast_fp16")]; tensor var_18790_to_fp16 = const()[name = tensor("op_18790_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1821_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1821_cast_fp16, y = var_18790_to_fp16)[name = tensor("aw_chunk_1821_cast_fp16")]; tensor var_18792_to_fp16 = const()[name = tensor("op_18792_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1823_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1823_cast_fp16, y = var_18792_to_fp16)[name = tensor("aw_chunk_1823_cast_fp16")]; tensor var_18794_to_fp16 = const()[name = tensor("op_18794_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1825_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1825_cast_fp16, y = var_18794_to_fp16)[name = tensor("aw_chunk_1825_cast_fp16")]; tensor var_18796_to_fp16 = const()[name = tensor("op_18796_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1827_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1827_cast_fp16, y = var_18796_to_fp16)[name = tensor("aw_chunk_1827_cast_fp16")]; tensor var_18798_to_fp16 = const()[name = tensor("op_18798_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1829_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1829_cast_fp16, y = var_18798_to_fp16)[name = tensor("aw_chunk_1829_cast_fp16")]; tensor var_18800_to_fp16 = const()[name = tensor("op_18800_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1831_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1831_cast_fp16, y = var_18800_to_fp16)[name = tensor("aw_chunk_1831_cast_fp16")]; tensor var_18802_to_fp16 = const()[name = tensor("op_18802_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1833_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1833_cast_fp16, y = var_18802_to_fp16)[name = tensor("aw_chunk_1833_cast_fp16")]; tensor var_18804_to_fp16 = const()[name = tensor("op_18804_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1835_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1835_cast_fp16, y = var_18804_to_fp16)[name = tensor("aw_chunk_1835_cast_fp16")]; tensor var_18806_to_fp16 = const()[name = tensor("op_18806_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1837_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1837_cast_fp16, y = var_18806_to_fp16)[name = tensor("aw_chunk_1837_cast_fp16")]; tensor var_18808_to_fp16 = const()[name = tensor("op_18808_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1839_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1839_cast_fp16, y = var_18808_to_fp16)[name = tensor("aw_chunk_1839_cast_fp16")]; tensor var_18810_to_fp16 = const()[name = tensor("op_18810_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1841_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1841_cast_fp16, y = var_18810_to_fp16)[name = tensor("aw_chunk_1841_cast_fp16")]; tensor var_18812_to_fp16 = const()[name = tensor("op_18812_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1843_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1843_cast_fp16, y = var_18812_to_fp16)[name = tensor("aw_chunk_1843_cast_fp16")]; tensor var_18814_to_fp16 = const()[name = tensor("op_18814_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1845_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1845_cast_fp16, y = var_18814_to_fp16)[name = tensor("aw_chunk_1845_cast_fp16")]; tensor var_18816_to_fp16 = const()[name = tensor("op_18816_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1847_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1847_cast_fp16, y = var_18816_to_fp16)[name = tensor("aw_chunk_1847_cast_fp16")]; tensor var_18818_to_fp16 = const()[name = tensor("op_18818_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1849_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1849_cast_fp16, y = var_18818_to_fp16)[name = tensor("aw_chunk_1849_cast_fp16")]; tensor var_18820_to_fp16 = const()[name = tensor("op_18820_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1851_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1851_cast_fp16, y = var_18820_to_fp16)[name = tensor("aw_chunk_1851_cast_fp16")]; tensor var_18822_to_fp16 = const()[name = tensor("op_18822_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1853_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1853_cast_fp16, y = var_18822_to_fp16)[name = tensor("aw_chunk_1853_cast_fp16")]; tensor var_18824_to_fp16 = const()[name = tensor("op_18824_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1855_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1855_cast_fp16, y = var_18824_to_fp16)[name = tensor("aw_chunk_1855_cast_fp16")]; tensor var_18826_to_fp16 = const()[name = tensor("op_18826_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1857_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1857_cast_fp16, y = var_18826_to_fp16)[name = tensor("aw_chunk_1857_cast_fp16")]; tensor var_18828_to_fp16 = const()[name = tensor("op_18828_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1859_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1859_cast_fp16, y = var_18828_to_fp16)[name = tensor("aw_chunk_1859_cast_fp16")]; tensor var_18830_to_fp16 = const()[name = tensor("op_18830_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1861_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1861_cast_fp16, y = var_18830_to_fp16)[name = tensor("aw_chunk_1861_cast_fp16")]; tensor var_18832_to_fp16 = const()[name = tensor("op_18832_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1863_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1863_cast_fp16, y = var_18832_to_fp16)[name = tensor("aw_chunk_1863_cast_fp16")]; tensor var_18834_to_fp16 = const()[name = tensor("op_18834_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1865_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1865_cast_fp16, y = var_18834_to_fp16)[name = tensor("aw_chunk_1865_cast_fp16")]; tensor var_18836_to_fp16 = const()[name = tensor("op_18836_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1867_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1867_cast_fp16, y = var_18836_to_fp16)[name = tensor("aw_chunk_1867_cast_fp16")]; tensor var_18838_to_fp16 = const()[name = tensor("op_18838_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1869_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1869_cast_fp16, y = var_18838_to_fp16)[name = tensor("aw_chunk_1869_cast_fp16")]; tensor var_18840_to_fp16 = const()[name = tensor("op_18840_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1871_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1871_cast_fp16, y = var_18840_to_fp16)[name = tensor("aw_chunk_1871_cast_fp16")]; tensor var_18842_to_fp16 = const()[name = tensor("op_18842_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1873_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1873_cast_fp16, y = var_18842_to_fp16)[name = tensor("aw_chunk_1873_cast_fp16")]; tensor var_18844_to_fp16 = const()[name = tensor("op_18844_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1875_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1875_cast_fp16, y = var_18844_to_fp16)[name = tensor("aw_chunk_1875_cast_fp16")]; tensor var_18846_to_fp16 = const()[name = tensor("op_18846_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1877_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1877_cast_fp16, y = var_18846_to_fp16)[name = tensor("aw_chunk_1877_cast_fp16")]; tensor var_18848_to_fp16 = const()[name = tensor("op_18848_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1879_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1879_cast_fp16, y = var_18848_to_fp16)[name = tensor("aw_chunk_1879_cast_fp16")]; tensor var_18850_to_fp16 = const()[name = tensor("op_18850_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1881_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1881_cast_fp16, y = var_18850_to_fp16)[name = tensor("aw_chunk_1881_cast_fp16")]; tensor var_18852_to_fp16 = const()[name = tensor("op_18852_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1883_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1883_cast_fp16, y = var_18852_to_fp16)[name = tensor("aw_chunk_1883_cast_fp16")]; tensor var_18854_to_fp16 = const()[name = tensor("op_18854_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1885_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1885_cast_fp16, y = var_18854_to_fp16)[name = tensor("aw_chunk_1885_cast_fp16")]; tensor var_18856_to_fp16 = const()[name = tensor("op_18856_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1887_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1887_cast_fp16, y = var_18856_to_fp16)[name = tensor("aw_chunk_1887_cast_fp16")]; tensor var_18858_to_fp16 = const()[name = tensor("op_18858_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1889_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1889_cast_fp16, y = var_18858_to_fp16)[name = tensor("aw_chunk_1889_cast_fp16")]; tensor var_18860_to_fp16 = const()[name = tensor("op_18860_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1891_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1891_cast_fp16, y = var_18860_to_fp16)[name = tensor("aw_chunk_1891_cast_fp16")]; tensor var_18862_to_fp16 = const()[name = tensor("op_18862_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1893_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1893_cast_fp16, y = var_18862_to_fp16)[name = tensor("aw_chunk_1893_cast_fp16")]; tensor var_18864_to_fp16 = const()[name = tensor("op_18864_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1895_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1895_cast_fp16, y = var_18864_to_fp16)[name = tensor("aw_chunk_1895_cast_fp16")]; tensor var_18866_to_fp16 = const()[name = tensor("op_18866_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1897_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1897_cast_fp16, y = var_18866_to_fp16)[name = tensor("aw_chunk_1897_cast_fp16")]; tensor var_18868_to_fp16 = const()[name = tensor("op_18868_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1899_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1899_cast_fp16, y = var_18868_to_fp16)[name = tensor("aw_chunk_1899_cast_fp16")]; tensor var_18870_to_fp16 = const()[name = tensor("op_18870_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1901_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1901_cast_fp16, y = var_18870_to_fp16)[name = tensor("aw_chunk_1901_cast_fp16")]; tensor var_18872_to_fp16 = const()[name = tensor("op_18872_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1903_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1903_cast_fp16, y = var_18872_to_fp16)[name = tensor("aw_chunk_1903_cast_fp16")]; tensor var_18874_to_fp16 = const()[name = tensor("op_18874_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1905_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1905_cast_fp16, y = var_18874_to_fp16)[name = tensor("aw_chunk_1905_cast_fp16")]; tensor var_18876_to_fp16 = const()[name = tensor("op_18876_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1907_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1907_cast_fp16, y = var_18876_to_fp16)[name = tensor("aw_chunk_1907_cast_fp16")]; tensor var_18878_to_fp16 = const()[name = tensor("op_18878_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1909_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1909_cast_fp16, y = var_18878_to_fp16)[name = tensor("aw_chunk_1909_cast_fp16")]; tensor var_18880_to_fp16 = const()[name = tensor("op_18880_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1911_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1911_cast_fp16, y = var_18880_to_fp16)[name = tensor("aw_chunk_1911_cast_fp16")]; tensor var_18882_to_fp16 = const()[name = tensor("op_18882_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1913_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1913_cast_fp16, y = var_18882_to_fp16)[name = tensor("aw_chunk_1913_cast_fp16")]; tensor var_18884_to_fp16 = const()[name = tensor("op_18884_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1915_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1915_cast_fp16, y = var_18884_to_fp16)[name = tensor("aw_chunk_1915_cast_fp16")]; tensor var_18886_to_fp16 = const()[name = tensor("op_18886_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1917_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1917_cast_fp16, y = var_18886_to_fp16)[name = tensor("aw_chunk_1917_cast_fp16")]; tensor var_18888_to_fp16 = const()[name = tensor("op_18888_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1919_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1919_cast_fp16, y = var_18888_to_fp16)[name = tensor("aw_chunk_1919_cast_fp16")]; tensor var_18890_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1761_cast_fp16)[name = tensor("op_18890_cast_fp16")]; tensor var_18891_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1763_cast_fp16)[name = tensor("op_18891_cast_fp16")]; tensor var_18892_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1765_cast_fp16)[name = tensor("op_18892_cast_fp16")]; tensor var_18893_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1767_cast_fp16)[name = tensor("op_18893_cast_fp16")]; tensor var_18894_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1769_cast_fp16)[name = tensor("op_18894_cast_fp16")]; tensor var_18895_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1771_cast_fp16)[name = tensor("op_18895_cast_fp16")]; tensor var_18896_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1773_cast_fp16)[name = tensor("op_18896_cast_fp16")]; tensor var_18897_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1775_cast_fp16)[name = tensor("op_18897_cast_fp16")]; tensor var_18898_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1777_cast_fp16)[name = tensor("op_18898_cast_fp16")]; tensor var_18899_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1779_cast_fp16)[name = tensor("op_18899_cast_fp16")]; tensor var_18900_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1781_cast_fp16)[name = tensor("op_18900_cast_fp16")]; tensor var_18901_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1783_cast_fp16)[name = tensor("op_18901_cast_fp16")]; tensor var_18902_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1785_cast_fp16)[name = tensor("op_18902_cast_fp16")]; tensor var_18903_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1787_cast_fp16)[name = tensor("op_18903_cast_fp16")]; tensor var_18904_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1789_cast_fp16)[name = tensor("op_18904_cast_fp16")]; tensor var_18905_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1791_cast_fp16)[name = tensor("op_18905_cast_fp16")]; tensor var_18906_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1793_cast_fp16)[name = tensor("op_18906_cast_fp16")]; tensor var_18907_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1795_cast_fp16)[name = tensor("op_18907_cast_fp16")]; tensor var_18908_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1797_cast_fp16)[name = tensor("op_18908_cast_fp16")]; tensor var_18909_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1799_cast_fp16)[name = tensor("op_18909_cast_fp16")]; tensor var_18910_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1801_cast_fp16)[name = tensor("op_18910_cast_fp16")]; tensor var_18911_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1803_cast_fp16)[name = tensor("op_18911_cast_fp16")]; tensor var_18912_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1805_cast_fp16)[name = tensor("op_18912_cast_fp16")]; tensor var_18913_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1807_cast_fp16)[name = tensor("op_18913_cast_fp16")]; tensor var_18914_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1809_cast_fp16)[name = tensor("op_18914_cast_fp16")]; tensor var_18915_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1811_cast_fp16)[name = tensor("op_18915_cast_fp16")]; tensor var_18916_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1813_cast_fp16)[name = tensor("op_18916_cast_fp16")]; tensor var_18917_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1815_cast_fp16)[name = tensor("op_18917_cast_fp16")]; tensor var_18918_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1817_cast_fp16)[name = tensor("op_18918_cast_fp16")]; tensor var_18919_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1819_cast_fp16)[name = tensor("op_18919_cast_fp16")]; tensor var_18920_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1821_cast_fp16)[name = tensor("op_18920_cast_fp16")]; tensor var_18921_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1823_cast_fp16)[name = tensor("op_18921_cast_fp16")]; tensor var_18922_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1825_cast_fp16)[name = tensor("op_18922_cast_fp16")]; tensor var_18923_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1827_cast_fp16)[name = tensor("op_18923_cast_fp16")]; tensor var_18924_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1829_cast_fp16)[name = tensor("op_18924_cast_fp16")]; tensor var_18925_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1831_cast_fp16)[name = tensor("op_18925_cast_fp16")]; tensor var_18926_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1833_cast_fp16)[name = tensor("op_18926_cast_fp16")]; tensor var_18927_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1835_cast_fp16)[name = tensor("op_18927_cast_fp16")]; tensor var_18928_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1837_cast_fp16)[name = tensor("op_18928_cast_fp16")]; tensor var_18929_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1839_cast_fp16)[name = tensor("op_18929_cast_fp16")]; tensor var_18930_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1841_cast_fp16)[name = tensor("op_18930_cast_fp16")]; tensor var_18931_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1843_cast_fp16)[name = tensor("op_18931_cast_fp16")]; tensor var_18932_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1845_cast_fp16)[name = tensor("op_18932_cast_fp16")]; tensor var_18933_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1847_cast_fp16)[name = tensor("op_18933_cast_fp16")]; tensor var_18934_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1849_cast_fp16)[name = tensor("op_18934_cast_fp16")]; tensor var_18935_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1851_cast_fp16)[name = tensor("op_18935_cast_fp16")]; tensor var_18936_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1853_cast_fp16)[name = tensor("op_18936_cast_fp16")]; tensor var_18937_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1855_cast_fp16)[name = tensor("op_18937_cast_fp16")]; tensor var_18938_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1857_cast_fp16)[name = tensor("op_18938_cast_fp16")]; tensor var_18939_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1859_cast_fp16)[name = tensor("op_18939_cast_fp16")]; tensor var_18940_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1861_cast_fp16)[name = tensor("op_18940_cast_fp16")]; tensor var_18941_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1863_cast_fp16)[name = tensor("op_18941_cast_fp16")]; tensor var_18942_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1865_cast_fp16)[name = tensor("op_18942_cast_fp16")]; tensor var_18943_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1867_cast_fp16)[name = tensor("op_18943_cast_fp16")]; tensor var_18944_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1869_cast_fp16)[name = tensor("op_18944_cast_fp16")]; tensor var_18945_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1871_cast_fp16)[name = tensor("op_18945_cast_fp16")]; tensor var_18946_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1873_cast_fp16)[name = tensor("op_18946_cast_fp16")]; tensor var_18947_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1875_cast_fp16)[name = tensor("op_18947_cast_fp16")]; tensor var_18948_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1877_cast_fp16)[name = tensor("op_18948_cast_fp16")]; tensor var_18949_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1879_cast_fp16)[name = tensor("op_18949_cast_fp16")]; tensor var_18950_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1881_cast_fp16)[name = tensor("op_18950_cast_fp16")]; tensor var_18951_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1883_cast_fp16)[name = tensor("op_18951_cast_fp16")]; tensor var_18952_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1885_cast_fp16)[name = tensor("op_18952_cast_fp16")]; tensor var_18953_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1887_cast_fp16)[name = tensor("op_18953_cast_fp16")]; tensor var_18954_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1889_cast_fp16)[name = tensor("op_18954_cast_fp16")]; tensor var_18955_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1891_cast_fp16)[name = tensor("op_18955_cast_fp16")]; tensor var_18956_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1893_cast_fp16)[name = tensor("op_18956_cast_fp16")]; tensor var_18957_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1895_cast_fp16)[name = tensor("op_18957_cast_fp16")]; tensor var_18958_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1897_cast_fp16)[name = tensor("op_18958_cast_fp16")]; tensor var_18959_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1899_cast_fp16)[name = tensor("op_18959_cast_fp16")]; tensor var_18960_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1901_cast_fp16)[name = tensor("op_18960_cast_fp16")]; tensor var_18961_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1903_cast_fp16)[name = tensor("op_18961_cast_fp16")]; tensor var_18962_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1905_cast_fp16)[name = tensor("op_18962_cast_fp16")]; tensor var_18963_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1907_cast_fp16)[name = tensor("op_18963_cast_fp16")]; tensor var_18964_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1909_cast_fp16)[name = tensor("op_18964_cast_fp16")]; tensor var_18965_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1911_cast_fp16)[name = tensor("op_18965_cast_fp16")]; tensor var_18966_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1913_cast_fp16)[name = tensor("op_18966_cast_fp16")]; tensor var_18967_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1915_cast_fp16)[name = tensor("op_18967_cast_fp16")]; tensor var_18968_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1917_cast_fp16)[name = tensor("op_18968_cast_fp16")]; tensor var_18969_cast_fp16 = softmax(axis = var_17688, x = aw_chunk_1919_cast_fp16)[name = tensor("op_18969_cast_fp16")]; tensor var_18971_equation_0 = const()[name = tensor("op_18971_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_18971_cast_fp16 = einsum(equation = var_18971_equation_0, values = (var_18491_cast_fp16, var_18890_cast_fp16))[name = tensor("op_18971_cast_fp16")]; tensor var_18973_equation_0 = const()[name = tensor("op_18973_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_18973_cast_fp16 = einsum(equation = var_18973_equation_0, values = (var_18491_cast_fp16, var_18891_cast_fp16))[name = tensor("op_18973_cast_fp16")]; tensor var_18975_equation_0 = const()[name = tensor("op_18975_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_18975_cast_fp16 = einsum(equation = var_18975_equation_0, values = (var_18491_cast_fp16, var_18892_cast_fp16))[name = tensor("op_18975_cast_fp16")]; tensor var_18977_equation_0 = const()[name = tensor("op_18977_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_18977_cast_fp16 = einsum(equation = var_18977_equation_0, values = (var_18491_cast_fp16, var_18893_cast_fp16))[name = tensor("op_18977_cast_fp16")]; tensor var_18979_equation_0 = const()[name = tensor("op_18979_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_18979_cast_fp16 = einsum(equation = var_18979_equation_0, values = (var_18495_cast_fp16, var_18894_cast_fp16))[name = tensor("op_18979_cast_fp16")]; tensor var_18981_equation_0 = const()[name = tensor("op_18981_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_18981_cast_fp16 = einsum(equation = var_18981_equation_0, values = (var_18495_cast_fp16, var_18895_cast_fp16))[name = tensor("op_18981_cast_fp16")]; tensor var_18983_equation_0 = const()[name = tensor("op_18983_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_18983_cast_fp16 = einsum(equation = var_18983_equation_0, values = (var_18495_cast_fp16, var_18896_cast_fp16))[name = tensor("op_18983_cast_fp16")]; tensor var_18985_equation_0 = const()[name = tensor("op_18985_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_18985_cast_fp16 = einsum(equation = var_18985_equation_0, values = (var_18495_cast_fp16, var_18897_cast_fp16))[name = tensor("op_18985_cast_fp16")]; tensor var_18987_equation_0 = const()[name = tensor("op_18987_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_18987_cast_fp16 = einsum(equation = var_18987_equation_0, values = (var_18499_cast_fp16, var_18898_cast_fp16))[name = tensor("op_18987_cast_fp16")]; tensor var_18989_equation_0 = const()[name = tensor("op_18989_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_18989_cast_fp16 = einsum(equation = var_18989_equation_0, values = (var_18499_cast_fp16, var_18899_cast_fp16))[name = tensor("op_18989_cast_fp16")]; tensor var_18991_equation_0 = const()[name = tensor("op_18991_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_18991_cast_fp16 = einsum(equation = var_18991_equation_0, values = (var_18499_cast_fp16, var_18900_cast_fp16))[name = tensor("op_18991_cast_fp16")]; tensor var_18993_equation_0 = const()[name = tensor("op_18993_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_18993_cast_fp16 = einsum(equation = var_18993_equation_0, values = (var_18499_cast_fp16, var_18901_cast_fp16))[name = tensor("op_18993_cast_fp16")]; tensor var_18995_equation_0 = const()[name = tensor("op_18995_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_18995_cast_fp16 = einsum(equation = var_18995_equation_0, values = (var_18503_cast_fp16, var_18902_cast_fp16))[name = tensor("op_18995_cast_fp16")]; tensor var_18997_equation_0 = const()[name = tensor("op_18997_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_18997_cast_fp16 = einsum(equation = var_18997_equation_0, values = (var_18503_cast_fp16, var_18903_cast_fp16))[name = tensor("op_18997_cast_fp16")]; tensor var_18999_equation_0 = const()[name = tensor("op_18999_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_18999_cast_fp16 = einsum(equation = var_18999_equation_0, values = (var_18503_cast_fp16, var_18904_cast_fp16))[name = tensor("op_18999_cast_fp16")]; tensor var_19001_equation_0 = const()[name = tensor("op_19001_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19001_cast_fp16 = einsum(equation = var_19001_equation_0, values = (var_18503_cast_fp16, var_18905_cast_fp16))[name = tensor("op_19001_cast_fp16")]; tensor var_19003_equation_0 = const()[name = tensor("op_19003_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19003_cast_fp16 = einsum(equation = var_19003_equation_0, values = (var_18507_cast_fp16, var_18906_cast_fp16))[name = tensor("op_19003_cast_fp16")]; tensor var_19005_equation_0 = const()[name = tensor("op_19005_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19005_cast_fp16 = einsum(equation = var_19005_equation_0, values = (var_18507_cast_fp16, var_18907_cast_fp16))[name = tensor("op_19005_cast_fp16")]; tensor var_19007_equation_0 = const()[name = tensor("op_19007_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19007_cast_fp16 = einsum(equation = var_19007_equation_0, values = (var_18507_cast_fp16, var_18908_cast_fp16))[name = tensor("op_19007_cast_fp16")]; tensor var_19009_equation_0 = const()[name = tensor("op_19009_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19009_cast_fp16 = einsum(equation = var_19009_equation_0, values = (var_18507_cast_fp16, var_18909_cast_fp16))[name = tensor("op_19009_cast_fp16")]; tensor var_19011_equation_0 = const()[name = tensor("op_19011_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19011_cast_fp16 = einsum(equation = var_19011_equation_0, values = (var_18511_cast_fp16, var_18910_cast_fp16))[name = tensor("op_19011_cast_fp16")]; tensor var_19013_equation_0 = const()[name = tensor("op_19013_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19013_cast_fp16 = einsum(equation = var_19013_equation_0, values = (var_18511_cast_fp16, var_18911_cast_fp16))[name = tensor("op_19013_cast_fp16")]; tensor var_19015_equation_0 = const()[name = tensor("op_19015_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19015_cast_fp16 = einsum(equation = var_19015_equation_0, values = (var_18511_cast_fp16, var_18912_cast_fp16))[name = tensor("op_19015_cast_fp16")]; tensor var_19017_equation_0 = const()[name = tensor("op_19017_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19017_cast_fp16 = einsum(equation = var_19017_equation_0, values = (var_18511_cast_fp16, var_18913_cast_fp16))[name = tensor("op_19017_cast_fp16")]; tensor var_19019_equation_0 = const()[name = tensor("op_19019_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19019_cast_fp16 = einsum(equation = var_19019_equation_0, values = (var_18515_cast_fp16, var_18914_cast_fp16))[name = tensor("op_19019_cast_fp16")]; tensor var_19021_equation_0 = const()[name = tensor("op_19021_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19021_cast_fp16 = einsum(equation = var_19021_equation_0, values = (var_18515_cast_fp16, var_18915_cast_fp16))[name = tensor("op_19021_cast_fp16")]; tensor var_19023_equation_0 = const()[name = tensor("op_19023_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19023_cast_fp16 = einsum(equation = var_19023_equation_0, values = (var_18515_cast_fp16, var_18916_cast_fp16))[name = tensor("op_19023_cast_fp16")]; tensor var_19025_equation_0 = const()[name = tensor("op_19025_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19025_cast_fp16 = einsum(equation = var_19025_equation_0, values = (var_18515_cast_fp16, var_18917_cast_fp16))[name = tensor("op_19025_cast_fp16")]; tensor var_19027_equation_0 = const()[name = tensor("op_19027_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19027_cast_fp16 = einsum(equation = var_19027_equation_0, values = (var_18519_cast_fp16, var_18918_cast_fp16))[name = tensor("op_19027_cast_fp16")]; tensor var_19029_equation_0 = const()[name = tensor("op_19029_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19029_cast_fp16 = einsum(equation = var_19029_equation_0, values = (var_18519_cast_fp16, var_18919_cast_fp16))[name = tensor("op_19029_cast_fp16")]; tensor var_19031_equation_0 = const()[name = tensor("op_19031_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19031_cast_fp16 = einsum(equation = var_19031_equation_0, values = (var_18519_cast_fp16, var_18920_cast_fp16))[name = tensor("op_19031_cast_fp16")]; tensor var_19033_equation_0 = const()[name = tensor("op_19033_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19033_cast_fp16 = einsum(equation = var_19033_equation_0, values = (var_18519_cast_fp16, var_18921_cast_fp16))[name = tensor("op_19033_cast_fp16")]; tensor var_19035_equation_0 = const()[name = tensor("op_19035_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19035_cast_fp16 = einsum(equation = var_19035_equation_0, values = (var_18523_cast_fp16, var_18922_cast_fp16))[name = tensor("op_19035_cast_fp16")]; tensor var_19037_equation_0 = const()[name = tensor("op_19037_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19037_cast_fp16 = einsum(equation = var_19037_equation_0, values = (var_18523_cast_fp16, var_18923_cast_fp16))[name = tensor("op_19037_cast_fp16")]; tensor var_19039_equation_0 = const()[name = tensor("op_19039_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19039_cast_fp16 = einsum(equation = var_19039_equation_0, values = (var_18523_cast_fp16, var_18924_cast_fp16))[name = tensor("op_19039_cast_fp16")]; tensor var_19041_equation_0 = const()[name = tensor("op_19041_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19041_cast_fp16 = einsum(equation = var_19041_equation_0, values = (var_18523_cast_fp16, var_18925_cast_fp16))[name = tensor("op_19041_cast_fp16")]; tensor var_19043_equation_0 = const()[name = tensor("op_19043_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19043_cast_fp16 = einsum(equation = var_19043_equation_0, values = (var_18527_cast_fp16, var_18926_cast_fp16))[name = tensor("op_19043_cast_fp16")]; tensor var_19045_equation_0 = const()[name = tensor("op_19045_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19045_cast_fp16 = einsum(equation = var_19045_equation_0, values = (var_18527_cast_fp16, var_18927_cast_fp16))[name = tensor("op_19045_cast_fp16")]; tensor var_19047_equation_0 = const()[name = tensor("op_19047_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19047_cast_fp16 = einsum(equation = var_19047_equation_0, values = (var_18527_cast_fp16, var_18928_cast_fp16))[name = tensor("op_19047_cast_fp16")]; tensor var_19049_equation_0 = const()[name = tensor("op_19049_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19049_cast_fp16 = einsum(equation = var_19049_equation_0, values = (var_18527_cast_fp16, var_18929_cast_fp16))[name = tensor("op_19049_cast_fp16")]; tensor var_19051_equation_0 = const()[name = tensor("op_19051_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19051_cast_fp16 = einsum(equation = var_19051_equation_0, values = (var_18531_cast_fp16, var_18930_cast_fp16))[name = tensor("op_19051_cast_fp16")]; tensor var_19053_equation_0 = const()[name = tensor("op_19053_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19053_cast_fp16 = einsum(equation = var_19053_equation_0, values = (var_18531_cast_fp16, var_18931_cast_fp16))[name = tensor("op_19053_cast_fp16")]; tensor var_19055_equation_0 = const()[name = tensor("op_19055_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19055_cast_fp16 = einsum(equation = var_19055_equation_0, values = (var_18531_cast_fp16, var_18932_cast_fp16))[name = tensor("op_19055_cast_fp16")]; tensor var_19057_equation_0 = const()[name = tensor("op_19057_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19057_cast_fp16 = einsum(equation = var_19057_equation_0, values = (var_18531_cast_fp16, var_18933_cast_fp16))[name = tensor("op_19057_cast_fp16")]; tensor var_19059_equation_0 = const()[name = tensor("op_19059_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19059_cast_fp16 = einsum(equation = var_19059_equation_0, values = (var_18535_cast_fp16, var_18934_cast_fp16))[name = tensor("op_19059_cast_fp16")]; tensor var_19061_equation_0 = const()[name = tensor("op_19061_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19061_cast_fp16 = einsum(equation = var_19061_equation_0, values = (var_18535_cast_fp16, var_18935_cast_fp16))[name = tensor("op_19061_cast_fp16")]; tensor var_19063_equation_0 = const()[name = tensor("op_19063_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19063_cast_fp16 = einsum(equation = var_19063_equation_0, values = (var_18535_cast_fp16, var_18936_cast_fp16))[name = tensor("op_19063_cast_fp16")]; tensor var_19065_equation_0 = const()[name = tensor("op_19065_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19065_cast_fp16 = einsum(equation = var_19065_equation_0, values = (var_18535_cast_fp16, var_18937_cast_fp16))[name = tensor("op_19065_cast_fp16")]; tensor var_19067_equation_0 = const()[name = tensor("op_19067_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19067_cast_fp16 = einsum(equation = var_19067_equation_0, values = (var_18539_cast_fp16, var_18938_cast_fp16))[name = tensor("op_19067_cast_fp16")]; tensor var_19069_equation_0 = const()[name = tensor("op_19069_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19069_cast_fp16 = einsum(equation = var_19069_equation_0, values = (var_18539_cast_fp16, var_18939_cast_fp16))[name = tensor("op_19069_cast_fp16")]; tensor var_19071_equation_0 = const()[name = tensor("op_19071_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19071_cast_fp16 = einsum(equation = var_19071_equation_0, values = (var_18539_cast_fp16, var_18940_cast_fp16))[name = tensor("op_19071_cast_fp16")]; tensor var_19073_equation_0 = const()[name = tensor("op_19073_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19073_cast_fp16 = einsum(equation = var_19073_equation_0, values = (var_18539_cast_fp16, var_18941_cast_fp16))[name = tensor("op_19073_cast_fp16")]; tensor var_19075_equation_0 = const()[name = tensor("op_19075_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19075_cast_fp16 = einsum(equation = var_19075_equation_0, values = (var_18543_cast_fp16, var_18942_cast_fp16))[name = tensor("op_19075_cast_fp16")]; tensor var_19077_equation_0 = const()[name = tensor("op_19077_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19077_cast_fp16 = einsum(equation = var_19077_equation_0, values = (var_18543_cast_fp16, var_18943_cast_fp16))[name = tensor("op_19077_cast_fp16")]; tensor var_19079_equation_0 = const()[name = tensor("op_19079_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19079_cast_fp16 = einsum(equation = var_19079_equation_0, values = (var_18543_cast_fp16, var_18944_cast_fp16))[name = tensor("op_19079_cast_fp16")]; tensor var_19081_equation_0 = const()[name = tensor("op_19081_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19081_cast_fp16 = einsum(equation = var_19081_equation_0, values = (var_18543_cast_fp16, var_18945_cast_fp16))[name = tensor("op_19081_cast_fp16")]; tensor var_19083_equation_0 = const()[name = tensor("op_19083_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19083_cast_fp16 = einsum(equation = var_19083_equation_0, values = (var_18547_cast_fp16, var_18946_cast_fp16))[name = tensor("op_19083_cast_fp16")]; tensor var_19085_equation_0 = const()[name = tensor("op_19085_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19085_cast_fp16 = einsum(equation = var_19085_equation_0, values = (var_18547_cast_fp16, var_18947_cast_fp16))[name = tensor("op_19085_cast_fp16")]; tensor var_19087_equation_0 = const()[name = tensor("op_19087_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19087_cast_fp16 = einsum(equation = var_19087_equation_0, values = (var_18547_cast_fp16, var_18948_cast_fp16))[name = tensor("op_19087_cast_fp16")]; tensor var_19089_equation_0 = const()[name = tensor("op_19089_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19089_cast_fp16 = einsum(equation = var_19089_equation_0, values = (var_18547_cast_fp16, var_18949_cast_fp16))[name = tensor("op_19089_cast_fp16")]; tensor var_19091_equation_0 = const()[name = tensor("op_19091_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19091_cast_fp16 = einsum(equation = var_19091_equation_0, values = (var_18551_cast_fp16, var_18950_cast_fp16))[name = tensor("op_19091_cast_fp16")]; tensor var_19093_equation_0 = const()[name = tensor("op_19093_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19093_cast_fp16 = einsum(equation = var_19093_equation_0, values = (var_18551_cast_fp16, var_18951_cast_fp16))[name = tensor("op_19093_cast_fp16")]; tensor var_19095_equation_0 = const()[name = tensor("op_19095_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19095_cast_fp16 = einsum(equation = var_19095_equation_0, values = (var_18551_cast_fp16, var_18952_cast_fp16))[name = tensor("op_19095_cast_fp16")]; tensor var_19097_equation_0 = const()[name = tensor("op_19097_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19097_cast_fp16 = einsum(equation = var_19097_equation_0, values = (var_18551_cast_fp16, var_18953_cast_fp16))[name = tensor("op_19097_cast_fp16")]; tensor var_19099_equation_0 = const()[name = tensor("op_19099_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19099_cast_fp16 = einsum(equation = var_19099_equation_0, values = (var_18555_cast_fp16, var_18954_cast_fp16))[name = tensor("op_19099_cast_fp16")]; tensor var_19101_equation_0 = const()[name = tensor("op_19101_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19101_cast_fp16 = einsum(equation = var_19101_equation_0, values = (var_18555_cast_fp16, var_18955_cast_fp16))[name = tensor("op_19101_cast_fp16")]; tensor var_19103_equation_0 = const()[name = tensor("op_19103_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19103_cast_fp16 = einsum(equation = var_19103_equation_0, values = (var_18555_cast_fp16, var_18956_cast_fp16))[name = tensor("op_19103_cast_fp16")]; tensor var_19105_equation_0 = const()[name = tensor("op_19105_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19105_cast_fp16 = einsum(equation = var_19105_equation_0, values = (var_18555_cast_fp16, var_18957_cast_fp16))[name = tensor("op_19105_cast_fp16")]; tensor var_19107_equation_0 = const()[name = tensor("op_19107_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19107_cast_fp16 = einsum(equation = var_19107_equation_0, values = (var_18559_cast_fp16, var_18958_cast_fp16))[name = tensor("op_19107_cast_fp16")]; tensor var_19109_equation_0 = const()[name = tensor("op_19109_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19109_cast_fp16 = einsum(equation = var_19109_equation_0, values = (var_18559_cast_fp16, var_18959_cast_fp16))[name = tensor("op_19109_cast_fp16")]; tensor var_19111_equation_0 = const()[name = tensor("op_19111_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19111_cast_fp16 = einsum(equation = var_19111_equation_0, values = (var_18559_cast_fp16, var_18960_cast_fp16))[name = tensor("op_19111_cast_fp16")]; tensor var_19113_equation_0 = const()[name = tensor("op_19113_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19113_cast_fp16 = einsum(equation = var_19113_equation_0, values = (var_18559_cast_fp16, var_18961_cast_fp16))[name = tensor("op_19113_cast_fp16")]; tensor var_19115_equation_0 = const()[name = tensor("op_19115_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19115_cast_fp16 = einsum(equation = var_19115_equation_0, values = (var_18563_cast_fp16, var_18962_cast_fp16))[name = tensor("op_19115_cast_fp16")]; tensor var_19117_equation_0 = const()[name = tensor("op_19117_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19117_cast_fp16 = einsum(equation = var_19117_equation_0, values = (var_18563_cast_fp16, var_18963_cast_fp16))[name = tensor("op_19117_cast_fp16")]; tensor var_19119_equation_0 = const()[name = tensor("op_19119_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19119_cast_fp16 = einsum(equation = var_19119_equation_0, values = (var_18563_cast_fp16, var_18964_cast_fp16))[name = tensor("op_19119_cast_fp16")]; tensor var_19121_equation_0 = const()[name = tensor("op_19121_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19121_cast_fp16 = einsum(equation = var_19121_equation_0, values = (var_18563_cast_fp16, var_18965_cast_fp16))[name = tensor("op_19121_cast_fp16")]; tensor var_19123_equation_0 = const()[name = tensor("op_19123_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19123_cast_fp16 = einsum(equation = var_19123_equation_0, values = (var_18567_cast_fp16, var_18966_cast_fp16))[name = tensor("op_19123_cast_fp16")]; tensor var_19125_equation_0 = const()[name = tensor("op_19125_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19125_cast_fp16 = einsum(equation = var_19125_equation_0, values = (var_18567_cast_fp16, var_18967_cast_fp16))[name = tensor("op_19125_cast_fp16")]; tensor var_19127_equation_0 = const()[name = tensor("op_19127_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19127_cast_fp16 = einsum(equation = var_19127_equation_0, values = (var_18567_cast_fp16, var_18968_cast_fp16))[name = tensor("op_19127_cast_fp16")]; tensor var_19129_equation_0 = const()[name = tensor("op_19129_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_19129_cast_fp16 = einsum(equation = var_19129_equation_0, values = (var_18567_cast_fp16, var_18969_cast_fp16))[name = tensor("op_19129_cast_fp16")]; tensor var_19131_interleave_0 = const()[name = tensor("op_19131_interleave_0"), val = tensor(false)]; tensor var_19131_cast_fp16 = concat(axis = var_17663, interleave = var_19131_interleave_0, values = (var_18971_cast_fp16, var_18973_cast_fp16, var_18975_cast_fp16, var_18977_cast_fp16))[name = tensor("op_19131_cast_fp16")]; tensor var_19133_interleave_0 = const()[name = tensor("op_19133_interleave_0"), val = tensor(false)]; tensor var_19133_cast_fp16 = concat(axis = var_17663, interleave = var_19133_interleave_0, values = (var_18979_cast_fp16, var_18981_cast_fp16, var_18983_cast_fp16, var_18985_cast_fp16))[name = tensor("op_19133_cast_fp16")]; tensor var_19135_interleave_0 = const()[name = tensor("op_19135_interleave_0"), val = tensor(false)]; tensor var_19135_cast_fp16 = concat(axis = var_17663, interleave = var_19135_interleave_0, values = (var_18987_cast_fp16, var_18989_cast_fp16, var_18991_cast_fp16, var_18993_cast_fp16))[name = tensor("op_19135_cast_fp16")]; tensor var_19137_interleave_0 = const()[name = tensor("op_19137_interleave_0"), val = tensor(false)]; tensor var_19137_cast_fp16 = concat(axis = var_17663, interleave = var_19137_interleave_0, values = (var_18995_cast_fp16, var_18997_cast_fp16, var_18999_cast_fp16, var_19001_cast_fp16))[name = tensor("op_19137_cast_fp16")]; tensor var_19139_interleave_0 = const()[name = tensor("op_19139_interleave_0"), val = tensor(false)]; tensor var_19139_cast_fp16 = concat(axis = var_17663, interleave = var_19139_interleave_0, values = (var_19003_cast_fp16, var_19005_cast_fp16, var_19007_cast_fp16, var_19009_cast_fp16))[name = tensor("op_19139_cast_fp16")]; tensor var_19141_interleave_0 = const()[name = tensor("op_19141_interleave_0"), val = tensor(false)]; tensor var_19141_cast_fp16 = concat(axis = var_17663, interleave = var_19141_interleave_0, values = (var_19011_cast_fp16, var_19013_cast_fp16, var_19015_cast_fp16, var_19017_cast_fp16))[name = tensor("op_19141_cast_fp16")]; tensor var_19143_interleave_0 = const()[name = tensor("op_19143_interleave_0"), val = tensor(false)]; tensor var_19143_cast_fp16 = concat(axis = var_17663, interleave = var_19143_interleave_0, values = (var_19019_cast_fp16, var_19021_cast_fp16, var_19023_cast_fp16, var_19025_cast_fp16))[name = tensor("op_19143_cast_fp16")]; tensor var_19145_interleave_0 = const()[name = tensor("op_19145_interleave_0"), val = tensor(false)]; tensor var_19145_cast_fp16 = concat(axis = var_17663, interleave = var_19145_interleave_0, values = (var_19027_cast_fp16, var_19029_cast_fp16, var_19031_cast_fp16, var_19033_cast_fp16))[name = tensor("op_19145_cast_fp16")]; tensor var_19147_interleave_0 = const()[name = tensor("op_19147_interleave_0"), val = tensor(false)]; tensor var_19147_cast_fp16 = concat(axis = var_17663, interleave = var_19147_interleave_0, values = (var_19035_cast_fp16, var_19037_cast_fp16, var_19039_cast_fp16, var_19041_cast_fp16))[name = tensor("op_19147_cast_fp16")]; tensor var_19149_interleave_0 = const()[name = tensor("op_19149_interleave_0"), val = tensor(false)]; tensor var_19149_cast_fp16 = concat(axis = var_17663, interleave = var_19149_interleave_0, values = (var_19043_cast_fp16, var_19045_cast_fp16, var_19047_cast_fp16, var_19049_cast_fp16))[name = tensor("op_19149_cast_fp16")]; tensor var_19151_interleave_0 = const()[name = tensor("op_19151_interleave_0"), val = tensor(false)]; tensor var_19151_cast_fp16 = concat(axis = var_17663, interleave = var_19151_interleave_0, values = (var_19051_cast_fp16, var_19053_cast_fp16, var_19055_cast_fp16, var_19057_cast_fp16))[name = tensor("op_19151_cast_fp16")]; tensor var_19153_interleave_0 = const()[name = tensor("op_19153_interleave_0"), val = tensor(false)]; tensor var_19153_cast_fp16 = concat(axis = var_17663, interleave = var_19153_interleave_0, values = (var_19059_cast_fp16, var_19061_cast_fp16, var_19063_cast_fp16, var_19065_cast_fp16))[name = tensor("op_19153_cast_fp16")]; tensor var_19155_interleave_0 = const()[name = tensor("op_19155_interleave_0"), val = tensor(false)]; tensor var_19155_cast_fp16 = concat(axis = var_17663, interleave = var_19155_interleave_0, values = (var_19067_cast_fp16, var_19069_cast_fp16, var_19071_cast_fp16, var_19073_cast_fp16))[name = tensor("op_19155_cast_fp16")]; tensor var_19157_interleave_0 = const()[name = tensor("op_19157_interleave_0"), val = tensor(false)]; tensor var_19157_cast_fp16 = concat(axis = var_17663, interleave = var_19157_interleave_0, values = (var_19075_cast_fp16, var_19077_cast_fp16, var_19079_cast_fp16, var_19081_cast_fp16))[name = tensor("op_19157_cast_fp16")]; tensor var_19159_interleave_0 = const()[name = tensor("op_19159_interleave_0"), val = tensor(false)]; tensor var_19159_cast_fp16 = concat(axis = var_17663, interleave = var_19159_interleave_0, values = (var_19083_cast_fp16, var_19085_cast_fp16, var_19087_cast_fp16, var_19089_cast_fp16))[name = tensor("op_19159_cast_fp16")]; tensor var_19161_interleave_0 = const()[name = tensor("op_19161_interleave_0"), val = tensor(false)]; tensor var_19161_cast_fp16 = concat(axis = var_17663, interleave = var_19161_interleave_0, values = (var_19091_cast_fp16, var_19093_cast_fp16, var_19095_cast_fp16, var_19097_cast_fp16))[name = tensor("op_19161_cast_fp16")]; tensor var_19163_interleave_0 = const()[name = tensor("op_19163_interleave_0"), val = tensor(false)]; tensor var_19163_cast_fp16 = concat(axis = var_17663, interleave = var_19163_interleave_0, values = (var_19099_cast_fp16, var_19101_cast_fp16, var_19103_cast_fp16, var_19105_cast_fp16))[name = tensor("op_19163_cast_fp16")]; tensor var_19165_interleave_0 = const()[name = tensor("op_19165_interleave_0"), val = tensor(false)]; tensor var_19165_cast_fp16 = concat(axis = var_17663, interleave = var_19165_interleave_0, values = (var_19107_cast_fp16, var_19109_cast_fp16, var_19111_cast_fp16, var_19113_cast_fp16))[name = tensor("op_19165_cast_fp16")]; tensor var_19167_interleave_0 = const()[name = tensor("op_19167_interleave_0"), val = tensor(false)]; tensor var_19167_cast_fp16 = concat(axis = var_17663, interleave = var_19167_interleave_0, values = (var_19115_cast_fp16, var_19117_cast_fp16, var_19119_cast_fp16, var_19121_cast_fp16))[name = tensor("op_19167_cast_fp16")]; tensor var_19169_interleave_0 = const()[name = tensor("op_19169_interleave_0"), val = tensor(false)]; tensor var_19169_cast_fp16 = concat(axis = var_17663, interleave = var_19169_interleave_0, values = (var_19123_cast_fp16, var_19125_cast_fp16, var_19127_cast_fp16, var_19129_cast_fp16))[name = tensor("op_19169_cast_fp16")]; tensor input_89_interleave_0 = const()[name = tensor("input_89_interleave_0"), val = tensor(false)]; tensor input_89_cast_fp16 = concat(axis = var_17688, interleave = input_89_interleave_0, values = (var_19131_cast_fp16, var_19133_cast_fp16, var_19135_cast_fp16, var_19137_cast_fp16, var_19139_cast_fp16, var_19141_cast_fp16, var_19143_cast_fp16, var_19145_cast_fp16, var_19147_cast_fp16, var_19149_cast_fp16, var_19151_cast_fp16, var_19153_cast_fp16, var_19155_cast_fp16, var_19157_cast_fp16, var_19159_cast_fp16, var_19161_cast_fp16, var_19163_cast_fp16, var_19165_cast_fp16, var_19167_cast_fp16, var_19169_cast_fp16))[name = tensor("input_89_cast_fp16")]; tensor var_19180_pad_type_0 = const()[name = tensor("op_19180_pad_type_0"), val = tensor("valid")]; tensor var_19180_strides_0 = const()[name = tensor("op_19180_strides_0"), val = tensor([1, 1])]; tensor var_19180_pad_0 = const()[name = tensor("op_19180_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_19180_dilations_0 = const()[name = tensor("op_19180_dilations_0"), val = tensor([1, 1])]; tensor var_19180_groups_0 = const()[name = tensor("op_19180_groups_0"), val = tensor(1)]; tensor layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161158080))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161977344))), name = tensor("layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_11_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161977472)))]; tensor var_19180_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_19180_dilations_0, groups = var_19180_groups_0, pad = var_19180_pad_0, pad_type = var_19180_pad_type_0, strides = var_19180_strides_0, weight = layers_11_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_89_cast_fp16)[name = tensor("op_19180_cast_fp16")]; tensor var_19186_pad_type_0 = const()[name = tensor("op_19186_pad_type_0"), val = tensor("valid")]; tensor var_19186_strides_0 = const()[name = tensor("op_19186_strides_0"), val = tensor([1, 1])]; tensor var_19186_pad_0 = const()[name = tensor("op_19186_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_19186_dilations_0 = const()[name = tensor("op_19186_dilations_0"), val = tensor([1, 1])]; tensor var_19186_groups_0 = const()[name = tensor("op_19186_groups_0"), val = tensor(1)]; tensor layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161997632))), name = tensor("layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(161980096))), shape = tensor([1280, 1280, 1, 1])]; tensor var_19186_cast_fp16 = conv(dilations = var_19186_dilations_0, groups = var_19186_groups_0, pad = var_19186_pad_0, pad_type = var_19186_pad_type_0, strides = var_19186_strides_0, weight = layers_11_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_89_cast_fp16)[name = tensor("op_19186_cast_fp16")]; tensor obj_47_cast_fp16 = add(x = var_19180_cast_fp16, y = var_19186_cast_fp16)[name = tensor("obj_47_cast_fp16")]; tensor inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_47_cast_fp16)[name = tensor("inputs_47_cast_fp16")]; tensor out_47_axes_0 = const()[name = tensor("out_47_axes_0"), val = tensor([1])]; tensor var_19197_to_fp16 = const()[name = tensor("op_19197_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_47_cast_fp16 = layer_norm(axes = out_47_axes_0, epsilon = var_19197_to_fp16, x = inputs_47_cast_fp16)[name = tensor("out_47_cast_fp16")]; tensor input_91_gamma_0_to_fp16 = const()[name = tensor("input_91_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162202496)))]; tensor input_91_beta_0_to_fp16 = const()[name = tensor("input_91_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162205120)))]; tensor input_91_epsilon_0_to_fp16 = const()[name = tensor("input_91_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_91_cast_fp16 = batch_norm(beta = input_91_beta_0_to_fp16, epsilon = input_91_epsilon_0_to_fp16, gamma = input_91_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = tensor("input_91_cast_fp16")]; tensor var_19215_pad_type_0 = const()[name = tensor("op_19215_pad_type_0"), val = tensor("valid")]; tensor var_19215_strides_0 = const()[name = tensor("op_19215_strides_0"), val = tensor([1, 1])]; tensor var_19215_pad_0 = const()[name = tensor("op_19215_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_19215_dilations_0 = const()[name = tensor("op_19215_dilations_0"), val = tensor([1, 1])]; tensor var_19215_groups_0 = const()[name = tensor("op_19215_groups_0"), val = tensor(1)]; tensor layers_11_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162207744))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165484608))), name = tensor("layers_11_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_11_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165484736)))]; tensor var_19215_cast_fp16 = conv(bias = layers_11_fc1_inlier_module_bias_to_fp16, dilations = var_19215_dilations_0, groups = var_19215_groups_0, pad = var_19215_pad_0, pad_type = var_19215_pad_type_0, strides = var_19215_strides_0, weight = layers_11_fc1_inlier_module_weight_to_fp16_palettized, x = input_91_cast_fp16)[name = tensor("op_19215_cast_fp16")]; tensor var_19221_pad_type_0 = const()[name = tensor("op_19221_pad_type_0"), val = tensor("valid")]; tensor var_19221_strides_0 = const()[name = tensor("op_19221_strides_0"), val = tensor([1, 1])]; tensor var_19221_pad_0 = const()[name = tensor("op_19221_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_19221_dilations_0 = const()[name = tensor("op_19221_dilations_0"), val = tensor([1, 1])]; tensor var_19221_groups_0 = const()[name = tensor("op_19221_groups_0"), val = tensor(1)]; tensor layers_11_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165526016))), name = tensor("layers_11_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165495040))), shape = tensor([5120, 1280, 1, 1])]; tensor var_19221_cast_fp16 = conv(dilations = var_19221_dilations_0, groups = var_19221_groups_0, pad = var_19221_pad_0, pad_type = var_19221_pad_type_0, strides = var_19221_strides_0, weight = layers_11_fc1_outlier_module_weight_to_fp16_sparsified, x = input_91_cast_fp16)[name = tensor("op_19221_cast_fp16")]; tensor input_93_cast_fp16 = add(x = var_19215_cast_fp16, y = var_19221_cast_fp16)[name = tensor("input_93_cast_fp16")]; tensor input_95_mode_0 = const()[name = tensor("input_95_mode_0"), val = tensor("EXACT")]; tensor input_95_cast_fp16 = gelu(mode = input_95_mode_0, x = input_93_cast_fp16)[name = tensor("input_95_cast_fp16")]; tensor var_19232_pad_type_0 = const()[name = tensor("op_19232_pad_type_0"), val = tensor("valid")]; tensor var_19232_strides_0 = const()[name = tensor("op_19232_strides_0"), val = tensor([1, 1])]; tensor var_19232_pad_0 = const()[name = tensor("op_19232_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_19232_dilations_0 = const()[name = tensor("op_19232_dilations_0"), val = tensor([1, 1])]; tensor var_19232_groups_0 = const()[name = tensor("op_19232_groups_0"), val = tensor(1)]; tensor layers_11_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166345280))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(169622144))), name = tensor("layers_11_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_11_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_11_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(169622272)))]; tensor var_19232_cast_fp16 = conv(bias = layers_11_fc2_inlier_module_bias_to_fp16, dilations = var_19232_dilations_0, groups = var_19232_groups_0, pad = var_19232_pad_0, pad_type = var_19232_pad_type_0, strides = var_19232_strides_0, weight = layers_11_fc2_inlier_module_weight_to_fp16_palettized, x = input_95_cast_fp16)[name = tensor("op_19232_cast_fp16")]; tensor var_19238_pad_type_0 = const()[name = tensor("op_19238_pad_type_0"), val = tensor("valid")]; tensor var_19238_strides_0 = const()[name = tensor("op_19238_strides_0"), val = tensor([1, 1])]; tensor var_19238_pad_0 = const()[name = tensor("op_19238_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_19238_dilations_0 = const()[name = tensor("op_19238_dilations_0"), val = tensor([1, 1])]; tensor var_19238_groups_0 = const()[name = tensor("op_19238_groups_0"), val = tensor(1)]; tensor layers_11_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(169852352))), name = tensor("layers_11_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(169624896))), shape = tensor([1280, 5120, 1, 1])]; tensor var_19238_cast_fp16 = conv(dilations = var_19238_dilations_0, groups = var_19238_groups_0, pad = var_19238_pad_0, pad_type = var_19238_pad_type_0, strides = var_19238_strides_0, weight = layers_11_fc2_outlier_module_weight_to_fp16_sparsified, x = input_95_cast_fp16)[name = tensor("op_19238_cast_fp16")]; tensor hidden_states_27_cast_fp16 = add(x = var_19232_cast_fp16, y = var_19238_cast_fp16)[name = tensor("hidden_states_27_cast_fp16")]; tensor inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_27_cast_fp16)[name = tensor("inputs_49_cast_fp16")]; tensor var_19244 = const()[name = tensor("op_19244"), val = tensor(3)]; tensor var_19269 = const()[name = tensor("op_19269"), val = tensor(1)]; tensor out_49_axes_0 = const()[name = tensor("out_49_axes_0"), val = tensor([1])]; tensor var_19286_to_fp16 = const()[name = tensor("op_19286_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_49_cast_fp16 = layer_norm(axes = out_49_axes_0, epsilon = var_19286_to_fp16, x = inputs_49_cast_fp16)[name = tensor("out_49_cast_fp16")]; tensor obj_49_gamma_0_to_fp16 = const()[name = tensor("obj_49_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(170671616)))]; tensor obj_49_beta_0_to_fp16 = const()[name = tensor("obj_49_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(170674240)))]; tensor obj_49_epsilon_0_to_fp16 = const()[name = tensor("obj_49_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_49_cast_fp16 = batch_norm(beta = obj_49_beta_0_to_fp16, epsilon = obj_49_epsilon_0_to_fp16, gamma = obj_49_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_49_cast_fp16)[name = tensor("obj_49_cast_fp16")]; tensor var_19308_pad_type_0 = const()[name = tensor("op_19308_pad_type_0"), val = tensor("valid")]; tensor var_19308_strides_0 = const()[name = tensor("op_19308_strides_0"), val = tensor([1, 1])]; tensor var_19308_pad_0 = const()[name = tensor("op_19308_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_19308_dilations_0 = const()[name = tensor("op_19308_dilations_0"), val = tensor([1, 1])]; tensor var_19308_groups_0 = const()[name = tensor("op_19308_groups_0"), val = tensor(1)]; tensor layers_12_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(170676864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171496128))), name = tensor("layers_12_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_12_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_12_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171496256)))]; tensor var_19308_cast_fp16 = conv(bias = layers_12_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_19308_dilations_0, groups = var_19308_groups_0, pad = var_19308_pad_0, pad_type = var_19308_pad_type_0, strides = var_19308_strides_0, weight = layers_12_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_49_cast_fp16)[name = tensor("op_19308_cast_fp16")]; tensor var_19314_pad_type_0 = const()[name = tensor("op_19314_pad_type_0"), val = tensor("valid")]; tensor var_19314_strides_0 = const()[name = tensor("op_19314_strides_0"), val = tensor([1, 1])]; tensor var_19314_pad_0 = const()[name = tensor("op_19314_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_19314_dilations_0 = const()[name = tensor("op_19314_dilations_0"), val = tensor([1, 1])]; tensor var_19314_groups_0 = const()[name = tensor("op_19314_groups_0"), val = tensor(1)]; tensor layers_12_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171547328))), name = tensor("layers_12_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171498880))), shape = tensor([1280, 1280, 1, 1])]; tensor var_19314_cast_fp16 = conv(dilations = var_19314_dilations_0, groups = var_19314_groups_0, pad = var_19314_pad_0, pad_type = var_19314_pad_type_0, strides = var_19314_strides_0, weight = layers_12_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_49_cast_fp16)[name = tensor("op_19314_cast_fp16")]; tensor query_25_cast_fp16 = add(x = var_19308_cast_fp16, y = var_19314_cast_fp16)[name = tensor("query_25_cast_fp16")]; tensor var_19323_pad_type_0 = const()[name = tensor("op_19323_pad_type_0"), val = tensor("valid")]; tensor var_19323_strides_0 = const()[name = tensor("op_19323_strides_0"), val = tensor([1, 1])]; tensor var_19323_pad_0 = const()[name = tensor("op_19323_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_19323_dilations_0 = const()[name = tensor("op_19323_dilations_0"), val = tensor([1, 1])]; tensor var_19323_groups_0 = const()[name = tensor("op_19323_groups_0"), val = tensor(1)]; tensor layers_12_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171752192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172571456))), name = tensor("layers_12_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_19323_cast_fp16 = conv(dilations = var_19323_dilations_0, groups = var_19323_groups_0, pad = var_19323_pad_0, pad_type = var_19323_pad_type_0, strides = var_19323_strides_0, weight = layers_12_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_49_cast_fp16)[name = tensor("op_19323_cast_fp16")]; tensor var_19329_pad_type_0 = const()[name = tensor("op_19329_pad_type_0"), val = tensor("valid")]; tensor var_19329_strides_0 = const()[name = tensor("op_19329_strides_0"), val = tensor([1, 1])]; tensor var_19329_pad_0 = const()[name = tensor("op_19329_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_19329_dilations_0 = const()[name = tensor("op_19329_dilations_0"), val = tensor([1, 1])]; tensor var_19329_groups_0 = const()[name = tensor("op_19329_groups_0"), val = tensor(1)]; tensor layers_12_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172602112))), name = tensor("layers_12_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172571584))), shape = tensor([1280, 1280, 1, 1])]; tensor var_19329_cast_fp16 = conv(dilations = var_19329_dilations_0, groups = var_19329_groups_0, pad = var_19329_pad_0, pad_type = var_19329_pad_type_0, strides = var_19329_strides_0, weight = layers_12_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_49_cast_fp16)[name = tensor("op_19329_cast_fp16")]; tensor key_25_cast_fp16 = add(x = var_19323_cast_fp16, y = var_19329_cast_fp16)[name = tensor("key_25_cast_fp16")]; tensor var_19339_pad_type_0 = const()[name = tensor("op_19339_pad_type_0"), val = tensor("valid")]; tensor var_19339_strides_0 = const()[name = tensor("op_19339_strides_0"), val = tensor([1, 1])]; tensor var_19339_pad_0 = const()[name = tensor("op_19339_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_19339_dilations_0 = const()[name = tensor("op_19339_dilations_0"), val = tensor([1, 1])]; tensor var_19339_groups_0 = const()[name = tensor("op_19339_groups_0"), val = tensor(1)]; tensor layers_12_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(172806976))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(173626240))), name = tensor("layers_12_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_12_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_12_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(173626368)))]; tensor var_19339_cast_fp16 = conv(bias = layers_12_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_19339_dilations_0, groups = var_19339_groups_0, pad = var_19339_pad_0, pad_type = var_19339_pad_type_0, strides = var_19339_strides_0, weight = layers_12_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_49_cast_fp16)[name = tensor("op_19339_cast_fp16")]; tensor var_19345_pad_type_0 = const()[name = tensor("op_19345_pad_type_0"), val = tensor("valid")]; tensor var_19345_strides_0 = const()[name = tensor("op_19345_strides_0"), val = tensor([1, 1])]; tensor var_19345_pad_0 = const()[name = tensor("op_19345_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_19345_dilations_0 = const()[name = tensor("op_19345_dilations_0"), val = tensor([1, 1])]; tensor var_19345_groups_0 = const()[name = tensor("op_19345_groups_0"), val = tensor(1)]; tensor layers_12_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(173648128))), name = tensor("layers_12_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(173628992))), shape = tensor([1280, 1280, 1, 1])]; tensor var_19345_cast_fp16 = conv(dilations = var_19345_dilations_0, groups = var_19345_groups_0, pad = var_19345_pad_0, pad_type = var_19345_pad_type_0, strides = var_19345_strides_0, weight = layers_12_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_49_cast_fp16)[name = tensor("op_19345_cast_fp16")]; tensor value_25_cast_fp16 = add(x = var_19339_cast_fp16, y = var_19345_cast_fp16)[name = tensor("value_25_cast_fp16")]; tensor var_19351_begin_0 = const()[name = tensor("op_19351_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19351_end_0 = const()[name = tensor("op_19351_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19351_end_mask_0 = const()[name = tensor("op_19351_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19351_cast_fp16 = slice_by_index(begin = var_19351_begin_0, end = var_19351_end_0, end_mask = var_19351_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19351_cast_fp16")]; tensor var_19355_begin_0 = const()[name = tensor("op_19355_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_19355_end_0 = const()[name = tensor("op_19355_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_19355_end_mask_0 = const()[name = tensor("op_19355_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19355_cast_fp16 = slice_by_index(begin = var_19355_begin_0, end = var_19355_end_0, end_mask = var_19355_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19355_cast_fp16")]; tensor var_19359_begin_0 = const()[name = tensor("op_19359_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_19359_end_0 = const()[name = tensor("op_19359_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_19359_end_mask_0 = const()[name = tensor("op_19359_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19359_cast_fp16 = slice_by_index(begin = var_19359_begin_0, end = var_19359_end_0, end_mask = var_19359_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19359_cast_fp16")]; tensor var_19363_begin_0 = const()[name = tensor("op_19363_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_19363_end_0 = const()[name = tensor("op_19363_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_19363_end_mask_0 = const()[name = tensor("op_19363_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19363_cast_fp16 = slice_by_index(begin = var_19363_begin_0, end = var_19363_end_0, end_mask = var_19363_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19363_cast_fp16")]; tensor var_19367_begin_0 = const()[name = tensor("op_19367_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_19367_end_0 = const()[name = tensor("op_19367_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_19367_end_mask_0 = const()[name = tensor("op_19367_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19367_cast_fp16 = slice_by_index(begin = var_19367_begin_0, end = var_19367_end_0, end_mask = var_19367_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19367_cast_fp16")]; tensor var_19371_begin_0 = const()[name = tensor("op_19371_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_19371_end_0 = const()[name = tensor("op_19371_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_19371_end_mask_0 = const()[name = tensor("op_19371_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19371_cast_fp16 = slice_by_index(begin = var_19371_begin_0, end = var_19371_end_0, end_mask = var_19371_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19371_cast_fp16")]; tensor var_19375_begin_0 = const()[name = tensor("op_19375_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_19375_end_0 = const()[name = tensor("op_19375_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_19375_end_mask_0 = const()[name = tensor("op_19375_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19375_cast_fp16 = slice_by_index(begin = var_19375_begin_0, end = var_19375_end_0, end_mask = var_19375_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19375_cast_fp16")]; tensor var_19379_begin_0 = const()[name = tensor("op_19379_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_19379_end_0 = const()[name = tensor("op_19379_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_19379_end_mask_0 = const()[name = tensor("op_19379_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19379_cast_fp16 = slice_by_index(begin = var_19379_begin_0, end = var_19379_end_0, end_mask = var_19379_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19379_cast_fp16")]; tensor var_19383_begin_0 = const()[name = tensor("op_19383_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_19383_end_0 = const()[name = tensor("op_19383_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_19383_end_mask_0 = const()[name = tensor("op_19383_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19383_cast_fp16 = slice_by_index(begin = var_19383_begin_0, end = var_19383_end_0, end_mask = var_19383_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19383_cast_fp16")]; tensor var_19387_begin_0 = const()[name = tensor("op_19387_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_19387_end_0 = const()[name = tensor("op_19387_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_19387_end_mask_0 = const()[name = tensor("op_19387_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19387_cast_fp16 = slice_by_index(begin = var_19387_begin_0, end = var_19387_end_0, end_mask = var_19387_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19387_cast_fp16")]; tensor var_19391_begin_0 = const()[name = tensor("op_19391_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_19391_end_0 = const()[name = tensor("op_19391_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_19391_end_mask_0 = const()[name = tensor("op_19391_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19391_cast_fp16 = slice_by_index(begin = var_19391_begin_0, end = var_19391_end_0, end_mask = var_19391_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19391_cast_fp16")]; tensor var_19395_begin_0 = const()[name = tensor("op_19395_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_19395_end_0 = const()[name = tensor("op_19395_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_19395_end_mask_0 = const()[name = tensor("op_19395_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19395_cast_fp16 = slice_by_index(begin = var_19395_begin_0, end = var_19395_end_0, end_mask = var_19395_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19395_cast_fp16")]; tensor var_19399_begin_0 = const()[name = tensor("op_19399_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_19399_end_0 = const()[name = tensor("op_19399_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_19399_end_mask_0 = const()[name = tensor("op_19399_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19399_cast_fp16 = slice_by_index(begin = var_19399_begin_0, end = var_19399_end_0, end_mask = var_19399_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19399_cast_fp16")]; tensor var_19403_begin_0 = const()[name = tensor("op_19403_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_19403_end_0 = const()[name = tensor("op_19403_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_19403_end_mask_0 = const()[name = tensor("op_19403_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19403_cast_fp16 = slice_by_index(begin = var_19403_begin_0, end = var_19403_end_0, end_mask = var_19403_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19403_cast_fp16")]; tensor var_19407_begin_0 = const()[name = tensor("op_19407_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_19407_end_0 = const()[name = tensor("op_19407_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_19407_end_mask_0 = const()[name = tensor("op_19407_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19407_cast_fp16 = slice_by_index(begin = var_19407_begin_0, end = var_19407_end_0, end_mask = var_19407_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19407_cast_fp16")]; tensor var_19411_begin_0 = const()[name = tensor("op_19411_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_19411_end_0 = const()[name = tensor("op_19411_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_19411_end_mask_0 = const()[name = tensor("op_19411_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19411_cast_fp16 = slice_by_index(begin = var_19411_begin_0, end = var_19411_end_0, end_mask = var_19411_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19411_cast_fp16")]; tensor var_19415_begin_0 = const()[name = tensor("op_19415_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_19415_end_0 = const()[name = tensor("op_19415_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_19415_end_mask_0 = const()[name = tensor("op_19415_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19415_cast_fp16 = slice_by_index(begin = var_19415_begin_0, end = var_19415_end_0, end_mask = var_19415_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19415_cast_fp16")]; tensor var_19419_begin_0 = const()[name = tensor("op_19419_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_19419_end_0 = const()[name = tensor("op_19419_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_19419_end_mask_0 = const()[name = tensor("op_19419_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19419_cast_fp16 = slice_by_index(begin = var_19419_begin_0, end = var_19419_end_0, end_mask = var_19419_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19419_cast_fp16")]; tensor var_19423_begin_0 = const()[name = tensor("op_19423_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_19423_end_0 = const()[name = tensor("op_19423_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_19423_end_mask_0 = const()[name = tensor("op_19423_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19423_cast_fp16 = slice_by_index(begin = var_19423_begin_0, end = var_19423_end_0, end_mask = var_19423_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19423_cast_fp16")]; tensor var_19427_begin_0 = const()[name = tensor("op_19427_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_19427_end_0 = const()[name = tensor("op_19427_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_19427_end_mask_0 = const()[name = tensor("op_19427_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_19427_cast_fp16 = slice_by_index(begin = var_19427_begin_0, end = var_19427_end_0, end_mask = var_19427_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_19427_cast_fp16")]; tensor var_19436_begin_0 = const()[name = tensor("op_19436_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19436_end_0 = const()[name = tensor("op_19436_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19436_end_mask_0 = const()[name = tensor("op_19436_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19436_cast_fp16 = slice_by_index(begin = var_19436_begin_0, end = var_19436_end_0, end_mask = var_19436_end_mask_0, x = var_19351_cast_fp16)[name = tensor("op_19436_cast_fp16")]; tensor var_19443_begin_0 = const()[name = tensor("op_19443_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19443_end_0 = const()[name = tensor("op_19443_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19443_end_mask_0 = const()[name = tensor("op_19443_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19443_cast_fp16 = slice_by_index(begin = var_19443_begin_0, end = var_19443_end_0, end_mask = var_19443_end_mask_0, x = var_19351_cast_fp16)[name = tensor("op_19443_cast_fp16")]; tensor var_19450_begin_0 = const()[name = tensor("op_19450_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19450_end_0 = const()[name = tensor("op_19450_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19450_end_mask_0 = const()[name = tensor("op_19450_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19450_cast_fp16 = slice_by_index(begin = var_19450_begin_0, end = var_19450_end_0, end_mask = var_19450_end_mask_0, x = var_19351_cast_fp16)[name = tensor("op_19450_cast_fp16")]; tensor var_19457_begin_0 = const()[name = tensor("op_19457_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19457_end_0 = const()[name = tensor("op_19457_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19457_end_mask_0 = const()[name = tensor("op_19457_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19457_cast_fp16 = slice_by_index(begin = var_19457_begin_0, end = var_19457_end_0, end_mask = var_19457_end_mask_0, x = var_19351_cast_fp16)[name = tensor("op_19457_cast_fp16")]; tensor var_19464_begin_0 = const()[name = tensor("op_19464_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19464_end_0 = const()[name = tensor("op_19464_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19464_end_mask_0 = const()[name = tensor("op_19464_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19464_cast_fp16 = slice_by_index(begin = var_19464_begin_0, end = var_19464_end_0, end_mask = var_19464_end_mask_0, x = var_19355_cast_fp16)[name = tensor("op_19464_cast_fp16")]; tensor var_19471_begin_0 = const()[name = tensor("op_19471_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19471_end_0 = const()[name = tensor("op_19471_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19471_end_mask_0 = const()[name = tensor("op_19471_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19471_cast_fp16 = slice_by_index(begin = var_19471_begin_0, end = var_19471_end_0, end_mask = var_19471_end_mask_0, x = var_19355_cast_fp16)[name = tensor("op_19471_cast_fp16")]; tensor var_19478_begin_0 = const()[name = tensor("op_19478_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19478_end_0 = const()[name = tensor("op_19478_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19478_end_mask_0 = const()[name = tensor("op_19478_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19478_cast_fp16 = slice_by_index(begin = var_19478_begin_0, end = var_19478_end_0, end_mask = var_19478_end_mask_0, x = var_19355_cast_fp16)[name = tensor("op_19478_cast_fp16")]; tensor var_19485_begin_0 = const()[name = tensor("op_19485_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19485_end_0 = const()[name = tensor("op_19485_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19485_end_mask_0 = const()[name = tensor("op_19485_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19485_cast_fp16 = slice_by_index(begin = var_19485_begin_0, end = var_19485_end_0, end_mask = var_19485_end_mask_0, x = var_19355_cast_fp16)[name = tensor("op_19485_cast_fp16")]; tensor var_19492_begin_0 = const()[name = tensor("op_19492_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19492_end_0 = const()[name = tensor("op_19492_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19492_end_mask_0 = const()[name = tensor("op_19492_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19492_cast_fp16 = slice_by_index(begin = var_19492_begin_0, end = var_19492_end_0, end_mask = var_19492_end_mask_0, x = var_19359_cast_fp16)[name = tensor("op_19492_cast_fp16")]; tensor var_19499_begin_0 = const()[name = tensor("op_19499_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19499_end_0 = const()[name = tensor("op_19499_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19499_end_mask_0 = const()[name = tensor("op_19499_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19499_cast_fp16 = slice_by_index(begin = var_19499_begin_0, end = var_19499_end_0, end_mask = var_19499_end_mask_0, x = var_19359_cast_fp16)[name = tensor("op_19499_cast_fp16")]; tensor var_19506_begin_0 = const()[name = tensor("op_19506_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19506_end_0 = const()[name = tensor("op_19506_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19506_end_mask_0 = const()[name = tensor("op_19506_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19506_cast_fp16 = slice_by_index(begin = var_19506_begin_0, end = var_19506_end_0, end_mask = var_19506_end_mask_0, x = var_19359_cast_fp16)[name = tensor("op_19506_cast_fp16")]; tensor var_19513_begin_0 = const()[name = tensor("op_19513_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19513_end_0 = const()[name = tensor("op_19513_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19513_end_mask_0 = const()[name = tensor("op_19513_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19513_cast_fp16 = slice_by_index(begin = var_19513_begin_0, end = var_19513_end_0, end_mask = var_19513_end_mask_0, x = var_19359_cast_fp16)[name = tensor("op_19513_cast_fp16")]; tensor var_19520_begin_0 = const()[name = tensor("op_19520_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19520_end_0 = const()[name = tensor("op_19520_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19520_end_mask_0 = const()[name = tensor("op_19520_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19520_cast_fp16 = slice_by_index(begin = var_19520_begin_0, end = var_19520_end_0, end_mask = var_19520_end_mask_0, x = var_19363_cast_fp16)[name = tensor("op_19520_cast_fp16")]; tensor var_19527_begin_0 = const()[name = tensor("op_19527_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19527_end_0 = const()[name = tensor("op_19527_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19527_end_mask_0 = const()[name = tensor("op_19527_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19527_cast_fp16 = slice_by_index(begin = var_19527_begin_0, end = var_19527_end_0, end_mask = var_19527_end_mask_0, x = var_19363_cast_fp16)[name = tensor("op_19527_cast_fp16")]; tensor var_19534_begin_0 = const()[name = tensor("op_19534_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19534_end_0 = const()[name = tensor("op_19534_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19534_end_mask_0 = const()[name = tensor("op_19534_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19534_cast_fp16 = slice_by_index(begin = var_19534_begin_0, end = var_19534_end_0, end_mask = var_19534_end_mask_0, x = var_19363_cast_fp16)[name = tensor("op_19534_cast_fp16")]; tensor var_19541_begin_0 = const()[name = tensor("op_19541_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19541_end_0 = const()[name = tensor("op_19541_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19541_end_mask_0 = const()[name = tensor("op_19541_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19541_cast_fp16 = slice_by_index(begin = var_19541_begin_0, end = var_19541_end_0, end_mask = var_19541_end_mask_0, x = var_19363_cast_fp16)[name = tensor("op_19541_cast_fp16")]; tensor var_19548_begin_0 = const()[name = tensor("op_19548_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19548_end_0 = const()[name = tensor("op_19548_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19548_end_mask_0 = const()[name = tensor("op_19548_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19548_cast_fp16 = slice_by_index(begin = var_19548_begin_0, end = var_19548_end_0, end_mask = var_19548_end_mask_0, x = var_19367_cast_fp16)[name = tensor("op_19548_cast_fp16")]; tensor var_19555_begin_0 = const()[name = tensor("op_19555_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19555_end_0 = const()[name = tensor("op_19555_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19555_end_mask_0 = const()[name = tensor("op_19555_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19555_cast_fp16 = slice_by_index(begin = var_19555_begin_0, end = var_19555_end_0, end_mask = var_19555_end_mask_0, x = var_19367_cast_fp16)[name = tensor("op_19555_cast_fp16")]; tensor var_19562_begin_0 = const()[name = tensor("op_19562_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19562_end_0 = const()[name = tensor("op_19562_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19562_end_mask_0 = const()[name = tensor("op_19562_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19562_cast_fp16 = slice_by_index(begin = var_19562_begin_0, end = var_19562_end_0, end_mask = var_19562_end_mask_0, x = var_19367_cast_fp16)[name = tensor("op_19562_cast_fp16")]; tensor var_19569_begin_0 = const()[name = tensor("op_19569_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19569_end_0 = const()[name = tensor("op_19569_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19569_end_mask_0 = const()[name = tensor("op_19569_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19569_cast_fp16 = slice_by_index(begin = var_19569_begin_0, end = var_19569_end_0, end_mask = var_19569_end_mask_0, x = var_19367_cast_fp16)[name = tensor("op_19569_cast_fp16")]; tensor var_19576_begin_0 = const()[name = tensor("op_19576_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19576_end_0 = const()[name = tensor("op_19576_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19576_end_mask_0 = const()[name = tensor("op_19576_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19576_cast_fp16 = slice_by_index(begin = var_19576_begin_0, end = var_19576_end_0, end_mask = var_19576_end_mask_0, x = var_19371_cast_fp16)[name = tensor("op_19576_cast_fp16")]; tensor var_19583_begin_0 = const()[name = tensor("op_19583_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19583_end_0 = const()[name = tensor("op_19583_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19583_end_mask_0 = const()[name = tensor("op_19583_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19583_cast_fp16 = slice_by_index(begin = var_19583_begin_0, end = var_19583_end_0, end_mask = var_19583_end_mask_0, x = var_19371_cast_fp16)[name = tensor("op_19583_cast_fp16")]; tensor var_19590_begin_0 = const()[name = tensor("op_19590_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19590_end_0 = const()[name = tensor("op_19590_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19590_end_mask_0 = const()[name = tensor("op_19590_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19590_cast_fp16 = slice_by_index(begin = var_19590_begin_0, end = var_19590_end_0, end_mask = var_19590_end_mask_0, x = var_19371_cast_fp16)[name = tensor("op_19590_cast_fp16")]; tensor var_19597_begin_0 = const()[name = tensor("op_19597_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19597_end_0 = const()[name = tensor("op_19597_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19597_end_mask_0 = const()[name = tensor("op_19597_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19597_cast_fp16 = slice_by_index(begin = var_19597_begin_0, end = var_19597_end_0, end_mask = var_19597_end_mask_0, x = var_19371_cast_fp16)[name = tensor("op_19597_cast_fp16")]; tensor var_19604_begin_0 = const()[name = tensor("op_19604_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19604_end_0 = const()[name = tensor("op_19604_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19604_end_mask_0 = const()[name = tensor("op_19604_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19604_cast_fp16 = slice_by_index(begin = var_19604_begin_0, end = var_19604_end_0, end_mask = var_19604_end_mask_0, x = var_19375_cast_fp16)[name = tensor("op_19604_cast_fp16")]; tensor var_19611_begin_0 = const()[name = tensor("op_19611_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19611_end_0 = const()[name = tensor("op_19611_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19611_end_mask_0 = const()[name = tensor("op_19611_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19611_cast_fp16 = slice_by_index(begin = var_19611_begin_0, end = var_19611_end_0, end_mask = var_19611_end_mask_0, x = var_19375_cast_fp16)[name = tensor("op_19611_cast_fp16")]; tensor var_19618_begin_0 = const()[name = tensor("op_19618_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19618_end_0 = const()[name = tensor("op_19618_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19618_end_mask_0 = const()[name = tensor("op_19618_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19618_cast_fp16 = slice_by_index(begin = var_19618_begin_0, end = var_19618_end_0, end_mask = var_19618_end_mask_0, x = var_19375_cast_fp16)[name = tensor("op_19618_cast_fp16")]; tensor var_19625_begin_0 = const()[name = tensor("op_19625_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19625_end_0 = const()[name = tensor("op_19625_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19625_end_mask_0 = const()[name = tensor("op_19625_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19625_cast_fp16 = slice_by_index(begin = var_19625_begin_0, end = var_19625_end_0, end_mask = var_19625_end_mask_0, x = var_19375_cast_fp16)[name = tensor("op_19625_cast_fp16")]; tensor var_19632_begin_0 = const()[name = tensor("op_19632_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19632_end_0 = const()[name = tensor("op_19632_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19632_end_mask_0 = const()[name = tensor("op_19632_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19632_cast_fp16 = slice_by_index(begin = var_19632_begin_0, end = var_19632_end_0, end_mask = var_19632_end_mask_0, x = var_19379_cast_fp16)[name = tensor("op_19632_cast_fp16")]; tensor var_19639_begin_0 = const()[name = tensor("op_19639_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19639_end_0 = const()[name = tensor("op_19639_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19639_end_mask_0 = const()[name = tensor("op_19639_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19639_cast_fp16 = slice_by_index(begin = var_19639_begin_0, end = var_19639_end_0, end_mask = var_19639_end_mask_0, x = var_19379_cast_fp16)[name = tensor("op_19639_cast_fp16")]; tensor var_19646_begin_0 = const()[name = tensor("op_19646_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19646_end_0 = const()[name = tensor("op_19646_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19646_end_mask_0 = const()[name = tensor("op_19646_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19646_cast_fp16 = slice_by_index(begin = var_19646_begin_0, end = var_19646_end_0, end_mask = var_19646_end_mask_0, x = var_19379_cast_fp16)[name = tensor("op_19646_cast_fp16")]; tensor var_19653_begin_0 = const()[name = tensor("op_19653_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19653_end_0 = const()[name = tensor("op_19653_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19653_end_mask_0 = const()[name = tensor("op_19653_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19653_cast_fp16 = slice_by_index(begin = var_19653_begin_0, end = var_19653_end_0, end_mask = var_19653_end_mask_0, x = var_19379_cast_fp16)[name = tensor("op_19653_cast_fp16")]; tensor var_19660_begin_0 = const()[name = tensor("op_19660_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19660_end_0 = const()[name = tensor("op_19660_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19660_end_mask_0 = const()[name = tensor("op_19660_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19660_cast_fp16 = slice_by_index(begin = var_19660_begin_0, end = var_19660_end_0, end_mask = var_19660_end_mask_0, x = var_19383_cast_fp16)[name = tensor("op_19660_cast_fp16")]; tensor var_19667_begin_0 = const()[name = tensor("op_19667_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19667_end_0 = const()[name = tensor("op_19667_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19667_end_mask_0 = const()[name = tensor("op_19667_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19667_cast_fp16 = slice_by_index(begin = var_19667_begin_0, end = var_19667_end_0, end_mask = var_19667_end_mask_0, x = var_19383_cast_fp16)[name = tensor("op_19667_cast_fp16")]; tensor var_19674_begin_0 = const()[name = tensor("op_19674_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19674_end_0 = const()[name = tensor("op_19674_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19674_end_mask_0 = const()[name = tensor("op_19674_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19674_cast_fp16 = slice_by_index(begin = var_19674_begin_0, end = var_19674_end_0, end_mask = var_19674_end_mask_0, x = var_19383_cast_fp16)[name = tensor("op_19674_cast_fp16")]; tensor var_19681_begin_0 = const()[name = tensor("op_19681_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19681_end_0 = const()[name = tensor("op_19681_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19681_end_mask_0 = const()[name = tensor("op_19681_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19681_cast_fp16 = slice_by_index(begin = var_19681_begin_0, end = var_19681_end_0, end_mask = var_19681_end_mask_0, x = var_19383_cast_fp16)[name = tensor("op_19681_cast_fp16")]; tensor var_19688_begin_0 = const()[name = tensor("op_19688_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19688_end_0 = const()[name = tensor("op_19688_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19688_end_mask_0 = const()[name = tensor("op_19688_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19688_cast_fp16 = slice_by_index(begin = var_19688_begin_0, end = var_19688_end_0, end_mask = var_19688_end_mask_0, x = var_19387_cast_fp16)[name = tensor("op_19688_cast_fp16")]; tensor var_19695_begin_0 = const()[name = tensor("op_19695_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19695_end_0 = const()[name = tensor("op_19695_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19695_end_mask_0 = const()[name = tensor("op_19695_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19695_cast_fp16 = slice_by_index(begin = var_19695_begin_0, end = var_19695_end_0, end_mask = var_19695_end_mask_0, x = var_19387_cast_fp16)[name = tensor("op_19695_cast_fp16")]; tensor var_19702_begin_0 = const()[name = tensor("op_19702_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19702_end_0 = const()[name = tensor("op_19702_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19702_end_mask_0 = const()[name = tensor("op_19702_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19702_cast_fp16 = slice_by_index(begin = var_19702_begin_0, end = var_19702_end_0, end_mask = var_19702_end_mask_0, x = var_19387_cast_fp16)[name = tensor("op_19702_cast_fp16")]; tensor var_19709_begin_0 = const()[name = tensor("op_19709_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19709_end_0 = const()[name = tensor("op_19709_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19709_end_mask_0 = const()[name = tensor("op_19709_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19709_cast_fp16 = slice_by_index(begin = var_19709_begin_0, end = var_19709_end_0, end_mask = var_19709_end_mask_0, x = var_19387_cast_fp16)[name = tensor("op_19709_cast_fp16")]; tensor var_19716_begin_0 = const()[name = tensor("op_19716_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19716_end_0 = const()[name = tensor("op_19716_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19716_end_mask_0 = const()[name = tensor("op_19716_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19716_cast_fp16 = slice_by_index(begin = var_19716_begin_0, end = var_19716_end_0, end_mask = var_19716_end_mask_0, x = var_19391_cast_fp16)[name = tensor("op_19716_cast_fp16")]; tensor var_19723_begin_0 = const()[name = tensor("op_19723_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19723_end_0 = const()[name = tensor("op_19723_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19723_end_mask_0 = const()[name = tensor("op_19723_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19723_cast_fp16 = slice_by_index(begin = var_19723_begin_0, end = var_19723_end_0, end_mask = var_19723_end_mask_0, x = var_19391_cast_fp16)[name = tensor("op_19723_cast_fp16")]; tensor var_19730_begin_0 = const()[name = tensor("op_19730_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19730_end_0 = const()[name = tensor("op_19730_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19730_end_mask_0 = const()[name = tensor("op_19730_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19730_cast_fp16 = slice_by_index(begin = var_19730_begin_0, end = var_19730_end_0, end_mask = var_19730_end_mask_0, x = var_19391_cast_fp16)[name = tensor("op_19730_cast_fp16")]; tensor var_19737_begin_0 = const()[name = tensor("op_19737_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19737_end_0 = const()[name = tensor("op_19737_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19737_end_mask_0 = const()[name = tensor("op_19737_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19737_cast_fp16 = slice_by_index(begin = var_19737_begin_0, end = var_19737_end_0, end_mask = var_19737_end_mask_0, x = var_19391_cast_fp16)[name = tensor("op_19737_cast_fp16")]; tensor var_19744_begin_0 = const()[name = tensor("op_19744_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19744_end_0 = const()[name = tensor("op_19744_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19744_end_mask_0 = const()[name = tensor("op_19744_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19744_cast_fp16 = slice_by_index(begin = var_19744_begin_0, end = var_19744_end_0, end_mask = var_19744_end_mask_0, x = var_19395_cast_fp16)[name = tensor("op_19744_cast_fp16")]; tensor var_19751_begin_0 = const()[name = tensor("op_19751_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19751_end_0 = const()[name = tensor("op_19751_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19751_end_mask_0 = const()[name = tensor("op_19751_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19751_cast_fp16 = slice_by_index(begin = var_19751_begin_0, end = var_19751_end_0, end_mask = var_19751_end_mask_0, x = var_19395_cast_fp16)[name = tensor("op_19751_cast_fp16")]; tensor var_19758_begin_0 = const()[name = tensor("op_19758_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19758_end_0 = const()[name = tensor("op_19758_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19758_end_mask_0 = const()[name = tensor("op_19758_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19758_cast_fp16 = slice_by_index(begin = var_19758_begin_0, end = var_19758_end_0, end_mask = var_19758_end_mask_0, x = var_19395_cast_fp16)[name = tensor("op_19758_cast_fp16")]; tensor var_19765_begin_0 = const()[name = tensor("op_19765_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19765_end_0 = const()[name = tensor("op_19765_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19765_end_mask_0 = const()[name = tensor("op_19765_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19765_cast_fp16 = slice_by_index(begin = var_19765_begin_0, end = var_19765_end_0, end_mask = var_19765_end_mask_0, x = var_19395_cast_fp16)[name = tensor("op_19765_cast_fp16")]; tensor var_19772_begin_0 = const()[name = tensor("op_19772_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19772_end_0 = const()[name = tensor("op_19772_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19772_end_mask_0 = const()[name = tensor("op_19772_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19772_cast_fp16 = slice_by_index(begin = var_19772_begin_0, end = var_19772_end_0, end_mask = var_19772_end_mask_0, x = var_19399_cast_fp16)[name = tensor("op_19772_cast_fp16")]; tensor var_19779_begin_0 = const()[name = tensor("op_19779_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19779_end_0 = const()[name = tensor("op_19779_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19779_end_mask_0 = const()[name = tensor("op_19779_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19779_cast_fp16 = slice_by_index(begin = var_19779_begin_0, end = var_19779_end_0, end_mask = var_19779_end_mask_0, x = var_19399_cast_fp16)[name = tensor("op_19779_cast_fp16")]; tensor var_19786_begin_0 = const()[name = tensor("op_19786_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19786_end_0 = const()[name = tensor("op_19786_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19786_end_mask_0 = const()[name = tensor("op_19786_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19786_cast_fp16 = slice_by_index(begin = var_19786_begin_0, end = var_19786_end_0, end_mask = var_19786_end_mask_0, x = var_19399_cast_fp16)[name = tensor("op_19786_cast_fp16")]; tensor var_19793_begin_0 = const()[name = tensor("op_19793_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19793_end_0 = const()[name = tensor("op_19793_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19793_end_mask_0 = const()[name = tensor("op_19793_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19793_cast_fp16 = slice_by_index(begin = var_19793_begin_0, end = var_19793_end_0, end_mask = var_19793_end_mask_0, x = var_19399_cast_fp16)[name = tensor("op_19793_cast_fp16")]; tensor var_19800_begin_0 = const()[name = tensor("op_19800_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19800_end_0 = const()[name = tensor("op_19800_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19800_end_mask_0 = const()[name = tensor("op_19800_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19800_cast_fp16 = slice_by_index(begin = var_19800_begin_0, end = var_19800_end_0, end_mask = var_19800_end_mask_0, x = var_19403_cast_fp16)[name = tensor("op_19800_cast_fp16")]; tensor var_19807_begin_0 = const()[name = tensor("op_19807_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19807_end_0 = const()[name = tensor("op_19807_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19807_end_mask_0 = const()[name = tensor("op_19807_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19807_cast_fp16 = slice_by_index(begin = var_19807_begin_0, end = var_19807_end_0, end_mask = var_19807_end_mask_0, x = var_19403_cast_fp16)[name = tensor("op_19807_cast_fp16")]; tensor var_19814_begin_0 = const()[name = tensor("op_19814_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19814_end_0 = const()[name = tensor("op_19814_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19814_end_mask_0 = const()[name = tensor("op_19814_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19814_cast_fp16 = slice_by_index(begin = var_19814_begin_0, end = var_19814_end_0, end_mask = var_19814_end_mask_0, x = var_19403_cast_fp16)[name = tensor("op_19814_cast_fp16")]; tensor var_19821_begin_0 = const()[name = tensor("op_19821_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19821_end_0 = const()[name = tensor("op_19821_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19821_end_mask_0 = const()[name = tensor("op_19821_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19821_cast_fp16 = slice_by_index(begin = var_19821_begin_0, end = var_19821_end_0, end_mask = var_19821_end_mask_0, x = var_19403_cast_fp16)[name = tensor("op_19821_cast_fp16")]; tensor var_19828_begin_0 = const()[name = tensor("op_19828_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19828_end_0 = const()[name = tensor("op_19828_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19828_end_mask_0 = const()[name = tensor("op_19828_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19828_cast_fp16 = slice_by_index(begin = var_19828_begin_0, end = var_19828_end_0, end_mask = var_19828_end_mask_0, x = var_19407_cast_fp16)[name = tensor("op_19828_cast_fp16")]; tensor var_19835_begin_0 = const()[name = tensor("op_19835_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19835_end_0 = const()[name = tensor("op_19835_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19835_end_mask_0 = const()[name = tensor("op_19835_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19835_cast_fp16 = slice_by_index(begin = var_19835_begin_0, end = var_19835_end_0, end_mask = var_19835_end_mask_0, x = var_19407_cast_fp16)[name = tensor("op_19835_cast_fp16")]; tensor var_19842_begin_0 = const()[name = tensor("op_19842_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19842_end_0 = const()[name = tensor("op_19842_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19842_end_mask_0 = const()[name = tensor("op_19842_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19842_cast_fp16 = slice_by_index(begin = var_19842_begin_0, end = var_19842_end_0, end_mask = var_19842_end_mask_0, x = var_19407_cast_fp16)[name = tensor("op_19842_cast_fp16")]; tensor var_19849_begin_0 = const()[name = tensor("op_19849_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19849_end_0 = const()[name = tensor("op_19849_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19849_end_mask_0 = const()[name = tensor("op_19849_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19849_cast_fp16 = slice_by_index(begin = var_19849_begin_0, end = var_19849_end_0, end_mask = var_19849_end_mask_0, x = var_19407_cast_fp16)[name = tensor("op_19849_cast_fp16")]; tensor var_19856_begin_0 = const()[name = tensor("op_19856_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19856_end_0 = const()[name = tensor("op_19856_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19856_end_mask_0 = const()[name = tensor("op_19856_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19856_cast_fp16 = slice_by_index(begin = var_19856_begin_0, end = var_19856_end_0, end_mask = var_19856_end_mask_0, x = var_19411_cast_fp16)[name = tensor("op_19856_cast_fp16")]; tensor var_19863_begin_0 = const()[name = tensor("op_19863_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19863_end_0 = const()[name = tensor("op_19863_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19863_end_mask_0 = const()[name = tensor("op_19863_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19863_cast_fp16 = slice_by_index(begin = var_19863_begin_0, end = var_19863_end_0, end_mask = var_19863_end_mask_0, x = var_19411_cast_fp16)[name = tensor("op_19863_cast_fp16")]; tensor var_19870_begin_0 = const()[name = tensor("op_19870_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19870_end_0 = const()[name = tensor("op_19870_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19870_end_mask_0 = const()[name = tensor("op_19870_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19870_cast_fp16 = slice_by_index(begin = var_19870_begin_0, end = var_19870_end_0, end_mask = var_19870_end_mask_0, x = var_19411_cast_fp16)[name = tensor("op_19870_cast_fp16")]; tensor var_19877_begin_0 = const()[name = tensor("op_19877_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19877_end_0 = const()[name = tensor("op_19877_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19877_end_mask_0 = const()[name = tensor("op_19877_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19877_cast_fp16 = slice_by_index(begin = var_19877_begin_0, end = var_19877_end_0, end_mask = var_19877_end_mask_0, x = var_19411_cast_fp16)[name = tensor("op_19877_cast_fp16")]; tensor var_19884_begin_0 = const()[name = tensor("op_19884_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19884_end_0 = const()[name = tensor("op_19884_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19884_end_mask_0 = const()[name = tensor("op_19884_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19884_cast_fp16 = slice_by_index(begin = var_19884_begin_0, end = var_19884_end_0, end_mask = var_19884_end_mask_0, x = var_19415_cast_fp16)[name = tensor("op_19884_cast_fp16")]; tensor var_19891_begin_0 = const()[name = tensor("op_19891_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19891_end_0 = const()[name = tensor("op_19891_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19891_end_mask_0 = const()[name = tensor("op_19891_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19891_cast_fp16 = slice_by_index(begin = var_19891_begin_0, end = var_19891_end_0, end_mask = var_19891_end_mask_0, x = var_19415_cast_fp16)[name = tensor("op_19891_cast_fp16")]; tensor var_19898_begin_0 = const()[name = tensor("op_19898_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19898_end_0 = const()[name = tensor("op_19898_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19898_end_mask_0 = const()[name = tensor("op_19898_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19898_cast_fp16 = slice_by_index(begin = var_19898_begin_0, end = var_19898_end_0, end_mask = var_19898_end_mask_0, x = var_19415_cast_fp16)[name = tensor("op_19898_cast_fp16")]; tensor var_19905_begin_0 = const()[name = tensor("op_19905_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19905_end_0 = const()[name = tensor("op_19905_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19905_end_mask_0 = const()[name = tensor("op_19905_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19905_cast_fp16 = slice_by_index(begin = var_19905_begin_0, end = var_19905_end_0, end_mask = var_19905_end_mask_0, x = var_19415_cast_fp16)[name = tensor("op_19905_cast_fp16")]; tensor var_19912_begin_0 = const()[name = tensor("op_19912_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19912_end_0 = const()[name = tensor("op_19912_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19912_end_mask_0 = const()[name = tensor("op_19912_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19912_cast_fp16 = slice_by_index(begin = var_19912_begin_0, end = var_19912_end_0, end_mask = var_19912_end_mask_0, x = var_19419_cast_fp16)[name = tensor("op_19912_cast_fp16")]; tensor var_19919_begin_0 = const()[name = tensor("op_19919_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19919_end_0 = const()[name = tensor("op_19919_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19919_end_mask_0 = const()[name = tensor("op_19919_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19919_cast_fp16 = slice_by_index(begin = var_19919_begin_0, end = var_19919_end_0, end_mask = var_19919_end_mask_0, x = var_19419_cast_fp16)[name = tensor("op_19919_cast_fp16")]; tensor var_19926_begin_0 = const()[name = tensor("op_19926_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19926_end_0 = const()[name = tensor("op_19926_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19926_end_mask_0 = const()[name = tensor("op_19926_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19926_cast_fp16 = slice_by_index(begin = var_19926_begin_0, end = var_19926_end_0, end_mask = var_19926_end_mask_0, x = var_19419_cast_fp16)[name = tensor("op_19926_cast_fp16")]; tensor var_19933_begin_0 = const()[name = tensor("op_19933_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19933_end_0 = const()[name = tensor("op_19933_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19933_end_mask_0 = const()[name = tensor("op_19933_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19933_cast_fp16 = slice_by_index(begin = var_19933_begin_0, end = var_19933_end_0, end_mask = var_19933_end_mask_0, x = var_19419_cast_fp16)[name = tensor("op_19933_cast_fp16")]; tensor var_19940_begin_0 = const()[name = tensor("op_19940_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19940_end_0 = const()[name = tensor("op_19940_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19940_end_mask_0 = const()[name = tensor("op_19940_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19940_cast_fp16 = slice_by_index(begin = var_19940_begin_0, end = var_19940_end_0, end_mask = var_19940_end_mask_0, x = var_19423_cast_fp16)[name = tensor("op_19940_cast_fp16")]; tensor var_19947_begin_0 = const()[name = tensor("op_19947_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19947_end_0 = const()[name = tensor("op_19947_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19947_end_mask_0 = const()[name = tensor("op_19947_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19947_cast_fp16 = slice_by_index(begin = var_19947_begin_0, end = var_19947_end_0, end_mask = var_19947_end_mask_0, x = var_19423_cast_fp16)[name = tensor("op_19947_cast_fp16")]; tensor var_19954_begin_0 = const()[name = tensor("op_19954_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19954_end_0 = const()[name = tensor("op_19954_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19954_end_mask_0 = const()[name = tensor("op_19954_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19954_cast_fp16 = slice_by_index(begin = var_19954_begin_0, end = var_19954_end_0, end_mask = var_19954_end_mask_0, x = var_19423_cast_fp16)[name = tensor("op_19954_cast_fp16")]; tensor var_19961_begin_0 = const()[name = tensor("op_19961_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19961_end_0 = const()[name = tensor("op_19961_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19961_end_mask_0 = const()[name = tensor("op_19961_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19961_cast_fp16 = slice_by_index(begin = var_19961_begin_0, end = var_19961_end_0, end_mask = var_19961_end_mask_0, x = var_19423_cast_fp16)[name = tensor("op_19961_cast_fp16")]; tensor var_19968_begin_0 = const()[name = tensor("op_19968_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19968_end_0 = const()[name = tensor("op_19968_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_19968_end_mask_0 = const()[name = tensor("op_19968_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19968_cast_fp16 = slice_by_index(begin = var_19968_begin_0, end = var_19968_end_0, end_mask = var_19968_end_mask_0, x = var_19427_cast_fp16)[name = tensor("op_19968_cast_fp16")]; tensor var_19975_begin_0 = const()[name = tensor("op_19975_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_19975_end_0 = const()[name = tensor("op_19975_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_19975_end_mask_0 = const()[name = tensor("op_19975_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19975_cast_fp16 = slice_by_index(begin = var_19975_begin_0, end = var_19975_end_0, end_mask = var_19975_end_mask_0, x = var_19427_cast_fp16)[name = tensor("op_19975_cast_fp16")]; tensor var_19982_begin_0 = const()[name = tensor("op_19982_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_19982_end_0 = const()[name = tensor("op_19982_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_19982_end_mask_0 = const()[name = tensor("op_19982_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19982_cast_fp16 = slice_by_index(begin = var_19982_begin_0, end = var_19982_end_0, end_mask = var_19982_end_mask_0, x = var_19427_cast_fp16)[name = tensor("op_19982_cast_fp16")]; tensor var_19989_begin_0 = const()[name = tensor("op_19989_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_19989_end_0 = const()[name = tensor("op_19989_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_19989_end_mask_0 = const()[name = tensor("op_19989_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19989_cast_fp16 = slice_by_index(begin = var_19989_begin_0, end = var_19989_end_0, end_mask = var_19989_end_mask_0, x = var_19427_cast_fp16)[name = tensor("op_19989_cast_fp16")]; tensor k_25_perm_0 = const()[name = tensor("k_25_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_19994_begin_0 = const()[name = tensor("op_19994_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_19994_end_0 = const()[name = tensor("op_19994_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_19994_end_mask_0 = const()[name = tensor("op_19994_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_25_cast_fp16 = transpose(perm = k_25_perm_0, x = key_25_cast_fp16)[name = tensor("transpose_19")]; tensor var_19994_cast_fp16 = slice_by_index(begin = var_19994_begin_0, end = var_19994_end_0, end_mask = var_19994_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_19994_cast_fp16")]; tensor var_19998_begin_0 = const()[name = tensor("op_19998_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_19998_end_0 = const()[name = tensor("op_19998_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_19998_end_mask_0 = const()[name = tensor("op_19998_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_19998_cast_fp16 = slice_by_index(begin = var_19998_begin_0, end = var_19998_end_0, end_mask = var_19998_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_19998_cast_fp16")]; tensor var_20002_begin_0 = const()[name = tensor("op_20002_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_20002_end_0 = const()[name = tensor("op_20002_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_20002_end_mask_0 = const()[name = tensor("op_20002_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_20002_cast_fp16 = slice_by_index(begin = var_20002_begin_0, end = var_20002_end_0, end_mask = var_20002_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_20002_cast_fp16")]; tensor var_20006_begin_0 = const()[name = tensor("op_20006_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_20006_end_0 = const()[name = tensor("op_20006_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_20006_end_mask_0 = const()[name = tensor("op_20006_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_20006_cast_fp16 = slice_by_index(begin = var_20006_begin_0, end = var_20006_end_0, end_mask = var_20006_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_20006_cast_fp16")]; tensor var_20010_begin_0 = const()[name = tensor("op_20010_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_20010_end_0 = const()[name = tensor("op_20010_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_20010_end_mask_0 = const()[name = tensor("op_20010_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_20010_cast_fp16 = slice_by_index(begin = var_20010_begin_0, end = var_20010_end_0, end_mask = var_20010_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_20010_cast_fp16")]; tensor var_20014_begin_0 = const()[name = tensor("op_20014_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_20014_end_0 = const()[name = tensor("op_20014_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_20014_end_mask_0 = const()[name = tensor("op_20014_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_20014_cast_fp16 = slice_by_index(begin = var_20014_begin_0, end = var_20014_end_0, end_mask = var_20014_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_20014_cast_fp16")]; tensor var_20018_begin_0 = const()[name = tensor("op_20018_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_20018_end_0 = const()[name = tensor("op_20018_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_20018_end_mask_0 = const()[name = tensor("op_20018_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_20018_cast_fp16 = slice_by_index(begin = var_20018_begin_0, end = var_20018_end_0, end_mask = var_20018_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_20018_cast_fp16")]; tensor var_20022_begin_0 = const()[name = tensor("op_20022_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_20022_end_0 = const()[name = tensor("op_20022_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_20022_end_mask_0 = const()[name = tensor("op_20022_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_20022_cast_fp16 = slice_by_index(begin = var_20022_begin_0, end = var_20022_end_0, end_mask = var_20022_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_20022_cast_fp16")]; tensor var_20026_begin_0 = const()[name = tensor("op_20026_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_20026_end_0 = const()[name = tensor("op_20026_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_20026_end_mask_0 = const()[name = tensor("op_20026_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_20026_cast_fp16 = slice_by_index(begin = var_20026_begin_0, end = var_20026_end_0, end_mask = var_20026_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_20026_cast_fp16")]; tensor var_20030_begin_0 = const()[name = tensor("op_20030_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_20030_end_0 = const()[name = tensor("op_20030_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_20030_end_mask_0 = const()[name = tensor("op_20030_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_20030_cast_fp16 = slice_by_index(begin = var_20030_begin_0, end = var_20030_end_0, end_mask = var_20030_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_20030_cast_fp16")]; tensor var_20034_begin_0 = const()[name = tensor("op_20034_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_20034_end_0 = const()[name = tensor("op_20034_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_20034_end_mask_0 = const()[name = tensor("op_20034_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_20034_cast_fp16 = slice_by_index(begin = var_20034_begin_0, end = var_20034_end_0, end_mask = var_20034_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_20034_cast_fp16")]; tensor var_20038_begin_0 = const()[name = tensor("op_20038_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_20038_end_0 = const()[name = tensor("op_20038_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_20038_end_mask_0 = const()[name = tensor("op_20038_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_20038_cast_fp16 = slice_by_index(begin = var_20038_begin_0, end = var_20038_end_0, end_mask = var_20038_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_20038_cast_fp16")]; tensor var_20042_begin_0 = const()[name = tensor("op_20042_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_20042_end_0 = const()[name = tensor("op_20042_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_20042_end_mask_0 = const()[name = tensor("op_20042_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_20042_cast_fp16 = slice_by_index(begin = var_20042_begin_0, end = var_20042_end_0, end_mask = var_20042_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_20042_cast_fp16")]; tensor var_20046_begin_0 = const()[name = tensor("op_20046_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_20046_end_0 = const()[name = tensor("op_20046_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_20046_end_mask_0 = const()[name = tensor("op_20046_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_20046_cast_fp16 = slice_by_index(begin = var_20046_begin_0, end = var_20046_end_0, end_mask = var_20046_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_20046_cast_fp16")]; tensor var_20050_begin_0 = const()[name = tensor("op_20050_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_20050_end_0 = const()[name = tensor("op_20050_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_20050_end_mask_0 = const()[name = tensor("op_20050_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_20050_cast_fp16 = slice_by_index(begin = var_20050_begin_0, end = var_20050_end_0, end_mask = var_20050_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_20050_cast_fp16")]; tensor var_20054_begin_0 = const()[name = tensor("op_20054_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_20054_end_0 = const()[name = tensor("op_20054_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_20054_end_mask_0 = const()[name = tensor("op_20054_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_20054_cast_fp16 = slice_by_index(begin = var_20054_begin_0, end = var_20054_end_0, end_mask = var_20054_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_20054_cast_fp16")]; tensor var_20058_begin_0 = const()[name = tensor("op_20058_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_20058_end_0 = const()[name = tensor("op_20058_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_20058_end_mask_0 = const()[name = tensor("op_20058_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_20058_cast_fp16 = slice_by_index(begin = var_20058_begin_0, end = var_20058_end_0, end_mask = var_20058_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_20058_cast_fp16")]; tensor var_20062_begin_0 = const()[name = tensor("op_20062_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_20062_end_0 = const()[name = tensor("op_20062_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_20062_end_mask_0 = const()[name = tensor("op_20062_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_20062_cast_fp16 = slice_by_index(begin = var_20062_begin_0, end = var_20062_end_0, end_mask = var_20062_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_20062_cast_fp16")]; tensor var_20066_begin_0 = const()[name = tensor("op_20066_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_20066_end_0 = const()[name = tensor("op_20066_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_20066_end_mask_0 = const()[name = tensor("op_20066_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_20066_cast_fp16 = slice_by_index(begin = var_20066_begin_0, end = var_20066_end_0, end_mask = var_20066_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_20066_cast_fp16")]; tensor var_20070_begin_0 = const()[name = tensor("op_20070_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_20070_end_0 = const()[name = tensor("op_20070_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_20070_end_mask_0 = const()[name = tensor("op_20070_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_20070_cast_fp16 = slice_by_index(begin = var_20070_begin_0, end = var_20070_end_0, end_mask = var_20070_end_mask_0, x = k_25_cast_fp16)[name = tensor("op_20070_cast_fp16")]; tensor var_20072_begin_0 = const()[name = tensor("op_20072_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_20072_end_0 = const()[name = tensor("op_20072_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_20072_end_mask_0 = const()[name = tensor("op_20072_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20072_cast_fp16 = slice_by_index(begin = var_20072_begin_0, end = var_20072_end_0, end_mask = var_20072_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20072_cast_fp16")]; tensor var_20076_begin_0 = const()[name = tensor("op_20076_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_20076_end_0 = const()[name = tensor("op_20076_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_20076_end_mask_0 = const()[name = tensor("op_20076_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20076_cast_fp16 = slice_by_index(begin = var_20076_begin_0, end = var_20076_end_0, end_mask = var_20076_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20076_cast_fp16")]; tensor var_20080_begin_0 = const()[name = tensor("op_20080_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_20080_end_0 = const()[name = tensor("op_20080_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_20080_end_mask_0 = const()[name = tensor("op_20080_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20080_cast_fp16 = slice_by_index(begin = var_20080_begin_0, end = var_20080_end_0, end_mask = var_20080_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20080_cast_fp16")]; tensor var_20084_begin_0 = const()[name = tensor("op_20084_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_20084_end_0 = const()[name = tensor("op_20084_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_20084_end_mask_0 = const()[name = tensor("op_20084_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20084_cast_fp16 = slice_by_index(begin = var_20084_begin_0, end = var_20084_end_0, end_mask = var_20084_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20084_cast_fp16")]; tensor var_20088_begin_0 = const()[name = tensor("op_20088_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_20088_end_0 = const()[name = tensor("op_20088_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_20088_end_mask_0 = const()[name = tensor("op_20088_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20088_cast_fp16 = slice_by_index(begin = var_20088_begin_0, end = var_20088_end_0, end_mask = var_20088_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20088_cast_fp16")]; tensor var_20092_begin_0 = const()[name = tensor("op_20092_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_20092_end_0 = const()[name = tensor("op_20092_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_20092_end_mask_0 = const()[name = tensor("op_20092_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20092_cast_fp16 = slice_by_index(begin = var_20092_begin_0, end = var_20092_end_0, end_mask = var_20092_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20092_cast_fp16")]; tensor var_20096_begin_0 = const()[name = tensor("op_20096_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_20096_end_0 = const()[name = tensor("op_20096_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_20096_end_mask_0 = const()[name = tensor("op_20096_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20096_cast_fp16 = slice_by_index(begin = var_20096_begin_0, end = var_20096_end_0, end_mask = var_20096_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20096_cast_fp16")]; tensor var_20100_begin_0 = const()[name = tensor("op_20100_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_20100_end_0 = const()[name = tensor("op_20100_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_20100_end_mask_0 = const()[name = tensor("op_20100_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20100_cast_fp16 = slice_by_index(begin = var_20100_begin_0, end = var_20100_end_0, end_mask = var_20100_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20100_cast_fp16")]; tensor var_20104_begin_0 = const()[name = tensor("op_20104_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_20104_end_0 = const()[name = tensor("op_20104_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_20104_end_mask_0 = const()[name = tensor("op_20104_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20104_cast_fp16 = slice_by_index(begin = var_20104_begin_0, end = var_20104_end_0, end_mask = var_20104_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20104_cast_fp16")]; tensor var_20108_begin_0 = const()[name = tensor("op_20108_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_20108_end_0 = const()[name = tensor("op_20108_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_20108_end_mask_0 = const()[name = tensor("op_20108_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20108_cast_fp16 = slice_by_index(begin = var_20108_begin_0, end = var_20108_end_0, end_mask = var_20108_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20108_cast_fp16")]; tensor var_20112_begin_0 = const()[name = tensor("op_20112_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_20112_end_0 = const()[name = tensor("op_20112_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_20112_end_mask_0 = const()[name = tensor("op_20112_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20112_cast_fp16 = slice_by_index(begin = var_20112_begin_0, end = var_20112_end_0, end_mask = var_20112_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20112_cast_fp16")]; tensor var_20116_begin_0 = const()[name = tensor("op_20116_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_20116_end_0 = const()[name = tensor("op_20116_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_20116_end_mask_0 = const()[name = tensor("op_20116_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20116_cast_fp16 = slice_by_index(begin = var_20116_begin_0, end = var_20116_end_0, end_mask = var_20116_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20116_cast_fp16")]; tensor var_20120_begin_0 = const()[name = tensor("op_20120_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_20120_end_0 = const()[name = tensor("op_20120_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_20120_end_mask_0 = const()[name = tensor("op_20120_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20120_cast_fp16 = slice_by_index(begin = var_20120_begin_0, end = var_20120_end_0, end_mask = var_20120_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20120_cast_fp16")]; tensor var_20124_begin_0 = const()[name = tensor("op_20124_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_20124_end_0 = const()[name = tensor("op_20124_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_20124_end_mask_0 = const()[name = tensor("op_20124_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20124_cast_fp16 = slice_by_index(begin = var_20124_begin_0, end = var_20124_end_0, end_mask = var_20124_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20124_cast_fp16")]; tensor var_20128_begin_0 = const()[name = tensor("op_20128_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_20128_end_0 = const()[name = tensor("op_20128_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_20128_end_mask_0 = const()[name = tensor("op_20128_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20128_cast_fp16 = slice_by_index(begin = var_20128_begin_0, end = var_20128_end_0, end_mask = var_20128_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20128_cast_fp16")]; tensor var_20132_begin_0 = const()[name = tensor("op_20132_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_20132_end_0 = const()[name = tensor("op_20132_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_20132_end_mask_0 = const()[name = tensor("op_20132_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20132_cast_fp16 = slice_by_index(begin = var_20132_begin_0, end = var_20132_end_0, end_mask = var_20132_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20132_cast_fp16")]; tensor var_20136_begin_0 = const()[name = tensor("op_20136_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_20136_end_0 = const()[name = tensor("op_20136_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_20136_end_mask_0 = const()[name = tensor("op_20136_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20136_cast_fp16 = slice_by_index(begin = var_20136_begin_0, end = var_20136_end_0, end_mask = var_20136_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20136_cast_fp16")]; tensor var_20140_begin_0 = const()[name = tensor("op_20140_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_20140_end_0 = const()[name = tensor("op_20140_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_20140_end_mask_0 = const()[name = tensor("op_20140_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20140_cast_fp16 = slice_by_index(begin = var_20140_begin_0, end = var_20140_end_0, end_mask = var_20140_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20140_cast_fp16")]; tensor var_20144_begin_0 = const()[name = tensor("op_20144_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_20144_end_0 = const()[name = tensor("op_20144_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_20144_end_mask_0 = const()[name = tensor("op_20144_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20144_cast_fp16 = slice_by_index(begin = var_20144_begin_0, end = var_20144_end_0, end_mask = var_20144_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20144_cast_fp16")]; tensor var_20148_begin_0 = const()[name = tensor("op_20148_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_20148_end_0 = const()[name = tensor("op_20148_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_20148_end_mask_0 = const()[name = tensor("op_20148_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20148_cast_fp16 = slice_by_index(begin = var_20148_begin_0, end = var_20148_end_0, end_mask = var_20148_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_20148_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1921_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1921_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1921_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1921_equation_0, values = (var_19994_cast_fp16, var_19436_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1921_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1923_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1923_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1923_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1923_equation_0, values = (var_19994_cast_fp16, var_19443_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1923_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1925_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1925_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1925_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1925_equation_0, values = (var_19994_cast_fp16, var_19450_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1925_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1927_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1927_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1927_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1927_equation_0, values = (var_19994_cast_fp16, var_19457_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1927_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1929_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1929_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1929_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1929_equation_0, values = (var_19998_cast_fp16, var_19464_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1929_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1931_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1931_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1931_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1931_equation_0, values = (var_19998_cast_fp16, var_19471_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1931_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1933_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1933_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1933_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1933_equation_0, values = (var_19998_cast_fp16, var_19478_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1933_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1935_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1935_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1935_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1935_equation_0, values = (var_19998_cast_fp16, var_19485_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1935_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1937_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1937_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1937_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1937_equation_0, values = (var_20002_cast_fp16, var_19492_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1937_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1939_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1939_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1939_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1939_equation_0, values = (var_20002_cast_fp16, var_19499_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1939_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1941_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1941_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1941_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1941_equation_0, values = (var_20002_cast_fp16, var_19506_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1941_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1943_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1943_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1943_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1943_equation_0, values = (var_20002_cast_fp16, var_19513_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1943_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1945_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1945_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1945_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1945_equation_0, values = (var_20006_cast_fp16, var_19520_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1945_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1947_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1947_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1947_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1947_equation_0, values = (var_20006_cast_fp16, var_19527_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1947_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1949_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1949_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1949_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1949_equation_0, values = (var_20006_cast_fp16, var_19534_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1949_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1951_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1951_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1951_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1951_equation_0, values = (var_20006_cast_fp16, var_19541_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1951_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1953_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1953_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1953_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1953_equation_0, values = (var_20010_cast_fp16, var_19548_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1953_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1955_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1955_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1955_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1955_equation_0, values = (var_20010_cast_fp16, var_19555_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1955_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1957_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1957_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1957_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1957_equation_0, values = (var_20010_cast_fp16, var_19562_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1957_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1959_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1959_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1959_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1959_equation_0, values = (var_20010_cast_fp16, var_19569_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1959_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1961_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1961_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1961_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1961_equation_0, values = (var_20014_cast_fp16, var_19576_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1961_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1963_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1963_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1963_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1963_equation_0, values = (var_20014_cast_fp16, var_19583_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1963_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1965_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1965_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1965_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1965_equation_0, values = (var_20014_cast_fp16, var_19590_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1965_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1967_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1967_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1967_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1967_equation_0, values = (var_20014_cast_fp16, var_19597_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1967_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1969_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1969_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1969_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1969_equation_0, values = (var_20018_cast_fp16, var_19604_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1969_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1971_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1971_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1971_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1971_equation_0, values = (var_20018_cast_fp16, var_19611_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1971_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1973_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1973_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1973_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1973_equation_0, values = (var_20018_cast_fp16, var_19618_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1973_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1975_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1975_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1975_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1975_equation_0, values = (var_20018_cast_fp16, var_19625_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1975_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1977_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1977_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1977_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1977_equation_0, values = (var_20022_cast_fp16, var_19632_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1977_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1979_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1979_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1979_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1979_equation_0, values = (var_20022_cast_fp16, var_19639_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1979_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1981_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1981_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1981_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1981_equation_0, values = (var_20022_cast_fp16, var_19646_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1981_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1983_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1983_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1983_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1983_equation_0, values = (var_20022_cast_fp16, var_19653_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1983_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1985_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1985_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1985_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1985_equation_0, values = (var_20026_cast_fp16, var_19660_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1985_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1987_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1987_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1987_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1987_equation_0, values = (var_20026_cast_fp16, var_19667_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1987_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1989_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1989_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1989_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1989_equation_0, values = (var_20026_cast_fp16, var_19674_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1989_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1991_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1991_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1991_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1991_equation_0, values = (var_20026_cast_fp16, var_19681_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1991_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1993_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1993_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1993_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1993_equation_0, values = (var_20030_cast_fp16, var_19688_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1993_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1995_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1995_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1995_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1995_equation_0, values = (var_20030_cast_fp16, var_19695_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1995_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1997_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1997_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1997_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1997_equation_0, values = (var_20030_cast_fp16, var_19702_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1997_cast_fp16")]; tensor _SplitHeadsQ__mh_w_1999_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_1999_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_1999_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_1999_equation_0, values = (var_20030_cast_fp16, var_19709_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_1999_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2001_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2001_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2001_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2001_equation_0, values = (var_20034_cast_fp16, var_19716_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2001_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2003_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2003_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2003_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2003_equation_0, values = (var_20034_cast_fp16, var_19723_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2003_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2005_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2005_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2005_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2005_equation_0, values = (var_20034_cast_fp16, var_19730_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2005_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2007_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2007_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2007_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2007_equation_0, values = (var_20034_cast_fp16, var_19737_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2007_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2009_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2009_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2009_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2009_equation_0, values = (var_20038_cast_fp16, var_19744_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2009_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2011_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2011_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2011_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2011_equation_0, values = (var_20038_cast_fp16, var_19751_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2011_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2013_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2013_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2013_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2013_equation_0, values = (var_20038_cast_fp16, var_19758_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2013_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2015_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2015_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2015_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2015_equation_0, values = (var_20038_cast_fp16, var_19765_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2015_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2017_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2017_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2017_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2017_equation_0, values = (var_20042_cast_fp16, var_19772_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2017_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2019_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2019_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2019_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2019_equation_0, values = (var_20042_cast_fp16, var_19779_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2019_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2021_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2021_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2021_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2021_equation_0, values = (var_20042_cast_fp16, var_19786_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2021_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2023_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2023_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2023_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2023_equation_0, values = (var_20042_cast_fp16, var_19793_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2023_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2025_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2025_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2025_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2025_equation_0, values = (var_20046_cast_fp16, var_19800_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2025_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2027_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2027_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2027_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2027_equation_0, values = (var_20046_cast_fp16, var_19807_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2027_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2029_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2029_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2029_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2029_equation_0, values = (var_20046_cast_fp16, var_19814_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2029_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2031_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2031_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2031_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2031_equation_0, values = (var_20046_cast_fp16, var_19821_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2031_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2033_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2033_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2033_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2033_equation_0, values = (var_20050_cast_fp16, var_19828_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2033_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2035_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2035_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2035_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2035_equation_0, values = (var_20050_cast_fp16, var_19835_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2035_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2037_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2037_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2037_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2037_equation_0, values = (var_20050_cast_fp16, var_19842_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2037_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2039_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2039_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2039_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2039_equation_0, values = (var_20050_cast_fp16, var_19849_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2039_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2041_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2041_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2041_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2041_equation_0, values = (var_20054_cast_fp16, var_19856_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2041_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2043_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2043_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2043_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2043_equation_0, values = (var_20054_cast_fp16, var_19863_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2043_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2045_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2045_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2045_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2045_equation_0, values = (var_20054_cast_fp16, var_19870_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2045_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2047_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2047_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2047_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2047_equation_0, values = (var_20054_cast_fp16, var_19877_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2047_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2049_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2049_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2049_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2049_equation_0, values = (var_20058_cast_fp16, var_19884_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2049_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2051_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2051_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2051_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2051_equation_0, values = (var_20058_cast_fp16, var_19891_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2051_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2053_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2053_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2053_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2053_equation_0, values = (var_20058_cast_fp16, var_19898_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2053_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2055_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2055_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2055_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2055_equation_0, values = (var_20058_cast_fp16, var_19905_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2055_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2057_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2057_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2057_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2057_equation_0, values = (var_20062_cast_fp16, var_19912_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2057_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2059_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2059_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2059_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2059_equation_0, values = (var_20062_cast_fp16, var_19919_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2059_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2061_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2061_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2061_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2061_equation_0, values = (var_20062_cast_fp16, var_19926_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2061_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2063_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2063_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2063_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2063_equation_0, values = (var_20062_cast_fp16, var_19933_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2063_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2065_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2065_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2065_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2065_equation_0, values = (var_20066_cast_fp16, var_19940_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2065_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2067_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2067_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2067_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2067_equation_0, values = (var_20066_cast_fp16, var_19947_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2067_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2069_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2069_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2069_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2069_equation_0, values = (var_20066_cast_fp16, var_19954_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2069_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2071_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2071_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2071_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2071_equation_0, values = (var_20066_cast_fp16, var_19961_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2071_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2073_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2073_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2073_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2073_equation_0, values = (var_20070_cast_fp16, var_19968_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2073_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2075_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2075_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2075_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2075_equation_0, values = (var_20070_cast_fp16, var_19975_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2075_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2077_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2077_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2077_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2077_equation_0, values = (var_20070_cast_fp16, var_19982_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2077_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2079_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2079_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2079_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2079_equation_0, values = (var_20070_cast_fp16, var_19989_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2079_cast_fp16")]; tensor var_20311_to_fp16 = const()[name = tensor("op_20311_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1921_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1921_cast_fp16, y = var_20311_to_fp16)[name = tensor("aw_chunk_1921_cast_fp16")]; tensor var_20313_to_fp16 = const()[name = tensor("op_20313_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1923_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1923_cast_fp16, y = var_20313_to_fp16)[name = tensor("aw_chunk_1923_cast_fp16")]; tensor var_20315_to_fp16 = const()[name = tensor("op_20315_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1925_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1925_cast_fp16, y = var_20315_to_fp16)[name = tensor("aw_chunk_1925_cast_fp16")]; tensor var_20317_to_fp16 = const()[name = tensor("op_20317_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1927_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1927_cast_fp16, y = var_20317_to_fp16)[name = tensor("aw_chunk_1927_cast_fp16")]; tensor var_20319_to_fp16 = const()[name = tensor("op_20319_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1929_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1929_cast_fp16, y = var_20319_to_fp16)[name = tensor("aw_chunk_1929_cast_fp16")]; tensor var_20321_to_fp16 = const()[name = tensor("op_20321_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1931_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1931_cast_fp16, y = var_20321_to_fp16)[name = tensor("aw_chunk_1931_cast_fp16")]; tensor var_20323_to_fp16 = const()[name = tensor("op_20323_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1933_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1933_cast_fp16, y = var_20323_to_fp16)[name = tensor("aw_chunk_1933_cast_fp16")]; tensor var_20325_to_fp16 = const()[name = tensor("op_20325_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1935_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1935_cast_fp16, y = var_20325_to_fp16)[name = tensor("aw_chunk_1935_cast_fp16")]; tensor var_20327_to_fp16 = const()[name = tensor("op_20327_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1937_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1937_cast_fp16, y = var_20327_to_fp16)[name = tensor("aw_chunk_1937_cast_fp16")]; tensor var_20329_to_fp16 = const()[name = tensor("op_20329_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1939_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1939_cast_fp16, y = var_20329_to_fp16)[name = tensor("aw_chunk_1939_cast_fp16")]; tensor var_20331_to_fp16 = const()[name = tensor("op_20331_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1941_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1941_cast_fp16, y = var_20331_to_fp16)[name = tensor("aw_chunk_1941_cast_fp16")]; tensor var_20333_to_fp16 = const()[name = tensor("op_20333_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1943_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1943_cast_fp16, y = var_20333_to_fp16)[name = tensor("aw_chunk_1943_cast_fp16")]; tensor var_20335_to_fp16 = const()[name = tensor("op_20335_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1945_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1945_cast_fp16, y = var_20335_to_fp16)[name = tensor("aw_chunk_1945_cast_fp16")]; tensor var_20337_to_fp16 = const()[name = tensor("op_20337_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1947_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1947_cast_fp16, y = var_20337_to_fp16)[name = tensor("aw_chunk_1947_cast_fp16")]; tensor var_20339_to_fp16 = const()[name = tensor("op_20339_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1949_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1949_cast_fp16, y = var_20339_to_fp16)[name = tensor("aw_chunk_1949_cast_fp16")]; tensor var_20341_to_fp16 = const()[name = tensor("op_20341_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1951_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1951_cast_fp16, y = var_20341_to_fp16)[name = tensor("aw_chunk_1951_cast_fp16")]; tensor var_20343_to_fp16 = const()[name = tensor("op_20343_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1953_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1953_cast_fp16, y = var_20343_to_fp16)[name = tensor("aw_chunk_1953_cast_fp16")]; tensor var_20345_to_fp16 = const()[name = tensor("op_20345_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1955_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1955_cast_fp16, y = var_20345_to_fp16)[name = tensor("aw_chunk_1955_cast_fp16")]; tensor var_20347_to_fp16 = const()[name = tensor("op_20347_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1957_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1957_cast_fp16, y = var_20347_to_fp16)[name = tensor("aw_chunk_1957_cast_fp16")]; tensor var_20349_to_fp16 = const()[name = tensor("op_20349_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1959_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1959_cast_fp16, y = var_20349_to_fp16)[name = tensor("aw_chunk_1959_cast_fp16")]; tensor var_20351_to_fp16 = const()[name = tensor("op_20351_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1961_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1961_cast_fp16, y = var_20351_to_fp16)[name = tensor("aw_chunk_1961_cast_fp16")]; tensor var_20353_to_fp16 = const()[name = tensor("op_20353_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1963_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1963_cast_fp16, y = var_20353_to_fp16)[name = tensor("aw_chunk_1963_cast_fp16")]; tensor var_20355_to_fp16 = const()[name = tensor("op_20355_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1965_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1965_cast_fp16, y = var_20355_to_fp16)[name = tensor("aw_chunk_1965_cast_fp16")]; tensor var_20357_to_fp16 = const()[name = tensor("op_20357_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1967_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1967_cast_fp16, y = var_20357_to_fp16)[name = tensor("aw_chunk_1967_cast_fp16")]; tensor var_20359_to_fp16 = const()[name = tensor("op_20359_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1969_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1969_cast_fp16, y = var_20359_to_fp16)[name = tensor("aw_chunk_1969_cast_fp16")]; tensor var_20361_to_fp16 = const()[name = tensor("op_20361_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1971_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1971_cast_fp16, y = var_20361_to_fp16)[name = tensor("aw_chunk_1971_cast_fp16")]; tensor var_20363_to_fp16 = const()[name = tensor("op_20363_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1973_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1973_cast_fp16, y = var_20363_to_fp16)[name = tensor("aw_chunk_1973_cast_fp16")]; tensor var_20365_to_fp16 = const()[name = tensor("op_20365_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1975_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1975_cast_fp16, y = var_20365_to_fp16)[name = tensor("aw_chunk_1975_cast_fp16")]; tensor var_20367_to_fp16 = const()[name = tensor("op_20367_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1977_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1977_cast_fp16, y = var_20367_to_fp16)[name = tensor("aw_chunk_1977_cast_fp16")]; tensor var_20369_to_fp16 = const()[name = tensor("op_20369_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1979_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1979_cast_fp16, y = var_20369_to_fp16)[name = tensor("aw_chunk_1979_cast_fp16")]; tensor var_20371_to_fp16 = const()[name = tensor("op_20371_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1981_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1981_cast_fp16, y = var_20371_to_fp16)[name = tensor("aw_chunk_1981_cast_fp16")]; tensor var_20373_to_fp16 = const()[name = tensor("op_20373_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1983_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1983_cast_fp16, y = var_20373_to_fp16)[name = tensor("aw_chunk_1983_cast_fp16")]; tensor var_20375_to_fp16 = const()[name = tensor("op_20375_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1985_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1985_cast_fp16, y = var_20375_to_fp16)[name = tensor("aw_chunk_1985_cast_fp16")]; tensor var_20377_to_fp16 = const()[name = tensor("op_20377_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1987_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1987_cast_fp16, y = var_20377_to_fp16)[name = tensor("aw_chunk_1987_cast_fp16")]; tensor var_20379_to_fp16 = const()[name = tensor("op_20379_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1989_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1989_cast_fp16, y = var_20379_to_fp16)[name = tensor("aw_chunk_1989_cast_fp16")]; tensor var_20381_to_fp16 = const()[name = tensor("op_20381_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1991_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1991_cast_fp16, y = var_20381_to_fp16)[name = tensor("aw_chunk_1991_cast_fp16")]; tensor var_20383_to_fp16 = const()[name = tensor("op_20383_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1993_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1993_cast_fp16, y = var_20383_to_fp16)[name = tensor("aw_chunk_1993_cast_fp16")]; tensor var_20385_to_fp16 = const()[name = tensor("op_20385_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1995_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1995_cast_fp16, y = var_20385_to_fp16)[name = tensor("aw_chunk_1995_cast_fp16")]; tensor var_20387_to_fp16 = const()[name = tensor("op_20387_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1997_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1997_cast_fp16, y = var_20387_to_fp16)[name = tensor("aw_chunk_1997_cast_fp16")]; tensor var_20389_to_fp16 = const()[name = tensor("op_20389_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_1999_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_1999_cast_fp16, y = var_20389_to_fp16)[name = tensor("aw_chunk_1999_cast_fp16")]; tensor var_20391_to_fp16 = const()[name = tensor("op_20391_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2001_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2001_cast_fp16, y = var_20391_to_fp16)[name = tensor("aw_chunk_2001_cast_fp16")]; tensor var_20393_to_fp16 = const()[name = tensor("op_20393_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2003_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2003_cast_fp16, y = var_20393_to_fp16)[name = tensor("aw_chunk_2003_cast_fp16")]; tensor var_20395_to_fp16 = const()[name = tensor("op_20395_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2005_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2005_cast_fp16, y = var_20395_to_fp16)[name = tensor("aw_chunk_2005_cast_fp16")]; tensor var_20397_to_fp16 = const()[name = tensor("op_20397_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2007_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2007_cast_fp16, y = var_20397_to_fp16)[name = tensor("aw_chunk_2007_cast_fp16")]; tensor var_20399_to_fp16 = const()[name = tensor("op_20399_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2009_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2009_cast_fp16, y = var_20399_to_fp16)[name = tensor("aw_chunk_2009_cast_fp16")]; tensor var_20401_to_fp16 = const()[name = tensor("op_20401_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2011_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2011_cast_fp16, y = var_20401_to_fp16)[name = tensor("aw_chunk_2011_cast_fp16")]; tensor var_20403_to_fp16 = const()[name = tensor("op_20403_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2013_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2013_cast_fp16, y = var_20403_to_fp16)[name = tensor("aw_chunk_2013_cast_fp16")]; tensor var_20405_to_fp16 = const()[name = tensor("op_20405_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2015_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2015_cast_fp16, y = var_20405_to_fp16)[name = tensor("aw_chunk_2015_cast_fp16")]; tensor var_20407_to_fp16 = const()[name = tensor("op_20407_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2017_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2017_cast_fp16, y = var_20407_to_fp16)[name = tensor("aw_chunk_2017_cast_fp16")]; tensor var_20409_to_fp16 = const()[name = tensor("op_20409_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2019_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2019_cast_fp16, y = var_20409_to_fp16)[name = tensor("aw_chunk_2019_cast_fp16")]; tensor var_20411_to_fp16 = const()[name = tensor("op_20411_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2021_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2021_cast_fp16, y = var_20411_to_fp16)[name = tensor("aw_chunk_2021_cast_fp16")]; tensor var_20413_to_fp16 = const()[name = tensor("op_20413_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2023_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2023_cast_fp16, y = var_20413_to_fp16)[name = tensor("aw_chunk_2023_cast_fp16")]; tensor var_20415_to_fp16 = const()[name = tensor("op_20415_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2025_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2025_cast_fp16, y = var_20415_to_fp16)[name = tensor("aw_chunk_2025_cast_fp16")]; tensor var_20417_to_fp16 = const()[name = tensor("op_20417_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2027_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2027_cast_fp16, y = var_20417_to_fp16)[name = tensor("aw_chunk_2027_cast_fp16")]; tensor var_20419_to_fp16 = const()[name = tensor("op_20419_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2029_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2029_cast_fp16, y = var_20419_to_fp16)[name = tensor("aw_chunk_2029_cast_fp16")]; tensor var_20421_to_fp16 = const()[name = tensor("op_20421_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2031_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2031_cast_fp16, y = var_20421_to_fp16)[name = tensor("aw_chunk_2031_cast_fp16")]; tensor var_20423_to_fp16 = const()[name = tensor("op_20423_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2033_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2033_cast_fp16, y = var_20423_to_fp16)[name = tensor("aw_chunk_2033_cast_fp16")]; tensor var_20425_to_fp16 = const()[name = tensor("op_20425_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2035_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2035_cast_fp16, y = var_20425_to_fp16)[name = tensor("aw_chunk_2035_cast_fp16")]; tensor var_20427_to_fp16 = const()[name = tensor("op_20427_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2037_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2037_cast_fp16, y = var_20427_to_fp16)[name = tensor("aw_chunk_2037_cast_fp16")]; tensor var_20429_to_fp16 = const()[name = tensor("op_20429_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2039_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2039_cast_fp16, y = var_20429_to_fp16)[name = tensor("aw_chunk_2039_cast_fp16")]; tensor var_20431_to_fp16 = const()[name = tensor("op_20431_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2041_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2041_cast_fp16, y = var_20431_to_fp16)[name = tensor("aw_chunk_2041_cast_fp16")]; tensor var_20433_to_fp16 = const()[name = tensor("op_20433_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2043_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2043_cast_fp16, y = var_20433_to_fp16)[name = tensor("aw_chunk_2043_cast_fp16")]; tensor var_20435_to_fp16 = const()[name = tensor("op_20435_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2045_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2045_cast_fp16, y = var_20435_to_fp16)[name = tensor("aw_chunk_2045_cast_fp16")]; tensor var_20437_to_fp16 = const()[name = tensor("op_20437_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2047_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2047_cast_fp16, y = var_20437_to_fp16)[name = tensor("aw_chunk_2047_cast_fp16")]; tensor var_20439_to_fp16 = const()[name = tensor("op_20439_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2049_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2049_cast_fp16, y = var_20439_to_fp16)[name = tensor("aw_chunk_2049_cast_fp16")]; tensor var_20441_to_fp16 = const()[name = tensor("op_20441_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2051_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2051_cast_fp16, y = var_20441_to_fp16)[name = tensor("aw_chunk_2051_cast_fp16")]; tensor var_20443_to_fp16 = const()[name = tensor("op_20443_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2053_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2053_cast_fp16, y = var_20443_to_fp16)[name = tensor("aw_chunk_2053_cast_fp16")]; tensor var_20445_to_fp16 = const()[name = tensor("op_20445_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2055_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2055_cast_fp16, y = var_20445_to_fp16)[name = tensor("aw_chunk_2055_cast_fp16")]; tensor var_20447_to_fp16 = const()[name = tensor("op_20447_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2057_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2057_cast_fp16, y = var_20447_to_fp16)[name = tensor("aw_chunk_2057_cast_fp16")]; tensor var_20449_to_fp16 = const()[name = tensor("op_20449_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2059_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2059_cast_fp16, y = var_20449_to_fp16)[name = tensor("aw_chunk_2059_cast_fp16")]; tensor var_20451_to_fp16 = const()[name = tensor("op_20451_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2061_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2061_cast_fp16, y = var_20451_to_fp16)[name = tensor("aw_chunk_2061_cast_fp16")]; tensor var_20453_to_fp16 = const()[name = tensor("op_20453_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2063_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2063_cast_fp16, y = var_20453_to_fp16)[name = tensor("aw_chunk_2063_cast_fp16")]; tensor var_20455_to_fp16 = const()[name = tensor("op_20455_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2065_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2065_cast_fp16, y = var_20455_to_fp16)[name = tensor("aw_chunk_2065_cast_fp16")]; tensor var_20457_to_fp16 = const()[name = tensor("op_20457_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2067_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2067_cast_fp16, y = var_20457_to_fp16)[name = tensor("aw_chunk_2067_cast_fp16")]; tensor var_20459_to_fp16 = const()[name = tensor("op_20459_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2069_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2069_cast_fp16, y = var_20459_to_fp16)[name = tensor("aw_chunk_2069_cast_fp16")]; tensor var_20461_to_fp16 = const()[name = tensor("op_20461_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2071_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2071_cast_fp16, y = var_20461_to_fp16)[name = tensor("aw_chunk_2071_cast_fp16")]; tensor var_20463_to_fp16 = const()[name = tensor("op_20463_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2073_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2073_cast_fp16, y = var_20463_to_fp16)[name = tensor("aw_chunk_2073_cast_fp16")]; tensor var_20465_to_fp16 = const()[name = tensor("op_20465_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2075_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2075_cast_fp16, y = var_20465_to_fp16)[name = tensor("aw_chunk_2075_cast_fp16")]; tensor var_20467_to_fp16 = const()[name = tensor("op_20467_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2077_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2077_cast_fp16, y = var_20467_to_fp16)[name = tensor("aw_chunk_2077_cast_fp16")]; tensor var_20469_to_fp16 = const()[name = tensor("op_20469_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2079_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2079_cast_fp16, y = var_20469_to_fp16)[name = tensor("aw_chunk_2079_cast_fp16")]; tensor var_20471_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1921_cast_fp16)[name = tensor("op_20471_cast_fp16")]; tensor var_20472_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1923_cast_fp16)[name = tensor("op_20472_cast_fp16")]; tensor var_20473_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1925_cast_fp16)[name = tensor("op_20473_cast_fp16")]; tensor var_20474_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1927_cast_fp16)[name = tensor("op_20474_cast_fp16")]; tensor var_20475_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1929_cast_fp16)[name = tensor("op_20475_cast_fp16")]; tensor var_20476_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1931_cast_fp16)[name = tensor("op_20476_cast_fp16")]; tensor var_20477_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1933_cast_fp16)[name = tensor("op_20477_cast_fp16")]; tensor var_20478_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1935_cast_fp16)[name = tensor("op_20478_cast_fp16")]; tensor var_20479_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1937_cast_fp16)[name = tensor("op_20479_cast_fp16")]; tensor var_20480_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1939_cast_fp16)[name = tensor("op_20480_cast_fp16")]; tensor var_20481_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1941_cast_fp16)[name = tensor("op_20481_cast_fp16")]; tensor var_20482_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1943_cast_fp16)[name = tensor("op_20482_cast_fp16")]; tensor var_20483_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1945_cast_fp16)[name = tensor("op_20483_cast_fp16")]; tensor var_20484_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1947_cast_fp16)[name = tensor("op_20484_cast_fp16")]; tensor var_20485_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1949_cast_fp16)[name = tensor("op_20485_cast_fp16")]; tensor var_20486_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1951_cast_fp16)[name = tensor("op_20486_cast_fp16")]; tensor var_20487_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1953_cast_fp16)[name = tensor("op_20487_cast_fp16")]; tensor var_20488_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1955_cast_fp16)[name = tensor("op_20488_cast_fp16")]; tensor var_20489_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1957_cast_fp16)[name = tensor("op_20489_cast_fp16")]; tensor var_20490_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1959_cast_fp16)[name = tensor("op_20490_cast_fp16")]; tensor var_20491_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1961_cast_fp16)[name = tensor("op_20491_cast_fp16")]; tensor var_20492_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1963_cast_fp16)[name = tensor("op_20492_cast_fp16")]; tensor var_20493_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1965_cast_fp16)[name = tensor("op_20493_cast_fp16")]; tensor var_20494_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1967_cast_fp16)[name = tensor("op_20494_cast_fp16")]; tensor var_20495_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1969_cast_fp16)[name = tensor("op_20495_cast_fp16")]; tensor var_20496_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1971_cast_fp16)[name = tensor("op_20496_cast_fp16")]; tensor var_20497_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1973_cast_fp16)[name = tensor("op_20497_cast_fp16")]; tensor var_20498_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1975_cast_fp16)[name = tensor("op_20498_cast_fp16")]; tensor var_20499_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1977_cast_fp16)[name = tensor("op_20499_cast_fp16")]; tensor var_20500_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1979_cast_fp16)[name = tensor("op_20500_cast_fp16")]; tensor var_20501_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1981_cast_fp16)[name = tensor("op_20501_cast_fp16")]; tensor var_20502_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1983_cast_fp16)[name = tensor("op_20502_cast_fp16")]; tensor var_20503_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1985_cast_fp16)[name = tensor("op_20503_cast_fp16")]; tensor var_20504_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1987_cast_fp16)[name = tensor("op_20504_cast_fp16")]; tensor var_20505_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1989_cast_fp16)[name = tensor("op_20505_cast_fp16")]; tensor var_20506_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1991_cast_fp16)[name = tensor("op_20506_cast_fp16")]; tensor var_20507_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1993_cast_fp16)[name = tensor("op_20507_cast_fp16")]; tensor var_20508_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1995_cast_fp16)[name = tensor("op_20508_cast_fp16")]; tensor var_20509_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1997_cast_fp16)[name = tensor("op_20509_cast_fp16")]; tensor var_20510_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_1999_cast_fp16)[name = tensor("op_20510_cast_fp16")]; tensor var_20511_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2001_cast_fp16)[name = tensor("op_20511_cast_fp16")]; tensor var_20512_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2003_cast_fp16)[name = tensor("op_20512_cast_fp16")]; tensor var_20513_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2005_cast_fp16)[name = tensor("op_20513_cast_fp16")]; tensor var_20514_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2007_cast_fp16)[name = tensor("op_20514_cast_fp16")]; tensor var_20515_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2009_cast_fp16)[name = tensor("op_20515_cast_fp16")]; tensor var_20516_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2011_cast_fp16)[name = tensor("op_20516_cast_fp16")]; tensor var_20517_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2013_cast_fp16)[name = tensor("op_20517_cast_fp16")]; tensor var_20518_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2015_cast_fp16)[name = tensor("op_20518_cast_fp16")]; tensor var_20519_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2017_cast_fp16)[name = tensor("op_20519_cast_fp16")]; tensor var_20520_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2019_cast_fp16)[name = tensor("op_20520_cast_fp16")]; tensor var_20521_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2021_cast_fp16)[name = tensor("op_20521_cast_fp16")]; tensor var_20522_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2023_cast_fp16)[name = tensor("op_20522_cast_fp16")]; tensor var_20523_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2025_cast_fp16)[name = tensor("op_20523_cast_fp16")]; tensor var_20524_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2027_cast_fp16)[name = tensor("op_20524_cast_fp16")]; tensor var_20525_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2029_cast_fp16)[name = tensor("op_20525_cast_fp16")]; tensor var_20526_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2031_cast_fp16)[name = tensor("op_20526_cast_fp16")]; tensor var_20527_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2033_cast_fp16)[name = tensor("op_20527_cast_fp16")]; tensor var_20528_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2035_cast_fp16)[name = tensor("op_20528_cast_fp16")]; tensor var_20529_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2037_cast_fp16)[name = tensor("op_20529_cast_fp16")]; tensor var_20530_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2039_cast_fp16)[name = tensor("op_20530_cast_fp16")]; tensor var_20531_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2041_cast_fp16)[name = tensor("op_20531_cast_fp16")]; tensor var_20532_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2043_cast_fp16)[name = tensor("op_20532_cast_fp16")]; tensor var_20533_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2045_cast_fp16)[name = tensor("op_20533_cast_fp16")]; tensor var_20534_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2047_cast_fp16)[name = tensor("op_20534_cast_fp16")]; tensor var_20535_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2049_cast_fp16)[name = tensor("op_20535_cast_fp16")]; tensor var_20536_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2051_cast_fp16)[name = tensor("op_20536_cast_fp16")]; tensor var_20537_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2053_cast_fp16)[name = tensor("op_20537_cast_fp16")]; tensor var_20538_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2055_cast_fp16)[name = tensor("op_20538_cast_fp16")]; tensor var_20539_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2057_cast_fp16)[name = tensor("op_20539_cast_fp16")]; tensor var_20540_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2059_cast_fp16)[name = tensor("op_20540_cast_fp16")]; tensor var_20541_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2061_cast_fp16)[name = tensor("op_20541_cast_fp16")]; tensor var_20542_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2063_cast_fp16)[name = tensor("op_20542_cast_fp16")]; tensor var_20543_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2065_cast_fp16)[name = tensor("op_20543_cast_fp16")]; tensor var_20544_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2067_cast_fp16)[name = tensor("op_20544_cast_fp16")]; tensor var_20545_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2069_cast_fp16)[name = tensor("op_20545_cast_fp16")]; tensor var_20546_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2071_cast_fp16)[name = tensor("op_20546_cast_fp16")]; tensor var_20547_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2073_cast_fp16)[name = tensor("op_20547_cast_fp16")]; tensor var_20548_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2075_cast_fp16)[name = tensor("op_20548_cast_fp16")]; tensor var_20549_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2077_cast_fp16)[name = tensor("op_20549_cast_fp16")]; tensor var_20550_cast_fp16 = softmax(axis = var_19269, x = aw_chunk_2079_cast_fp16)[name = tensor("op_20550_cast_fp16")]; tensor var_20552_equation_0 = const()[name = tensor("op_20552_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20552_cast_fp16 = einsum(equation = var_20552_equation_0, values = (var_20072_cast_fp16, var_20471_cast_fp16))[name = tensor("op_20552_cast_fp16")]; tensor var_20554_equation_0 = const()[name = tensor("op_20554_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20554_cast_fp16 = einsum(equation = var_20554_equation_0, values = (var_20072_cast_fp16, var_20472_cast_fp16))[name = tensor("op_20554_cast_fp16")]; tensor var_20556_equation_0 = const()[name = tensor("op_20556_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20556_cast_fp16 = einsum(equation = var_20556_equation_0, values = (var_20072_cast_fp16, var_20473_cast_fp16))[name = tensor("op_20556_cast_fp16")]; tensor var_20558_equation_0 = const()[name = tensor("op_20558_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20558_cast_fp16 = einsum(equation = var_20558_equation_0, values = (var_20072_cast_fp16, var_20474_cast_fp16))[name = tensor("op_20558_cast_fp16")]; tensor var_20560_equation_0 = const()[name = tensor("op_20560_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20560_cast_fp16 = einsum(equation = var_20560_equation_0, values = (var_20076_cast_fp16, var_20475_cast_fp16))[name = tensor("op_20560_cast_fp16")]; tensor var_20562_equation_0 = const()[name = tensor("op_20562_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20562_cast_fp16 = einsum(equation = var_20562_equation_0, values = (var_20076_cast_fp16, var_20476_cast_fp16))[name = tensor("op_20562_cast_fp16")]; tensor var_20564_equation_0 = const()[name = tensor("op_20564_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20564_cast_fp16 = einsum(equation = var_20564_equation_0, values = (var_20076_cast_fp16, var_20477_cast_fp16))[name = tensor("op_20564_cast_fp16")]; tensor var_20566_equation_0 = const()[name = tensor("op_20566_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20566_cast_fp16 = einsum(equation = var_20566_equation_0, values = (var_20076_cast_fp16, var_20478_cast_fp16))[name = tensor("op_20566_cast_fp16")]; tensor var_20568_equation_0 = const()[name = tensor("op_20568_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20568_cast_fp16 = einsum(equation = var_20568_equation_0, values = (var_20080_cast_fp16, var_20479_cast_fp16))[name = tensor("op_20568_cast_fp16")]; tensor var_20570_equation_0 = const()[name = tensor("op_20570_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20570_cast_fp16 = einsum(equation = var_20570_equation_0, values = (var_20080_cast_fp16, var_20480_cast_fp16))[name = tensor("op_20570_cast_fp16")]; tensor var_20572_equation_0 = const()[name = tensor("op_20572_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20572_cast_fp16 = einsum(equation = var_20572_equation_0, values = (var_20080_cast_fp16, var_20481_cast_fp16))[name = tensor("op_20572_cast_fp16")]; tensor var_20574_equation_0 = const()[name = tensor("op_20574_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20574_cast_fp16 = einsum(equation = var_20574_equation_0, values = (var_20080_cast_fp16, var_20482_cast_fp16))[name = tensor("op_20574_cast_fp16")]; tensor var_20576_equation_0 = const()[name = tensor("op_20576_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20576_cast_fp16 = einsum(equation = var_20576_equation_0, values = (var_20084_cast_fp16, var_20483_cast_fp16))[name = tensor("op_20576_cast_fp16")]; tensor var_20578_equation_0 = const()[name = tensor("op_20578_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20578_cast_fp16 = einsum(equation = var_20578_equation_0, values = (var_20084_cast_fp16, var_20484_cast_fp16))[name = tensor("op_20578_cast_fp16")]; tensor var_20580_equation_0 = const()[name = tensor("op_20580_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20580_cast_fp16 = einsum(equation = var_20580_equation_0, values = (var_20084_cast_fp16, var_20485_cast_fp16))[name = tensor("op_20580_cast_fp16")]; tensor var_20582_equation_0 = const()[name = tensor("op_20582_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20582_cast_fp16 = einsum(equation = var_20582_equation_0, values = (var_20084_cast_fp16, var_20486_cast_fp16))[name = tensor("op_20582_cast_fp16")]; tensor var_20584_equation_0 = const()[name = tensor("op_20584_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20584_cast_fp16 = einsum(equation = var_20584_equation_0, values = (var_20088_cast_fp16, var_20487_cast_fp16))[name = tensor("op_20584_cast_fp16")]; tensor var_20586_equation_0 = const()[name = tensor("op_20586_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20586_cast_fp16 = einsum(equation = var_20586_equation_0, values = (var_20088_cast_fp16, var_20488_cast_fp16))[name = tensor("op_20586_cast_fp16")]; tensor var_20588_equation_0 = const()[name = tensor("op_20588_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20588_cast_fp16 = einsum(equation = var_20588_equation_0, values = (var_20088_cast_fp16, var_20489_cast_fp16))[name = tensor("op_20588_cast_fp16")]; tensor var_20590_equation_0 = const()[name = tensor("op_20590_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20590_cast_fp16 = einsum(equation = var_20590_equation_0, values = (var_20088_cast_fp16, var_20490_cast_fp16))[name = tensor("op_20590_cast_fp16")]; tensor var_20592_equation_0 = const()[name = tensor("op_20592_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20592_cast_fp16 = einsum(equation = var_20592_equation_0, values = (var_20092_cast_fp16, var_20491_cast_fp16))[name = tensor("op_20592_cast_fp16")]; tensor var_20594_equation_0 = const()[name = tensor("op_20594_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20594_cast_fp16 = einsum(equation = var_20594_equation_0, values = (var_20092_cast_fp16, var_20492_cast_fp16))[name = tensor("op_20594_cast_fp16")]; tensor var_20596_equation_0 = const()[name = tensor("op_20596_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20596_cast_fp16 = einsum(equation = var_20596_equation_0, values = (var_20092_cast_fp16, var_20493_cast_fp16))[name = tensor("op_20596_cast_fp16")]; tensor var_20598_equation_0 = const()[name = tensor("op_20598_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20598_cast_fp16 = einsum(equation = var_20598_equation_0, values = (var_20092_cast_fp16, var_20494_cast_fp16))[name = tensor("op_20598_cast_fp16")]; tensor var_20600_equation_0 = const()[name = tensor("op_20600_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20600_cast_fp16 = einsum(equation = var_20600_equation_0, values = (var_20096_cast_fp16, var_20495_cast_fp16))[name = tensor("op_20600_cast_fp16")]; tensor var_20602_equation_0 = const()[name = tensor("op_20602_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20602_cast_fp16 = einsum(equation = var_20602_equation_0, values = (var_20096_cast_fp16, var_20496_cast_fp16))[name = tensor("op_20602_cast_fp16")]; tensor var_20604_equation_0 = const()[name = tensor("op_20604_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20604_cast_fp16 = einsum(equation = var_20604_equation_0, values = (var_20096_cast_fp16, var_20497_cast_fp16))[name = tensor("op_20604_cast_fp16")]; tensor var_20606_equation_0 = const()[name = tensor("op_20606_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20606_cast_fp16 = einsum(equation = var_20606_equation_0, values = (var_20096_cast_fp16, var_20498_cast_fp16))[name = tensor("op_20606_cast_fp16")]; tensor var_20608_equation_0 = const()[name = tensor("op_20608_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20608_cast_fp16 = einsum(equation = var_20608_equation_0, values = (var_20100_cast_fp16, var_20499_cast_fp16))[name = tensor("op_20608_cast_fp16")]; tensor var_20610_equation_0 = const()[name = tensor("op_20610_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20610_cast_fp16 = einsum(equation = var_20610_equation_0, values = (var_20100_cast_fp16, var_20500_cast_fp16))[name = tensor("op_20610_cast_fp16")]; tensor var_20612_equation_0 = const()[name = tensor("op_20612_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20612_cast_fp16 = einsum(equation = var_20612_equation_0, values = (var_20100_cast_fp16, var_20501_cast_fp16))[name = tensor("op_20612_cast_fp16")]; tensor var_20614_equation_0 = const()[name = tensor("op_20614_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20614_cast_fp16 = einsum(equation = var_20614_equation_0, values = (var_20100_cast_fp16, var_20502_cast_fp16))[name = tensor("op_20614_cast_fp16")]; tensor var_20616_equation_0 = const()[name = tensor("op_20616_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20616_cast_fp16 = einsum(equation = var_20616_equation_0, values = (var_20104_cast_fp16, var_20503_cast_fp16))[name = tensor("op_20616_cast_fp16")]; tensor var_20618_equation_0 = const()[name = tensor("op_20618_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20618_cast_fp16 = einsum(equation = var_20618_equation_0, values = (var_20104_cast_fp16, var_20504_cast_fp16))[name = tensor("op_20618_cast_fp16")]; tensor var_20620_equation_0 = const()[name = tensor("op_20620_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20620_cast_fp16 = einsum(equation = var_20620_equation_0, values = (var_20104_cast_fp16, var_20505_cast_fp16))[name = tensor("op_20620_cast_fp16")]; tensor var_20622_equation_0 = const()[name = tensor("op_20622_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20622_cast_fp16 = einsum(equation = var_20622_equation_0, values = (var_20104_cast_fp16, var_20506_cast_fp16))[name = tensor("op_20622_cast_fp16")]; tensor var_20624_equation_0 = const()[name = tensor("op_20624_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20624_cast_fp16 = einsum(equation = var_20624_equation_0, values = (var_20108_cast_fp16, var_20507_cast_fp16))[name = tensor("op_20624_cast_fp16")]; tensor var_20626_equation_0 = const()[name = tensor("op_20626_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20626_cast_fp16 = einsum(equation = var_20626_equation_0, values = (var_20108_cast_fp16, var_20508_cast_fp16))[name = tensor("op_20626_cast_fp16")]; tensor var_20628_equation_0 = const()[name = tensor("op_20628_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20628_cast_fp16 = einsum(equation = var_20628_equation_0, values = (var_20108_cast_fp16, var_20509_cast_fp16))[name = tensor("op_20628_cast_fp16")]; tensor var_20630_equation_0 = const()[name = tensor("op_20630_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20630_cast_fp16 = einsum(equation = var_20630_equation_0, values = (var_20108_cast_fp16, var_20510_cast_fp16))[name = tensor("op_20630_cast_fp16")]; tensor var_20632_equation_0 = const()[name = tensor("op_20632_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20632_cast_fp16 = einsum(equation = var_20632_equation_0, values = (var_20112_cast_fp16, var_20511_cast_fp16))[name = tensor("op_20632_cast_fp16")]; tensor var_20634_equation_0 = const()[name = tensor("op_20634_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20634_cast_fp16 = einsum(equation = var_20634_equation_0, values = (var_20112_cast_fp16, var_20512_cast_fp16))[name = tensor("op_20634_cast_fp16")]; tensor var_20636_equation_0 = const()[name = tensor("op_20636_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20636_cast_fp16 = einsum(equation = var_20636_equation_0, values = (var_20112_cast_fp16, var_20513_cast_fp16))[name = tensor("op_20636_cast_fp16")]; tensor var_20638_equation_0 = const()[name = tensor("op_20638_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20638_cast_fp16 = einsum(equation = var_20638_equation_0, values = (var_20112_cast_fp16, var_20514_cast_fp16))[name = tensor("op_20638_cast_fp16")]; tensor var_20640_equation_0 = const()[name = tensor("op_20640_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20640_cast_fp16 = einsum(equation = var_20640_equation_0, values = (var_20116_cast_fp16, var_20515_cast_fp16))[name = tensor("op_20640_cast_fp16")]; tensor var_20642_equation_0 = const()[name = tensor("op_20642_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20642_cast_fp16 = einsum(equation = var_20642_equation_0, values = (var_20116_cast_fp16, var_20516_cast_fp16))[name = tensor("op_20642_cast_fp16")]; tensor var_20644_equation_0 = const()[name = tensor("op_20644_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20644_cast_fp16 = einsum(equation = var_20644_equation_0, values = (var_20116_cast_fp16, var_20517_cast_fp16))[name = tensor("op_20644_cast_fp16")]; tensor var_20646_equation_0 = const()[name = tensor("op_20646_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20646_cast_fp16 = einsum(equation = var_20646_equation_0, values = (var_20116_cast_fp16, var_20518_cast_fp16))[name = tensor("op_20646_cast_fp16")]; tensor var_20648_equation_0 = const()[name = tensor("op_20648_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20648_cast_fp16 = einsum(equation = var_20648_equation_0, values = (var_20120_cast_fp16, var_20519_cast_fp16))[name = tensor("op_20648_cast_fp16")]; tensor var_20650_equation_0 = const()[name = tensor("op_20650_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20650_cast_fp16 = einsum(equation = var_20650_equation_0, values = (var_20120_cast_fp16, var_20520_cast_fp16))[name = tensor("op_20650_cast_fp16")]; tensor var_20652_equation_0 = const()[name = tensor("op_20652_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20652_cast_fp16 = einsum(equation = var_20652_equation_0, values = (var_20120_cast_fp16, var_20521_cast_fp16))[name = tensor("op_20652_cast_fp16")]; tensor var_20654_equation_0 = const()[name = tensor("op_20654_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20654_cast_fp16 = einsum(equation = var_20654_equation_0, values = (var_20120_cast_fp16, var_20522_cast_fp16))[name = tensor("op_20654_cast_fp16")]; tensor var_20656_equation_0 = const()[name = tensor("op_20656_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20656_cast_fp16 = einsum(equation = var_20656_equation_0, values = (var_20124_cast_fp16, var_20523_cast_fp16))[name = tensor("op_20656_cast_fp16")]; tensor var_20658_equation_0 = const()[name = tensor("op_20658_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20658_cast_fp16 = einsum(equation = var_20658_equation_0, values = (var_20124_cast_fp16, var_20524_cast_fp16))[name = tensor("op_20658_cast_fp16")]; tensor var_20660_equation_0 = const()[name = tensor("op_20660_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20660_cast_fp16 = einsum(equation = var_20660_equation_0, values = (var_20124_cast_fp16, var_20525_cast_fp16))[name = tensor("op_20660_cast_fp16")]; tensor var_20662_equation_0 = const()[name = tensor("op_20662_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20662_cast_fp16 = einsum(equation = var_20662_equation_0, values = (var_20124_cast_fp16, var_20526_cast_fp16))[name = tensor("op_20662_cast_fp16")]; tensor var_20664_equation_0 = const()[name = tensor("op_20664_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20664_cast_fp16 = einsum(equation = var_20664_equation_0, values = (var_20128_cast_fp16, var_20527_cast_fp16))[name = tensor("op_20664_cast_fp16")]; tensor var_20666_equation_0 = const()[name = tensor("op_20666_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20666_cast_fp16 = einsum(equation = var_20666_equation_0, values = (var_20128_cast_fp16, var_20528_cast_fp16))[name = tensor("op_20666_cast_fp16")]; tensor var_20668_equation_0 = const()[name = tensor("op_20668_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20668_cast_fp16 = einsum(equation = var_20668_equation_0, values = (var_20128_cast_fp16, var_20529_cast_fp16))[name = tensor("op_20668_cast_fp16")]; tensor var_20670_equation_0 = const()[name = tensor("op_20670_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20670_cast_fp16 = einsum(equation = var_20670_equation_0, values = (var_20128_cast_fp16, var_20530_cast_fp16))[name = tensor("op_20670_cast_fp16")]; tensor var_20672_equation_0 = const()[name = tensor("op_20672_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20672_cast_fp16 = einsum(equation = var_20672_equation_0, values = (var_20132_cast_fp16, var_20531_cast_fp16))[name = tensor("op_20672_cast_fp16")]; tensor var_20674_equation_0 = const()[name = tensor("op_20674_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20674_cast_fp16 = einsum(equation = var_20674_equation_0, values = (var_20132_cast_fp16, var_20532_cast_fp16))[name = tensor("op_20674_cast_fp16")]; tensor var_20676_equation_0 = const()[name = tensor("op_20676_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20676_cast_fp16 = einsum(equation = var_20676_equation_0, values = (var_20132_cast_fp16, var_20533_cast_fp16))[name = tensor("op_20676_cast_fp16")]; tensor var_20678_equation_0 = const()[name = tensor("op_20678_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20678_cast_fp16 = einsum(equation = var_20678_equation_0, values = (var_20132_cast_fp16, var_20534_cast_fp16))[name = tensor("op_20678_cast_fp16")]; tensor var_20680_equation_0 = const()[name = tensor("op_20680_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20680_cast_fp16 = einsum(equation = var_20680_equation_0, values = (var_20136_cast_fp16, var_20535_cast_fp16))[name = tensor("op_20680_cast_fp16")]; tensor var_20682_equation_0 = const()[name = tensor("op_20682_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20682_cast_fp16 = einsum(equation = var_20682_equation_0, values = (var_20136_cast_fp16, var_20536_cast_fp16))[name = tensor("op_20682_cast_fp16")]; tensor var_20684_equation_0 = const()[name = tensor("op_20684_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20684_cast_fp16 = einsum(equation = var_20684_equation_0, values = (var_20136_cast_fp16, var_20537_cast_fp16))[name = tensor("op_20684_cast_fp16")]; tensor var_20686_equation_0 = const()[name = tensor("op_20686_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20686_cast_fp16 = einsum(equation = var_20686_equation_0, values = (var_20136_cast_fp16, var_20538_cast_fp16))[name = tensor("op_20686_cast_fp16")]; tensor var_20688_equation_0 = const()[name = tensor("op_20688_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20688_cast_fp16 = einsum(equation = var_20688_equation_0, values = (var_20140_cast_fp16, var_20539_cast_fp16))[name = tensor("op_20688_cast_fp16")]; tensor var_20690_equation_0 = const()[name = tensor("op_20690_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20690_cast_fp16 = einsum(equation = var_20690_equation_0, values = (var_20140_cast_fp16, var_20540_cast_fp16))[name = tensor("op_20690_cast_fp16")]; tensor var_20692_equation_0 = const()[name = tensor("op_20692_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20692_cast_fp16 = einsum(equation = var_20692_equation_0, values = (var_20140_cast_fp16, var_20541_cast_fp16))[name = tensor("op_20692_cast_fp16")]; tensor var_20694_equation_0 = const()[name = tensor("op_20694_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20694_cast_fp16 = einsum(equation = var_20694_equation_0, values = (var_20140_cast_fp16, var_20542_cast_fp16))[name = tensor("op_20694_cast_fp16")]; tensor var_20696_equation_0 = const()[name = tensor("op_20696_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20696_cast_fp16 = einsum(equation = var_20696_equation_0, values = (var_20144_cast_fp16, var_20543_cast_fp16))[name = tensor("op_20696_cast_fp16")]; tensor var_20698_equation_0 = const()[name = tensor("op_20698_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20698_cast_fp16 = einsum(equation = var_20698_equation_0, values = (var_20144_cast_fp16, var_20544_cast_fp16))[name = tensor("op_20698_cast_fp16")]; tensor var_20700_equation_0 = const()[name = tensor("op_20700_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20700_cast_fp16 = einsum(equation = var_20700_equation_0, values = (var_20144_cast_fp16, var_20545_cast_fp16))[name = tensor("op_20700_cast_fp16")]; tensor var_20702_equation_0 = const()[name = tensor("op_20702_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20702_cast_fp16 = einsum(equation = var_20702_equation_0, values = (var_20144_cast_fp16, var_20546_cast_fp16))[name = tensor("op_20702_cast_fp16")]; tensor var_20704_equation_0 = const()[name = tensor("op_20704_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20704_cast_fp16 = einsum(equation = var_20704_equation_0, values = (var_20148_cast_fp16, var_20547_cast_fp16))[name = tensor("op_20704_cast_fp16")]; tensor var_20706_equation_0 = const()[name = tensor("op_20706_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20706_cast_fp16 = einsum(equation = var_20706_equation_0, values = (var_20148_cast_fp16, var_20548_cast_fp16))[name = tensor("op_20706_cast_fp16")]; tensor var_20708_equation_0 = const()[name = tensor("op_20708_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20708_cast_fp16 = einsum(equation = var_20708_equation_0, values = (var_20148_cast_fp16, var_20549_cast_fp16))[name = tensor("op_20708_cast_fp16")]; tensor var_20710_equation_0 = const()[name = tensor("op_20710_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_20710_cast_fp16 = einsum(equation = var_20710_equation_0, values = (var_20148_cast_fp16, var_20550_cast_fp16))[name = tensor("op_20710_cast_fp16")]; tensor var_20712_interleave_0 = const()[name = tensor("op_20712_interleave_0"), val = tensor(false)]; tensor var_20712_cast_fp16 = concat(axis = var_19244, interleave = var_20712_interleave_0, values = (var_20552_cast_fp16, var_20554_cast_fp16, var_20556_cast_fp16, var_20558_cast_fp16))[name = tensor("op_20712_cast_fp16")]; tensor var_20714_interleave_0 = const()[name = tensor("op_20714_interleave_0"), val = tensor(false)]; tensor var_20714_cast_fp16 = concat(axis = var_19244, interleave = var_20714_interleave_0, values = (var_20560_cast_fp16, var_20562_cast_fp16, var_20564_cast_fp16, var_20566_cast_fp16))[name = tensor("op_20714_cast_fp16")]; tensor var_20716_interleave_0 = const()[name = tensor("op_20716_interleave_0"), val = tensor(false)]; tensor var_20716_cast_fp16 = concat(axis = var_19244, interleave = var_20716_interleave_0, values = (var_20568_cast_fp16, var_20570_cast_fp16, var_20572_cast_fp16, var_20574_cast_fp16))[name = tensor("op_20716_cast_fp16")]; tensor var_20718_interleave_0 = const()[name = tensor("op_20718_interleave_0"), val = tensor(false)]; tensor var_20718_cast_fp16 = concat(axis = var_19244, interleave = var_20718_interleave_0, values = (var_20576_cast_fp16, var_20578_cast_fp16, var_20580_cast_fp16, var_20582_cast_fp16))[name = tensor("op_20718_cast_fp16")]; tensor var_20720_interleave_0 = const()[name = tensor("op_20720_interleave_0"), val = tensor(false)]; tensor var_20720_cast_fp16 = concat(axis = var_19244, interleave = var_20720_interleave_0, values = (var_20584_cast_fp16, var_20586_cast_fp16, var_20588_cast_fp16, var_20590_cast_fp16))[name = tensor("op_20720_cast_fp16")]; tensor var_20722_interleave_0 = const()[name = tensor("op_20722_interleave_0"), val = tensor(false)]; tensor var_20722_cast_fp16 = concat(axis = var_19244, interleave = var_20722_interleave_0, values = (var_20592_cast_fp16, var_20594_cast_fp16, var_20596_cast_fp16, var_20598_cast_fp16))[name = tensor("op_20722_cast_fp16")]; tensor var_20724_interleave_0 = const()[name = tensor("op_20724_interleave_0"), val = tensor(false)]; tensor var_20724_cast_fp16 = concat(axis = var_19244, interleave = var_20724_interleave_0, values = (var_20600_cast_fp16, var_20602_cast_fp16, var_20604_cast_fp16, var_20606_cast_fp16))[name = tensor("op_20724_cast_fp16")]; tensor var_20726_interleave_0 = const()[name = tensor("op_20726_interleave_0"), val = tensor(false)]; tensor var_20726_cast_fp16 = concat(axis = var_19244, interleave = var_20726_interleave_0, values = (var_20608_cast_fp16, var_20610_cast_fp16, var_20612_cast_fp16, var_20614_cast_fp16))[name = tensor("op_20726_cast_fp16")]; tensor var_20728_interleave_0 = const()[name = tensor("op_20728_interleave_0"), val = tensor(false)]; tensor var_20728_cast_fp16 = concat(axis = var_19244, interleave = var_20728_interleave_0, values = (var_20616_cast_fp16, var_20618_cast_fp16, var_20620_cast_fp16, var_20622_cast_fp16))[name = tensor("op_20728_cast_fp16")]; tensor var_20730_interleave_0 = const()[name = tensor("op_20730_interleave_0"), val = tensor(false)]; tensor var_20730_cast_fp16 = concat(axis = var_19244, interleave = var_20730_interleave_0, values = (var_20624_cast_fp16, var_20626_cast_fp16, var_20628_cast_fp16, var_20630_cast_fp16))[name = tensor("op_20730_cast_fp16")]; tensor var_20732_interleave_0 = const()[name = tensor("op_20732_interleave_0"), val = tensor(false)]; tensor var_20732_cast_fp16 = concat(axis = var_19244, interleave = var_20732_interleave_0, values = (var_20632_cast_fp16, var_20634_cast_fp16, var_20636_cast_fp16, var_20638_cast_fp16))[name = tensor("op_20732_cast_fp16")]; tensor var_20734_interleave_0 = const()[name = tensor("op_20734_interleave_0"), val = tensor(false)]; tensor var_20734_cast_fp16 = concat(axis = var_19244, interleave = var_20734_interleave_0, values = (var_20640_cast_fp16, var_20642_cast_fp16, var_20644_cast_fp16, var_20646_cast_fp16))[name = tensor("op_20734_cast_fp16")]; tensor var_20736_interleave_0 = const()[name = tensor("op_20736_interleave_0"), val = tensor(false)]; tensor var_20736_cast_fp16 = concat(axis = var_19244, interleave = var_20736_interleave_0, values = (var_20648_cast_fp16, var_20650_cast_fp16, var_20652_cast_fp16, var_20654_cast_fp16))[name = tensor("op_20736_cast_fp16")]; tensor var_20738_interleave_0 = const()[name = tensor("op_20738_interleave_0"), val = tensor(false)]; tensor var_20738_cast_fp16 = concat(axis = var_19244, interleave = var_20738_interleave_0, values = (var_20656_cast_fp16, var_20658_cast_fp16, var_20660_cast_fp16, var_20662_cast_fp16))[name = tensor("op_20738_cast_fp16")]; tensor var_20740_interleave_0 = const()[name = tensor("op_20740_interleave_0"), val = tensor(false)]; tensor var_20740_cast_fp16 = concat(axis = var_19244, interleave = var_20740_interleave_0, values = (var_20664_cast_fp16, var_20666_cast_fp16, var_20668_cast_fp16, var_20670_cast_fp16))[name = tensor("op_20740_cast_fp16")]; tensor var_20742_interleave_0 = const()[name = tensor("op_20742_interleave_0"), val = tensor(false)]; tensor var_20742_cast_fp16 = concat(axis = var_19244, interleave = var_20742_interleave_0, values = (var_20672_cast_fp16, var_20674_cast_fp16, var_20676_cast_fp16, var_20678_cast_fp16))[name = tensor("op_20742_cast_fp16")]; tensor var_20744_interleave_0 = const()[name = tensor("op_20744_interleave_0"), val = tensor(false)]; tensor var_20744_cast_fp16 = concat(axis = var_19244, interleave = var_20744_interleave_0, values = (var_20680_cast_fp16, var_20682_cast_fp16, var_20684_cast_fp16, var_20686_cast_fp16))[name = tensor("op_20744_cast_fp16")]; tensor var_20746_interleave_0 = const()[name = tensor("op_20746_interleave_0"), val = tensor(false)]; tensor var_20746_cast_fp16 = concat(axis = var_19244, interleave = var_20746_interleave_0, values = (var_20688_cast_fp16, var_20690_cast_fp16, var_20692_cast_fp16, var_20694_cast_fp16))[name = tensor("op_20746_cast_fp16")]; tensor var_20748_interleave_0 = const()[name = tensor("op_20748_interleave_0"), val = tensor(false)]; tensor var_20748_cast_fp16 = concat(axis = var_19244, interleave = var_20748_interleave_0, values = (var_20696_cast_fp16, var_20698_cast_fp16, var_20700_cast_fp16, var_20702_cast_fp16))[name = tensor("op_20748_cast_fp16")]; tensor var_20750_interleave_0 = const()[name = tensor("op_20750_interleave_0"), val = tensor(false)]; tensor var_20750_cast_fp16 = concat(axis = var_19244, interleave = var_20750_interleave_0, values = (var_20704_cast_fp16, var_20706_cast_fp16, var_20708_cast_fp16, var_20710_cast_fp16))[name = tensor("op_20750_cast_fp16")]; tensor input_97_interleave_0 = const()[name = tensor("input_97_interleave_0"), val = tensor(false)]; tensor input_97_cast_fp16 = concat(axis = var_19269, interleave = input_97_interleave_0, values = (var_20712_cast_fp16, var_20714_cast_fp16, var_20716_cast_fp16, var_20718_cast_fp16, var_20720_cast_fp16, var_20722_cast_fp16, var_20724_cast_fp16, var_20726_cast_fp16, var_20728_cast_fp16, var_20730_cast_fp16, var_20732_cast_fp16, var_20734_cast_fp16, var_20736_cast_fp16, var_20738_cast_fp16, var_20740_cast_fp16, var_20742_cast_fp16, var_20744_cast_fp16, var_20746_cast_fp16, var_20748_cast_fp16, var_20750_cast_fp16))[name = tensor("input_97_cast_fp16")]; tensor var_20761_pad_type_0 = const()[name = tensor("op_20761_pad_type_0"), val = tensor("valid")]; tensor var_20761_strides_0 = const()[name = tensor("op_20761_strides_0"), val = tensor([1, 1])]; tensor var_20761_pad_0 = const()[name = tensor("op_20761_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_20761_dilations_0 = const()[name = tensor("op_20761_dilations_0"), val = tensor([1, 1])]; tensor var_20761_groups_0 = const()[name = tensor("op_20761_groups_0"), val = tensor(1)]; tensor layers_12_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(173852992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174672256))), name = tensor("layers_12_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_12_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_12_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174672384)))]; tensor var_20761_cast_fp16 = conv(bias = layers_12_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_20761_dilations_0, groups = var_20761_groups_0, pad = var_20761_pad_0, pad_type = var_20761_pad_type_0, strides = var_20761_strides_0, weight = layers_12_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_97_cast_fp16)[name = tensor("op_20761_cast_fp16")]; tensor var_20767_pad_type_0 = const()[name = tensor("op_20767_pad_type_0"), val = tensor("valid")]; tensor var_20767_strides_0 = const()[name = tensor("op_20767_strides_0"), val = tensor([1, 1])]; tensor var_20767_pad_0 = const()[name = tensor("op_20767_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_20767_dilations_0 = const()[name = tensor("op_20767_dilations_0"), val = tensor([1, 1])]; tensor var_20767_groups_0 = const()[name = tensor("op_20767_groups_0"), val = tensor(1)]; tensor layers_12_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174690880))), name = tensor("layers_12_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174675008))), shape = tensor([1280, 1280, 1, 1])]; tensor var_20767_cast_fp16 = conv(dilations = var_20767_dilations_0, groups = var_20767_groups_0, pad = var_20767_pad_0, pad_type = var_20767_pad_type_0, strides = var_20767_strides_0, weight = layers_12_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_97_cast_fp16)[name = tensor("op_20767_cast_fp16")]; tensor obj_51_cast_fp16 = add(x = var_20761_cast_fp16, y = var_20767_cast_fp16)[name = tensor("obj_51_cast_fp16")]; tensor inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = obj_51_cast_fp16)[name = tensor("inputs_51_cast_fp16")]; tensor out_51_axes_0 = const()[name = tensor("out_51_axes_0"), val = tensor([1])]; tensor var_20778_to_fp16 = const()[name = tensor("op_20778_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_51_cast_fp16 = layer_norm(axes = out_51_axes_0, epsilon = var_20778_to_fp16, x = inputs_51_cast_fp16)[name = tensor("out_51_cast_fp16")]; tensor input_99_gamma_0_to_fp16 = const()[name = tensor("input_99_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174895744)))]; tensor input_99_beta_0_to_fp16 = const()[name = tensor("input_99_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174898368)))]; tensor input_99_epsilon_0_to_fp16 = const()[name = tensor("input_99_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_99_cast_fp16 = batch_norm(beta = input_99_beta_0_to_fp16, epsilon = input_99_epsilon_0_to_fp16, gamma = input_99_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_51_cast_fp16)[name = tensor("input_99_cast_fp16")]; tensor var_20796_pad_type_0 = const()[name = tensor("op_20796_pad_type_0"), val = tensor("valid")]; tensor var_20796_strides_0 = const()[name = tensor("op_20796_strides_0"), val = tensor([1, 1])]; tensor var_20796_pad_0 = const()[name = tensor("op_20796_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_20796_dilations_0 = const()[name = tensor("op_20796_dilations_0"), val = tensor([1, 1])]; tensor var_20796_groups_0 = const()[name = tensor("op_20796_groups_0"), val = tensor(1)]; tensor layers_12_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174900992))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(178177856))), name = tensor("layers_12_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_12_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_12_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(178177984)))]; tensor var_20796_cast_fp16 = conv(bias = layers_12_fc1_inlier_module_bias_to_fp16, dilations = var_20796_dilations_0, groups = var_20796_groups_0, pad = var_20796_pad_0, pad_type = var_20796_pad_type_0, strides = var_20796_strides_0, weight = layers_12_fc1_inlier_module_weight_to_fp16_palettized, x = input_99_cast_fp16)[name = tensor("op_20796_cast_fp16")]; tensor var_20802_pad_type_0 = const()[name = tensor("op_20802_pad_type_0"), val = tensor("valid")]; tensor var_20802_strides_0 = const()[name = tensor("op_20802_strides_0"), val = tensor([1, 1])]; tensor var_20802_pad_0 = const()[name = tensor("op_20802_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_20802_dilations_0 = const()[name = tensor("op_20802_dilations_0"), val = tensor([1, 1])]; tensor var_20802_groups_0 = const()[name = tensor("op_20802_groups_0"), val = tensor(1)]; tensor layers_12_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(178232512))), name = tensor("layers_12_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(178188288))), shape = tensor([5120, 1280, 1, 1])]; tensor var_20802_cast_fp16 = conv(dilations = var_20802_dilations_0, groups = var_20802_groups_0, pad = var_20802_pad_0, pad_type = var_20802_pad_type_0, strides = var_20802_strides_0, weight = layers_12_fc1_outlier_module_weight_to_fp16_sparsified, x = input_99_cast_fp16)[name = tensor("op_20802_cast_fp16")]; tensor input_101_cast_fp16 = add(x = var_20796_cast_fp16, y = var_20802_cast_fp16)[name = tensor("input_101_cast_fp16")]; tensor input_103_mode_0 = const()[name = tensor("input_103_mode_0"), val = tensor("EXACT")]; tensor input_103_cast_fp16 = gelu(mode = input_103_mode_0, x = input_101_cast_fp16)[name = tensor("input_103_cast_fp16")]; tensor var_20813_pad_type_0 = const()[name = tensor("op_20813_pad_type_0"), val = tensor("valid")]; tensor var_20813_strides_0 = const()[name = tensor("op_20813_strides_0"), val = tensor([1, 1])]; tensor var_20813_pad_0 = const()[name = tensor("op_20813_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_20813_dilations_0 = const()[name = tensor("op_20813_dilations_0"), val = tensor([1, 1])]; tensor var_20813_groups_0 = const()[name = tensor("op_20813_groups_0"), val = tensor(1)]; tensor layers_12_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(179051776))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(182328640))), name = tensor("layers_12_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_12_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_12_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(182328768)))]; tensor var_20813_cast_fp16 = conv(bias = layers_12_fc2_inlier_module_bias_to_fp16, dilations = var_20813_dilations_0, groups = var_20813_groups_0, pad = var_20813_pad_0, pad_type = var_20813_pad_type_0, strides = var_20813_strides_0, weight = layers_12_fc2_inlier_module_weight_to_fp16_palettized, x = input_103_cast_fp16)[name = tensor("op_20813_cast_fp16")]; tensor var_20819_pad_type_0 = const()[name = tensor("op_20819_pad_type_0"), val = tensor("valid")]; tensor var_20819_strides_0 = const()[name = tensor("op_20819_strides_0"), val = tensor([1, 1])]; tensor var_20819_pad_0 = const()[name = tensor("op_20819_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_20819_dilations_0 = const()[name = tensor("op_20819_dilations_0"), val = tensor([1, 1])]; tensor var_20819_groups_0 = const()[name = tensor("op_20819_groups_0"), val = tensor(1)]; tensor layers_12_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(182509056))), name = tensor("layers_12_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(182331392))), shape = tensor([1280, 5120, 1, 1])]; tensor var_20819_cast_fp16 = conv(dilations = var_20819_dilations_0, groups = var_20819_groups_0, pad = var_20819_pad_0, pad_type = var_20819_pad_type_0, strides = var_20819_strides_0, weight = layers_12_fc2_outlier_module_weight_to_fp16_sparsified, x = input_103_cast_fp16)[name = tensor("op_20819_cast_fp16")]; tensor hidden_states_29_cast_fp16 = add(x = var_20813_cast_fp16, y = var_20819_cast_fp16)[name = tensor("hidden_states_29_cast_fp16")]; tensor inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = hidden_states_29_cast_fp16)[name = tensor("inputs_53_cast_fp16")]; tensor var_20825 = const()[name = tensor("op_20825"), val = tensor(3)]; tensor var_20850 = const()[name = tensor("op_20850"), val = tensor(1)]; tensor out_53_axes_0 = const()[name = tensor("out_53_axes_0"), val = tensor([1])]; tensor var_20867_to_fp16 = const()[name = tensor("op_20867_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_53_cast_fp16 = layer_norm(axes = out_53_axes_0, epsilon = var_20867_to_fp16, x = inputs_53_cast_fp16)[name = tensor("out_53_cast_fp16")]; tensor obj_53_gamma_0_to_fp16 = const()[name = tensor("obj_53_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(183328320)))]; tensor obj_53_beta_0_to_fp16 = const()[name = tensor("obj_53_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(183330944)))]; tensor obj_53_epsilon_0_to_fp16 = const()[name = tensor("obj_53_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_53_cast_fp16 = batch_norm(beta = obj_53_beta_0_to_fp16, epsilon = obj_53_epsilon_0_to_fp16, gamma = obj_53_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_53_cast_fp16)[name = tensor("obj_53_cast_fp16")]; tensor var_20889_pad_type_0 = const()[name = tensor("op_20889_pad_type_0"), val = tensor("valid")]; tensor var_20889_strides_0 = const()[name = tensor("op_20889_strides_0"), val = tensor([1, 1])]; tensor var_20889_pad_0 = const()[name = tensor("op_20889_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_20889_dilations_0 = const()[name = tensor("op_20889_dilations_0"), val = tensor([1, 1])]; tensor var_20889_groups_0 = const()[name = tensor("op_20889_groups_0"), val = tensor(1)]; tensor layers_13_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(183333568))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184152832))), name = tensor("layers_13_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_13_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_13_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184152960)))]; tensor var_20889_cast_fp16 = conv(bias = layers_13_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_20889_dilations_0, groups = var_20889_groups_0, pad = var_20889_pad_0, pad_type = var_20889_pad_type_0, strides = var_20889_strides_0, weight = layers_13_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_53_cast_fp16)[name = tensor("op_20889_cast_fp16")]; tensor var_20895_pad_type_0 = const()[name = tensor("op_20895_pad_type_0"), val = tensor("valid")]; tensor var_20895_strides_0 = const()[name = tensor("op_20895_strides_0"), val = tensor([1, 1])]; tensor var_20895_pad_0 = const()[name = tensor("op_20895_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_20895_dilations_0 = const()[name = tensor("op_20895_dilations_0"), val = tensor([1, 1])]; tensor var_20895_groups_0 = const()[name = tensor("op_20895_groups_0"), val = tensor(1)]; tensor layers_13_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184209280))), name = tensor("layers_13_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184155584))), shape = tensor([1280, 1280, 1, 1])]; tensor var_20895_cast_fp16 = conv(dilations = var_20895_dilations_0, groups = var_20895_groups_0, pad = var_20895_pad_0, pad_type = var_20895_pad_type_0, strides = var_20895_strides_0, weight = layers_13_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_53_cast_fp16)[name = tensor("op_20895_cast_fp16")]; tensor query_27_cast_fp16 = add(x = var_20889_cast_fp16, y = var_20895_cast_fp16)[name = tensor("query_27_cast_fp16")]; tensor var_20904_pad_type_0 = const()[name = tensor("op_20904_pad_type_0"), val = tensor("valid")]; tensor var_20904_strides_0 = const()[name = tensor("op_20904_strides_0"), val = tensor([1, 1])]; tensor var_20904_pad_0 = const()[name = tensor("op_20904_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_20904_dilations_0 = const()[name = tensor("op_20904_dilations_0"), val = tensor([1, 1])]; tensor var_20904_groups_0 = const()[name = tensor("op_20904_groups_0"), val = tensor(1)]; tensor layers_13_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184414144))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185233408))), name = tensor("layers_13_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_20904_cast_fp16 = conv(dilations = var_20904_dilations_0, groups = var_20904_groups_0, pad = var_20904_pad_0, pad_type = var_20904_pad_type_0, strides = var_20904_strides_0, weight = layers_13_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_53_cast_fp16)[name = tensor("op_20904_cast_fp16")]; tensor var_20910_pad_type_0 = const()[name = tensor("op_20910_pad_type_0"), val = tensor("valid")]; tensor var_20910_strides_0 = const()[name = tensor("op_20910_strides_0"), val = tensor([1, 1])]; tensor var_20910_pad_0 = const()[name = tensor("op_20910_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_20910_dilations_0 = const()[name = tensor("op_20910_dilations_0"), val = tensor([1, 1])]; tensor var_20910_groups_0 = const()[name = tensor("op_20910_groups_0"), val = tensor(1)]; tensor layers_13_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185265728))), name = tensor("layers_13_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185233536))), shape = tensor([1280, 1280, 1, 1])]; tensor var_20910_cast_fp16 = conv(dilations = var_20910_dilations_0, groups = var_20910_groups_0, pad = var_20910_pad_0, pad_type = var_20910_pad_type_0, strides = var_20910_strides_0, weight = layers_13_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_53_cast_fp16)[name = tensor("op_20910_cast_fp16")]; tensor key_27_cast_fp16 = add(x = var_20904_cast_fp16, y = var_20910_cast_fp16)[name = tensor("key_27_cast_fp16")]; tensor var_20920_pad_type_0 = const()[name = tensor("op_20920_pad_type_0"), val = tensor("valid")]; tensor var_20920_strides_0 = const()[name = tensor("op_20920_strides_0"), val = tensor([1, 1])]; tensor var_20920_pad_0 = const()[name = tensor("op_20920_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_20920_dilations_0 = const()[name = tensor("op_20920_dilations_0"), val = tensor([1, 1])]; tensor var_20920_groups_0 = const()[name = tensor("op_20920_groups_0"), val = tensor(1)]; tensor layers_13_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(185470592))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186289856))), name = tensor("layers_13_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_13_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_13_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186289984)))]; tensor var_20920_cast_fp16 = conv(bias = layers_13_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_20920_dilations_0, groups = var_20920_groups_0, pad = var_20920_pad_0, pad_type = var_20920_pad_type_0, strides = var_20920_strides_0, weight = layers_13_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_53_cast_fp16)[name = tensor("op_20920_cast_fp16")]; tensor var_20926_pad_type_0 = const()[name = tensor("op_20926_pad_type_0"), val = tensor("valid")]; tensor var_20926_strides_0 = const()[name = tensor("op_20926_strides_0"), val = tensor([1, 1])]; tensor var_20926_pad_0 = const()[name = tensor("op_20926_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_20926_dilations_0 = const()[name = tensor("op_20926_dilations_0"), val = tensor([1, 1])]; tensor var_20926_groups_0 = const()[name = tensor("op_20926_groups_0"), val = tensor(1)]; tensor layers_13_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186311936))), name = tensor("layers_13_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186292608))), shape = tensor([1280, 1280, 1, 1])]; tensor var_20926_cast_fp16 = conv(dilations = var_20926_dilations_0, groups = var_20926_groups_0, pad = var_20926_pad_0, pad_type = var_20926_pad_type_0, strides = var_20926_strides_0, weight = layers_13_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_53_cast_fp16)[name = tensor("op_20926_cast_fp16")]; tensor value_27_cast_fp16 = add(x = var_20920_cast_fp16, y = var_20926_cast_fp16)[name = tensor("value_27_cast_fp16")]; tensor var_20932_begin_0 = const()[name = tensor("op_20932_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_20932_end_0 = const()[name = tensor("op_20932_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_20932_end_mask_0 = const()[name = tensor("op_20932_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20932_cast_fp16 = slice_by_index(begin = var_20932_begin_0, end = var_20932_end_0, end_mask = var_20932_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20932_cast_fp16")]; tensor var_20936_begin_0 = const()[name = tensor("op_20936_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_20936_end_0 = const()[name = tensor("op_20936_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_20936_end_mask_0 = const()[name = tensor("op_20936_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20936_cast_fp16 = slice_by_index(begin = var_20936_begin_0, end = var_20936_end_0, end_mask = var_20936_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20936_cast_fp16")]; tensor var_20940_begin_0 = const()[name = tensor("op_20940_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_20940_end_0 = const()[name = tensor("op_20940_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_20940_end_mask_0 = const()[name = tensor("op_20940_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20940_cast_fp16 = slice_by_index(begin = var_20940_begin_0, end = var_20940_end_0, end_mask = var_20940_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20940_cast_fp16")]; tensor var_20944_begin_0 = const()[name = tensor("op_20944_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_20944_end_0 = const()[name = tensor("op_20944_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_20944_end_mask_0 = const()[name = tensor("op_20944_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20944_cast_fp16 = slice_by_index(begin = var_20944_begin_0, end = var_20944_end_0, end_mask = var_20944_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20944_cast_fp16")]; tensor var_20948_begin_0 = const()[name = tensor("op_20948_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_20948_end_0 = const()[name = tensor("op_20948_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_20948_end_mask_0 = const()[name = tensor("op_20948_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20948_cast_fp16 = slice_by_index(begin = var_20948_begin_0, end = var_20948_end_0, end_mask = var_20948_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20948_cast_fp16")]; tensor var_20952_begin_0 = const()[name = tensor("op_20952_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_20952_end_0 = const()[name = tensor("op_20952_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_20952_end_mask_0 = const()[name = tensor("op_20952_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20952_cast_fp16 = slice_by_index(begin = var_20952_begin_0, end = var_20952_end_0, end_mask = var_20952_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20952_cast_fp16")]; tensor var_20956_begin_0 = const()[name = tensor("op_20956_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_20956_end_0 = const()[name = tensor("op_20956_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_20956_end_mask_0 = const()[name = tensor("op_20956_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20956_cast_fp16 = slice_by_index(begin = var_20956_begin_0, end = var_20956_end_0, end_mask = var_20956_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20956_cast_fp16")]; tensor var_20960_begin_0 = const()[name = tensor("op_20960_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_20960_end_0 = const()[name = tensor("op_20960_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_20960_end_mask_0 = const()[name = tensor("op_20960_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20960_cast_fp16 = slice_by_index(begin = var_20960_begin_0, end = var_20960_end_0, end_mask = var_20960_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20960_cast_fp16")]; tensor var_20964_begin_0 = const()[name = tensor("op_20964_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_20964_end_0 = const()[name = tensor("op_20964_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_20964_end_mask_0 = const()[name = tensor("op_20964_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20964_cast_fp16 = slice_by_index(begin = var_20964_begin_0, end = var_20964_end_0, end_mask = var_20964_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20964_cast_fp16")]; tensor var_20968_begin_0 = const()[name = tensor("op_20968_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_20968_end_0 = const()[name = tensor("op_20968_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_20968_end_mask_0 = const()[name = tensor("op_20968_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20968_cast_fp16 = slice_by_index(begin = var_20968_begin_0, end = var_20968_end_0, end_mask = var_20968_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20968_cast_fp16")]; tensor var_20972_begin_0 = const()[name = tensor("op_20972_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_20972_end_0 = const()[name = tensor("op_20972_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_20972_end_mask_0 = const()[name = tensor("op_20972_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20972_cast_fp16 = slice_by_index(begin = var_20972_begin_0, end = var_20972_end_0, end_mask = var_20972_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20972_cast_fp16")]; tensor var_20976_begin_0 = const()[name = tensor("op_20976_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_20976_end_0 = const()[name = tensor("op_20976_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_20976_end_mask_0 = const()[name = tensor("op_20976_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20976_cast_fp16 = slice_by_index(begin = var_20976_begin_0, end = var_20976_end_0, end_mask = var_20976_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20976_cast_fp16")]; tensor var_20980_begin_0 = const()[name = tensor("op_20980_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_20980_end_0 = const()[name = tensor("op_20980_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_20980_end_mask_0 = const()[name = tensor("op_20980_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20980_cast_fp16 = slice_by_index(begin = var_20980_begin_0, end = var_20980_end_0, end_mask = var_20980_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20980_cast_fp16")]; tensor var_20984_begin_0 = const()[name = tensor("op_20984_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_20984_end_0 = const()[name = tensor("op_20984_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_20984_end_mask_0 = const()[name = tensor("op_20984_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20984_cast_fp16 = slice_by_index(begin = var_20984_begin_0, end = var_20984_end_0, end_mask = var_20984_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20984_cast_fp16")]; tensor var_20988_begin_0 = const()[name = tensor("op_20988_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_20988_end_0 = const()[name = tensor("op_20988_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_20988_end_mask_0 = const()[name = tensor("op_20988_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20988_cast_fp16 = slice_by_index(begin = var_20988_begin_0, end = var_20988_end_0, end_mask = var_20988_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20988_cast_fp16")]; tensor var_20992_begin_0 = const()[name = tensor("op_20992_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_20992_end_0 = const()[name = tensor("op_20992_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_20992_end_mask_0 = const()[name = tensor("op_20992_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20992_cast_fp16 = slice_by_index(begin = var_20992_begin_0, end = var_20992_end_0, end_mask = var_20992_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20992_cast_fp16")]; tensor var_20996_begin_0 = const()[name = tensor("op_20996_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_20996_end_0 = const()[name = tensor("op_20996_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_20996_end_mask_0 = const()[name = tensor("op_20996_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_20996_cast_fp16 = slice_by_index(begin = var_20996_begin_0, end = var_20996_end_0, end_mask = var_20996_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20996_cast_fp16")]; tensor var_21000_begin_0 = const()[name = tensor("op_21000_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_21000_end_0 = const()[name = tensor("op_21000_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_21000_end_mask_0 = const()[name = tensor("op_21000_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21000_cast_fp16 = slice_by_index(begin = var_21000_begin_0, end = var_21000_end_0, end_mask = var_21000_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_21000_cast_fp16")]; tensor var_21004_begin_0 = const()[name = tensor("op_21004_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_21004_end_0 = const()[name = tensor("op_21004_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_21004_end_mask_0 = const()[name = tensor("op_21004_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21004_cast_fp16 = slice_by_index(begin = var_21004_begin_0, end = var_21004_end_0, end_mask = var_21004_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_21004_cast_fp16")]; tensor var_21008_begin_0 = const()[name = tensor("op_21008_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_21008_end_0 = const()[name = tensor("op_21008_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_21008_end_mask_0 = const()[name = tensor("op_21008_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21008_cast_fp16 = slice_by_index(begin = var_21008_begin_0, end = var_21008_end_0, end_mask = var_21008_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_21008_cast_fp16")]; tensor var_21017_begin_0 = const()[name = tensor("op_21017_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21017_end_0 = const()[name = tensor("op_21017_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21017_end_mask_0 = const()[name = tensor("op_21017_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21017_cast_fp16 = slice_by_index(begin = var_21017_begin_0, end = var_21017_end_0, end_mask = var_21017_end_mask_0, x = var_20932_cast_fp16)[name = tensor("op_21017_cast_fp16")]; tensor var_21024_begin_0 = const()[name = tensor("op_21024_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21024_end_0 = const()[name = tensor("op_21024_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21024_end_mask_0 = const()[name = tensor("op_21024_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21024_cast_fp16 = slice_by_index(begin = var_21024_begin_0, end = var_21024_end_0, end_mask = var_21024_end_mask_0, x = var_20932_cast_fp16)[name = tensor("op_21024_cast_fp16")]; tensor var_21031_begin_0 = const()[name = tensor("op_21031_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21031_end_0 = const()[name = tensor("op_21031_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21031_end_mask_0 = const()[name = tensor("op_21031_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21031_cast_fp16 = slice_by_index(begin = var_21031_begin_0, end = var_21031_end_0, end_mask = var_21031_end_mask_0, x = var_20932_cast_fp16)[name = tensor("op_21031_cast_fp16")]; tensor var_21038_begin_0 = const()[name = tensor("op_21038_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21038_end_0 = const()[name = tensor("op_21038_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21038_end_mask_0 = const()[name = tensor("op_21038_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21038_cast_fp16 = slice_by_index(begin = var_21038_begin_0, end = var_21038_end_0, end_mask = var_21038_end_mask_0, x = var_20932_cast_fp16)[name = tensor("op_21038_cast_fp16")]; tensor var_21045_begin_0 = const()[name = tensor("op_21045_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21045_end_0 = const()[name = tensor("op_21045_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21045_end_mask_0 = const()[name = tensor("op_21045_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21045_cast_fp16 = slice_by_index(begin = var_21045_begin_0, end = var_21045_end_0, end_mask = var_21045_end_mask_0, x = var_20936_cast_fp16)[name = tensor("op_21045_cast_fp16")]; tensor var_21052_begin_0 = const()[name = tensor("op_21052_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21052_end_0 = const()[name = tensor("op_21052_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21052_end_mask_0 = const()[name = tensor("op_21052_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21052_cast_fp16 = slice_by_index(begin = var_21052_begin_0, end = var_21052_end_0, end_mask = var_21052_end_mask_0, x = var_20936_cast_fp16)[name = tensor("op_21052_cast_fp16")]; tensor var_21059_begin_0 = const()[name = tensor("op_21059_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21059_end_0 = const()[name = tensor("op_21059_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21059_end_mask_0 = const()[name = tensor("op_21059_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21059_cast_fp16 = slice_by_index(begin = var_21059_begin_0, end = var_21059_end_0, end_mask = var_21059_end_mask_0, x = var_20936_cast_fp16)[name = tensor("op_21059_cast_fp16")]; tensor var_21066_begin_0 = const()[name = tensor("op_21066_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21066_end_0 = const()[name = tensor("op_21066_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21066_end_mask_0 = const()[name = tensor("op_21066_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21066_cast_fp16 = slice_by_index(begin = var_21066_begin_0, end = var_21066_end_0, end_mask = var_21066_end_mask_0, x = var_20936_cast_fp16)[name = tensor("op_21066_cast_fp16")]; tensor var_21073_begin_0 = const()[name = tensor("op_21073_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21073_end_0 = const()[name = tensor("op_21073_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21073_end_mask_0 = const()[name = tensor("op_21073_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21073_cast_fp16 = slice_by_index(begin = var_21073_begin_0, end = var_21073_end_0, end_mask = var_21073_end_mask_0, x = var_20940_cast_fp16)[name = tensor("op_21073_cast_fp16")]; tensor var_21080_begin_0 = const()[name = tensor("op_21080_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21080_end_0 = const()[name = tensor("op_21080_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21080_end_mask_0 = const()[name = tensor("op_21080_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21080_cast_fp16 = slice_by_index(begin = var_21080_begin_0, end = var_21080_end_0, end_mask = var_21080_end_mask_0, x = var_20940_cast_fp16)[name = tensor("op_21080_cast_fp16")]; tensor var_21087_begin_0 = const()[name = tensor("op_21087_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21087_end_0 = const()[name = tensor("op_21087_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21087_end_mask_0 = const()[name = tensor("op_21087_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21087_cast_fp16 = slice_by_index(begin = var_21087_begin_0, end = var_21087_end_0, end_mask = var_21087_end_mask_0, x = var_20940_cast_fp16)[name = tensor("op_21087_cast_fp16")]; tensor var_21094_begin_0 = const()[name = tensor("op_21094_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21094_end_0 = const()[name = tensor("op_21094_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21094_end_mask_0 = const()[name = tensor("op_21094_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21094_cast_fp16 = slice_by_index(begin = var_21094_begin_0, end = var_21094_end_0, end_mask = var_21094_end_mask_0, x = var_20940_cast_fp16)[name = tensor("op_21094_cast_fp16")]; tensor var_21101_begin_0 = const()[name = tensor("op_21101_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21101_end_0 = const()[name = tensor("op_21101_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21101_end_mask_0 = const()[name = tensor("op_21101_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21101_cast_fp16 = slice_by_index(begin = var_21101_begin_0, end = var_21101_end_0, end_mask = var_21101_end_mask_0, x = var_20944_cast_fp16)[name = tensor("op_21101_cast_fp16")]; tensor var_21108_begin_0 = const()[name = tensor("op_21108_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21108_end_0 = const()[name = tensor("op_21108_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21108_end_mask_0 = const()[name = tensor("op_21108_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21108_cast_fp16 = slice_by_index(begin = var_21108_begin_0, end = var_21108_end_0, end_mask = var_21108_end_mask_0, x = var_20944_cast_fp16)[name = tensor("op_21108_cast_fp16")]; tensor var_21115_begin_0 = const()[name = tensor("op_21115_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21115_end_0 = const()[name = tensor("op_21115_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21115_end_mask_0 = const()[name = tensor("op_21115_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21115_cast_fp16 = slice_by_index(begin = var_21115_begin_0, end = var_21115_end_0, end_mask = var_21115_end_mask_0, x = var_20944_cast_fp16)[name = tensor("op_21115_cast_fp16")]; tensor var_21122_begin_0 = const()[name = tensor("op_21122_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21122_end_0 = const()[name = tensor("op_21122_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21122_end_mask_0 = const()[name = tensor("op_21122_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21122_cast_fp16 = slice_by_index(begin = var_21122_begin_0, end = var_21122_end_0, end_mask = var_21122_end_mask_0, x = var_20944_cast_fp16)[name = tensor("op_21122_cast_fp16")]; tensor var_21129_begin_0 = const()[name = tensor("op_21129_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21129_end_0 = const()[name = tensor("op_21129_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21129_end_mask_0 = const()[name = tensor("op_21129_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21129_cast_fp16 = slice_by_index(begin = var_21129_begin_0, end = var_21129_end_0, end_mask = var_21129_end_mask_0, x = var_20948_cast_fp16)[name = tensor("op_21129_cast_fp16")]; tensor var_21136_begin_0 = const()[name = tensor("op_21136_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21136_end_0 = const()[name = tensor("op_21136_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21136_end_mask_0 = const()[name = tensor("op_21136_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21136_cast_fp16 = slice_by_index(begin = var_21136_begin_0, end = var_21136_end_0, end_mask = var_21136_end_mask_0, x = var_20948_cast_fp16)[name = tensor("op_21136_cast_fp16")]; tensor var_21143_begin_0 = const()[name = tensor("op_21143_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21143_end_0 = const()[name = tensor("op_21143_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21143_end_mask_0 = const()[name = tensor("op_21143_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21143_cast_fp16 = slice_by_index(begin = var_21143_begin_0, end = var_21143_end_0, end_mask = var_21143_end_mask_0, x = var_20948_cast_fp16)[name = tensor("op_21143_cast_fp16")]; tensor var_21150_begin_0 = const()[name = tensor("op_21150_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21150_end_0 = const()[name = tensor("op_21150_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21150_end_mask_0 = const()[name = tensor("op_21150_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21150_cast_fp16 = slice_by_index(begin = var_21150_begin_0, end = var_21150_end_0, end_mask = var_21150_end_mask_0, x = var_20948_cast_fp16)[name = tensor("op_21150_cast_fp16")]; tensor var_21157_begin_0 = const()[name = tensor("op_21157_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21157_end_0 = const()[name = tensor("op_21157_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21157_end_mask_0 = const()[name = tensor("op_21157_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21157_cast_fp16 = slice_by_index(begin = var_21157_begin_0, end = var_21157_end_0, end_mask = var_21157_end_mask_0, x = var_20952_cast_fp16)[name = tensor("op_21157_cast_fp16")]; tensor var_21164_begin_0 = const()[name = tensor("op_21164_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21164_end_0 = const()[name = tensor("op_21164_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21164_end_mask_0 = const()[name = tensor("op_21164_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21164_cast_fp16 = slice_by_index(begin = var_21164_begin_0, end = var_21164_end_0, end_mask = var_21164_end_mask_0, x = var_20952_cast_fp16)[name = tensor("op_21164_cast_fp16")]; tensor var_21171_begin_0 = const()[name = tensor("op_21171_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21171_end_0 = const()[name = tensor("op_21171_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21171_end_mask_0 = const()[name = tensor("op_21171_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21171_cast_fp16 = slice_by_index(begin = var_21171_begin_0, end = var_21171_end_0, end_mask = var_21171_end_mask_0, x = var_20952_cast_fp16)[name = tensor("op_21171_cast_fp16")]; tensor var_21178_begin_0 = const()[name = tensor("op_21178_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21178_end_0 = const()[name = tensor("op_21178_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21178_end_mask_0 = const()[name = tensor("op_21178_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21178_cast_fp16 = slice_by_index(begin = var_21178_begin_0, end = var_21178_end_0, end_mask = var_21178_end_mask_0, x = var_20952_cast_fp16)[name = tensor("op_21178_cast_fp16")]; tensor var_21185_begin_0 = const()[name = tensor("op_21185_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21185_end_0 = const()[name = tensor("op_21185_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21185_end_mask_0 = const()[name = tensor("op_21185_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21185_cast_fp16 = slice_by_index(begin = var_21185_begin_0, end = var_21185_end_0, end_mask = var_21185_end_mask_0, x = var_20956_cast_fp16)[name = tensor("op_21185_cast_fp16")]; tensor var_21192_begin_0 = const()[name = tensor("op_21192_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21192_end_0 = const()[name = tensor("op_21192_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21192_end_mask_0 = const()[name = tensor("op_21192_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21192_cast_fp16 = slice_by_index(begin = var_21192_begin_0, end = var_21192_end_0, end_mask = var_21192_end_mask_0, x = var_20956_cast_fp16)[name = tensor("op_21192_cast_fp16")]; tensor var_21199_begin_0 = const()[name = tensor("op_21199_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21199_end_0 = const()[name = tensor("op_21199_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21199_end_mask_0 = const()[name = tensor("op_21199_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21199_cast_fp16 = slice_by_index(begin = var_21199_begin_0, end = var_21199_end_0, end_mask = var_21199_end_mask_0, x = var_20956_cast_fp16)[name = tensor("op_21199_cast_fp16")]; tensor var_21206_begin_0 = const()[name = tensor("op_21206_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21206_end_0 = const()[name = tensor("op_21206_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21206_end_mask_0 = const()[name = tensor("op_21206_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21206_cast_fp16 = slice_by_index(begin = var_21206_begin_0, end = var_21206_end_0, end_mask = var_21206_end_mask_0, x = var_20956_cast_fp16)[name = tensor("op_21206_cast_fp16")]; tensor var_21213_begin_0 = const()[name = tensor("op_21213_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21213_end_0 = const()[name = tensor("op_21213_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21213_end_mask_0 = const()[name = tensor("op_21213_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21213_cast_fp16 = slice_by_index(begin = var_21213_begin_0, end = var_21213_end_0, end_mask = var_21213_end_mask_0, x = var_20960_cast_fp16)[name = tensor("op_21213_cast_fp16")]; tensor var_21220_begin_0 = const()[name = tensor("op_21220_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21220_end_0 = const()[name = tensor("op_21220_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21220_end_mask_0 = const()[name = tensor("op_21220_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21220_cast_fp16 = slice_by_index(begin = var_21220_begin_0, end = var_21220_end_0, end_mask = var_21220_end_mask_0, x = var_20960_cast_fp16)[name = tensor("op_21220_cast_fp16")]; tensor var_21227_begin_0 = const()[name = tensor("op_21227_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21227_end_0 = const()[name = tensor("op_21227_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21227_end_mask_0 = const()[name = tensor("op_21227_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21227_cast_fp16 = slice_by_index(begin = var_21227_begin_0, end = var_21227_end_0, end_mask = var_21227_end_mask_0, x = var_20960_cast_fp16)[name = tensor("op_21227_cast_fp16")]; tensor var_21234_begin_0 = const()[name = tensor("op_21234_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21234_end_0 = const()[name = tensor("op_21234_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21234_end_mask_0 = const()[name = tensor("op_21234_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21234_cast_fp16 = slice_by_index(begin = var_21234_begin_0, end = var_21234_end_0, end_mask = var_21234_end_mask_0, x = var_20960_cast_fp16)[name = tensor("op_21234_cast_fp16")]; tensor var_21241_begin_0 = const()[name = tensor("op_21241_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21241_end_0 = const()[name = tensor("op_21241_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21241_end_mask_0 = const()[name = tensor("op_21241_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21241_cast_fp16 = slice_by_index(begin = var_21241_begin_0, end = var_21241_end_0, end_mask = var_21241_end_mask_0, x = var_20964_cast_fp16)[name = tensor("op_21241_cast_fp16")]; tensor var_21248_begin_0 = const()[name = tensor("op_21248_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21248_end_0 = const()[name = tensor("op_21248_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21248_end_mask_0 = const()[name = tensor("op_21248_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21248_cast_fp16 = slice_by_index(begin = var_21248_begin_0, end = var_21248_end_0, end_mask = var_21248_end_mask_0, x = var_20964_cast_fp16)[name = tensor("op_21248_cast_fp16")]; tensor var_21255_begin_0 = const()[name = tensor("op_21255_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21255_end_0 = const()[name = tensor("op_21255_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21255_end_mask_0 = const()[name = tensor("op_21255_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21255_cast_fp16 = slice_by_index(begin = var_21255_begin_0, end = var_21255_end_0, end_mask = var_21255_end_mask_0, x = var_20964_cast_fp16)[name = tensor("op_21255_cast_fp16")]; tensor var_21262_begin_0 = const()[name = tensor("op_21262_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21262_end_0 = const()[name = tensor("op_21262_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21262_end_mask_0 = const()[name = tensor("op_21262_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21262_cast_fp16 = slice_by_index(begin = var_21262_begin_0, end = var_21262_end_0, end_mask = var_21262_end_mask_0, x = var_20964_cast_fp16)[name = tensor("op_21262_cast_fp16")]; tensor var_21269_begin_0 = const()[name = tensor("op_21269_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21269_end_0 = const()[name = tensor("op_21269_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21269_end_mask_0 = const()[name = tensor("op_21269_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21269_cast_fp16 = slice_by_index(begin = var_21269_begin_0, end = var_21269_end_0, end_mask = var_21269_end_mask_0, x = var_20968_cast_fp16)[name = tensor("op_21269_cast_fp16")]; tensor var_21276_begin_0 = const()[name = tensor("op_21276_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21276_end_0 = const()[name = tensor("op_21276_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21276_end_mask_0 = const()[name = tensor("op_21276_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21276_cast_fp16 = slice_by_index(begin = var_21276_begin_0, end = var_21276_end_0, end_mask = var_21276_end_mask_0, x = var_20968_cast_fp16)[name = tensor("op_21276_cast_fp16")]; tensor var_21283_begin_0 = const()[name = tensor("op_21283_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21283_end_0 = const()[name = tensor("op_21283_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21283_end_mask_0 = const()[name = tensor("op_21283_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21283_cast_fp16 = slice_by_index(begin = var_21283_begin_0, end = var_21283_end_0, end_mask = var_21283_end_mask_0, x = var_20968_cast_fp16)[name = tensor("op_21283_cast_fp16")]; tensor var_21290_begin_0 = const()[name = tensor("op_21290_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21290_end_0 = const()[name = tensor("op_21290_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21290_end_mask_0 = const()[name = tensor("op_21290_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21290_cast_fp16 = slice_by_index(begin = var_21290_begin_0, end = var_21290_end_0, end_mask = var_21290_end_mask_0, x = var_20968_cast_fp16)[name = tensor("op_21290_cast_fp16")]; tensor var_21297_begin_0 = const()[name = tensor("op_21297_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21297_end_0 = const()[name = tensor("op_21297_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21297_end_mask_0 = const()[name = tensor("op_21297_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21297_cast_fp16 = slice_by_index(begin = var_21297_begin_0, end = var_21297_end_0, end_mask = var_21297_end_mask_0, x = var_20972_cast_fp16)[name = tensor("op_21297_cast_fp16")]; tensor var_21304_begin_0 = const()[name = tensor("op_21304_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21304_end_0 = const()[name = tensor("op_21304_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21304_end_mask_0 = const()[name = tensor("op_21304_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21304_cast_fp16 = slice_by_index(begin = var_21304_begin_0, end = var_21304_end_0, end_mask = var_21304_end_mask_0, x = var_20972_cast_fp16)[name = tensor("op_21304_cast_fp16")]; tensor var_21311_begin_0 = const()[name = tensor("op_21311_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21311_end_0 = const()[name = tensor("op_21311_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21311_end_mask_0 = const()[name = tensor("op_21311_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21311_cast_fp16 = slice_by_index(begin = var_21311_begin_0, end = var_21311_end_0, end_mask = var_21311_end_mask_0, x = var_20972_cast_fp16)[name = tensor("op_21311_cast_fp16")]; tensor var_21318_begin_0 = const()[name = tensor("op_21318_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21318_end_0 = const()[name = tensor("op_21318_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21318_end_mask_0 = const()[name = tensor("op_21318_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21318_cast_fp16 = slice_by_index(begin = var_21318_begin_0, end = var_21318_end_0, end_mask = var_21318_end_mask_0, x = var_20972_cast_fp16)[name = tensor("op_21318_cast_fp16")]; tensor var_21325_begin_0 = const()[name = tensor("op_21325_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21325_end_0 = const()[name = tensor("op_21325_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21325_end_mask_0 = const()[name = tensor("op_21325_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21325_cast_fp16 = slice_by_index(begin = var_21325_begin_0, end = var_21325_end_0, end_mask = var_21325_end_mask_0, x = var_20976_cast_fp16)[name = tensor("op_21325_cast_fp16")]; tensor var_21332_begin_0 = const()[name = tensor("op_21332_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21332_end_0 = const()[name = tensor("op_21332_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21332_end_mask_0 = const()[name = tensor("op_21332_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21332_cast_fp16 = slice_by_index(begin = var_21332_begin_0, end = var_21332_end_0, end_mask = var_21332_end_mask_0, x = var_20976_cast_fp16)[name = tensor("op_21332_cast_fp16")]; tensor var_21339_begin_0 = const()[name = tensor("op_21339_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21339_end_0 = const()[name = tensor("op_21339_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21339_end_mask_0 = const()[name = tensor("op_21339_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21339_cast_fp16 = slice_by_index(begin = var_21339_begin_0, end = var_21339_end_0, end_mask = var_21339_end_mask_0, x = var_20976_cast_fp16)[name = tensor("op_21339_cast_fp16")]; tensor var_21346_begin_0 = const()[name = tensor("op_21346_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21346_end_0 = const()[name = tensor("op_21346_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21346_end_mask_0 = const()[name = tensor("op_21346_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21346_cast_fp16 = slice_by_index(begin = var_21346_begin_0, end = var_21346_end_0, end_mask = var_21346_end_mask_0, x = var_20976_cast_fp16)[name = tensor("op_21346_cast_fp16")]; tensor var_21353_begin_0 = const()[name = tensor("op_21353_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21353_end_0 = const()[name = tensor("op_21353_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21353_end_mask_0 = const()[name = tensor("op_21353_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21353_cast_fp16 = slice_by_index(begin = var_21353_begin_0, end = var_21353_end_0, end_mask = var_21353_end_mask_0, x = var_20980_cast_fp16)[name = tensor("op_21353_cast_fp16")]; tensor var_21360_begin_0 = const()[name = tensor("op_21360_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21360_end_0 = const()[name = tensor("op_21360_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21360_end_mask_0 = const()[name = tensor("op_21360_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21360_cast_fp16 = slice_by_index(begin = var_21360_begin_0, end = var_21360_end_0, end_mask = var_21360_end_mask_0, x = var_20980_cast_fp16)[name = tensor("op_21360_cast_fp16")]; tensor var_21367_begin_0 = const()[name = tensor("op_21367_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21367_end_0 = const()[name = tensor("op_21367_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21367_end_mask_0 = const()[name = tensor("op_21367_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21367_cast_fp16 = slice_by_index(begin = var_21367_begin_0, end = var_21367_end_0, end_mask = var_21367_end_mask_0, x = var_20980_cast_fp16)[name = tensor("op_21367_cast_fp16")]; tensor var_21374_begin_0 = const()[name = tensor("op_21374_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21374_end_0 = const()[name = tensor("op_21374_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21374_end_mask_0 = const()[name = tensor("op_21374_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21374_cast_fp16 = slice_by_index(begin = var_21374_begin_0, end = var_21374_end_0, end_mask = var_21374_end_mask_0, x = var_20980_cast_fp16)[name = tensor("op_21374_cast_fp16")]; tensor var_21381_begin_0 = const()[name = tensor("op_21381_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21381_end_0 = const()[name = tensor("op_21381_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21381_end_mask_0 = const()[name = tensor("op_21381_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21381_cast_fp16 = slice_by_index(begin = var_21381_begin_0, end = var_21381_end_0, end_mask = var_21381_end_mask_0, x = var_20984_cast_fp16)[name = tensor("op_21381_cast_fp16")]; tensor var_21388_begin_0 = const()[name = tensor("op_21388_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21388_end_0 = const()[name = tensor("op_21388_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21388_end_mask_0 = const()[name = tensor("op_21388_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21388_cast_fp16 = slice_by_index(begin = var_21388_begin_0, end = var_21388_end_0, end_mask = var_21388_end_mask_0, x = var_20984_cast_fp16)[name = tensor("op_21388_cast_fp16")]; tensor var_21395_begin_0 = const()[name = tensor("op_21395_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21395_end_0 = const()[name = tensor("op_21395_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21395_end_mask_0 = const()[name = tensor("op_21395_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21395_cast_fp16 = slice_by_index(begin = var_21395_begin_0, end = var_21395_end_0, end_mask = var_21395_end_mask_0, x = var_20984_cast_fp16)[name = tensor("op_21395_cast_fp16")]; tensor var_21402_begin_0 = const()[name = tensor("op_21402_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21402_end_0 = const()[name = tensor("op_21402_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21402_end_mask_0 = const()[name = tensor("op_21402_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21402_cast_fp16 = slice_by_index(begin = var_21402_begin_0, end = var_21402_end_0, end_mask = var_21402_end_mask_0, x = var_20984_cast_fp16)[name = tensor("op_21402_cast_fp16")]; tensor var_21409_begin_0 = const()[name = tensor("op_21409_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21409_end_0 = const()[name = tensor("op_21409_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21409_end_mask_0 = const()[name = tensor("op_21409_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21409_cast_fp16 = slice_by_index(begin = var_21409_begin_0, end = var_21409_end_0, end_mask = var_21409_end_mask_0, x = var_20988_cast_fp16)[name = tensor("op_21409_cast_fp16")]; tensor var_21416_begin_0 = const()[name = tensor("op_21416_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21416_end_0 = const()[name = tensor("op_21416_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21416_end_mask_0 = const()[name = tensor("op_21416_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21416_cast_fp16 = slice_by_index(begin = var_21416_begin_0, end = var_21416_end_0, end_mask = var_21416_end_mask_0, x = var_20988_cast_fp16)[name = tensor("op_21416_cast_fp16")]; tensor var_21423_begin_0 = const()[name = tensor("op_21423_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21423_end_0 = const()[name = tensor("op_21423_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21423_end_mask_0 = const()[name = tensor("op_21423_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21423_cast_fp16 = slice_by_index(begin = var_21423_begin_0, end = var_21423_end_0, end_mask = var_21423_end_mask_0, x = var_20988_cast_fp16)[name = tensor("op_21423_cast_fp16")]; tensor var_21430_begin_0 = const()[name = tensor("op_21430_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21430_end_0 = const()[name = tensor("op_21430_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21430_end_mask_0 = const()[name = tensor("op_21430_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21430_cast_fp16 = slice_by_index(begin = var_21430_begin_0, end = var_21430_end_0, end_mask = var_21430_end_mask_0, x = var_20988_cast_fp16)[name = tensor("op_21430_cast_fp16")]; tensor var_21437_begin_0 = const()[name = tensor("op_21437_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21437_end_0 = const()[name = tensor("op_21437_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21437_end_mask_0 = const()[name = tensor("op_21437_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21437_cast_fp16 = slice_by_index(begin = var_21437_begin_0, end = var_21437_end_0, end_mask = var_21437_end_mask_0, x = var_20992_cast_fp16)[name = tensor("op_21437_cast_fp16")]; tensor var_21444_begin_0 = const()[name = tensor("op_21444_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21444_end_0 = const()[name = tensor("op_21444_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21444_end_mask_0 = const()[name = tensor("op_21444_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21444_cast_fp16 = slice_by_index(begin = var_21444_begin_0, end = var_21444_end_0, end_mask = var_21444_end_mask_0, x = var_20992_cast_fp16)[name = tensor("op_21444_cast_fp16")]; tensor var_21451_begin_0 = const()[name = tensor("op_21451_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21451_end_0 = const()[name = tensor("op_21451_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21451_end_mask_0 = const()[name = tensor("op_21451_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21451_cast_fp16 = slice_by_index(begin = var_21451_begin_0, end = var_21451_end_0, end_mask = var_21451_end_mask_0, x = var_20992_cast_fp16)[name = tensor("op_21451_cast_fp16")]; tensor var_21458_begin_0 = const()[name = tensor("op_21458_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21458_end_0 = const()[name = tensor("op_21458_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21458_end_mask_0 = const()[name = tensor("op_21458_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21458_cast_fp16 = slice_by_index(begin = var_21458_begin_0, end = var_21458_end_0, end_mask = var_21458_end_mask_0, x = var_20992_cast_fp16)[name = tensor("op_21458_cast_fp16")]; tensor var_21465_begin_0 = const()[name = tensor("op_21465_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21465_end_0 = const()[name = tensor("op_21465_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21465_end_mask_0 = const()[name = tensor("op_21465_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21465_cast_fp16 = slice_by_index(begin = var_21465_begin_0, end = var_21465_end_0, end_mask = var_21465_end_mask_0, x = var_20996_cast_fp16)[name = tensor("op_21465_cast_fp16")]; tensor var_21472_begin_0 = const()[name = tensor("op_21472_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21472_end_0 = const()[name = tensor("op_21472_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21472_end_mask_0 = const()[name = tensor("op_21472_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21472_cast_fp16 = slice_by_index(begin = var_21472_begin_0, end = var_21472_end_0, end_mask = var_21472_end_mask_0, x = var_20996_cast_fp16)[name = tensor("op_21472_cast_fp16")]; tensor var_21479_begin_0 = const()[name = tensor("op_21479_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21479_end_0 = const()[name = tensor("op_21479_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21479_end_mask_0 = const()[name = tensor("op_21479_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21479_cast_fp16 = slice_by_index(begin = var_21479_begin_0, end = var_21479_end_0, end_mask = var_21479_end_mask_0, x = var_20996_cast_fp16)[name = tensor("op_21479_cast_fp16")]; tensor var_21486_begin_0 = const()[name = tensor("op_21486_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21486_end_0 = const()[name = tensor("op_21486_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21486_end_mask_0 = const()[name = tensor("op_21486_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21486_cast_fp16 = slice_by_index(begin = var_21486_begin_0, end = var_21486_end_0, end_mask = var_21486_end_mask_0, x = var_20996_cast_fp16)[name = tensor("op_21486_cast_fp16")]; tensor var_21493_begin_0 = const()[name = tensor("op_21493_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21493_end_0 = const()[name = tensor("op_21493_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21493_end_mask_0 = const()[name = tensor("op_21493_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21493_cast_fp16 = slice_by_index(begin = var_21493_begin_0, end = var_21493_end_0, end_mask = var_21493_end_mask_0, x = var_21000_cast_fp16)[name = tensor("op_21493_cast_fp16")]; tensor var_21500_begin_0 = const()[name = tensor("op_21500_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21500_end_0 = const()[name = tensor("op_21500_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21500_end_mask_0 = const()[name = tensor("op_21500_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21500_cast_fp16 = slice_by_index(begin = var_21500_begin_0, end = var_21500_end_0, end_mask = var_21500_end_mask_0, x = var_21000_cast_fp16)[name = tensor("op_21500_cast_fp16")]; tensor var_21507_begin_0 = const()[name = tensor("op_21507_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21507_end_0 = const()[name = tensor("op_21507_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21507_end_mask_0 = const()[name = tensor("op_21507_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21507_cast_fp16 = slice_by_index(begin = var_21507_begin_0, end = var_21507_end_0, end_mask = var_21507_end_mask_0, x = var_21000_cast_fp16)[name = tensor("op_21507_cast_fp16")]; tensor var_21514_begin_0 = const()[name = tensor("op_21514_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21514_end_0 = const()[name = tensor("op_21514_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21514_end_mask_0 = const()[name = tensor("op_21514_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21514_cast_fp16 = slice_by_index(begin = var_21514_begin_0, end = var_21514_end_0, end_mask = var_21514_end_mask_0, x = var_21000_cast_fp16)[name = tensor("op_21514_cast_fp16")]; tensor var_21521_begin_0 = const()[name = tensor("op_21521_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21521_end_0 = const()[name = tensor("op_21521_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21521_end_mask_0 = const()[name = tensor("op_21521_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21521_cast_fp16 = slice_by_index(begin = var_21521_begin_0, end = var_21521_end_0, end_mask = var_21521_end_mask_0, x = var_21004_cast_fp16)[name = tensor("op_21521_cast_fp16")]; tensor var_21528_begin_0 = const()[name = tensor("op_21528_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21528_end_0 = const()[name = tensor("op_21528_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21528_end_mask_0 = const()[name = tensor("op_21528_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21528_cast_fp16 = slice_by_index(begin = var_21528_begin_0, end = var_21528_end_0, end_mask = var_21528_end_mask_0, x = var_21004_cast_fp16)[name = tensor("op_21528_cast_fp16")]; tensor var_21535_begin_0 = const()[name = tensor("op_21535_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21535_end_0 = const()[name = tensor("op_21535_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21535_end_mask_0 = const()[name = tensor("op_21535_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21535_cast_fp16 = slice_by_index(begin = var_21535_begin_0, end = var_21535_end_0, end_mask = var_21535_end_mask_0, x = var_21004_cast_fp16)[name = tensor("op_21535_cast_fp16")]; tensor var_21542_begin_0 = const()[name = tensor("op_21542_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21542_end_0 = const()[name = tensor("op_21542_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21542_end_mask_0 = const()[name = tensor("op_21542_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21542_cast_fp16 = slice_by_index(begin = var_21542_begin_0, end = var_21542_end_0, end_mask = var_21542_end_mask_0, x = var_21004_cast_fp16)[name = tensor("op_21542_cast_fp16")]; tensor var_21549_begin_0 = const()[name = tensor("op_21549_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21549_end_0 = const()[name = tensor("op_21549_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_21549_end_mask_0 = const()[name = tensor("op_21549_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21549_cast_fp16 = slice_by_index(begin = var_21549_begin_0, end = var_21549_end_0, end_mask = var_21549_end_mask_0, x = var_21008_cast_fp16)[name = tensor("op_21549_cast_fp16")]; tensor var_21556_begin_0 = const()[name = tensor("op_21556_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_21556_end_0 = const()[name = tensor("op_21556_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_21556_end_mask_0 = const()[name = tensor("op_21556_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21556_cast_fp16 = slice_by_index(begin = var_21556_begin_0, end = var_21556_end_0, end_mask = var_21556_end_mask_0, x = var_21008_cast_fp16)[name = tensor("op_21556_cast_fp16")]; tensor var_21563_begin_0 = const()[name = tensor("op_21563_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_21563_end_0 = const()[name = tensor("op_21563_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_21563_end_mask_0 = const()[name = tensor("op_21563_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21563_cast_fp16 = slice_by_index(begin = var_21563_begin_0, end = var_21563_end_0, end_mask = var_21563_end_mask_0, x = var_21008_cast_fp16)[name = tensor("op_21563_cast_fp16")]; tensor var_21570_begin_0 = const()[name = tensor("op_21570_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_21570_end_0 = const()[name = tensor("op_21570_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21570_end_mask_0 = const()[name = tensor("op_21570_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21570_cast_fp16 = slice_by_index(begin = var_21570_begin_0, end = var_21570_end_0, end_mask = var_21570_end_mask_0, x = var_21008_cast_fp16)[name = tensor("op_21570_cast_fp16")]; tensor k_27_perm_0 = const()[name = tensor("k_27_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_21575_begin_0 = const()[name = tensor("op_21575_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21575_end_0 = const()[name = tensor("op_21575_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_21575_end_mask_0 = const()[name = tensor("op_21575_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_27_cast_fp16 = transpose(perm = k_27_perm_0, x = key_27_cast_fp16)[name = tensor("transpose_18")]; tensor var_21575_cast_fp16 = slice_by_index(begin = var_21575_begin_0, end = var_21575_end_0, end_mask = var_21575_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21575_cast_fp16")]; tensor var_21579_begin_0 = const()[name = tensor("op_21579_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_21579_end_0 = const()[name = tensor("op_21579_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_21579_end_mask_0 = const()[name = tensor("op_21579_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21579_cast_fp16 = slice_by_index(begin = var_21579_begin_0, end = var_21579_end_0, end_mask = var_21579_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21579_cast_fp16")]; tensor var_21583_begin_0 = const()[name = tensor("op_21583_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_21583_end_0 = const()[name = tensor("op_21583_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_21583_end_mask_0 = const()[name = tensor("op_21583_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21583_cast_fp16 = slice_by_index(begin = var_21583_begin_0, end = var_21583_end_0, end_mask = var_21583_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21583_cast_fp16")]; tensor var_21587_begin_0 = const()[name = tensor("op_21587_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_21587_end_0 = const()[name = tensor("op_21587_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_21587_end_mask_0 = const()[name = tensor("op_21587_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21587_cast_fp16 = slice_by_index(begin = var_21587_begin_0, end = var_21587_end_0, end_mask = var_21587_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21587_cast_fp16")]; tensor var_21591_begin_0 = const()[name = tensor("op_21591_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_21591_end_0 = const()[name = tensor("op_21591_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_21591_end_mask_0 = const()[name = tensor("op_21591_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21591_cast_fp16 = slice_by_index(begin = var_21591_begin_0, end = var_21591_end_0, end_mask = var_21591_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21591_cast_fp16")]; tensor var_21595_begin_0 = const()[name = tensor("op_21595_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_21595_end_0 = const()[name = tensor("op_21595_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_21595_end_mask_0 = const()[name = tensor("op_21595_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21595_cast_fp16 = slice_by_index(begin = var_21595_begin_0, end = var_21595_end_0, end_mask = var_21595_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21595_cast_fp16")]; tensor var_21599_begin_0 = const()[name = tensor("op_21599_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_21599_end_0 = const()[name = tensor("op_21599_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_21599_end_mask_0 = const()[name = tensor("op_21599_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21599_cast_fp16 = slice_by_index(begin = var_21599_begin_0, end = var_21599_end_0, end_mask = var_21599_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21599_cast_fp16")]; tensor var_21603_begin_0 = const()[name = tensor("op_21603_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_21603_end_0 = const()[name = tensor("op_21603_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_21603_end_mask_0 = const()[name = tensor("op_21603_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21603_cast_fp16 = slice_by_index(begin = var_21603_begin_0, end = var_21603_end_0, end_mask = var_21603_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21603_cast_fp16")]; tensor var_21607_begin_0 = const()[name = tensor("op_21607_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_21607_end_0 = const()[name = tensor("op_21607_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_21607_end_mask_0 = const()[name = tensor("op_21607_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21607_cast_fp16 = slice_by_index(begin = var_21607_begin_0, end = var_21607_end_0, end_mask = var_21607_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21607_cast_fp16")]; tensor var_21611_begin_0 = const()[name = tensor("op_21611_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_21611_end_0 = const()[name = tensor("op_21611_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_21611_end_mask_0 = const()[name = tensor("op_21611_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21611_cast_fp16 = slice_by_index(begin = var_21611_begin_0, end = var_21611_end_0, end_mask = var_21611_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21611_cast_fp16")]; tensor var_21615_begin_0 = const()[name = tensor("op_21615_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_21615_end_0 = const()[name = tensor("op_21615_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_21615_end_mask_0 = const()[name = tensor("op_21615_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21615_cast_fp16 = slice_by_index(begin = var_21615_begin_0, end = var_21615_end_0, end_mask = var_21615_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21615_cast_fp16")]; tensor var_21619_begin_0 = const()[name = tensor("op_21619_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_21619_end_0 = const()[name = tensor("op_21619_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_21619_end_mask_0 = const()[name = tensor("op_21619_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21619_cast_fp16 = slice_by_index(begin = var_21619_begin_0, end = var_21619_end_0, end_mask = var_21619_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21619_cast_fp16")]; tensor var_21623_begin_0 = const()[name = tensor("op_21623_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_21623_end_0 = const()[name = tensor("op_21623_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_21623_end_mask_0 = const()[name = tensor("op_21623_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21623_cast_fp16 = slice_by_index(begin = var_21623_begin_0, end = var_21623_end_0, end_mask = var_21623_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21623_cast_fp16")]; tensor var_21627_begin_0 = const()[name = tensor("op_21627_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_21627_end_0 = const()[name = tensor("op_21627_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_21627_end_mask_0 = const()[name = tensor("op_21627_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21627_cast_fp16 = slice_by_index(begin = var_21627_begin_0, end = var_21627_end_0, end_mask = var_21627_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21627_cast_fp16")]; tensor var_21631_begin_0 = const()[name = tensor("op_21631_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_21631_end_0 = const()[name = tensor("op_21631_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_21631_end_mask_0 = const()[name = tensor("op_21631_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21631_cast_fp16 = slice_by_index(begin = var_21631_begin_0, end = var_21631_end_0, end_mask = var_21631_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21631_cast_fp16")]; tensor var_21635_begin_0 = const()[name = tensor("op_21635_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_21635_end_0 = const()[name = tensor("op_21635_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_21635_end_mask_0 = const()[name = tensor("op_21635_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21635_cast_fp16 = slice_by_index(begin = var_21635_begin_0, end = var_21635_end_0, end_mask = var_21635_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21635_cast_fp16")]; tensor var_21639_begin_0 = const()[name = tensor("op_21639_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_21639_end_0 = const()[name = tensor("op_21639_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_21639_end_mask_0 = const()[name = tensor("op_21639_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21639_cast_fp16 = slice_by_index(begin = var_21639_begin_0, end = var_21639_end_0, end_mask = var_21639_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21639_cast_fp16")]; tensor var_21643_begin_0 = const()[name = tensor("op_21643_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_21643_end_0 = const()[name = tensor("op_21643_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_21643_end_mask_0 = const()[name = tensor("op_21643_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21643_cast_fp16 = slice_by_index(begin = var_21643_begin_0, end = var_21643_end_0, end_mask = var_21643_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21643_cast_fp16")]; tensor var_21647_begin_0 = const()[name = tensor("op_21647_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_21647_end_0 = const()[name = tensor("op_21647_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_21647_end_mask_0 = const()[name = tensor("op_21647_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21647_cast_fp16 = slice_by_index(begin = var_21647_begin_0, end = var_21647_end_0, end_mask = var_21647_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21647_cast_fp16")]; tensor var_21651_begin_0 = const()[name = tensor("op_21651_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_21651_end_0 = const()[name = tensor("op_21651_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_21651_end_mask_0 = const()[name = tensor("op_21651_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_21651_cast_fp16 = slice_by_index(begin = var_21651_begin_0, end = var_21651_end_0, end_mask = var_21651_end_mask_0, x = k_27_cast_fp16)[name = tensor("op_21651_cast_fp16")]; tensor var_21653_begin_0 = const()[name = tensor("op_21653_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_21653_end_0 = const()[name = tensor("op_21653_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_21653_end_mask_0 = const()[name = tensor("op_21653_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21653_cast_fp16 = slice_by_index(begin = var_21653_begin_0, end = var_21653_end_0, end_mask = var_21653_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21653_cast_fp16")]; tensor var_21657_begin_0 = const()[name = tensor("op_21657_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_21657_end_0 = const()[name = tensor("op_21657_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_21657_end_mask_0 = const()[name = tensor("op_21657_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21657_cast_fp16 = slice_by_index(begin = var_21657_begin_0, end = var_21657_end_0, end_mask = var_21657_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21657_cast_fp16")]; tensor var_21661_begin_0 = const()[name = tensor("op_21661_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_21661_end_0 = const()[name = tensor("op_21661_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_21661_end_mask_0 = const()[name = tensor("op_21661_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21661_cast_fp16 = slice_by_index(begin = var_21661_begin_0, end = var_21661_end_0, end_mask = var_21661_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21661_cast_fp16")]; tensor var_21665_begin_0 = const()[name = tensor("op_21665_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_21665_end_0 = const()[name = tensor("op_21665_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_21665_end_mask_0 = const()[name = tensor("op_21665_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21665_cast_fp16 = slice_by_index(begin = var_21665_begin_0, end = var_21665_end_0, end_mask = var_21665_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21665_cast_fp16")]; tensor var_21669_begin_0 = const()[name = tensor("op_21669_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_21669_end_0 = const()[name = tensor("op_21669_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_21669_end_mask_0 = const()[name = tensor("op_21669_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21669_cast_fp16 = slice_by_index(begin = var_21669_begin_0, end = var_21669_end_0, end_mask = var_21669_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21669_cast_fp16")]; tensor var_21673_begin_0 = const()[name = tensor("op_21673_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_21673_end_0 = const()[name = tensor("op_21673_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_21673_end_mask_0 = const()[name = tensor("op_21673_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21673_cast_fp16 = slice_by_index(begin = var_21673_begin_0, end = var_21673_end_0, end_mask = var_21673_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21673_cast_fp16")]; tensor var_21677_begin_0 = const()[name = tensor("op_21677_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_21677_end_0 = const()[name = tensor("op_21677_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_21677_end_mask_0 = const()[name = tensor("op_21677_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21677_cast_fp16 = slice_by_index(begin = var_21677_begin_0, end = var_21677_end_0, end_mask = var_21677_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21677_cast_fp16")]; tensor var_21681_begin_0 = const()[name = tensor("op_21681_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_21681_end_0 = const()[name = tensor("op_21681_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_21681_end_mask_0 = const()[name = tensor("op_21681_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21681_cast_fp16 = slice_by_index(begin = var_21681_begin_0, end = var_21681_end_0, end_mask = var_21681_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21681_cast_fp16")]; tensor var_21685_begin_0 = const()[name = tensor("op_21685_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_21685_end_0 = const()[name = tensor("op_21685_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_21685_end_mask_0 = const()[name = tensor("op_21685_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21685_cast_fp16 = slice_by_index(begin = var_21685_begin_0, end = var_21685_end_0, end_mask = var_21685_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21685_cast_fp16")]; tensor var_21689_begin_0 = const()[name = tensor("op_21689_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_21689_end_0 = const()[name = tensor("op_21689_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_21689_end_mask_0 = const()[name = tensor("op_21689_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21689_cast_fp16 = slice_by_index(begin = var_21689_begin_0, end = var_21689_end_0, end_mask = var_21689_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21689_cast_fp16")]; tensor var_21693_begin_0 = const()[name = tensor("op_21693_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_21693_end_0 = const()[name = tensor("op_21693_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_21693_end_mask_0 = const()[name = tensor("op_21693_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21693_cast_fp16 = slice_by_index(begin = var_21693_begin_0, end = var_21693_end_0, end_mask = var_21693_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21693_cast_fp16")]; tensor var_21697_begin_0 = const()[name = tensor("op_21697_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_21697_end_0 = const()[name = tensor("op_21697_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_21697_end_mask_0 = const()[name = tensor("op_21697_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21697_cast_fp16 = slice_by_index(begin = var_21697_begin_0, end = var_21697_end_0, end_mask = var_21697_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21697_cast_fp16")]; tensor var_21701_begin_0 = const()[name = tensor("op_21701_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_21701_end_0 = const()[name = tensor("op_21701_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_21701_end_mask_0 = const()[name = tensor("op_21701_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21701_cast_fp16 = slice_by_index(begin = var_21701_begin_0, end = var_21701_end_0, end_mask = var_21701_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21701_cast_fp16")]; tensor var_21705_begin_0 = const()[name = tensor("op_21705_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_21705_end_0 = const()[name = tensor("op_21705_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_21705_end_mask_0 = const()[name = tensor("op_21705_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21705_cast_fp16 = slice_by_index(begin = var_21705_begin_0, end = var_21705_end_0, end_mask = var_21705_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21705_cast_fp16")]; tensor var_21709_begin_0 = const()[name = tensor("op_21709_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_21709_end_0 = const()[name = tensor("op_21709_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_21709_end_mask_0 = const()[name = tensor("op_21709_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21709_cast_fp16 = slice_by_index(begin = var_21709_begin_0, end = var_21709_end_0, end_mask = var_21709_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21709_cast_fp16")]; tensor var_21713_begin_0 = const()[name = tensor("op_21713_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_21713_end_0 = const()[name = tensor("op_21713_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_21713_end_mask_0 = const()[name = tensor("op_21713_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21713_cast_fp16 = slice_by_index(begin = var_21713_begin_0, end = var_21713_end_0, end_mask = var_21713_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21713_cast_fp16")]; tensor var_21717_begin_0 = const()[name = tensor("op_21717_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_21717_end_0 = const()[name = tensor("op_21717_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_21717_end_mask_0 = const()[name = tensor("op_21717_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21717_cast_fp16 = slice_by_index(begin = var_21717_begin_0, end = var_21717_end_0, end_mask = var_21717_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21717_cast_fp16")]; tensor var_21721_begin_0 = const()[name = tensor("op_21721_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_21721_end_0 = const()[name = tensor("op_21721_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_21721_end_mask_0 = const()[name = tensor("op_21721_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21721_cast_fp16 = slice_by_index(begin = var_21721_begin_0, end = var_21721_end_0, end_mask = var_21721_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21721_cast_fp16")]; tensor var_21725_begin_0 = const()[name = tensor("op_21725_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_21725_end_0 = const()[name = tensor("op_21725_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_21725_end_mask_0 = const()[name = tensor("op_21725_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21725_cast_fp16 = slice_by_index(begin = var_21725_begin_0, end = var_21725_end_0, end_mask = var_21725_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21725_cast_fp16")]; tensor var_21729_begin_0 = const()[name = tensor("op_21729_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_21729_end_0 = const()[name = tensor("op_21729_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_21729_end_mask_0 = const()[name = tensor("op_21729_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_21729_cast_fp16 = slice_by_index(begin = var_21729_begin_0, end = var_21729_end_0, end_mask = var_21729_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_21729_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2081_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2081_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2081_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2081_equation_0, values = (var_21575_cast_fp16, var_21017_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2081_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2083_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2083_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2083_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2083_equation_0, values = (var_21575_cast_fp16, var_21024_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2083_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2085_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2085_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2085_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2085_equation_0, values = (var_21575_cast_fp16, var_21031_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2085_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2087_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2087_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2087_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2087_equation_0, values = (var_21575_cast_fp16, var_21038_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2087_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2089_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2089_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2089_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2089_equation_0, values = (var_21579_cast_fp16, var_21045_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2089_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2091_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2091_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2091_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2091_equation_0, values = (var_21579_cast_fp16, var_21052_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2091_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2093_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2093_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2093_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2093_equation_0, values = (var_21579_cast_fp16, var_21059_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2093_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2095_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2095_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2095_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2095_equation_0, values = (var_21579_cast_fp16, var_21066_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2095_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2097_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2097_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2097_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2097_equation_0, values = (var_21583_cast_fp16, var_21073_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2097_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2099_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2099_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2099_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2099_equation_0, values = (var_21583_cast_fp16, var_21080_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2099_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2101_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2101_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2101_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2101_equation_0, values = (var_21583_cast_fp16, var_21087_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2101_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2103_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2103_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2103_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2103_equation_0, values = (var_21583_cast_fp16, var_21094_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2103_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2105_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2105_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2105_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2105_equation_0, values = (var_21587_cast_fp16, var_21101_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2105_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2107_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2107_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2107_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2107_equation_0, values = (var_21587_cast_fp16, var_21108_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2107_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2109_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2109_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2109_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2109_equation_0, values = (var_21587_cast_fp16, var_21115_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2109_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2111_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2111_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2111_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2111_equation_0, values = (var_21587_cast_fp16, var_21122_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2111_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2113_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2113_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2113_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2113_equation_0, values = (var_21591_cast_fp16, var_21129_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2113_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2115_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2115_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2115_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2115_equation_0, values = (var_21591_cast_fp16, var_21136_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2115_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2117_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2117_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2117_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2117_equation_0, values = (var_21591_cast_fp16, var_21143_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2117_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2119_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2119_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2119_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2119_equation_0, values = (var_21591_cast_fp16, var_21150_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2119_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2121_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2121_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2121_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2121_equation_0, values = (var_21595_cast_fp16, var_21157_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2121_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2123_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2123_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2123_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2123_equation_0, values = (var_21595_cast_fp16, var_21164_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2123_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2125_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2125_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2125_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2125_equation_0, values = (var_21595_cast_fp16, var_21171_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2125_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2127_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2127_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2127_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2127_equation_0, values = (var_21595_cast_fp16, var_21178_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2127_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2129_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2129_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2129_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2129_equation_0, values = (var_21599_cast_fp16, var_21185_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2129_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2131_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2131_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2131_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2131_equation_0, values = (var_21599_cast_fp16, var_21192_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2131_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2133_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2133_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2133_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2133_equation_0, values = (var_21599_cast_fp16, var_21199_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2133_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2135_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2135_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2135_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2135_equation_0, values = (var_21599_cast_fp16, var_21206_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2135_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2137_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2137_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2137_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2137_equation_0, values = (var_21603_cast_fp16, var_21213_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2137_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2139_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2139_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2139_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2139_equation_0, values = (var_21603_cast_fp16, var_21220_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2139_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2141_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2141_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2141_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2141_equation_0, values = (var_21603_cast_fp16, var_21227_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2141_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2143_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2143_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2143_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2143_equation_0, values = (var_21603_cast_fp16, var_21234_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2143_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2145_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2145_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2145_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2145_equation_0, values = (var_21607_cast_fp16, var_21241_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2145_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2147_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2147_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2147_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2147_equation_0, values = (var_21607_cast_fp16, var_21248_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2147_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2149_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2149_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2149_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2149_equation_0, values = (var_21607_cast_fp16, var_21255_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2149_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2151_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2151_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2151_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2151_equation_0, values = (var_21607_cast_fp16, var_21262_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2151_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2153_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2153_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2153_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2153_equation_0, values = (var_21611_cast_fp16, var_21269_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2153_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2155_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2155_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2155_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2155_equation_0, values = (var_21611_cast_fp16, var_21276_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2155_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2157_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2157_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2157_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2157_equation_0, values = (var_21611_cast_fp16, var_21283_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2157_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2159_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2159_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2159_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2159_equation_0, values = (var_21611_cast_fp16, var_21290_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2159_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2161_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2161_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2161_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2161_equation_0, values = (var_21615_cast_fp16, var_21297_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2161_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2163_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2163_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2163_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2163_equation_0, values = (var_21615_cast_fp16, var_21304_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2163_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2165_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2165_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2165_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2165_equation_0, values = (var_21615_cast_fp16, var_21311_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2165_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2167_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2167_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2167_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2167_equation_0, values = (var_21615_cast_fp16, var_21318_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2167_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2169_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2169_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2169_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2169_equation_0, values = (var_21619_cast_fp16, var_21325_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2169_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2171_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2171_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2171_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2171_equation_0, values = (var_21619_cast_fp16, var_21332_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2171_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2173_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2173_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2173_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2173_equation_0, values = (var_21619_cast_fp16, var_21339_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2173_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2175_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2175_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2175_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2175_equation_0, values = (var_21619_cast_fp16, var_21346_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2175_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2177_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2177_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2177_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2177_equation_0, values = (var_21623_cast_fp16, var_21353_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2177_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2179_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2179_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2179_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2179_equation_0, values = (var_21623_cast_fp16, var_21360_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2179_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2181_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2181_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2181_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2181_equation_0, values = (var_21623_cast_fp16, var_21367_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2181_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2183_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2183_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2183_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2183_equation_0, values = (var_21623_cast_fp16, var_21374_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2183_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2185_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2185_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2185_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2185_equation_0, values = (var_21627_cast_fp16, var_21381_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2185_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2187_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2187_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2187_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2187_equation_0, values = (var_21627_cast_fp16, var_21388_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2187_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2189_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2189_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2189_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2189_equation_0, values = (var_21627_cast_fp16, var_21395_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2189_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2191_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2191_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2191_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2191_equation_0, values = (var_21627_cast_fp16, var_21402_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2191_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2193_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2193_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2193_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2193_equation_0, values = (var_21631_cast_fp16, var_21409_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2193_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2195_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2195_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2195_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2195_equation_0, values = (var_21631_cast_fp16, var_21416_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2195_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2197_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2197_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2197_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2197_equation_0, values = (var_21631_cast_fp16, var_21423_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2197_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2199_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2199_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2199_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2199_equation_0, values = (var_21631_cast_fp16, var_21430_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2199_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2201_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2201_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2201_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2201_equation_0, values = (var_21635_cast_fp16, var_21437_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2201_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2203_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2203_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2203_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2203_equation_0, values = (var_21635_cast_fp16, var_21444_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2203_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2205_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2205_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2205_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2205_equation_0, values = (var_21635_cast_fp16, var_21451_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2205_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2207_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2207_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2207_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2207_equation_0, values = (var_21635_cast_fp16, var_21458_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2207_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2209_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2209_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2209_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2209_equation_0, values = (var_21639_cast_fp16, var_21465_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2209_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2211_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2211_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2211_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2211_equation_0, values = (var_21639_cast_fp16, var_21472_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2211_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2213_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2213_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2213_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2213_equation_0, values = (var_21639_cast_fp16, var_21479_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2213_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2215_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2215_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2215_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2215_equation_0, values = (var_21639_cast_fp16, var_21486_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2215_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2217_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2217_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2217_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2217_equation_0, values = (var_21643_cast_fp16, var_21493_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2217_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2219_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2219_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2219_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2219_equation_0, values = (var_21643_cast_fp16, var_21500_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2219_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2221_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2221_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2221_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2221_equation_0, values = (var_21643_cast_fp16, var_21507_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2221_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2223_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2223_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2223_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2223_equation_0, values = (var_21643_cast_fp16, var_21514_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2223_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2225_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2225_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2225_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2225_equation_0, values = (var_21647_cast_fp16, var_21521_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2225_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2227_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2227_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2227_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2227_equation_0, values = (var_21647_cast_fp16, var_21528_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2227_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2229_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2229_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2229_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2229_equation_0, values = (var_21647_cast_fp16, var_21535_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2229_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2231_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2231_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2231_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2231_equation_0, values = (var_21647_cast_fp16, var_21542_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2231_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2233_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2233_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2233_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2233_equation_0, values = (var_21651_cast_fp16, var_21549_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2233_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2235_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2235_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2235_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2235_equation_0, values = (var_21651_cast_fp16, var_21556_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2235_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2237_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2237_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2237_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2237_equation_0, values = (var_21651_cast_fp16, var_21563_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2237_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2239_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2239_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2239_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2239_equation_0, values = (var_21651_cast_fp16, var_21570_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2239_cast_fp16")]; tensor var_21892_to_fp16 = const()[name = tensor("op_21892_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2081_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2081_cast_fp16, y = var_21892_to_fp16)[name = tensor("aw_chunk_2081_cast_fp16")]; tensor var_21894_to_fp16 = const()[name = tensor("op_21894_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2083_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2083_cast_fp16, y = var_21894_to_fp16)[name = tensor("aw_chunk_2083_cast_fp16")]; tensor var_21896_to_fp16 = const()[name = tensor("op_21896_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2085_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2085_cast_fp16, y = var_21896_to_fp16)[name = tensor("aw_chunk_2085_cast_fp16")]; tensor var_21898_to_fp16 = const()[name = tensor("op_21898_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2087_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2087_cast_fp16, y = var_21898_to_fp16)[name = tensor("aw_chunk_2087_cast_fp16")]; tensor var_21900_to_fp16 = const()[name = tensor("op_21900_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2089_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2089_cast_fp16, y = var_21900_to_fp16)[name = tensor("aw_chunk_2089_cast_fp16")]; tensor var_21902_to_fp16 = const()[name = tensor("op_21902_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2091_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2091_cast_fp16, y = var_21902_to_fp16)[name = tensor("aw_chunk_2091_cast_fp16")]; tensor var_21904_to_fp16 = const()[name = tensor("op_21904_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2093_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2093_cast_fp16, y = var_21904_to_fp16)[name = tensor("aw_chunk_2093_cast_fp16")]; tensor var_21906_to_fp16 = const()[name = tensor("op_21906_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2095_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2095_cast_fp16, y = var_21906_to_fp16)[name = tensor("aw_chunk_2095_cast_fp16")]; tensor var_21908_to_fp16 = const()[name = tensor("op_21908_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2097_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2097_cast_fp16, y = var_21908_to_fp16)[name = tensor("aw_chunk_2097_cast_fp16")]; tensor var_21910_to_fp16 = const()[name = tensor("op_21910_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2099_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2099_cast_fp16, y = var_21910_to_fp16)[name = tensor("aw_chunk_2099_cast_fp16")]; tensor var_21912_to_fp16 = const()[name = tensor("op_21912_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2101_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2101_cast_fp16, y = var_21912_to_fp16)[name = tensor("aw_chunk_2101_cast_fp16")]; tensor var_21914_to_fp16 = const()[name = tensor("op_21914_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2103_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2103_cast_fp16, y = var_21914_to_fp16)[name = tensor("aw_chunk_2103_cast_fp16")]; tensor var_21916_to_fp16 = const()[name = tensor("op_21916_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2105_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2105_cast_fp16, y = var_21916_to_fp16)[name = tensor("aw_chunk_2105_cast_fp16")]; tensor var_21918_to_fp16 = const()[name = tensor("op_21918_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2107_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2107_cast_fp16, y = var_21918_to_fp16)[name = tensor("aw_chunk_2107_cast_fp16")]; tensor var_21920_to_fp16 = const()[name = tensor("op_21920_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2109_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2109_cast_fp16, y = var_21920_to_fp16)[name = tensor("aw_chunk_2109_cast_fp16")]; tensor var_21922_to_fp16 = const()[name = tensor("op_21922_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2111_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2111_cast_fp16, y = var_21922_to_fp16)[name = tensor("aw_chunk_2111_cast_fp16")]; tensor var_21924_to_fp16 = const()[name = tensor("op_21924_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2113_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2113_cast_fp16, y = var_21924_to_fp16)[name = tensor("aw_chunk_2113_cast_fp16")]; tensor var_21926_to_fp16 = const()[name = tensor("op_21926_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2115_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2115_cast_fp16, y = var_21926_to_fp16)[name = tensor("aw_chunk_2115_cast_fp16")]; tensor var_21928_to_fp16 = const()[name = tensor("op_21928_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2117_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2117_cast_fp16, y = var_21928_to_fp16)[name = tensor("aw_chunk_2117_cast_fp16")]; tensor var_21930_to_fp16 = const()[name = tensor("op_21930_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2119_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2119_cast_fp16, y = var_21930_to_fp16)[name = tensor("aw_chunk_2119_cast_fp16")]; tensor var_21932_to_fp16 = const()[name = tensor("op_21932_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2121_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2121_cast_fp16, y = var_21932_to_fp16)[name = tensor("aw_chunk_2121_cast_fp16")]; tensor var_21934_to_fp16 = const()[name = tensor("op_21934_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2123_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2123_cast_fp16, y = var_21934_to_fp16)[name = tensor("aw_chunk_2123_cast_fp16")]; tensor var_21936_to_fp16 = const()[name = tensor("op_21936_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2125_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2125_cast_fp16, y = var_21936_to_fp16)[name = tensor("aw_chunk_2125_cast_fp16")]; tensor var_21938_to_fp16 = const()[name = tensor("op_21938_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2127_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2127_cast_fp16, y = var_21938_to_fp16)[name = tensor("aw_chunk_2127_cast_fp16")]; tensor var_21940_to_fp16 = const()[name = tensor("op_21940_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2129_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2129_cast_fp16, y = var_21940_to_fp16)[name = tensor("aw_chunk_2129_cast_fp16")]; tensor var_21942_to_fp16 = const()[name = tensor("op_21942_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2131_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2131_cast_fp16, y = var_21942_to_fp16)[name = tensor("aw_chunk_2131_cast_fp16")]; tensor var_21944_to_fp16 = const()[name = tensor("op_21944_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2133_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2133_cast_fp16, y = var_21944_to_fp16)[name = tensor("aw_chunk_2133_cast_fp16")]; tensor var_21946_to_fp16 = const()[name = tensor("op_21946_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2135_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2135_cast_fp16, y = var_21946_to_fp16)[name = tensor("aw_chunk_2135_cast_fp16")]; tensor var_21948_to_fp16 = const()[name = tensor("op_21948_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2137_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2137_cast_fp16, y = var_21948_to_fp16)[name = tensor("aw_chunk_2137_cast_fp16")]; tensor var_21950_to_fp16 = const()[name = tensor("op_21950_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2139_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2139_cast_fp16, y = var_21950_to_fp16)[name = tensor("aw_chunk_2139_cast_fp16")]; tensor var_21952_to_fp16 = const()[name = tensor("op_21952_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2141_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2141_cast_fp16, y = var_21952_to_fp16)[name = tensor("aw_chunk_2141_cast_fp16")]; tensor var_21954_to_fp16 = const()[name = tensor("op_21954_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2143_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2143_cast_fp16, y = var_21954_to_fp16)[name = tensor("aw_chunk_2143_cast_fp16")]; tensor var_21956_to_fp16 = const()[name = tensor("op_21956_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2145_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2145_cast_fp16, y = var_21956_to_fp16)[name = tensor("aw_chunk_2145_cast_fp16")]; tensor var_21958_to_fp16 = const()[name = tensor("op_21958_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2147_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2147_cast_fp16, y = var_21958_to_fp16)[name = tensor("aw_chunk_2147_cast_fp16")]; tensor var_21960_to_fp16 = const()[name = tensor("op_21960_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2149_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2149_cast_fp16, y = var_21960_to_fp16)[name = tensor("aw_chunk_2149_cast_fp16")]; tensor var_21962_to_fp16 = const()[name = tensor("op_21962_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2151_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2151_cast_fp16, y = var_21962_to_fp16)[name = tensor("aw_chunk_2151_cast_fp16")]; tensor var_21964_to_fp16 = const()[name = tensor("op_21964_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2153_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2153_cast_fp16, y = var_21964_to_fp16)[name = tensor("aw_chunk_2153_cast_fp16")]; tensor var_21966_to_fp16 = const()[name = tensor("op_21966_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2155_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2155_cast_fp16, y = var_21966_to_fp16)[name = tensor("aw_chunk_2155_cast_fp16")]; tensor var_21968_to_fp16 = const()[name = tensor("op_21968_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2157_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2157_cast_fp16, y = var_21968_to_fp16)[name = tensor("aw_chunk_2157_cast_fp16")]; tensor var_21970_to_fp16 = const()[name = tensor("op_21970_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2159_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2159_cast_fp16, y = var_21970_to_fp16)[name = tensor("aw_chunk_2159_cast_fp16")]; tensor var_21972_to_fp16 = const()[name = tensor("op_21972_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2161_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2161_cast_fp16, y = var_21972_to_fp16)[name = tensor("aw_chunk_2161_cast_fp16")]; tensor var_21974_to_fp16 = const()[name = tensor("op_21974_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2163_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2163_cast_fp16, y = var_21974_to_fp16)[name = tensor("aw_chunk_2163_cast_fp16")]; tensor var_21976_to_fp16 = const()[name = tensor("op_21976_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2165_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2165_cast_fp16, y = var_21976_to_fp16)[name = tensor("aw_chunk_2165_cast_fp16")]; tensor var_21978_to_fp16 = const()[name = tensor("op_21978_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2167_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2167_cast_fp16, y = var_21978_to_fp16)[name = tensor("aw_chunk_2167_cast_fp16")]; tensor var_21980_to_fp16 = const()[name = tensor("op_21980_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2169_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2169_cast_fp16, y = var_21980_to_fp16)[name = tensor("aw_chunk_2169_cast_fp16")]; tensor var_21982_to_fp16 = const()[name = tensor("op_21982_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2171_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2171_cast_fp16, y = var_21982_to_fp16)[name = tensor("aw_chunk_2171_cast_fp16")]; tensor var_21984_to_fp16 = const()[name = tensor("op_21984_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2173_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2173_cast_fp16, y = var_21984_to_fp16)[name = tensor("aw_chunk_2173_cast_fp16")]; tensor var_21986_to_fp16 = const()[name = tensor("op_21986_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2175_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2175_cast_fp16, y = var_21986_to_fp16)[name = tensor("aw_chunk_2175_cast_fp16")]; tensor var_21988_to_fp16 = const()[name = tensor("op_21988_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2177_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2177_cast_fp16, y = var_21988_to_fp16)[name = tensor("aw_chunk_2177_cast_fp16")]; tensor var_21990_to_fp16 = const()[name = tensor("op_21990_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2179_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2179_cast_fp16, y = var_21990_to_fp16)[name = tensor("aw_chunk_2179_cast_fp16")]; tensor var_21992_to_fp16 = const()[name = tensor("op_21992_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2181_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2181_cast_fp16, y = var_21992_to_fp16)[name = tensor("aw_chunk_2181_cast_fp16")]; tensor var_21994_to_fp16 = const()[name = tensor("op_21994_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2183_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2183_cast_fp16, y = var_21994_to_fp16)[name = tensor("aw_chunk_2183_cast_fp16")]; tensor var_21996_to_fp16 = const()[name = tensor("op_21996_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2185_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2185_cast_fp16, y = var_21996_to_fp16)[name = tensor("aw_chunk_2185_cast_fp16")]; tensor var_21998_to_fp16 = const()[name = tensor("op_21998_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2187_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2187_cast_fp16, y = var_21998_to_fp16)[name = tensor("aw_chunk_2187_cast_fp16")]; tensor var_22000_to_fp16 = const()[name = tensor("op_22000_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2189_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2189_cast_fp16, y = var_22000_to_fp16)[name = tensor("aw_chunk_2189_cast_fp16")]; tensor var_22002_to_fp16 = const()[name = tensor("op_22002_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2191_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2191_cast_fp16, y = var_22002_to_fp16)[name = tensor("aw_chunk_2191_cast_fp16")]; tensor var_22004_to_fp16 = const()[name = tensor("op_22004_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2193_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2193_cast_fp16, y = var_22004_to_fp16)[name = tensor("aw_chunk_2193_cast_fp16")]; tensor var_22006_to_fp16 = const()[name = tensor("op_22006_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2195_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2195_cast_fp16, y = var_22006_to_fp16)[name = tensor("aw_chunk_2195_cast_fp16")]; tensor var_22008_to_fp16 = const()[name = tensor("op_22008_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2197_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2197_cast_fp16, y = var_22008_to_fp16)[name = tensor("aw_chunk_2197_cast_fp16")]; tensor var_22010_to_fp16 = const()[name = tensor("op_22010_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2199_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2199_cast_fp16, y = var_22010_to_fp16)[name = tensor("aw_chunk_2199_cast_fp16")]; tensor var_22012_to_fp16 = const()[name = tensor("op_22012_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2201_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2201_cast_fp16, y = var_22012_to_fp16)[name = tensor("aw_chunk_2201_cast_fp16")]; tensor var_22014_to_fp16 = const()[name = tensor("op_22014_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2203_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2203_cast_fp16, y = var_22014_to_fp16)[name = tensor("aw_chunk_2203_cast_fp16")]; tensor var_22016_to_fp16 = const()[name = tensor("op_22016_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2205_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2205_cast_fp16, y = var_22016_to_fp16)[name = tensor("aw_chunk_2205_cast_fp16")]; tensor var_22018_to_fp16 = const()[name = tensor("op_22018_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2207_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2207_cast_fp16, y = var_22018_to_fp16)[name = tensor("aw_chunk_2207_cast_fp16")]; tensor var_22020_to_fp16 = const()[name = tensor("op_22020_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2209_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2209_cast_fp16, y = var_22020_to_fp16)[name = tensor("aw_chunk_2209_cast_fp16")]; tensor var_22022_to_fp16 = const()[name = tensor("op_22022_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2211_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2211_cast_fp16, y = var_22022_to_fp16)[name = tensor("aw_chunk_2211_cast_fp16")]; tensor var_22024_to_fp16 = const()[name = tensor("op_22024_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2213_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2213_cast_fp16, y = var_22024_to_fp16)[name = tensor("aw_chunk_2213_cast_fp16")]; tensor var_22026_to_fp16 = const()[name = tensor("op_22026_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2215_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2215_cast_fp16, y = var_22026_to_fp16)[name = tensor("aw_chunk_2215_cast_fp16")]; tensor var_22028_to_fp16 = const()[name = tensor("op_22028_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2217_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2217_cast_fp16, y = var_22028_to_fp16)[name = tensor("aw_chunk_2217_cast_fp16")]; tensor var_22030_to_fp16 = const()[name = tensor("op_22030_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2219_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2219_cast_fp16, y = var_22030_to_fp16)[name = tensor("aw_chunk_2219_cast_fp16")]; tensor var_22032_to_fp16 = const()[name = tensor("op_22032_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2221_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2221_cast_fp16, y = var_22032_to_fp16)[name = tensor("aw_chunk_2221_cast_fp16")]; tensor var_22034_to_fp16 = const()[name = tensor("op_22034_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2223_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2223_cast_fp16, y = var_22034_to_fp16)[name = tensor("aw_chunk_2223_cast_fp16")]; tensor var_22036_to_fp16 = const()[name = tensor("op_22036_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2225_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2225_cast_fp16, y = var_22036_to_fp16)[name = tensor("aw_chunk_2225_cast_fp16")]; tensor var_22038_to_fp16 = const()[name = tensor("op_22038_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2227_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2227_cast_fp16, y = var_22038_to_fp16)[name = tensor("aw_chunk_2227_cast_fp16")]; tensor var_22040_to_fp16 = const()[name = tensor("op_22040_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2229_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2229_cast_fp16, y = var_22040_to_fp16)[name = tensor("aw_chunk_2229_cast_fp16")]; tensor var_22042_to_fp16 = const()[name = tensor("op_22042_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2231_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2231_cast_fp16, y = var_22042_to_fp16)[name = tensor("aw_chunk_2231_cast_fp16")]; tensor var_22044_to_fp16 = const()[name = tensor("op_22044_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2233_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2233_cast_fp16, y = var_22044_to_fp16)[name = tensor("aw_chunk_2233_cast_fp16")]; tensor var_22046_to_fp16 = const()[name = tensor("op_22046_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2235_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2235_cast_fp16, y = var_22046_to_fp16)[name = tensor("aw_chunk_2235_cast_fp16")]; tensor var_22048_to_fp16 = const()[name = tensor("op_22048_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2237_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2237_cast_fp16, y = var_22048_to_fp16)[name = tensor("aw_chunk_2237_cast_fp16")]; tensor var_22050_to_fp16 = const()[name = tensor("op_22050_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2239_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2239_cast_fp16, y = var_22050_to_fp16)[name = tensor("aw_chunk_2239_cast_fp16")]; tensor var_22052_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2081_cast_fp16)[name = tensor("op_22052_cast_fp16")]; tensor var_22053_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2083_cast_fp16)[name = tensor("op_22053_cast_fp16")]; tensor var_22054_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2085_cast_fp16)[name = tensor("op_22054_cast_fp16")]; tensor var_22055_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2087_cast_fp16)[name = tensor("op_22055_cast_fp16")]; tensor var_22056_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2089_cast_fp16)[name = tensor("op_22056_cast_fp16")]; tensor var_22057_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2091_cast_fp16)[name = tensor("op_22057_cast_fp16")]; tensor var_22058_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2093_cast_fp16)[name = tensor("op_22058_cast_fp16")]; tensor var_22059_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2095_cast_fp16)[name = tensor("op_22059_cast_fp16")]; tensor var_22060_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2097_cast_fp16)[name = tensor("op_22060_cast_fp16")]; tensor var_22061_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2099_cast_fp16)[name = tensor("op_22061_cast_fp16")]; tensor var_22062_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2101_cast_fp16)[name = tensor("op_22062_cast_fp16")]; tensor var_22063_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2103_cast_fp16)[name = tensor("op_22063_cast_fp16")]; tensor var_22064_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2105_cast_fp16)[name = tensor("op_22064_cast_fp16")]; tensor var_22065_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2107_cast_fp16)[name = tensor("op_22065_cast_fp16")]; tensor var_22066_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2109_cast_fp16)[name = tensor("op_22066_cast_fp16")]; tensor var_22067_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2111_cast_fp16)[name = tensor("op_22067_cast_fp16")]; tensor var_22068_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2113_cast_fp16)[name = tensor("op_22068_cast_fp16")]; tensor var_22069_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2115_cast_fp16)[name = tensor("op_22069_cast_fp16")]; tensor var_22070_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2117_cast_fp16)[name = tensor("op_22070_cast_fp16")]; tensor var_22071_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2119_cast_fp16)[name = tensor("op_22071_cast_fp16")]; tensor var_22072_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2121_cast_fp16)[name = tensor("op_22072_cast_fp16")]; tensor var_22073_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2123_cast_fp16)[name = tensor("op_22073_cast_fp16")]; tensor var_22074_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2125_cast_fp16)[name = tensor("op_22074_cast_fp16")]; tensor var_22075_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2127_cast_fp16)[name = tensor("op_22075_cast_fp16")]; tensor var_22076_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2129_cast_fp16)[name = tensor("op_22076_cast_fp16")]; tensor var_22077_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2131_cast_fp16)[name = tensor("op_22077_cast_fp16")]; tensor var_22078_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2133_cast_fp16)[name = tensor("op_22078_cast_fp16")]; tensor var_22079_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2135_cast_fp16)[name = tensor("op_22079_cast_fp16")]; tensor var_22080_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2137_cast_fp16)[name = tensor("op_22080_cast_fp16")]; tensor var_22081_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2139_cast_fp16)[name = tensor("op_22081_cast_fp16")]; tensor var_22082_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2141_cast_fp16)[name = tensor("op_22082_cast_fp16")]; tensor var_22083_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2143_cast_fp16)[name = tensor("op_22083_cast_fp16")]; tensor var_22084_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2145_cast_fp16)[name = tensor("op_22084_cast_fp16")]; tensor var_22085_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2147_cast_fp16)[name = tensor("op_22085_cast_fp16")]; tensor var_22086_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2149_cast_fp16)[name = tensor("op_22086_cast_fp16")]; tensor var_22087_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2151_cast_fp16)[name = tensor("op_22087_cast_fp16")]; tensor var_22088_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2153_cast_fp16)[name = tensor("op_22088_cast_fp16")]; tensor var_22089_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2155_cast_fp16)[name = tensor("op_22089_cast_fp16")]; tensor var_22090_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2157_cast_fp16)[name = tensor("op_22090_cast_fp16")]; tensor var_22091_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2159_cast_fp16)[name = tensor("op_22091_cast_fp16")]; tensor var_22092_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2161_cast_fp16)[name = tensor("op_22092_cast_fp16")]; tensor var_22093_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2163_cast_fp16)[name = tensor("op_22093_cast_fp16")]; tensor var_22094_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2165_cast_fp16)[name = tensor("op_22094_cast_fp16")]; tensor var_22095_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2167_cast_fp16)[name = tensor("op_22095_cast_fp16")]; tensor var_22096_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2169_cast_fp16)[name = tensor("op_22096_cast_fp16")]; tensor var_22097_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2171_cast_fp16)[name = tensor("op_22097_cast_fp16")]; tensor var_22098_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2173_cast_fp16)[name = tensor("op_22098_cast_fp16")]; tensor var_22099_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2175_cast_fp16)[name = tensor("op_22099_cast_fp16")]; tensor var_22100_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2177_cast_fp16)[name = tensor("op_22100_cast_fp16")]; tensor var_22101_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2179_cast_fp16)[name = tensor("op_22101_cast_fp16")]; tensor var_22102_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2181_cast_fp16)[name = tensor("op_22102_cast_fp16")]; tensor var_22103_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2183_cast_fp16)[name = tensor("op_22103_cast_fp16")]; tensor var_22104_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2185_cast_fp16)[name = tensor("op_22104_cast_fp16")]; tensor var_22105_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2187_cast_fp16)[name = tensor("op_22105_cast_fp16")]; tensor var_22106_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2189_cast_fp16)[name = tensor("op_22106_cast_fp16")]; tensor var_22107_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2191_cast_fp16)[name = tensor("op_22107_cast_fp16")]; tensor var_22108_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2193_cast_fp16)[name = tensor("op_22108_cast_fp16")]; tensor var_22109_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2195_cast_fp16)[name = tensor("op_22109_cast_fp16")]; tensor var_22110_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2197_cast_fp16)[name = tensor("op_22110_cast_fp16")]; tensor var_22111_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2199_cast_fp16)[name = tensor("op_22111_cast_fp16")]; tensor var_22112_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2201_cast_fp16)[name = tensor("op_22112_cast_fp16")]; tensor var_22113_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2203_cast_fp16)[name = tensor("op_22113_cast_fp16")]; tensor var_22114_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2205_cast_fp16)[name = tensor("op_22114_cast_fp16")]; tensor var_22115_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2207_cast_fp16)[name = tensor("op_22115_cast_fp16")]; tensor var_22116_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2209_cast_fp16)[name = tensor("op_22116_cast_fp16")]; tensor var_22117_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2211_cast_fp16)[name = tensor("op_22117_cast_fp16")]; tensor var_22118_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2213_cast_fp16)[name = tensor("op_22118_cast_fp16")]; tensor var_22119_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2215_cast_fp16)[name = tensor("op_22119_cast_fp16")]; tensor var_22120_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2217_cast_fp16)[name = tensor("op_22120_cast_fp16")]; tensor var_22121_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2219_cast_fp16)[name = tensor("op_22121_cast_fp16")]; tensor var_22122_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2221_cast_fp16)[name = tensor("op_22122_cast_fp16")]; tensor var_22123_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2223_cast_fp16)[name = tensor("op_22123_cast_fp16")]; tensor var_22124_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2225_cast_fp16)[name = tensor("op_22124_cast_fp16")]; tensor var_22125_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2227_cast_fp16)[name = tensor("op_22125_cast_fp16")]; tensor var_22126_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2229_cast_fp16)[name = tensor("op_22126_cast_fp16")]; tensor var_22127_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2231_cast_fp16)[name = tensor("op_22127_cast_fp16")]; tensor var_22128_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2233_cast_fp16)[name = tensor("op_22128_cast_fp16")]; tensor var_22129_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2235_cast_fp16)[name = tensor("op_22129_cast_fp16")]; tensor var_22130_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2237_cast_fp16)[name = tensor("op_22130_cast_fp16")]; tensor var_22131_cast_fp16 = softmax(axis = var_20850, x = aw_chunk_2239_cast_fp16)[name = tensor("op_22131_cast_fp16")]; tensor var_22133_equation_0 = const()[name = tensor("op_22133_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22133_cast_fp16 = einsum(equation = var_22133_equation_0, values = (var_21653_cast_fp16, var_22052_cast_fp16))[name = tensor("op_22133_cast_fp16")]; tensor var_22135_equation_0 = const()[name = tensor("op_22135_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22135_cast_fp16 = einsum(equation = var_22135_equation_0, values = (var_21653_cast_fp16, var_22053_cast_fp16))[name = tensor("op_22135_cast_fp16")]; tensor var_22137_equation_0 = const()[name = tensor("op_22137_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22137_cast_fp16 = einsum(equation = var_22137_equation_0, values = (var_21653_cast_fp16, var_22054_cast_fp16))[name = tensor("op_22137_cast_fp16")]; tensor var_22139_equation_0 = const()[name = tensor("op_22139_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22139_cast_fp16 = einsum(equation = var_22139_equation_0, values = (var_21653_cast_fp16, var_22055_cast_fp16))[name = tensor("op_22139_cast_fp16")]; tensor var_22141_equation_0 = const()[name = tensor("op_22141_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22141_cast_fp16 = einsum(equation = var_22141_equation_0, values = (var_21657_cast_fp16, var_22056_cast_fp16))[name = tensor("op_22141_cast_fp16")]; tensor var_22143_equation_0 = const()[name = tensor("op_22143_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22143_cast_fp16 = einsum(equation = var_22143_equation_0, values = (var_21657_cast_fp16, var_22057_cast_fp16))[name = tensor("op_22143_cast_fp16")]; tensor var_22145_equation_0 = const()[name = tensor("op_22145_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22145_cast_fp16 = einsum(equation = var_22145_equation_0, values = (var_21657_cast_fp16, var_22058_cast_fp16))[name = tensor("op_22145_cast_fp16")]; tensor var_22147_equation_0 = const()[name = tensor("op_22147_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22147_cast_fp16 = einsum(equation = var_22147_equation_0, values = (var_21657_cast_fp16, var_22059_cast_fp16))[name = tensor("op_22147_cast_fp16")]; tensor var_22149_equation_0 = const()[name = tensor("op_22149_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22149_cast_fp16 = einsum(equation = var_22149_equation_0, values = (var_21661_cast_fp16, var_22060_cast_fp16))[name = tensor("op_22149_cast_fp16")]; tensor var_22151_equation_0 = const()[name = tensor("op_22151_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22151_cast_fp16 = einsum(equation = var_22151_equation_0, values = (var_21661_cast_fp16, var_22061_cast_fp16))[name = tensor("op_22151_cast_fp16")]; tensor var_22153_equation_0 = const()[name = tensor("op_22153_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22153_cast_fp16 = einsum(equation = var_22153_equation_0, values = (var_21661_cast_fp16, var_22062_cast_fp16))[name = tensor("op_22153_cast_fp16")]; tensor var_22155_equation_0 = const()[name = tensor("op_22155_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22155_cast_fp16 = einsum(equation = var_22155_equation_0, values = (var_21661_cast_fp16, var_22063_cast_fp16))[name = tensor("op_22155_cast_fp16")]; tensor var_22157_equation_0 = const()[name = tensor("op_22157_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22157_cast_fp16 = einsum(equation = var_22157_equation_0, values = (var_21665_cast_fp16, var_22064_cast_fp16))[name = tensor("op_22157_cast_fp16")]; tensor var_22159_equation_0 = const()[name = tensor("op_22159_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22159_cast_fp16 = einsum(equation = var_22159_equation_0, values = (var_21665_cast_fp16, var_22065_cast_fp16))[name = tensor("op_22159_cast_fp16")]; tensor var_22161_equation_0 = const()[name = tensor("op_22161_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22161_cast_fp16 = einsum(equation = var_22161_equation_0, values = (var_21665_cast_fp16, var_22066_cast_fp16))[name = tensor("op_22161_cast_fp16")]; tensor var_22163_equation_0 = const()[name = tensor("op_22163_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22163_cast_fp16 = einsum(equation = var_22163_equation_0, values = (var_21665_cast_fp16, var_22067_cast_fp16))[name = tensor("op_22163_cast_fp16")]; tensor var_22165_equation_0 = const()[name = tensor("op_22165_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22165_cast_fp16 = einsum(equation = var_22165_equation_0, values = (var_21669_cast_fp16, var_22068_cast_fp16))[name = tensor("op_22165_cast_fp16")]; tensor var_22167_equation_0 = const()[name = tensor("op_22167_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22167_cast_fp16 = einsum(equation = var_22167_equation_0, values = (var_21669_cast_fp16, var_22069_cast_fp16))[name = tensor("op_22167_cast_fp16")]; tensor var_22169_equation_0 = const()[name = tensor("op_22169_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22169_cast_fp16 = einsum(equation = var_22169_equation_0, values = (var_21669_cast_fp16, var_22070_cast_fp16))[name = tensor("op_22169_cast_fp16")]; tensor var_22171_equation_0 = const()[name = tensor("op_22171_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22171_cast_fp16 = einsum(equation = var_22171_equation_0, values = (var_21669_cast_fp16, var_22071_cast_fp16))[name = tensor("op_22171_cast_fp16")]; tensor var_22173_equation_0 = const()[name = tensor("op_22173_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22173_cast_fp16 = einsum(equation = var_22173_equation_0, values = (var_21673_cast_fp16, var_22072_cast_fp16))[name = tensor("op_22173_cast_fp16")]; tensor var_22175_equation_0 = const()[name = tensor("op_22175_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22175_cast_fp16 = einsum(equation = var_22175_equation_0, values = (var_21673_cast_fp16, var_22073_cast_fp16))[name = tensor("op_22175_cast_fp16")]; tensor var_22177_equation_0 = const()[name = tensor("op_22177_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22177_cast_fp16 = einsum(equation = var_22177_equation_0, values = (var_21673_cast_fp16, var_22074_cast_fp16))[name = tensor("op_22177_cast_fp16")]; tensor var_22179_equation_0 = const()[name = tensor("op_22179_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22179_cast_fp16 = einsum(equation = var_22179_equation_0, values = (var_21673_cast_fp16, var_22075_cast_fp16))[name = tensor("op_22179_cast_fp16")]; tensor var_22181_equation_0 = const()[name = tensor("op_22181_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22181_cast_fp16 = einsum(equation = var_22181_equation_0, values = (var_21677_cast_fp16, var_22076_cast_fp16))[name = tensor("op_22181_cast_fp16")]; tensor var_22183_equation_0 = const()[name = tensor("op_22183_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22183_cast_fp16 = einsum(equation = var_22183_equation_0, values = (var_21677_cast_fp16, var_22077_cast_fp16))[name = tensor("op_22183_cast_fp16")]; tensor var_22185_equation_0 = const()[name = tensor("op_22185_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22185_cast_fp16 = einsum(equation = var_22185_equation_0, values = (var_21677_cast_fp16, var_22078_cast_fp16))[name = tensor("op_22185_cast_fp16")]; tensor var_22187_equation_0 = const()[name = tensor("op_22187_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22187_cast_fp16 = einsum(equation = var_22187_equation_0, values = (var_21677_cast_fp16, var_22079_cast_fp16))[name = tensor("op_22187_cast_fp16")]; tensor var_22189_equation_0 = const()[name = tensor("op_22189_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22189_cast_fp16 = einsum(equation = var_22189_equation_0, values = (var_21681_cast_fp16, var_22080_cast_fp16))[name = tensor("op_22189_cast_fp16")]; tensor var_22191_equation_0 = const()[name = tensor("op_22191_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22191_cast_fp16 = einsum(equation = var_22191_equation_0, values = (var_21681_cast_fp16, var_22081_cast_fp16))[name = tensor("op_22191_cast_fp16")]; tensor var_22193_equation_0 = const()[name = tensor("op_22193_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22193_cast_fp16 = einsum(equation = var_22193_equation_0, values = (var_21681_cast_fp16, var_22082_cast_fp16))[name = tensor("op_22193_cast_fp16")]; tensor var_22195_equation_0 = const()[name = tensor("op_22195_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22195_cast_fp16 = einsum(equation = var_22195_equation_0, values = (var_21681_cast_fp16, var_22083_cast_fp16))[name = tensor("op_22195_cast_fp16")]; tensor var_22197_equation_0 = const()[name = tensor("op_22197_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22197_cast_fp16 = einsum(equation = var_22197_equation_0, values = (var_21685_cast_fp16, var_22084_cast_fp16))[name = tensor("op_22197_cast_fp16")]; tensor var_22199_equation_0 = const()[name = tensor("op_22199_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22199_cast_fp16 = einsum(equation = var_22199_equation_0, values = (var_21685_cast_fp16, var_22085_cast_fp16))[name = tensor("op_22199_cast_fp16")]; tensor var_22201_equation_0 = const()[name = tensor("op_22201_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22201_cast_fp16 = einsum(equation = var_22201_equation_0, values = (var_21685_cast_fp16, var_22086_cast_fp16))[name = tensor("op_22201_cast_fp16")]; tensor var_22203_equation_0 = const()[name = tensor("op_22203_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22203_cast_fp16 = einsum(equation = var_22203_equation_0, values = (var_21685_cast_fp16, var_22087_cast_fp16))[name = tensor("op_22203_cast_fp16")]; tensor var_22205_equation_0 = const()[name = tensor("op_22205_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22205_cast_fp16 = einsum(equation = var_22205_equation_0, values = (var_21689_cast_fp16, var_22088_cast_fp16))[name = tensor("op_22205_cast_fp16")]; tensor var_22207_equation_0 = const()[name = tensor("op_22207_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22207_cast_fp16 = einsum(equation = var_22207_equation_0, values = (var_21689_cast_fp16, var_22089_cast_fp16))[name = tensor("op_22207_cast_fp16")]; tensor var_22209_equation_0 = const()[name = tensor("op_22209_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22209_cast_fp16 = einsum(equation = var_22209_equation_0, values = (var_21689_cast_fp16, var_22090_cast_fp16))[name = tensor("op_22209_cast_fp16")]; tensor var_22211_equation_0 = const()[name = tensor("op_22211_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22211_cast_fp16 = einsum(equation = var_22211_equation_0, values = (var_21689_cast_fp16, var_22091_cast_fp16))[name = tensor("op_22211_cast_fp16")]; tensor var_22213_equation_0 = const()[name = tensor("op_22213_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22213_cast_fp16 = einsum(equation = var_22213_equation_0, values = (var_21693_cast_fp16, var_22092_cast_fp16))[name = tensor("op_22213_cast_fp16")]; tensor var_22215_equation_0 = const()[name = tensor("op_22215_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22215_cast_fp16 = einsum(equation = var_22215_equation_0, values = (var_21693_cast_fp16, var_22093_cast_fp16))[name = tensor("op_22215_cast_fp16")]; tensor var_22217_equation_0 = const()[name = tensor("op_22217_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22217_cast_fp16 = einsum(equation = var_22217_equation_0, values = (var_21693_cast_fp16, var_22094_cast_fp16))[name = tensor("op_22217_cast_fp16")]; tensor var_22219_equation_0 = const()[name = tensor("op_22219_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22219_cast_fp16 = einsum(equation = var_22219_equation_0, values = (var_21693_cast_fp16, var_22095_cast_fp16))[name = tensor("op_22219_cast_fp16")]; tensor var_22221_equation_0 = const()[name = tensor("op_22221_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22221_cast_fp16 = einsum(equation = var_22221_equation_0, values = (var_21697_cast_fp16, var_22096_cast_fp16))[name = tensor("op_22221_cast_fp16")]; tensor var_22223_equation_0 = const()[name = tensor("op_22223_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22223_cast_fp16 = einsum(equation = var_22223_equation_0, values = (var_21697_cast_fp16, var_22097_cast_fp16))[name = tensor("op_22223_cast_fp16")]; tensor var_22225_equation_0 = const()[name = tensor("op_22225_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22225_cast_fp16 = einsum(equation = var_22225_equation_0, values = (var_21697_cast_fp16, var_22098_cast_fp16))[name = tensor("op_22225_cast_fp16")]; tensor var_22227_equation_0 = const()[name = tensor("op_22227_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22227_cast_fp16 = einsum(equation = var_22227_equation_0, values = (var_21697_cast_fp16, var_22099_cast_fp16))[name = tensor("op_22227_cast_fp16")]; tensor var_22229_equation_0 = const()[name = tensor("op_22229_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22229_cast_fp16 = einsum(equation = var_22229_equation_0, values = (var_21701_cast_fp16, var_22100_cast_fp16))[name = tensor("op_22229_cast_fp16")]; tensor var_22231_equation_0 = const()[name = tensor("op_22231_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22231_cast_fp16 = einsum(equation = var_22231_equation_0, values = (var_21701_cast_fp16, var_22101_cast_fp16))[name = tensor("op_22231_cast_fp16")]; tensor var_22233_equation_0 = const()[name = tensor("op_22233_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22233_cast_fp16 = einsum(equation = var_22233_equation_0, values = (var_21701_cast_fp16, var_22102_cast_fp16))[name = tensor("op_22233_cast_fp16")]; tensor var_22235_equation_0 = const()[name = tensor("op_22235_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22235_cast_fp16 = einsum(equation = var_22235_equation_0, values = (var_21701_cast_fp16, var_22103_cast_fp16))[name = tensor("op_22235_cast_fp16")]; tensor var_22237_equation_0 = const()[name = tensor("op_22237_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22237_cast_fp16 = einsum(equation = var_22237_equation_0, values = (var_21705_cast_fp16, var_22104_cast_fp16))[name = tensor("op_22237_cast_fp16")]; tensor var_22239_equation_0 = const()[name = tensor("op_22239_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22239_cast_fp16 = einsum(equation = var_22239_equation_0, values = (var_21705_cast_fp16, var_22105_cast_fp16))[name = tensor("op_22239_cast_fp16")]; tensor var_22241_equation_0 = const()[name = tensor("op_22241_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22241_cast_fp16 = einsum(equation = var_22241_equation_0, values = (var_21705_cast_fp16, var_22106_cast_fp16))[name = tensor("op_22241_cast_fp16")]; tensor var_22243_equation_0 = const()[name = tensor("op_22243_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22243_cast_fp16 = einsum(equation = var_22243_equation_0, values = (var_21705_cast_fp16, var_22107_cast_fp16))[name = tensor("op_22243_cast_fp16")]; tensor var_22245_equation_0 = const()[name = tensor("op_22245_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22245_cast_fp16 = einsum(equation = var_22245_equation_0, values = (var_21709_cast_fp16, var_22108_cast_fp16))[name = tensor("op_22245_cast_fp16")]; tensor var_22247_equation_0 = const()[name = tensor("op_22247_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22247_cast_fp16 = einsum(equation = var_22247_equation_0, values = (var_21709_cast_fp16, var_22109_cast_fp16))[name = tensor("op_22247_cast_fp16")]; tensor var_22249_equation_0 = const()[name = tensor("op_22249_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22249_cast_fp16 = einsum(equation = var_22249_equation_0, values = (var_21709_cast_fp16, var_22110_cast_fp16))[name = tensor("op_22249_cast_fp16")]; tensor var_22251_equation_0 = const()[name = tensor("op_22251_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22251_cast_fp16 = einsum(equation = var_22251_equation_0, values = (var_21709_cast_fp16, var_22111_cast_fp16))[name = tensor("op_22251_cast_fp16")]; tensor var_22253_equation_0 = const()[name = tensor("op_22253_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22253_cast_fp16 = einsum(equation = var_22253_equation_0, values = (var_21713_cast_fp16, var_22112_cast_fp16))[name = tensor("op_22253_cast_fp16")]; tensor var_22255_equation_0 = const()[name = tensor("op_22255_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22255_cast_fp16 = einsum(equation = var_22255_equation_0, values = (var_21713_cast_fp16, var_22113_cast_fp16))[name = tensor("op_22255_cast_fp16")]; tensor var_22257_equation_0 = const()[name = tensor("op_22257_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22257_cast_fp16 = einsum(equation = var_22257_equation_0, values = (var_21713_cast_fp16, var_22114_cast_fp16))[name = tensor("op_22257_cast_fp16")]; tensor var_22259_equation_0 = const()[name = tensor("op_22259_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22259_cast_fp16 = einsum(equation = var_22259_equation_0, values = (var_21713_cast_fp16, var_22115_cast_fp16))[name = tensor("op_22259_cast_fp16")]; tensor var_22261_equation_0 = const()[name = tensor("op_22261_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22261_cast_fp16 = einsum(equation = var_22261_equation_0, values = (var_21717_cast_fp16, var_22116_cast_fp16))[name = tensor("op_22261_cast_fp16")]; tensor var_22263_equation_0 = const()[name = tensor("op_22263_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22263_cast_fp16 = einsum(equation = var_22263_equation_0, values = (var_21717_cast_fp16, var_22117_cast_fp16))[name = tensor("op_22263_cast_fp16")]; tensor var_22265_equation_0 = const()[name = tensor("op_22265_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22265_cast_fp16 = einsum(equation = var_22265_equation_0, values = (var_21717_cast_fp16, var_22118_cast_fp16))[name = tensor("op_22265_cast_fp16")]; tensor var_22267_equation_0 = const()[name = tensor("op_22267_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22267_cast_fp16 = einsum(equation = var_22267_equation_0, values = (var_21717_cast_fp16, var_22119_cast_fp16))[name = tensor("op_22267_cast_fp16")]; tensor var_22269_equation_0 = const()[name = tensor("op_22269_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22269_cast_fp16 = einsum(equation = var_22269_equation_0, values = (var_21721_cast_fp16, var_22120_cast_fp16))[name = tensor("op_22269_cast_fp16")]; tensor var_22271_equation_0 = const()[name = tensor("op_22271_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22271_cast_fp16 = einsum(equation = var_22271_equation_0, values = (var_21721_cast_fp16, var_22121_cast_fp16))[name = tensor("op_22271_cast_fp16")]; tensor var_22273_equation_0 = const()[name = tensor("op_22273_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22273_cast_fp16 = einsum(equation = var_22273_equation_0, values = (var_21721_cast_fp16, var_22122_cast_fp16))[name = tensor("op_22273_cast_fp16")]; tensor var_22275_equation_0 = const()[name = tensor("op_22275_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22275_cast_fp16 = einsum(equation = var_22275_equation_0, values = (var_21721_cast_fp16, var_22123_cast_fp16))[name = tensor("op_22275_cast_fp16")]; tensor var_22277_equation_0 = const()[name = tensor("op_22277_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22277_cast_fp16 = einsum(equation = var_22277_equation_0, values = (var_21725_cast_fp16, var_22124_cast_fp16))[name = tensor("op_22277_cast_fp16")]; tensor var_22279_equation_0 = const()[name = tensor("op_22279_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22279_cast_fp16 = einsum(equation = var_22279_equation_0, values = (var_21725_cast_fp16, var_22125_cast_fp16))[name = tensor("op_22279_cast_fp16")]; tensor var_22281_equation_0 = const()[name = tensor("op_22281_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22281_cast_fp16 = einsum(equation = var_22281_equation_0, values = (var_21725_cast_fp16, var_22126_cast_fp16))[name = tensor("op_22281_cast_fp16")]; tensor var_22283_equation_0 = const()[name = tensor("op_22283_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22283_cast_fp16 = einsum(equation = var_22283_equation_0, values = (var_21725_cast_fp16, var_22127_cast_fp16))[name = tensor("op_22283_cast_fp16")]; tensor var_22285_equation_0 = const()[name = tensor("op_22285_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22285_cast_fp16 = einsum(equation = var_22285_equation_0, values = (var_21729_cast_fp16, var_22128_cast_fp16))[name = tensor("op_22285_cast_fp16")]; tensor var_22287_equation_0 = const()[name = tensor("op_22287_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22287_cast_fp16 = einsum(equation = var_22287_equation_0, values = (var_21729_cast_fp16, var_22129_cast_fp16))[name = tensor("op_22287_cast_fp16")]; tensor var_22289_equation_0 = const()[name = tensor("op_22289_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22289_cast_fp16 = einsum(equation = var_22289_equation_0, values = (var_21729_cast_fp16, var_22130_cast_fp16))[name = tensor("op_22289_cast_fp16")]; tensor var_22291_equation_0 = const()[name = tensor("op_22291_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_22291_cast_fp16 = einsum(equation = var_22291_equation_0, values = (var_21729_cast_fp16, var_22131_cast_fp16))[name = tensor("op_22291_cast_fp16")]; tensor var_22293_interleave_0 = const()[name = tensor("op_22293_interleave_0"), val = tensor(false)]; tensor var_22293_cast_fp16 = concat(axis = var_20825, interleave = var_22293_interleave_0, values = (var_22133_cast_fp16, var_22135_cast_fp16, var_22137_cast_fp16, var_22139_cast_fp16))[name = tensor("op_22293_cast_fp16")]; tensor var_22295_interleave_0 = const()[name = tensor("op_22295_interleave_0"), val = tensor(false)]; tensor var_22295_cast_fp16 = concat(axis = var_20825, interleave = var_22295_interleave_0, values = (var_22141_cast_fp16, var_22143_cast_fp16, var_22145_cast_fp16, var_22147_cast_fp16))[name = tensor("op_22295_cast_fp16")]; tensor var_22297_interleave_0 = const()[name = tensor("op_22297_interleave_0"), val = tensor(false)]; tensor var_22297_cast_fp16 = concat(axis = var_20825, interleave = var_22297_interleave_0, values = (var_22149_cast_fp16, var_22151_cast_fp16, var_22153_cast_fp16, var_22155_cast_fp16))[name = tensor("op_22297_cast_fp16")]; tensor var_22299_interleave_0 = const()[name = tensor("op_22299_interleave_0"), val = tensor(false)]; tensor var_22299_cast_fp16 = concat(axis = var_20825, interleave = var_22299_interleave_0, values = (var_22157_cast_fp16, var_22159_cast_fp16, var_22161_cast_fp16, var_22163_cast_fp16))[name = tensor("op_22299_cast_fp16")]; tensor var_22301_interleave_0 = const()[name = tensor("op_22301_interleave_0"), val = tensor(false)]; tensor var_22301_cast_fp16 = concat(axis = var_20825, interleave = var_22301_interleave_0, values = (var_22165_cast_fp16, var_22167_cast_fp16, var_22169_cast_fp16, var_22171_cast_fp16))[name = tensor("op_22301_cast_fp16")]; tensor var_22303_interleave_0 = const()[name = tensor("op_22303_interleave_0"), val = tensor(false)]; tensor var_22303_cast_fp16 = concat(axis = var_20825, interleave = var_22303_interleave_0, values = (var_22173_cast_fp16, var_22175_cast_fp16, var_22177_cast_fp16, var_22179_cast_fp16))[name = tensor("op_22303_cast_fp16")]; tensor var_22305_interleave_0 = const()[name = tensor("op_22305_interleave_0"), val = tensor(false)]; tensor var_22305_cast_fp16 = concat(axis = var_20825, interleave = var_22305_interleave_0, values = (var_22181_cast_fp16, var_22183_cast_fp16, var_22185_cast_fp16, var_22187_cast_fp16))[name = tensor("op_22305_cast_fp16")]; tensor var_22307_interleave_0 = const()[name = tensor("op_22307_interleave_0"), val = tensor(false)]; tensor var_22307_cast_fp16 = concat(axis = var_20825, interleave = var_22307_interleave_0, values = (var_22189_cast_fp16, var_22191_cast_fp16, var_22193_cast_fp16, var_22195_cast_fp16))[name = tensor("op_22307_cast_fp16")]; tensor var_22309_interleave_0 = const()[name = tensor("op_22309_interleave_0"), val = tensor(false)]; tensor var_22309_cast_fp16 = concat(axis = var_20825, interleave = var_22309_interleave_0, values = (var_22197_cast_fp16, var_22199_cast_fp16, var_22201_cast_fp16, var_22203_cast_fp16))[name = tensor("op_22309_cast_fp16")]; tensor var_22311_interleave_0 = const()[name = tensor("op_22311_interleave_0"), val = tensor(false)]; tensor var_22311_cast_fp16 = concat(axis = var_20825, interleave = var_22311_interleave_0, values = (var_22205_cast_fp16, var_22207_cast_fp16, var_22209_cast_fp16, var_22211_cast_fp16))[name = tensor("op_22311_cast_fp16")]; tensor var_22313_interleave_0 = const()[name = tensor("op_22313_interleave_0"), val = tensor(false)]; tensor var_22313_cast_fp16 = concat(axis = var_20825, interleave = var_22313_interleave_0, values = (var_22213_cast_fp16, var_22215_cast_fp16, var_22217_cast_fp16, var_22219_cast_fp16))[name = tensor("op_22313_cast_fp16")]; tensor var_22315_interleave_0 = const()[name = tensor("op_22315_interleave_0"), val = tensor(false)]; tensor var_22315_cast_fp16 = concat(axis = var_20825, interleave = var_22315_interleave_0, values = (var_22221_cast_fp16, var_22223_cast_fp16, var_22225_cast_fp16, var_22227_cast_fp16))[name = tensor("op_22315_cast_fp16")]; tensor var_22317_interleave_0 = const()[name = tensor("op_22317_interleave_0"), val = tensor(false)]; tensor var_22317_cast_fp16 = concat(axis = var_20825, interleave = var_22317_interleave_0, values = (var_22229_cast_fp16, var_22231_cast_fp16, var_22233_cast_fp16, var_22235_cast_fp16))[name = tensor("op_22317_cast_fp16")]; tensor var_22319_interleave_0 = const()[name = tensor("op_22319_interleave_0"), val = tensor(false)]; tensor var_22319_cast_fp16 = concat(axis = var_20825, interleave = var_22319_interleave_0, values = (var_22237_cast_fp16, var_22239_cast_fp16, var_22241_cast_fp16, var_22243_cast_fp16))[name = tensor("op_22319_cast_fp16")]; tensor var_22321_interleave_0 = const()[name = tensor("op_22321_interleave_0"), val = tensor(false)]; tensor var_22321_cast_fp16 = concat(axis = var_20825, interleave = var_22321_interleave_0, values = (var_22245_cast_fp16, var_22247_cast_fp16, var_22249_cast_fp16, var_22251_cast_fp16))[name = tensor("op_22321_cast_fp16")]; tensor var_22323_interleave_0 = const()[name = tensor("op_22323_interleave_0"), val = tensor(false)]; tensor var_22323_cast_fp16 = concat(axis = var_20825, interleave = var_22323_interleave_0, values = (var_22253_cast_fp16, var_22255_cast_fp16, var_22257_cast_fp16, var_22259_cast_fp16))[name = tensor("op_22323_cast_fp16")]; tensor var_22325_interleave_0 = const()[name = tensor("op_22325_interleave_0"), val = tensor(false)]; tensor var_22325_cast_fp16 = concat(axis = var_20825, interleave = var_22325_interleave_0, values = (var_22261_cast_fp16, var_22263_cast_fp16, var_22265_cast_fp16, var_22267_cast_fp16))[name = tensor("op_22325_cast_fp16")]; tensor var_22327_interleave_0 = const()[name = tensor("op_22327_interleave_0"), val = tensor(false)]; tensor var_22327_cast_fp16 = concat(axis = var_20825, interleave = var_22327_interleave_0, values = (var_22269_cast_fp16, var_22271_cast_fp16, var_22273_cast_fp16, var_22275_cast_fp16))[name = tensor("op_22327_cast_fp16")]; tensor var_22329_interleave_0 = const()[name = tensor("op_22329_interleave_0"), val = tensor(false)]; tensor var_22329_cast_fp16 = concat(axis = var_20825, interleave = var_22329_interleave_0, values = (var_22277_cast_fp16, var_22279_cast_fp16, var_22281_cast_fp16, var_22283_cast_fp16))[name = tensor("op_22329_cast_fp16")]; tensor var_22331_interleave_0 = const()[name = tensor("op_22331_interleave_0"), val = tensor(false)]; tensor var_22331_cast_fp16 = concat(axis = var_20825, interleave = var_22331_interleave_0, values = (var_22285_cast_fp16, var_22287_cast_fp16, var_22289_cast_fp16, var_22291_cast_fp16))[name = tensor("op_22331_cast_fp16")]; tensor input_105_interleave_0 = const()[name = tensor("input_105_interleave_0"), val = tensor(false)]; tensor input_105_cast_fp16 = concat(axis = var_20850, interleave = input_105_interleave_0, values = (var_22293_cast_fp16, var_22295_cast_fp16, var_22297_cast_fp16, var_22299_cast_fp16, var_22301_cast_fp16, var_22303_cast_fp16, var_22305_cast_fp16, var_22307_cast_fp16, var_22309_cast_fp16, var_22311_cast_fp16, var_22313_cast_fp16, var_22315_cast_fp16, var_22317_cast_fp16, var_22319_cast_fp16, var_22321_cast_fp16, var_22323_cast_fp16, var_22325_cast_fp16, var_22327_cast_fp16, var_22329_cast_fp16, var_22331_cast_fp16))[name = tensor("input_105_cast_fp16")]; tensor var_22342_pad_type_0 = const()[name = tensor("op_22342_pad_type_0"), val = tensor("valid")]; tensor var_22342_strides_0 = const()[name = tensor("op_22342_strides_0"), val = tensor([1, 1])]; tensor var_22342_pad_0 = const()[name = tensor("op_22342_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_22342_dilations_0 = const()[name = tensor("op_22342_dilations_0"), val = tensor([1, 1])]; tensor var_22342_groups_0 = const()[name = tensor("op_22342_groups_0"), val = tensor(1)]; tensor layers_13_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(186516800))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187336064))), name = tensor("layers_13_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_13_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_13_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187336192)))]; tensor var_22342_cast_fp16 = conv(bias = layers_13_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_22342_dilations_0, groups = var_22342_groups_0, pad = var_22342_pad_0, pad_type = var_22342_pad_type_0, strides = var_22342_strides_0, weight = layers_13_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_105_cast_fp16)[name = tensor("op_22342_cast_fp16")]; tensor var_22348_pad_type_0 = const()[name = tensor("op_22348_pad_type_0"), val = tensor("valid")]; tensor var_22348_strides_0 = const()[name = tensor("op_22348_strides_0"), val = tensor([1, 1])]; tensor var_22348_pad_0 = const()[name = tensor("op_22348_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_22348_dilations_0 = const()[name = tensor("op_22348_dilations_0"), val = tensor([1, 1])]; tensor var_22348_groups_0 = const()[name = tensor("op_22348_groups_0"), val = tensor(1)]; tensor layers_13_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187358784))), name = tensor("layers_13_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187338816))), shape = tensor([1280, 1280, 1, 1])]; tensor var_22348_cast_fp16 = conv(dilations = var_22348_dilations_0, groups = var_22348_groups_0, pad = var_22348_pad_0, pad_type = var_22348_pad_type_0, strides = var_22348_strides_0, weight = layers_13_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_105_cast_fp16)[name = tensor("op_22348_cast_fp16")]; tensor obj_55_cast_fp16 = add(x = var_22342_cast_fp16, y = var_22348_cast_fp16)[name = tensor("obj_55_cast_fp16")]; tensor inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = obj_55_cast_fp16)[name = tensor("inputs_55_cast_fp16")]; tensor out_55_axes_0 = const()[name = tensor("out_55_axes_0"), val = tensor([1])]; tensor var_22359_to_fp16 = const()[name = tensor("op_22359_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_55_cast_fp16 = layer_norm(axes = out_55_axes_0, epsilon = var_22359_to_fp16, x = inputs_55_cast_fp16)[name = tensor("out_55_cast_fp16")]; tensor input_107_gamma_0_to_fp16 = const()[name = tensor("input_107_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187563648)))]; tensor input_107_beta_0_to_fp16 = const()[name = tensor("input_107_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187566272)))]; tensor input_107_epsilon_0_to_fp16 = const()[name = tensor("input_107_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_107_cast_fp16 = batch_norm(beta = input_107_beta_0_to_fp16, epsilon = input_107_epsilon_0_to_fp16, gamma = input_107_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_55_cast_fp16)[name = tensor("input_107_cast_fp16")]; tensor var_22377_pad_type_0 = const()[name = tensor("op_22377_pad_type_0"), val = tensor("valid")]; tensor var_22377_strides_0 = const()[name = tensor("op_22377_strides_0"), val = tensor([1, 1])]; tensor var_22377_pad_0 = const()[name = tensor("op_22377_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_22377_dilations_0 = const()[name = tensor("op_22377_dilations_0"), val = tensor([1, 1])]; tensor var_22377_groups_0 = const()[name = tensor("op_22377_groups_0"), val = tensor(1)]; tensor layers_13_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(187568896))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190845760))), name = tensor("layers_13_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_13_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_13_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190845888)))]; tensor var_22377_cast_fp16 = conv(bias = layers_13_fc1_inlier_module_bias_to_fp16, dilations = var_22377_dilations_0, groups = var_22377_groups_0, pad = var_22377_pad_0, pad_type = var_22377_pad_type_0, strides = var_22377_strides_0, weight = layers_13_fc1_inlier_module_weight_to_fp16_palettized, x = input_107_cast_fp16)[name = tensor("op_22377_cast_fp16")]; tensor var_22383_pad_type_0 = const()[name = tensor("op_22383_pad_type_0"), val = tensor("valid")]; tensor var_22383_strides_0 = const()[name = tensor("op_22383_strides_0"), val = tensor([1, 1])]; tensor var_22383_pad_0 = const()[name = tensor("op_22383_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_22383_dilations_0 = const()[name = tensor("op_22383_dilations_0"), val = tensor([1, 1])]; tensor var_22383_groups_0 = const()[name = tensor("op_22383_groups_0"), val = tensor(1)]; tensor layers_13_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190903232))), name = tensor("layers_13_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(190856192))), shape = tensor([5120, 1280, 1, 1])]; tensor var_22383_cast_fp16 = conv(dilations = var_22383_dilations_0, groups = var_22383_groups_0, pad = var_22383_pad_0, pad_type = var_22383_pad_type_0, strides = var_22383_strides_0, weight = layers_13_fc1_outlier_module_weight_to_fp16_sparsified, x = input_107_cast_fp16)[name = tensor("op_22383_cast_fp16")]; tensor input_109_cast_fp16 = add(x = var_22377_cast_fp16, y = var_22383_cast_fp16)[name = tensor("input_109_cast_fp16")]; tensor input_111_mode_0 = const()[name = tensor("input_111_mode_0"), val = tensor("EXACT")]; tensor input_111_cast_fp16 = gelu(mode = input_111_mode_0, x = input_109_cast_fp16)[name = tensor("input_111_cast_fp16")]; tensor var_22394_pad_type_0 = const()[name = tensor("op_22394_pad_type_0"), val = tensor("valid")]; tensor var_22394_strides_0 = const()[name = tensor("op_22394_strides_0"), val = tensor([1, 1])]; tensor var_22394_pad_0 = const()[name = tensor("op_22394_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_22394_dilations_0 = const()[name = tensor("op_22394_dilations_0"), val = tensor([1, 1])]; tensor var_22394_groups_0 = const()[name = tensor("op_22394_groups_0"), val = tensor(1)]; tensor layers_13_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(191722496))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(194999360))), name = tensor("layers_13_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_13_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_13_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(194999488)))]; tensor var_22394_cast_fp16 = conv(bias = layers_13_fc2_inlier_module_bias_to_fp16, dilations = var_22394_dilations_0, groups = var_22394_groups_0, pad = var_22394_pad_0, pad_type = var_22394_pad_type_0, strides = var_22394_strides_0, weight = layers_13_fc2_inlier_module_weight_to_fp16_palettized, x = input_111_cast_fp16)[name = tensor("op_22394_cast_fp16")]; tensor var_22400_pad_type_0 = const()[name = tensor("op_22400_pad_type_0"), val = tensor("valid")]; tensor var_22400_strides_0 = const()[name = tensor("op_22400_strides_0"), val = tensor([1, 1])]; tensor var_22400_pad_0 = const()[name = tensor("op_22400_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_22400_dilations_0 = const()[name = tensor("op_22400_dilations_0"), val = tensor([1, 1])]; tensor var_22400_groups_0 = const()[name = tensor("op_22400_groups_0"), val = tensor(1)]; tensor layers_13_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(195176960))), name = tensor("layers_13_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(195002112))), shape = tensor([1280, 5120, 1, 1])]; tensor var_22400_cast_fp16 = conv(dilations = var_22400_dilations_0, groups = var_22400_groups_0, pad = var_22400_pad_0, pad_type = var_22400_pad_type_0, strides = var_22400_strides_0, weight = layers_13_fc2_outlier_module_weight_to_fp16_sparsified, x = input_111_cast_fp16)[name = tensor("op_22400_cast_fp16")]; tensor hidden_states_31_cast_fp16 = add(x = var_22394_cast_fp16, y = var_22400_cast_fp16)[name = tensor("hidden_states_31_cast_fp16")]; tensor inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = hidden_states_31_cast_fp16)[name = tensor("inputs_57_cast_fp16")]; tensor var_22406 = const()[name = tensor("op_22406"), val = tensor(3)]; tensor var_22431 = const()[name = tensor("op_22431"), val = tensor(1)]; tensor out_57_axes_0 = const()[name = tensor("out_57_axes_0"), val = tensor([1])]; tensor var_22448_to_fp16 = const()[name = tensor("op_22448_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_57_cast_fp16 = layer_norm(axes = out_57_axes_0, epsilon = var_22448_to_fp16, x = inputs_57_cast_fp16)[name = tensor("out_57_cast_fp16")]; tensor obj_57_gamma_0_to_fp16 = const()[name = tensor("obj_57_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(195996224)))]; tensor obj_57_beta_0_to_fp16 = const()[name = tensor("obj_57_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(195998848)))]; tensor obj_57_epsilon_0_to_fp16 = const()[name = tensor("obj_57_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_57_cast_fp16 = batch_norm(beta = obj_57_beta_0_to_fp16, epsilon = obj_57_epsilon_0_to_fp16, gamma = obj_57_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_57_cast_fp16)[name = tensor("obj_57_cast_fp16")]; tensor var_22470_pad_type_0 = const()[name = tensor("op_22470_pad_type_0"), val = tensor("valid")]; tensor var_22470_strides_0 = const()[name = tensor("op_22470_strides_0"), val = tensor([1, 1])]; tensor var_22470_pad_0 = const()[name = tensor("op_22470_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_22470_dilations_0 = const()[name = tensor("op_22470_dilations_0"), val = tensor([1, 1])]; tensor var_22470_groups_0 = const()[name = tensor("op_22470_groups_0"), val = tensor(1)]; tensor layers_14_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(196001472))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(196820736))), name = tensor("layers_14_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_14_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_14_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(196820864)))]; tensor var_22470_cast_fp16 = conv(bias = layers_14_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_22470_dilations_0, groups = var_22470_groups_0, pad = var_22470_pad_0, pad_type = var_22470_pad_type_0, strides = var_22470_strides_0, weight = layers_14_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_57_cast_fp16)[name = tensor("op_22470_cast_fp16")]; tensor var_22476_pad_type_0 = const()[name = tensor("op_22476_pad_type_0"), val = tensor("valid")]; tensor var_22476_strides_0 = const()[name = tensor("op_22476_strides_0"), val = tensor([1, 1])]; tensor var_22476_pad_0 = const()[name = tensor("op_22476_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_22476_dilations_0 = const()[name = tensor("op_22476_dilations_0"), val = tensor([1, 1])]; tensor var_22476_groups_0 = const()[name = tensor("op_22476_groups_0"), val = tensor(1)]; tensor layers_14_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(196868352))), name = tensor("layers_14_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(196823488))), shape = tensor([1280, 1280, 1, 1])]; tensor var_22476_cast_fp16 = conv(dilations = var_22476_dilations_0, groups = var_22476_groups_0, pad = var_22476_pad_0, pad_type = var_22476_pad_type_0, strides = var_22476_strides_0, weight = layers_14_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_57_cast_fp16)[name = tensor("op_22476_cast_fp16")]; tensor query_29_cast_fp16 = add(x = var_22470_cast_fp16, y = var_22476_cast_fp16)[name = tensor("query_29_cast_fp16")]; tensor var_22485_pad_type_0 = const()[name = tensor("op_22485_pad_type_0"), val = tensor("valid")]; tensor var_22485_strides_0 = const()[name = tensor("op_22485_strides_0"), val = tensor([1, 1])]; tensor var_22485_pad_0 = const()[name = tensor("op_22485_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_22485_dilations_0 = const()[name = tensor("op_22485_dilations_0"), val = tensor([1, 1])]; tensor var_22485_groups_0 = const()[name = tensor("op_22485_groups_0"), val = tensor(1)]; tensor layers_14_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(197073216))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(197892480))), name = tensor("layers_14_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_22485_cast_fp16 = conv(dilations = var_22485_dilations_0, groups = var_22485_groups_0, pad = var_22485_pad_0, pad_type = var_22485_pad_type_0, strides = var_22485_strides_0, weight = layers_14_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_57_cast_fp16)[name = tensor("op_22485_cast_fp16")]; tensor var_22491_pad_type_0 = const()[name = tensor("op_22491_pad_type_0"), val = tensor("valid")]; tensor var_22491_strides_0 = const()[name = tensor("op_22491_strides_0"), val = tensor([1, 1])]; tensor var_22491_pad_0 = const()[name = tensor("op_22491_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_22491_dilations_0 = const()[name = tensor("op_22491_dilations_0"), val = tensor([1, 1])]; tensor var_22491_groups_0 = const()[name = tensor("op_22491_groups_0"), val = tensor(1)]; tensor layers_14_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(197921344))), name = tensor("layers_14_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(197892608))), shape = tensor([1280, 1280, 1, 1])]; tensor var_22491_cast_fp16 = conv(dilations = var_22491_dilations_0, groups = var_22491_groups_0, pad = var_22491_pad_0, pad_type = var_22491_pad_type_0, strides = var_22491_strides_0, weight = layers_14_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_57_cast_fp16)[name = tensor("op_22491_cast_fp16")]; tensor key_29_cast_fp16 = add(x = var_22485_cast_fp16, y = var_22491_cast_fp16)[name = tensor("key_29_cast_fp16")]; tensor var_22501_pad_type_0 = const()[name = tensor("op_22501_pad_type_0"), val = tensor("valid")]; tensor var_22501_strides_0 = const()[name = tensor("op_22501_strides_0"), val = tensor([1, 1])]; tensor var_22501_pad_0 = const()[name = tensor("op_22501_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_22501_dilations_0 = const()[name = tensor("op_22501_dilations_0"), val = tensor([1, 1])]; tensor var_22501_groups_0 = const()[name = tensor("op_22501_groups_0"), val = tensor(1)]; tensor layers_14_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(198126208))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(198945472))), name = tensor("layers_14_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_14_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_14_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(198945600)))]; tensor var_22501_cast_fp16 = conv(bias = layers_14_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_22501_dilations_0, groups = var_22501_groups_0, pad = var_22501_pad_0, pad_type = var_22501_pad_type_0, strides = var_22501_strides_0, weight = layers_14_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_57_cast_fp16)[name = tensor("op_22501_cast_fp16")]; tensor var_22507_pad_type_0 = const()[name = tensor("op_22507_pad_type_0"), val = tensor("valid")]; tensor var_22507_strides_0 = const()[name = tensor("op_22507_strides_0"), val = tensor([1, 1])]; tensor var_22507_pad_0 = const()[name = tensor("op_22507_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_22507_dilations_0 = const()[name = tensor("op_22507_dilations_0"), val = tensor([1, 1])]; tensor var_22507_groups_0 = const()[name = tensor("op_22507_groups_0"), val = tensor(1)]; tensor layers_14_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(198963968))), name = tensor("layers_14_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(198948224))), shape = tensor([1280, 1280, 1, 1])]; tensor var_22507_cast_fp16 = conv(dilations = var_22507_dilations_0, groups = var_22507_groups_0, pad = var_22507_pad_0, pad_type = var_22507_pad_type_0, strides = var_22507_strides_0, weight = layers_14_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_57_cast_fp16)[name = tensor("op_22507_cast_fp16")]; tensor value_29_cast_fp16 = add(x = var_22501_cast_fp16, y = var_22507_cast_fp16)[name = tensor("value_29_cast_fp16")]; tensor var_22513_begin_0 = const()[name = tensor("op_22513_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_22513_end_0 = const()[name = tensor("op_22513_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_22513_end_mask_0 = const()[name = tensor("op_22513_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22513_cast_fp16 = slice_by_index(begin = var_22513_begin_0, end = var_22513_end_0, end_mask = var_22513_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22513_cast_fp16")]; tensor var_22517_begin_0 = const()[name = tensor("op_22517_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_22517_end_0 = const()[name = tensor("op_22517_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_22517_end_mask_0 = const()[name = tensor("op_22517_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22517_cast_fp16 = slice_by_index(begin = var_22517_begin_0, end = var_22517_end_0, end_mask = var_22517_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22517_cast_fp16")]; tensor var_22521_begin_0 = const()[name = tensor("op_22521_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_22521_end_0 = const()[name = tensor("op_22521_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_22521_end_mask_0 = const()[name = tensor("op_22521_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22521_cast_fp16 = slice_by_index(begin = var_22521_begin_0, end = var_22521_end_0, end_mask = var_22521_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22521_cast_fp16")]; tensor var_22525_begin_0 = const()[name = tensor("op_22525_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_22525_end_0 = const()[name = tensor("op_22525_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_22525_end_mask_0 = const()[name = tensor("op_22525_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22525_cast_fp16 = slice_by_index(begin = var_22525_begin_0, end = var_22525_end_0, end_mask = var_22525_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22525_cast_fp16")]; tensor var_22529_begin_0 = const()[name = tensor("op_22529_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_22529_end_0 = const()[name = tensor("op_22529_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_22529_end_mask_0 = const()[name = tensor("op_22529_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22529_cast_fp16 = slice_by_index(begin = var_22529_begin_0, end = var_22529_end_0, end_mask = var_22529_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22529_cast_fp16")]; tensor var_22533_begin_0 = const()[name = tensor("op_22533_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_22533_end_0 = const()[name = tensor("op_22533_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_22533_end_mask_0 = const()[name = tensor("op_22533_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22533_cast_fp16 = slice_by_index(begin = var_22533_begin_0, end = var_22533_end_0, end_mask = var_22533_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22533_cast_fp16")]; tensor var_22537_begin_0 = const()[name = tensor("op_22537_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_22537_end_0 = const()[name = tensor("op_22537_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_22537_end_mask_0 = const()[name = tensor("op_22537_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22537_cast_fp16 = slice_by_index(begin = var_22537_begin_0, end = var_22537_end_0, end_mask = var_22537_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22537_cast_fp16")]; tensor var_22541_begin_0 = const()[name = tensor("op_22541_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_22541_end_0 = const()[name = tensor("op_22541_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_22541_end_mask_0 = const()[name = tensor("op_22541_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22541_cast_fp16 = slice_by_index(begin = var_22541_begin_0, end = var_22541_end_0, end_mask = var_22541_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22541_cast_fp16")]; tensor var_22545_begin_0 = const()[name = tensor("op_22545_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_22545_end_0 = const()[name = tensor("op_22545_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_22545_end_mask_0 = const()[name = tensor("op_22545_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22545_cast_fp16 = slice_by_index(begin = var_22545_begin_0, end = var_22545_end_0, end_mask = var_22545_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22545_cast_fp16")]; tensor var_22549_begin_0 = const()[name = tensor("op_22549_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_22549_end_0 = const()[name = tensor("op_22549_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_22549_end_mask_0 = const()[name = tensor("op_22549_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22549_cast_fp16 = slice_by_index(begin = var_22549_begin_0, end = var_22549_end_0, end_mask = var_22549_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22549_cast_fp16")]; tensor var_22553_begin_0 = const()[name = tensor("op_22553_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_22553_end_0 = const()[name = tensor("op_22553_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_22553_end_mask_0 = const()[name = tensor("op_22553_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22553_cast_fp16 = slice_by_index(begin = var_22553_begin_0, end = var_22553_end_0, end_mask = var_22553_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22553_cast_fp16")]; tensor var_22557_begin_0 = const()[name = tensor("op_22557_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_22557_end_0 = const()[name = tensor("op_22557_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_22557_end_mask_0 = const()[name = tensor("op_22557_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22557_cast_fp16 = slice_by_index(begin = var_22557_begin_0, end = var_22557_end_0, end_mask = var_22557_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22557_cast_fp16")]; tensor var_22561_begin_0 = const()[name = tensor("op_22561_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_22561_end_0 = const()[name = tensor("op_22561_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_22561_end_mask_0 = const()[name = tensor("op_22561_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22561_cast_fp16 = slice_by_index(begin = var_22561_begin_0, end = var_22561_end_0, end_mask = var_22561_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22561_cast_fp16")]; tensor var_22565_begin_0 = const()[name = tensor("op_22565_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_22565_end_0 = const()[name = tensor("op_22565_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_22565_end_mask_0 = const()[name = tensor("op_22565_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22565_cast_fp16 = slice_by_index(begin = var_22565_begin_0, end = var_22565_end_0, end_mask = var_22565_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22565_cast_fp16")]; tensor var_22569_begin_0 = const()[name = tensor("op_22569_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_22569_end_0 = const()[name = tensor("op_22569_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_22569_end_mask_0 = const()[name = tensor("op_22569_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22569_cast_fp16 = slice_by_index(begin = var_22569_begin_0, end = var_22569_end_0, end_mask = var_22569_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22569_cast_fp16")]; tensor var_22573_begin_0 = const()[name = tensor("op_22573_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_22573_end_0 = const()[name = tensor("op_22573_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_22573_end_mask_0 = const()[name = tensor("op_22573_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22573_cast_fp16 = slice_by_index(begin = var_22573_begin_0, end = var_22573_end_0, end_mask = var_22573_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22573_cast_fp16")]; tensor var_22577_begin_0 = const()[name = tensor("op_22577_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_22577_end_0 = const()[name = tensor("op_22577_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_22577_end_mask_0 = const()[name = tensor("op_22577_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22577_cast_fp16 = slice_by_index(begin = var_22577_begin_0, end = var_22577_end_0, end_mask = var_22577_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22577_cast_fp16")]; tensor var_22581_begin_0 = const()[name = tensor("op_22581_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_22581_end_0 = const()[name = tensor("op_22581_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_22581_end_mask_0 = const()[name = tensor("op_22581_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22581_cast_fp16 = slice_by_index(begin = var_22581_begin_0, end = var_22581_end_0, end_mask = var_22581_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22581_cast_fp16")]; tensor var_22585_begin_0 = const()[name = tensor("op_22585_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_22585_end_0 = const()[name = tensor("op_22585_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_22585_end_mask_0 = const()[name = tensor("op_22585_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22585_cast_fp16 = slice_by_index(begin = var_22585_begin_0, end = var_22585_end_0, end_mask = var_22585_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22585_cast_fp16")]; tensor var_22589_begin_0 = const()[name = tensor("op_22589_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_22589_end_0 = const()[name = tensor("op_22589_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_22589_end_mask_0 = const()[name = tensor("op_22589_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_22589_cast_fp16 = slice_by_index(begin = var_22589_begin_0, end = var_22589_end_0, end_mask = var_22589_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_22589_cast_fp16")]; tensor var_22598_begin_0 = const()[name = tensor("op_22598_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_22598_end_0 = const()[name = tensor("op_22598_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_22598_end_mask_0 = const()[name = tensor("op_22598_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22598_cast_fp16 = slice_by_index(begin = var_22598_begin_0, end = var_22598_end_0, end_mask = var_22598_end_mask_0, x = var_22513_cast_fp16)[name = tensor("op_22598_cast_fp16")]; tensor var_22605_begin_0 = const()[name = tensor("op_22605_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_22605_end_0 = const()[name = tensor("op_22605_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_22605_end_mask_0 = const()[name = tensor("op_22605_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22605_cast_fp16 = slice_by_index(begin = var_22605_begin_0, end = var_22605_end_0, end_mask = var_22605_end_mask_0, x = var_22513_cast_fp16)[name = tensor("op_22605_cast_fp16")]; tensor var_22612_begin_0 = const()[name = tensor("op_22612_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_22612_end_0 = const()[name = tensor("op_22612_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_22612_end_mask_0 = const()[name = tensor("op_22612_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22612_cast_fp16 = slice_by_index(begin = var_22612_begin_0, end = var_22612_end_0, end_mask = var_22612_end_mask_0, x = var_22513_cast_fp16)[name = tensor("op_22612_cast_fp16")]; tensor var_22619_begin_0 = const()[name = tensor("op_22619_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_22619_end_0 = const()[name = tensor("op_22619_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_22619_end_mask_0 = const()[name = tensor("op_22619_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22619_cast_fp16 = slice_by_index(begin = var_22619_begin_0, end = var_22619_end_0, end_mask = var_22619_end_mask_0, x = var_22513_cast_fp16)[name = tensor("op_22619_cast_fp16")]; tensor var_22626_begin_0 = const()[name = tensor("op_22626_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_22626_end_0 = const()[name = tensor("op_22626_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_22626_end_mask_0 = const()[name = tensor("op_22626_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22626_cast_fp16 = slice_by_index(begin = var_22626_begin_0, end = var_22626_end_0, end_mask = var_22626_end_mask_0, x = var_22517_cast_fp16)[name = tensor("op_22626_cast_fp16")]; tensor var_22633_begin_0 = const()[name = tensor("op_22633_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_22633_end_0 = const()[name = tensor("op_22633_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_22633_end_mask_0 = const()[name = tensor("op_22633_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22633_cast_fp16 = slice_by_index(begin = var_22633_begin_0, end = var_22633_end_0, end_mask = var_22633_end_mask_0, x = var_22517_cast_fp16)[name = tensor("op_22633_cast_fp16")]; tensor var_22640_begin_0 = const()[name = tensor("op_22640_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_22640_end_0 = const()[name = tensor("op_22640_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_22640_end_mask_0 = const()[name = tensor("op_22640_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22640_cast_fp16 = slice_by_index(begin = var_22640_begin_0, end = var_22640_end_0, end_mask = var_22640_end_mask_0, x = var_22517_cast_fp16)[name = tensor("op_22640_cast_fp16")]; tensor var_22647_begin_0 = const()[name = tensor("op_22647_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_22647_end_0 = const()[name = tensor("op_22647_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_22647_end_mask_0 = const()[name = tensor("op_22647_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22647_cast_fp16 = slice_by_index(begin = var_22647_begin_0, end = var_22647_end_0, end_mask = var_22647_end_mask_0, x = var_22517_cast_fp16)[name = tensor("op_22647_cast_fp16")]; tensor var_22654_begin_0 = const()[name = tensor("op_22654_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_22654_end_0 = const()[name = tensor("op_22654_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_22654_end_mask_0 = const()[name = tensor("op_22654_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22654_cast_fp16 = slice_by_index(begin = var_22654_begin_0, end = var_22654_end_0, end_mask = var_22654_end_mask_0, x = var_22521_cast_fp16)[name = tensor("op_22654_cast_fp16")]; tensor var_22661_begin_0 = const()[name = tensor("op_22661_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_22661_end_0 = const()[name = tensor("op_22661_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_22661_end_mask_0 = const()[name = tensor("op_22661_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22661_cast_fp16 = slice_by_index(begin = var_22661_begin_0, end = var_22661_end_0, end_mask = var_22661_end_mask_0, x = var_22521_cast_fp16)[name = tensor("op_22661_cast_fp16")]; tensor var_22668_begin_0 = const()[name = tensor("op_22668_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_22668_end_0 = const()[name = tensor("op_22668_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_22668_end_mask_0 = const()[name = tensor("op_22668_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22668_cast_fp16 = slice_by_index(begin = var_22668_begin_0, end = var_22668_end_0, end_mask = var_22668_end_mask_0, x = var_22521_cast_fp16)[name = tensor("op_22668_cast_fp16")]; tensor var_22675_begin_0 = const()[name = tensor("op_22675_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_22675_end_0 = const()[name = tensor("op_22675_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_22675_end_mask_0 = const()[name = tensor("op_22675_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22675_cast_fp16 = slice_by_index(begin = var_22675_begin_0, end = var_22675_end_0, end_mask = var_22675_end_mask_0, x = var_22521_cast_fp16)[name = tensor("op_22675_cast_fp16")]; tensor var_22682_begin_0 = const()[name = tensor("op_22682_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_22682_end_0 = const()[name = tensor("op_22682_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_22682_end_mask_0 = const()[name = tensor("op_22682_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22682_cast_fp16 = slice_by_index(begin = var_22682_begin_0, end = var_22682_end_0, end_mask = var_22682_end_mask_0, x = var_22525_cast_fp16)[name = tensor("op_22682_cast_fp16")]; tensor var_22689_begin_0 = const()[name = tensor("op_22689_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_22689_end_0 = const()[name = tensor("op_22689_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_22689_end_mask_0 = const()[name = tensor("op_22689_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22689_cast_fp16 = slice_by_index(begin = var_22689_begin_0, end = var_22689_end_0, end_mask = var_22689_end_mask_0, x = var_22525_cast_fp16)[name = tensor("op_22689_cast_fp16")]; tensor var_22696_begin_0 = const()[name = tensor("op_22696_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_22696_end_0 = const()[name = tensor("op_22696_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_22696_end_mask_0 = const()[name = tensor("op_22696_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22696_cast_fp16 = slice_by_index(begin = var_22696_begin_0, end = var_22696_end_0, end_mask = var_22696_end_mask_0, x = var_22525_cast_fp16)[name = tensor("op_22696_cast_fp16")]; tensor var_22703_begin_0 = const()[name = tensor("op_22703_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_22703_end_0 = const()[name = tensor("op_22703_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_22703_end_mask_0 = const()[name = tensor("op_22703_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22703_cast_fp16 = slice_by_index(begin = var_22703_begin_0, end = var_22703_end_0, end_mask = var_22703_end_mask_0, x = var_22525_cast_fp16)[name = tensor("op_22703_cast_fp16")]; tensor var_22710_begin_0 = const()[name = tensor("op_22710_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_22710_end_0 = const()[name = tensor("op_22710_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_22710_end_mask_0 = const()[name = tensor("op_22710_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22710_cast_fp16 = slice_by_index(begin = var_22710_begin_0, end = var_22710_end_0, end_mask = var_22710_end_mask_0, x = var_22529_cast_fp16)[name = tensor("op_22710_cast_fp16")]; tensor var_22717_begin_0 = const()[name = tensor("op_22717_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_22717_end_0 = const()[name = tensor("op_22717_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_22717_end_mask_0 = const()[name = tensor("op_22717_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22717_cast_fp16 = slice_by_index(begin = var_22717_begin_0, end = var_22717_end_0, end_mask = var_22717_end_mask_0, x = var_22529_cast_fp16)[name = tensor("op_22717_cast_fp16")]; tensor var_22724_begin_0 = const()[name = tensor("op_22724_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_22724_end_0 = const()[name = tensor("op_22724_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_22724_end_mask_0 = const()[name = tensor("op_22724_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22724_cast_fp16 = slice_by_index(begin = var_22724_begin_0, end = var_22724_end_0, end_mask = var_22724_end_mask_0, x = var_22529_cast_fp16)[name = tensor("op_22724_cast_fp16")]; tensor var_22731_begin_0 = const()[name = tensor("op_22731_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_22731_end_0 = const()[name = tensor("op_22731_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_22731_end_mask_0 = const()[name = tensor("op_22731_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22731_cast_fp16 = slice_by_index(begin = var_22731_begin_0, end = var_22731_end_0, end_mask = var_22731_end_mask_0, x = var_22529_cast_fp16)[name = tensor("op_22731_cast_fp16")]; tensor var_22738_begin_0 = const()[name = tensor("op_22738_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_22738_end_0 = const()[name = tensor("op_22738_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_22738_end_mask_0 = const()[name = tensor("op_22738_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22738_cast_fp16 = slice_by_index(begin = var_22738_begin_0, end = var_22738_end_0, end_mask = var_22738_end_mask_0, x = var_22533_cast_fp16)[name = tensor("op_22738_cast_fp16")]; tensor var_22745_begin_0 = const()[name = tensor("op_22745_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_22745_end_0 = const()[name = tensor("op_22745_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_22745_end_mask_0 = const()[name = tensor("op_22745_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22745_cast_fp16 = slice_by_index(begin = var_22745_begin_0, end = var_22745_end_0, end_mask = var_22745_end_mask_0, x = var_22533_cast_fp16)[name = tensor("op_22745_cast_fp16")]; tensor var_22752_begin_0 = const()[name = tensor("op_22752_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_22752_end_0 = const()[name = tensor("op_22752_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_22752_end_mask_0 = const()[name = tensor("op_22752_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22752_cast_fp16 = slice_by_index(begin = var_22752_begin_0, end = var_22752_end_0, end_mask = var_22752_end_mask_0, x = var_22533_cast_fp16)[name = tensor("op_22752_cast_fp16")]; tensor var_22759_begin_0 = const()[name = tensor("op_22759_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_22759_end_0 = const()[name = tensor("op_22759_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_22759_end_mask_0 = const()[name = tensor("op_22759_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22759_cast_fp16 = slice_by_index(begin = var_22759_begin_0, end = var_22759_end_0, end_mask = var_22759_end_mask_0, x = var_22533_cast_fp16)[name = tensor("op_22759_cast_fp16")]; tensor var_22766_begin_0 = const()[name = tensor("op_22766_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_22766_end_0 = const()[name = tensor("op_22766_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_22766_end_mask_0 = const()[name = tensor("op_22766_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22766_cast_fp16 = slice_by_index(begin = var_22766_begin_0, end = var_22766_end_0, end_mask = var_22766_end_mask_0, x = var_22537_cast_fp16)[name = tensor("op_22766_cast_fp16")]; tensor var_22773_begin_0 = const()[name = tensor("op_22773_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_22773_end_0 = const()[name = tensor("op_22773_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_22773_end_mask_0 = const()[name = tensor("op_22773_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22773_cast_fp16 = slice_by_index(begin = var_22773_begin_0, end = var_22773_end_0, end_mask = var_22773_end_mask_0, x = var_22537_cast_fp16)[name = tensor("op_22773_cast_fp16")]; tensor var_22780_begin_0 = const()[name = tensor("op_22780_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_22780_end_0 = const()[name = tensor("op_22780_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_22780_end_mask_0 = const()[name = tensor("op_22780_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22780_cast_fp16 = slice_by_index(begin = var_22780_begin_0, end = var_22780_end_0, end_mask = var_22780_end_mask_0, x = var_22537_cast_fp16)[name = tensor("op_22780_cast_fp16")]; tensor var_22787_begin_0 = const()[name = tensor("op_22787_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_22787_end_0 = const()[name = tensor("op_22787_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_22787_end_mask_0 = const()[name = tensor("op_22787_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22787_cast_fp16 = slice_by_index(begin = var_22787_begin_0, end = var_22787_end_0, end_mask = var_22787_end_mask_0, x = var_22537_cast_fp16)[name = tensor("op_22787_cast_fp16")]; tensor var_22794_begin_0 = const()[name = tensor("op_22794_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_22794_end_0 = const()[name = tensor("op_22794_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_22794_end_mask_0 = const()[name = tensor("op_22794_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22794_cast_fp16 = slice_by_index(begin = var_22794_begin_0, end = var_22794_end_0, end_mask = var_22794_end_mask_0, x = var_22541_cast_fp16)[name = tensor("op_22794_cast_fp16")]; tensor var_22801_begin_0 = const()[name = tensor("op_22801_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_22801_end_0 = const()[name = tensor("op_22801_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_22801_end_mask_0 = const()[name = tensor("op_22801_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22801_cast_fp16 = slice_by_index(begin = var_22801_begin_0, end = var_22801_end_0, end_mask = var_22801_end_mask_0, x = var_22541_cast_fp16)[name = tensor("op_22801_cast_fp16")]; tensor var_22808_begin_0 = const()[name = tensor("op_22808_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_22808_end_0 = const()[name = tensor("op_22808_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_22808_end_mask_0 = const()[name = tensor("op_22808_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22808_cast_fp16 = slice_by_index(begin = var_22808_begin_0, end = var_22808_end_0, end_mask = var_22808_end_mask_0, x = var_22541_cast_fp16)[name = tensor("op_22808_cast_fp16")]; tensor var_22815_begin_0 = const()[name = tensor("op_22815_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_22815_end_0 = const()[name = tensor("op_22815_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_22815_end_mask_0 = const()[name = tensor("op_22815_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22815_cast_fp16 = slice_by_index(begin = var_22815_begin_0, end = var_22815_end_0, end_mask = var_22815_end_mask_0, x = var_22541_cast_fp16)[name = tensor("op_22815_cast_fp16")]; tensor var_22822_begin_0 = const()[name = tensor("op_22822_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_22822_end_0 = const()[name = tensor("op_22822_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_22822_end_mask_0 = const()[name = tensor("op_22822_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22822_cast_fp16 = slice_by_index(begin = var_22822_begin_0, end = var_22822_end_0, end_mask = var_22822_end_mask_0, x = var_22545_cast_fp16)[name = tensor("op_22822_cast_fp16")]; tensor var_22829_begin_0 = const()[name = tensor("op_22829_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_22829_end_0 = const()[name = tensor("op_22829_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_22829_end_mask_0 = const()[name = tensor("op_22829_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22829_cast_fp16 = slice_by_index(begin = var_22829_begin_0, end = var_22829_end_0, end_mask = var_22829_end_mask_0, x = var_22545_cast_fp16)[name = tensor("op_22829_cast_fp16")]; tensor var_22836_begin_0 = const()[name = tensor("op_22836_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_22836_end_0 = const()[name = tensor("op_22836_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_22836_end_mask_0 = const()[name = tensor("op_22836_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22836_cast_fp16 = slice_by_index(begin = var_22836_begin_0, end = var_22836_end_0, end_mask = var_22836_end_mask_0, x = var_22545_cast_fp16)[name = tensor("op_22836_cast_fp16")]; tensor var_22843_begin_0 = const()[name = tensor("op_22843_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_22843_end_0 = const()[name = tensor("op_22843_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_22843_end_mask_0 = const()[name = tensor("op_22843_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22843_cast_fp16 = slice_by_index(begin = var_22843_begin_0, end = var_22843_end_0, end_mask = var_22843_end_mask_0, x = var_22545_cast_fp16)[name = tensor("op_22843_cast_fp16")]; tensor var_22850_begin_0 = const()[name = tensor("op_22850_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_22850_end_0 = const()[name = tensor("op_22850_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_22850_end_mask_0 = const()[name = tensor("op_22850_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22850_cast_fp16 = slice_by_index(begin = var_22850_begin_0, end = var_22850_end_0, end_mask = var_22850_end_mask_0, x = var_22549_cast_fp16)[name = tensor("op_22850_cast_fp16")]; tensor var_22857_begin_0 = const()[name = tensor("op_22857_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_22857_end_0 = const()[name = tensor("op_22857_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_22857_end_mask_0 = const()[name = tensor("op_22857_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22857_cast_fp16 = slice_by_index(begin = var_22857_begin_0, end = var_22857_end_0, end_mask = var_22857_end_mask_0, x = var_22549_cast_fp16)[name = tensor("op_22857_cast_fp16")]; tensor var_22864_begin_0 = const()[name = tensor("op_22864_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_22864_end_0 = const()[name = tensor("op_22864_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_22864_end_mask_0 = const()[name = tensor("op_22864_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22864_cast_fp16 = slice_by_index(begin = var_22864_begin_0, end = var_22864_end_0, end_mask = var_22864_end_mask_0, x = var_22549_cast_fp16)[name = tensor("op_22864_cast_fp16")]; tensor var_22871_begin_0 = const()[name = tensor("op_22871_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_22871_end_0 = const()[name = tensor("op_22871_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_22871_end_mask_0 = const()[name = tensor("op_22871_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22871_cast_fp16 = slice_by_index(begin = var_22871_begin_0, end = var_22871_end_0, end_mask = var_22871_end_mask_0, x = var_22549_cast_fp16)[name = tensor("op_22871_cast_fp16")]; tensor var_22878_begin_0 = const()[name = tensor("op_22878_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_22878_end_0 = const()[name = tensor("op_22878_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_22878_end_mask_0 = const()[name = tensor("op_22878_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22878_cast_fp16 = slice_by_index(begin = var_22878_begin_0, end = var_22878_end_0, end_mask = var_22878_end_mask_0, x = var_22553_cast_fp16)[name = tensor("op_22878_cast_fp16")]; tensor var_22885_begin_0 = const()[name = tensor("op_22885_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_22885_end_0 = const()[name = tensor("op_22885_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_22885_end_mask_0 = const()[name = tensor("op_22885_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22885_cast_fp16 = slice_by_index(begin = var_22885_begin_0, end = var_22885_end_0, end_mask = var_22885_end_mask_0, x = var_22553_cast_fp16)[name = tensor("op_22885_cast_fp16")]; tensor var_22892_begin_0 = const()[name = tensor("op_22892_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_22892_end_0 = const()[name = tensor("op_22892_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_22892_end_mask_0 = const()[name = tensor("op_22892_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22892_cast_fp16 = slice_by_index(begin = var_22892_begin_0, end = var_22892_end_0, end_mask = var_22892_end_mask_0, x = var_22553_cast_fp16)[name = tensor("op_22892_cast_fp16")]; tensor var_22899_begin_0 = const()[name = tensor("op_22899_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_22899_end_0 = const()[name = tensor("op_22899_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_22899_end_mask_0 = const()[name = tensor("op_22899_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22899_cast_fp16 = slice_by_index(begin = var_22899_begin_0, end = var_22899_end_0, end_mask = var_22899_end_mask_0, x = var_22553_cast_fp16)[name = tensor("op_22899_cast_fp16")]; tensor var_22906_begin_0 = const()[name = tensor("op_22906_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_22906_end_0 = const()[name = tensor("op_22906_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_22906_end_mask_0 = const()[name = tensor("op_22906_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22906_cast_fp16 = slice_by_index(begin = var_22906_begin_0, end = var_22906_end_0, end_mask = var_22906_end_mask_0, x = var_22557_cast_fp16)[name = tensor("op_22906_cast_fp16")]; tensor var_22913_begin_0 = const()[name = tensor("op_22913_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_22913_end_0 = const()[name = tensor("op_22913_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_22913_end_mask_0 = const()[name = tensor("op_22913_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22913_cast_fp16 = slice_by_index(begin = var_22913_begin_0, end = var_22913_end_0, end_mask = var_22913_end_mask_0, x = var_22557_cast_fp16)[name = tensor("op_22913_cast_fp16")]; tensor var_22920_begin_0 = const()[name = tensor("op_22920_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_22920_end_0 = const()[name = tensor("op_22920_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_22920_end_mask_0 = const()[name = tensor("op_22920_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22920_cast_fp16 = slice_by_index(begin = var_22920_begin_0, end = var_22920_end_0, end_mask = var_22920_end_mask_0, x = var_22557_cast_fp16)[name = tensor("op_22920_cast_fp16")]; tensor var_22927_begin_0 = const()[name = tensor("op_22927_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_22927_end_0 = const()[name = tensor("op_22927_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_22927_end_mask_0 = const()[name = tensor("op_22927_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22927_cast_fp16 = slice_by_index(begin = var_22927_begin_0, end = var_22927_end_0, end_mask = var_22927_end_mask_0, x = var_22557_cast_fp16)[name = tensor("op_22927_cast_fp16")]; tensor var_22934_begin_0 = const()[name = tensor("op_22934_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_22934_end_0 = const()[name = tensor("op_22934_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_22934_end_mask_0 = const()[name = tensor("op_22934_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22934_cast_fp16 = slice_by_index(begin = var_22934_begin_0, end = var_22934_end_0, end_mask = var_22934_end_mask_0, x = var_22561_cast_fp16)[name = tensor("op_22934_cast_fp16")]; tensor var_22941_begin_0 = const()[name = tensor("op_22941_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_22941_end_0 = const()[name = tensor("op_22941_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_22941_end_mask_0 = const()[name = tensor("op_22941_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22941_cast_fp16 = slice_by_index(begin = var_22941_begin_0, end = var_22941_end_0, end_mask = var_22941_end_mask_0, x = var_22561_cast_fp16)[name = tensor("op_22941_cast_fp16")]; tensor var_22948_begin_0 = const()[name = tensor("op_22948_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_22948_end_0 = const()[name = tensor("op_22948_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_22948_end_mask_0 = const()[name = tensor("op_22948_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22948_cast_fp16 = slice_by_index(begin = var_22948_begin_0, end = var_22948_end_0, end_mask = var_22948_end_mask_0, x = var_22561_cast_fp16)[name = tensor("op_22948_cast_fp16")]; tensor var_22955_begin_0 = const()[name = tensor("op_22955_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_22955_end_0 = const()[name = tensor("op_22955_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_22955_end_mask_0 = const()[name = tensor("op_22955_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22955_cast_fp16 = slice_by_index(begin = var_22955_begin_0, end = var_22955_end_0, end_mask = var_22955_end_mask_0, x = var_22561_cast_fp16)[name = tensor("op_22955_cast_fp16")]; tensor var_22962_begin_0 = const()[name = tensor("op_22962_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_22962_end_0 = const()[name = tensor("op_22962_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_22962_end_mask_0 = const()[name = tensor("op_22962_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22962_cast_fp16 = slice_by_index(begin = var_22962_begin_0, end = var_22962_end_0, end_mask = var_22962_end_mask_0, x = var_22565_cast_fp16)[name = tensor("op_22962_cast_fp16")]; tensor var_22969_begin_0 = const()[name = tensor("op_22969_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_22969_end_0 = const()[name = tensor("op_22969_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_22969_end_mask_0 = const()[name = tensor("op_22969_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22969_cast_fp16 = slice_by_index(begin = var_22969_begin_0, end = var_22969_end_0, end_mask = var_22969_end_mask_0, x = var_22565_cast_fp16)[name = tensor("op_22969_cast_fp16")]; tensor var_22976_begin_0 = const()[name = tensor("op_22976_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_22976_end_0 = const()[name = tensor("op_22976_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_22976_end_mask_0 = const()[name = tensor("op_22976_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22976_cast_fp16 = slice_by_index(begin = var_22976_begin_0, end = var_22976_end_0, end_mask = var_22976_end_mask_0, x = var_22565_cast_fp16)[name = tensor("op_22976_cast_fp16")]; tensor var_22983_begin_0 = const()[name = tensor("op_22983_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_22983_end_0 = const()[name = tensor("op_22983_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_22983_end_mask_0 = const()[name = tensor("op_22983_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22983_cast_fp16 = slice_by_index(begin = var_22983_begin_0, end = var_22983_end_0, end_mask = var_22983_end_mask_0, x = var_22565_cast_fp16)[name = tensor("op_22983_cast_fp16")]; tensor var_22990_begin_0 = const()[name = tensor("op_22990_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_22990_end_0 = const()[name = tensor("op_22990_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_22990_end_mask_0 = const()[name = tensor("op_22990_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22990_cast_fp16 = slice_by_index(begin = var_22990_begin_0, end = var_22990_end_0, end_mask = var_22990_end_mask_0, x = var_22569_cast_fp16)[name = tensor("op_22990_cast_fp16")]; tensor var_22997_begin_0 = const()[name = tensor("op_22997_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_22997_end_0 = const()[name = tensor("op_22997_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_22997_end_mask_0 = const()[name = tensor("op_22997_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_22997_cast_fp16 = slice_by_index(begin = var_22997_begin_0, end = var_22997_end_0, end_mask = var_22997_end_mask_0, x = var_22569_cast_fp16)[name = tensor("op_22997_cast_fp16")]; tensor var_23004_begin_0 = const()[name = tensor("op_23004_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_23004_end_0 = const()[name = tensor("op_23004_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_23004_end_mask_0 = const()[name = tensor("op_23004_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23004_cast_fp16 = slice_by_index(begin = var_23004_begin_0, end = var_23004_end_0, end_mask = var_23004_end_mask_0, x = var_22569_cast_fp16)[name = tensor("op_23004_cast_fp16")]; tensor var_23011_begin_0 = const()[name = tensor("op_23011_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_23011_end_0 = const()[name = tensor("op_23011_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_23011_end_mask_0 = const()[name = tensor("op_23011_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23011_cast_fp16 = slice_by_index(begin = var_23011_begin_0, end = var_23011_end_0, end_mask = var_23011_end_mask_0, x = var_22569_cast_fp16)[name = tensor("op_23011_cast_fp16")]; tensor var_23018_begin_0 = const()[name = tensor("op_23018_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_23018_end_0 = const()[name = tensor("op_23018_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_23018_end_mask_0 = const()[name = tensor("op_23018_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23018_cast_fp16 = slice_by_index(begin = var_23018_begin_0, end = var_23018_end_0, end_mask = var_23018_end_mask_0, x = var_22573_cast_fp16)[name = tensor("op_23018_cast_fp16")]; tensor var_23025_begin_0 = const()[name = tensor("op_23025_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_23025_end_0 = const()[name = tensor("op_23025_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_23025_end_mask_0 = const()[name = tensor("op_23025_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23025_cast_fp16 = slice_by_index(begin = var_23025_begin_0, end = var_23025_end_0, end_mask = var_23025_end_mask_0, x = var_22573_cast_fp16)[name = tensor("op_23025_cast_fp16")]; tensor var_23032_begin_0 = const()[name = tensor("op_23032_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_23032_end_0 = const()[name = tensor("op_23032_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_23032_end_mask_0 = const()[name = tensor("op_23032_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23032_cast_fp16 = slice_by_index(begin = var_23032_begin_0, end = var_23032_end_0, end_mask = var_23032_end_mask_0, x = var_22573_cast_fp16)[name = tensor("op_23032_cast_fp16")]; tensor var_23039_begin_0 = const()[name = tensor("op_23039_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_23039_end_0 = const()[name = tensor("op_23039_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_23039_end_mask_0 = const()[name = tensor("op_23039_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23039_cast_fp16 = slice_by_index(begin = var_23039_begin_0, end = var_23039_end_0, end_mask = var_23039_end_mask_0, x = var_22573_cast_fp16)[name = tensor("op_23039_cast_fp16")]; tensor var_23046_begin_0 = const()[name = tensor("op_23046_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_23046_end_0 = const()[name = tensor("op_23046_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_23046_end_mask_0 = const()[name = tensor("op_23046_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23046_cast_fp16 = slice_by_index(begin = var_23046_begin_0, end = var_23046_end_0, end_mask = var_23046_end_mask_0, x = var_22577_cast_fp16)[name = tensor("op_23046_cast_fp16")]; tensor var_23053_begin_0 = const()[name = tensor("op_23053_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_23053_end_0 = const()[name = tensor("op_23053_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_23053_end_mask_0 = const()[name = tensor("op_23053_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23053_cast_fp16 = slice_by_index(begin = var_23053_begin_0, end = var_23053_end_0, end_mask = var_23053_end_mask_0, x = var_22577_cast_fp16)[name = tensor("op_23053_cast_fp16")]; tensor var_23060_begin_0 = const()[name = tensor("op_23060_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_23060_end_0 = const()[name = tensor("op_23060_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_23060_end_mask_0 = const()[name = tensor("op_23060_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23060_cast_fp16 = slice_by_index(begin = var_23060_begin_0, end = var_23060_end_0, end_mask = var_23060_end_mask_0, x = var_22577_cast_fp16)[name = tensor("op_23060_cast_fp16")]; tensor var_23067_begin_0 = const()[name = tensor("op_23067_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_23067_end_0 = const()[name = tensor("op_23067_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_23067_end_mask_0 = const()[name = tensor("op_23067_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23067_cast_fp16 = slice_by_index(begin = var_23067_begin_0, end = var_23067_end_0, end_mask = var_23067_end_mask_0, x = var_22577_cast_fp16)[name = tensor("op_23067_cast_fp16")]; tensor var_23074_begin_0 = const()[name = tensor("op_23074_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_23074_end_0 = const()[name = tensor("op_23074_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_23074_end_mask_0 = const()[name = tensor("op_23074_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23074_cast_fp16 = slice_by_index(begin = var_23074_begin_0, end = var_23074_end_0, end_mask = var_23074_end_mask_0, x = var_22581_cast_fp16)[name = tensor("op_23074_cast_fp16")]; tensor var_23081_begin_0 = const()[name = tensor("op_23081_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_23081_end_0 = const()[name = tensor("op_23081_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_23081_end_mask_0 = const()[name = tensor("op_23081_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23081_cast_fp16 = slice_by_index(begin = var_23081_begin_0, end = var_23081_end_0, end_mask = var_23081_end_mask_0, x = var_22581_cast_fp16)[name = tensor("op_23081_cast_fp16")]; tensor var_23088_begin_0 = const()[name = tensor("op_23088_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_23088_end_0 = const()[name = tensor("op_23088_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_23088_end_mask_0 = const()[name = tensor("op_23088_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23088_cast_fp16 = slice_by_index(begin = var_23088_begin_0, end = var_23088_end_0, end_mask = var_23088_end_mask_0, x = var_22581_cast_fp16)[name = tensor("op_23088_cast_fp16")]; tensor var_23095_begin_0 = const()[name = tensor("op_23095_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_23095_end_0 = const()[name = tensor("op_23095_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_23095_end_mask_0 = const()[name = tensor("op_23095_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23095_cast_fp16 = slice_by_index(begin = var_23095_begin_0, end = var_23095_end_0, end_mask = var_23095_end_mask_0, x = var_22581_cast_fp16)[name = tensor("op_23095_cast_fp16")]; tensor var_23102_begin_0 = const()[name = tensor("op_23102_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_23102_end_0 = const()[name = tensor("op_23102_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_23102_end_mask_0 = const()[name = tensor("op_23102_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23102_cast_fp16 = slice_by_index(begin = var_23102_begin_0, end = var_23102_end_0, end_mask = var_23102_end_mask_0, x = var_22585_cast_fp16)[name = tensor("op_23102_cast_fp16")]; tensor var_23109_begin_0 = const()[name = tensor("op_23109_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_23109_end_0 = const()[name = tensor("op_23109_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_23109_end_mask_0 = const()[name = tensor("op_23109_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23109_cast_fp16 = slice_by_index(begin = var_23109_begin_0, end = var_23109_end_0, end_mask = var_23109_end_mask_0, x = var_22585_cast_fp16)[name = tensor("op_23109_cast_fp16")]; tensor var_23116_begin_0 = const()[name = tensor("op_23116_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_23116_end_0 = const()[name = tensor("op_23116_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_23116_end_mask_0 = const()[name = tensor("op_23116_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23116_cast_fp16 = slice_by_index(begin = var_23116_begin_0, end = var_23116_end_0, end_mask = var_23116_end_mask_0, x = var_22585_cast_fp16)[name = tensor("op_23116_cast_fp16")]; tensor var_23123_begin_0 = const()[name = tensor("op_23123_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_23123_end_0 = const()[name = tensor("op_23123_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_23123_end_mask_0 = const()[name = tensor("op_23123_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23123_cast_fp16 = slice_by_index(begin = var_23123_begin_0, end = var_23123_end_0, end_mask = var_23123_end_mask_0, x = var_22585_cast_fp16)[name = tensor("op_23123_cast_fp16")]; tensor var_23130_begin_0 = const()[name = tensor("op_23130_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_23130_end_0 = const()[name = tensor("op_23130_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_23130_end_mask_0 = const()[name = tensor("op_23130_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23130_cast_fp16 = slice_by_index(begin = var_23130_begin_0, end = var_23130_end_0, end_mask = var_23130_end_mask_0, x = var_22589_cast_fp16)[name = tensor("op_23130_cast_fp16")]; tensor var_23137_begin_0 = const()[name = tensor("op_23137_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_23137_end_0 = const()[name = tensor("op_23137_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_23137_end_mask_0 = const()[name = tensor("op_23137_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23137_cast_fp16 = slice_by_index(begin = var_23137_begin_0, end = var_23137_end_0, end_mask = var_23137_end_mask_0, x = var_22589_cast_fp16)[name = tensor("op_23137_cast_fp16")]; tensor var_23144_begin_0 = const()[name = tensor("op_23144_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_23144_end_0 = const()[name = tensor("op_23144_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_23144_end_mask_0 = const()[name = tensor("op_23144_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23144_cast_fp16 = slice_by_index(begin = var_23144_begin_0, end = var_23144_end_0, end_mask = var_23144_end_mask_0, x = var_22589_cast_fp16)[name = tensor("op_23144_cast_fp16")]; tensor var_23151_begin_0 = const()[name = tensor("op_23151_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_23151_end_0 = const()[name = tensor("op_23151_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_23151_end_mask_0 = const()[name = tensor("op_23151_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23151_cast_fp16 = slice_by_index(begin = var_23151_begin_0, end = var_23151_end_0, end_mask = var_23151_end_mask_0, x = var_22589_cast_fp16)[name = tensor("op_23151_cast_fp16")]; tensor k_29_perm_0 = const()[name = tensor("k_29_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_23156_begin_0 = const()[name = tensor("op_23156_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_23156_end_0 = const()[name = tensor("op_23156_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_23156_end_mask_0 = const()[name = tensor("op_23156_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_29_cast_fp16 = transpose(perm = k_29_perm_0, x = key_29_cast_fp16)[name = tensor("transpose_17")]; tensor var_23156_cast_fp16 = slice_by_index(begin = var_23156_begin_0, end = var_23156_end_0, end_mask = var_23156_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23156_cast_fp16")]; tensor var_23160_begin_0 = const()[name = tensor("op_23160_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_23160_end_0 = const()[name = tensor("op_23160_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_23160_end_mask_0 = const()[name = tensor("op_23160_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23160_cast_fp16 = slice_by_index(begin = var_23160_begin_0, end = var_23160_end_0, end_mask = var_23160_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23160_cast_fp16")]; tensor var_23164_begin_0 = const()[name = tensor("op_23164_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_23164_end_0 = const()[name = tensor("op_23164_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_23164_end_mask_0 = const()[name = tensor("op_23164_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23164_cast_fp16 = slice_by_index(begin = var_23164_begin_0, end = var_23164_end_0, end_mask = var_23164_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23164_cast_fp16")]; tensor var_23168_begin_0 = const()[name = tensor("op_23168_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_23168_end_0 = const()[name = tensor("op_23168_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_23168_end_mask_0 = const()[name = tensor("op_23168_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23168_cast_fp16 = slice_by_index(begin = var_23168_begin_0, end = var_23168_end_0, end_mask = var_23168_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23168_cast_fp16")]; tensor var_23172_begin_0 = const()[name = tensor("op_23172_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_23172_end_0 = const()[name = tensor("op_23172_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_23172_end_mask_0 = const()[name = tensor("op_23172_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23172_cast_fp16 = slice_by_index(begin = var_23172_begin_0, end = var_23172_end_0, end_mask = var_23172_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23172_cast_fp16")]; tensor var_23176_begin_0 = const()[name = tensor("op_23176_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_23176_end_0 = const()[name = tensor("op_23176_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_23176_end_mask_0 = const()[name = tensor("op_23176_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23176_cast_fp16 = slice_by_index(begin = var_23176_begin_0, end = var_23176_end_0, end_mask = var_23176_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23176_cast_fp16")]; tensor var_23180_begin_0 = const()[name = tensor("op_23180_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_23180_end_0 = const()[name = tensor("op_23180_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_23180_end_mask_0 = const()[name = tensor("op_23180_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23180_cast_fp16 = slice_by_index(begin = var_23180_begin_0, end = var_23180_end_0, end_mask = var_23180_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23180_cast_fp16")]; tensor var_23184_begin_0 = const()[name = tensor("op_23184_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_23184_end_0 = const()[name = tensor("op_23184_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_23184_end_mask_0 = const()[name = tensor("op_23184_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23184_cast_fp16 = slice_by_index(begin = var_23184_begin_0, end = var_23184_end_0, end_mask = var_23184_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23184_cast_fp16")]; tensor var_23188_begin_0 = const()[name = tensor("op_23188_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_23188_end_0 = const()[name = tensor("op_23188_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_23188_end_mask_0 = const()[name = tensor("op_23188_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23188_cast_fp16 = slice_by_index(begin = var_23188_begin_0, end = var_23188_end_0, end_mask = var_23188_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23188_cast_fp16")]; tensor var_23192_begin_0 = const()[name = tensor("op_23192_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_23192_end_0 = const()[name = tensor("op_23192_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_23192_end_mask_0 = const()[name = tensor("op_23192_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23192_cast_fp16 = slice_by_index(begin = var_23192_begin_0, end = var_23192_end_0, end_mask = var_23192_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23192_cast_fp16")]; tensor var_23196_begin_0 = const()[name = tensor("op_23196_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_23196_end_0 = const()[name = tensor("op_23196_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_23196_end_mask_0 = const()[name = tensor("op_23196_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23196_cast_fp16 = slice_by_index(begin = var_23196_begin_0, end = var_23196_end_0, end_mask = var_23196_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23196_cast_fp16")]; tensor var_23200_begin_0 = const()[name = tensor("op_23200_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_23200_end_0 = const()[name = tensor("op_23200_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_23200_end_mask_0 = const()[name = tensor("op_23200_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23200_cast_fp16 = slice_by_index(begin = var_23200_begin_0, end = var_23200_end_0, end_mask = var_23200_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23200_cast_fp16")]; tensor var_23204_begin_0 = const()[name = tensor("op_23204_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_23204_end_0 = const()[name = tensor("op_23204_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_23204_end_mask_0 = const()[name = tensor("op_23204_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23204_cast_fp16 = slice_by_index(begin = var_23204_begin_0, end = var_23204_end_0, end_mask = var_23204_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23204_cast_fp16")]; tensor var_23208_begin_0 = const()[name = tensor("op_23208_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_23208_end_0 = const()[name = tensor("op_23208_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_23208_end_mask_0 = const()[name = tensor("op_23208_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23208_cast_fp16 = slice_by_index(begin = var_23208_begin_0, end = var_23208_end_0, end_mask = var_23208_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23208_cast_fp16")]; tensor var_23212_begin_0 = const()[name = tensor("op_23212_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_23212_end_0 = const()[name = tensor("op_23212_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_23212_end_mask_0 = const()[name = tensor("op_23212_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23212_cast_fp16 = slice_by_index(begin = var_23212_begin_0, end = var_23212_end_0, end_mask = var_23212_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23212_cast_fp16")]; tensor var_23216_begin_0 = const()[name = tensor("op_23216_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_23216_end_0 = const()[name = tensor("op_23216_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_23216_end_mask_0 = const()[name = tensor("op_23216_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23216_cast_fp16 = slice_by_index(begin = var_23216_begin_0, end = var_23216_end_0, end_mask = var_23216_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23216_cast_fp16")]; tensor var_23220_begin_0 = const()[name = tensor("op_23220_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_23220_end_0 = const()[name = tensor("op_23220_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_23220_end_mask_0 = const()[name = tensor("op_23220_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23220_cast_fp16 = slice_by_index(begin = var_23220_begin_0, end = var_23220_end_0, end_mask = var_23220_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23220_cast_fp16")]; tensor var_23224_begin_0 = const()[name = tensor("op_23224_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_23224_end_0 = const()[name = tensor("op_23224_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_23224_end_mask_0 = const()[name = tensor("op_23224_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23224_cast_fp16 = slice_by_index(begin = var_23224_begin_0, end = var_23224_end_0, end_mask = var_23224_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23224_cast_fp16")]; tensor var_23228_begin_0 = const()[name = tensor("op_23228_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_23228_end_0 = const()[name = tensor("op_23228_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_23228_end_mask_0 = const()[name = tensor("op_23228_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23228_cast_fp16 = slice_by_index(begin = var_23228_begin_0, end = var_23228_end_0, end_mask = var_23228_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23228_cast_fp16")]; tensor var_23232_begin_0 = const()[name = tensor("op_23232_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_23232_end_0 = const()[name = tensor("op_23232_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_23232_end_mask_0 = const()[name = tensor("op_23232_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_23232_cast_fp16 = slice_by_index(begin = var_23232_begin_0, end = var_23232_end_0, end_mask = var_23232_end_mask_0, x = k_29_cast_fp16)[name = tensor("op_23232_cast_fp16")]; tensor var_23234_begin_0 = const()[name = tensor("op_23234_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_23234_end_0 = const()[name = tensor("op_23234_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_23234_end_mask_0 = const()[name = tensor("op_23234_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23234_cast_fp16 = slice_by_index(begin = var_23234_begin_0, end = var_23234_end_0, end_mask = var_23234_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23234_cast_fp16")]; tensor var_23238_begin_0 = const()[name = tensor("op_23238_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_23238_end_0 = const()[name = tensor("op_23238_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_23238_end_mask_0 = const()[name = tensor("op_23238_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23238_cast_fp16 = slice_by_index(begin = var_23238_begin_0, end = var_23238_end_0, end_mask = var_23238_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23238_cast_fp16")]; tensor var_23242_begin_0 = const()[name = tensor("op_23242_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_23242_end_0 = const()[name = tensor("op_23242_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_23242_end_mask_0 = const()[name = tensor("op_23242_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23242_cast_fp16 = slice_by_index(begin = var_23242_begin_0, end = var_23242_end_0, end_mask = var_23242_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23242_cast_fp16")]; tensor var_23246_begin_0 = const()[name = tensor("op_23246_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_23246_end_0 = const()[name = tensor("op_23246_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_23246_end_mask_0 = const()[name = tensor("op_23246_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23246_cast_fp16 = slice_by_index(begin = var_23246_begin_0, end = var_23246_end_0, end_mask = var_23246_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23246_cast_fp16")]; tensor var_23250_begin_0 = const()[name = tensor("op_23250_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_23250_end_0 = const()[name = tensor("op_23250_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_23250_end_mask_0 = const()[name = tensor("op_23250_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23250_cast_fp16 = slice_by_index(begin = var_23250_begin_0, end = var_23250_end_0, end_mask = var_23250_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23250_cast_fp16")]; tensor var_23254_begin_0 = const()[name = tensor("op_23254_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_23254_end_0 = const()[name = tensor("op_23254_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_23254_end_mask_0 = const()[name = tensor("op_23254_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23254_cast_fp16 = slice_by_index(begin = var_23254_begin_0, end = var_23254_end_0, end_mask = var_23254_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23254_cast_fp16")]; tensor var_23258_begin_0 = const()[name = tensor("op_23258_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_23258_end_0 = const()[name = tensor("op_23258_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_23258_end_mask_0 = const()[name = tensor("op_23258_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23258_cast_fp16 = slice_by_index(begin = var_23258_begin_0, end = var_23258_end_0, end_mask = var_23258_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23258_cast_fp16")]; tensor var_23262_begin_0 = const()[name = tensor("op_23262_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_23262_end_0 = const()[name = tensor("op_23262_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_23262_end_mask_0 = const()[name = tensor("op_23262_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23262_cast_fp16 = slice_by_index(begin = var_23262_begin_0, end = var_23262_end_0, end_mask = var_23262_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23262_cast_fp16")]; tensor var_23266_begin_0 = const()[name = tensor("op_23266_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_23266_end_0 = const()[name = tensor("op_23266_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_23266_end_mask_0 = const()[name = tensor("op_23266_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23266_cast_fp16 = slice_by_index(begin = var_23266_begin_0, end = var_23266_end_0, end_mask = var_23266_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23266_cast_fp16")]; tensor var_23270_begin_0 = const()[name = tensor("op_23270_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_23270_end_0 = const()[name = tensor("op_23270_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_23270_end_mask_0 = const()[name = tensor("op_23270_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23270_cast_fp16 = slice_by_index(begin = var_23270_begin_0, end = var_23270_end_0, end_mask = var_23270_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23270_cast_fp16")]; tensor var_23274_begin_0 = const()[name = tensor("op_23274_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_23274_end_0 = const()[name = tensor("op_23274_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_23274_end_mask_0 = const()[name = tensor("op_23274_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23274_cast_fp16 = slice_by_index(begin = var_23274_begin_0, end = var_23274_end_0, end_mask = var_23274_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23274_cast_fp16")]; tensor var_23278_begin_0 = const()[name = tensor("op_23278_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_23278_end_0 = const()[name = tensor("op_23278_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_23278_end_mask_0 = const()[name = tensor("op_23278_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23278_cast_fp16 = slice_by_index(begin = var_23278_begin_0, end = var_23278_end_0, end_mask = var_23278_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23278_cast_fp16")]; tensor var_23282_begin_0 = const()[name = tensor("op_23282_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_23282_end_0 = const()[name = tensor("op_23282_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_23282_end_mask_0 = const()[name = tensor("op_23282_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23282_cast_fp16 = slice_by_index(begin = var_23282_begin_0, end = var_23282_end_0, end_mask = var_23282_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23282_cast_fp16")]; tensor var_23286_begin_0 = const()[name = tensor("op_23286_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_23286_end_0 = const()[name = tensor("op_23286_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_23286_end_mask_0 = const()[name = tensor("op_23286_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23286_cast_fp16 = slice_by_index(begin = var_23286_begin_0, end = var_23286_end_0, end_mask = var_23286_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23286_cast_fp16")]; tensor var_23290_begin_0 = const()[name = tensor("op_23290_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_23290_end_0 = const()[name = tensor("op_23290_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_23290_end_mask_0 = const()[name = tensor("op_23290_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23290_cast_fp16 = slice_by_index(begin = var_23290_begin_0, end = var_23290_end_0, end_mask = var_23290_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23290_cast_fp16")]; tensor var_23294_begin_0 = const()[name = tensor("op_23294_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_23294_end_0 = const()[name = tensor("op_23294_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_23294_end_mask_0 = const()[name = tensor("op_23294_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23294_cast_fp16 = slice_by_index(begin = var_23294_begin_0, end = var_23294_end_0, end_mask = var_23294_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23294_cast_fp16")]; tensor var_23298_begin_0 = const()[name = tensor("op_23298_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_23298_end_0 = const()[name = tensor("op_23298_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_23298_end_mask_0 = const()[name = tensor("op_23298_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23298_cast_fp16 = slice_by_index(begin = var_23298_begin_0, end = var_23298_end_0, end_mask = var_23298_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23298_cast_fp16")]; tensor var_23302_begin_0 = const()[name = tensor("op_23302_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_23302_end_0 = const()[name = tensor("op_23302_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_23302_end_mask_0 = const()[name = tensor("op_23302_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23302_cast_fp16 = slice_by_index(begin = var_23302_begin_0, end = var_23302_end_0, end_mask = var_23302_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23302_cast_fp16")]; tensor var_23306_begin_0 = const()[name = tensor("op_23306_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_23306_end_0 = const()[name = tensor("op_23306_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_23306_end_mask_0 = const()[name = tensor("op_23306_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23306_cast_fp16 = slice_by_index(begin = var_23306_begin_0, end = var_23306_end_0, end_mask = var_23306_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23306_cast_fp16")]; tensor var_23310_begin_0 = const()[name = tensor("op_23310_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_23310_end_0 = const()[name = tensor("op_23310_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_23310_end_mask_0 = const()[name = tensor("op_23310_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_23310_cast_fp16 = slice_by_index(begin = var_23310_begin_0, end = var_23310_end_0, end_mask = var_23310_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_23310_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2241_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2241_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2241_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2241_equation_0, values = (var_23156_cast_fp16, var_22598_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2241_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2243_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2243_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2243_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2243_equation_0, values = (var_23156_cast_fp16, var_22605_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2243_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2245_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2245_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2245_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2245_equation_0, values = (var_23156_cast_fp16, var_22612_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2245_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2247_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2247_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2247_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2247_equation_0, values = (var_23156_cast_fp16, var_22619_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2247_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2249_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2249_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2249_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2249_equation_0, values = (var_23160_cast_fp16, var_22626_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2249_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2251_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2251_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2251_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2251_equation_0, values = (var_23160_cast_fp16, var_22633_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2251_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2253_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2253_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2253_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2253_equation_0, values = (var_23160_cast_fp16, var_22640_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2253_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2255_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2255_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2255_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2255_equation_0, values = (var_23160_cast_fp16, var_22647_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2255_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2257_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2257_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2257_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2257_equation_0, values = (var_23164_cast_fp16, var_22654_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2257_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2259_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2259_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2259_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2259_equation_0, values = (var_23164_cast_fp16, var_22661_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2259_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2261_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2261_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2261_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2261_equation_0, values = (var_23164_cast_fp16, var_22668_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2261_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2263_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2263_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2263_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2263_equation_0, values = (var_23164_cast_fp16, var_22675_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2263_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2265_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2265_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2265_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2265_equation_0, values = (var_23168_cast_fp16, var_22682_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2265_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2267_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2267_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2267_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2267_equation_0, values = (var_23168_cast_fp16, var_22689_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2267_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2269_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2269_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2269_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2269_equation_0, values = (var_23168_cast_fp16, var_22696_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2269_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2271_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2271_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2271_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2271_equation_0, values = (var_23168_cast_fp16, var_22703_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2271_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2273_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2273_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2273_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2273_equation_0, values = (var_23172_cast_fp16, var_22710_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2273_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2275_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2275_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2275_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2275_equation_0, values = (var_23172_cast_fp16, var_22717_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2275_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2277_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2277_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2277_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2277_equation_0, values = (var_23172_cast_fp16, var_22724_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2277_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2279_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2279_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2279_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2279_equation_0, values = (var_23172_cast_fp16, var_22731_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2279_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2281_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2281_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2281_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2281_equation_0, values = (var_23176_cast_fp16, var_22738_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2281_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2283_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2283_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2283_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2283_equation_0, values = (var_23176_cast_fp16, var_22745_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2283_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2285_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2285_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2285_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2285_equation_0, values = (var_23176_cast_fp16, var_22752_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2285_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2287_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2287_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2287_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2287_equation_0, values = (var_23176_cast_fp16, var_22759_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2287_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2289_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2289_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2289_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2289_equation_0, values = (var_23180_cast_fp16, var_22766_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2289_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2291_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2291_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2291_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2291_equation_0, values = (var_23180_cast_fp16, var_22773_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2291_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2293_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2293_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2293_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2293_equation_0, values = (var_23180_cast_fp16, var_22780_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2293_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2295_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2295_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2295_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2295_equation_0, values = (var_23180_cast_fp16, var_22787_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2295_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2297_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2297_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2297_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2297_equation_0, values = (var_23184_cast_fp16, var_22794_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2297_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2299_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2299_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2299_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2299_equation_0, values = (var_23184_cast_fp16, var_22801_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2299_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2301_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2301_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2301_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2301_equation_0, values = (var_23184_cast_fp16, var_22808_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2301_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2303_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2303_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2303_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2303_equation_0, values = (var_23184_cast_fp16, var_22815_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2303_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2305_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2305_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2305_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2305_equation_0, values = (var_23188_cast_fp16, var_22822_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2305_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2307_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2307_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2307_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2307_equation_0, values = (var_23188_cast_fp16, var_22829_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2307_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2309_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2309_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2309_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2309_equation_0, values = (var_23188_cast_fp16, var_22836_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2309_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2311_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2311_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2311_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2311_equation_0, values = (var_23188_cast_fp16, var_22843_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2311_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2313_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2313_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2313_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2313_equation_0, values = (var_23192_cast_fp16, var_22850_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2313_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2315_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2315_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2315_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2315_equation_0, values = (var_23192_cast_fp16, var_22857_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2315_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2317_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2317_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2317_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2317_equation_0, values = (var_23192_cast_fp16, var_22864_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2317_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2319_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2319_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2319_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2319_equation_0, values = (var_23192_cast_fp16, var_22871_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2319_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2321_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2321_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2321_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2321_equation_0, values = (var_23196_cast_fp16, var_22878_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2321_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2323_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2323_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2323_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2323_equation_0, values = (var_23196_cast_fp16, var_22885_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2323_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2325_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2325_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2325_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2325_equation_0, values = (var_23196_cast_fp16, var_22892_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2325_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2327_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2327_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2327_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2327_equation_0, values = (var_23196_cast_fp16, var_22899_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2327_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2329_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2329_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2329_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2329_equation_0, values = (var_23200_cast_fp16, var_22906_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2329_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2331_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2331_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2331_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2331_equation_0, values = (var_23200_cast_fp16, var_22913_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2331_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2333_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2333_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2333_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2333_equation_0, values = (var_23200_cast_fp16, var_22920_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2333_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2335_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2335_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2335_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2335_equation_0, values = (var_23200_cast_fp16, var_22927_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2335_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2337_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2337_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2337_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2337_equation_0, values = (var_23204_cast_fp16, var_22934_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2337_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2339_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2339_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2339_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2339_equation_0, values = (var_23204_cast_fp16, var_22941_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2339_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2341_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2341_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2341_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2341_equation_0, values = (var_23204_cast_fp16, var_22948_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2341_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2343_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2343_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2343_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2343_equation_0, values = (var_23204_cast_fp16, var_22955_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2343_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2345_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2345_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2345_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2345_equation_0, values = (var_23208_cast_fp16, var_22962_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2345_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2347_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2347_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2347_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2347_equation_0, values = (var_23208_cast_fp16, var_22969_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2347_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2349_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2349_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2349_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2349_equation_0, values = (var_23208_cast_fp16, var_22976_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2349_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2351_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2351_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2351_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2351_equation_0, values = (var_23208_cast_fp16, var_22983_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2351_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2353_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2353_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2353_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2353_equation_0, values = (var_23212_cast_fp16, var_22990_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2353_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2355_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2355_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2355_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2355_equation_0, values = (var_23212_cast_fp16, var_22997_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2355_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2357_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2357_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2357_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2357_equation_0, values = (var_23212_cast_fp16, var_23004_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2357_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2359_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2359_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2359_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2359_equation_0, values = (var_23212_cast_fp16, var_23011_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2359_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2361_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2361_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2361_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2361_equation_0, values = (var_23216_cast_fp16, var_23018_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2361_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2363_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2363_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2363_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2363_equation_0, values = (var_23216_cast_fp16, var_23025_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2363_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2365_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2365_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2365_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2365_equation_0, values = (var_23216_cast_fp16, var_23032_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2365_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2367_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2367_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2367_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2367_equation_0, values = (var_23216_cast_fp16, var_23039_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2367_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2369_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2369_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2369_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2369_equation_0, values = (var_23220_cast_fp16, var_23046_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2369_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2371_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2371_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2371_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2371_equation_0, values = (var_23220_cast_fp16, var_23053_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2371_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2373_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2373_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2373_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2373_equation_0, values = (var_23220_cast_fp16, var_23060_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2373_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2375_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2375_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2375_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2375_equation_0, values = (var_23220_cast_fp16, var_23067_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2375_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2377_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2377_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2377_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2377_equation_0, values = (var_23224_cast_fp16, var_23074_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2377_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2379_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2379_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2379_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2379_equation_0, values = (var_23224_cast_fp16, var_23081_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2379_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2381_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2381_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2381_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2381_equation_0, values = (var_23224_cast_fp16, var_23088_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2381_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2383_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2383_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2383_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2383_equation_0, values = (var_23224_cast_fp16, var_23095_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2383_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2385_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2385_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2385_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2385_equation_0, values = (var_23228_cast_fp16, var_23102_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2385_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2387_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2387_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2387_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2387_equation_0, values = (var_23228_cast_fp16, var_23109_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2387_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2389_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2389_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2389_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2389_equation_0, values = (var_23228_cast_fp16, var_23116_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2389_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2391_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2391_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2391_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2391_equation_0, values = (var_23228_cast_fp16, var_23123_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2391_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2393_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2393_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2393_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2393_equation_0, values = (var_23232_cast_fp16, var_23130_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2393_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2395_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2395_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2395_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2395_equation_0, values = (var_23232_cast_fp16, var_23137_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2395_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2397_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2397_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2397_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2397_equation_0, values = (var_23232_cast_fp16, var_23144_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2397_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2399_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2399_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2399_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2399_equation_0, values = (var_23232_cast_fp16, var_23151_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2399_cast_fp16")]; tensor var_23473_to_fp16 = const()[name = tensor("op_23473_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2241_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2241_cast_fp16, y = var_23473_to_fp16)[name = tensor("aw_chunk_2241_cast_fp16")]; tensor var_23475_to_fp16 = const()[name = tensor("op_23475_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2243_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2243_cast_fp16, y = var_23475_to_fp16)[name = tensor("aw_chunk_2243_cast_fp16")]; tensor var_23477_to_fp16 = const()[name = tensor("op_23477_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2245_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2245_cast_fp16, y = var_23477_to_fp16)[name = tensor("aw_chunk_2245_cast_fp16")]; tensor var_23479_to_fp16 = const()[name = tensor("op_23479_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2247_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2247_cast_fp16, y = var_23479_to_fp16)[name = tensor("aw_chunk_2247_cast_fp16")]; tensor var_23481_to_fp16 = const()[name = tensor("op_23481_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2249_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2249_cast_fp16, y = var_23481_to_fp16)[name = tensor("aw_chunk_2249_cast_fp16")]; tensor var_23483_to_fp16 = const()[name = tensor("op_23483_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2251_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2251_cast_fp16, y = var_23483_to_fp16)[name = tensor("aw_chunk_2251_cast_fp16")]; tensor var_23485_to_fp16 = const()[name = tensor("op_23485_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2253_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2253_cast_fp16, y = var_23485_to_fp16)[name = tensor("aw_chunk_2253_cast_fp16")]; tensor var_23487_to_fp16 = const()[name = tensor("op_23487_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2255_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2255_cast_fp16, y = var_23487_to_fp16)[name = tensor("aw_chunk_2255_cast_fp16")]; tensor var_23489_to_fp16 = const()[name = tensor("op_23489_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2257_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2257_cast_fp16, y = var_23489_to_fp16)[name = tensor("aw_chunk_2257_cast_fp16")]; tensor var_23491_to_fp16 = const()[name = tensor("op_23491_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2259_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2259_cast_fp16, y = var_23491_to_fp16)[name = tensor("aw_chunk_2259_cast_fp16")]; tensor var_23493_to_fp16 = const()[name = tensor("op_23493_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2261_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2261_cast_fp16, y = var_23493_to_fp16)[name = tensor("aw_chunk_2261_cast_fp16")]; tensor var_23495_to_fp16 = const()[name = tensor("op_23495_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2263_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2263_cast_fp16, y = var_23495_to_fp16)[name = tensor("aw_chunk_2263_cast_fp16")]; tensor var_23497_to_fp16 = const()[name = tensor("op_23497_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2265_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2265_cast_fp16, y = var_23497_to_fp16)[name = tensor("aw_chunk_2265_cast_fp16")]; tensor var_23499_to_fp16 = const()[name = tensor("op_23499_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2267_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2267_cast_fp16, y = var_23499_to_fp16)[name = tensor("aw_chunk_2267_cast_fp16")]; tensor var_23501_to_fp16 = const()[name = tensor("op_23501_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2269_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2269_cast_fp16, y = var_23501_to_fp16)[name = tensor("aw_chunk_2269_cast_fp16")]; tensor var_23503_to_fp16 = const()[name = tensor("op_23503_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2271_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2271_cast_fp16, y = var_23503_to_fp16)[name = tensor("aw_chunk_2271_cast_fp16")]; tensor var_23505_to_fp16 = const()[name = tensor("op_23505_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2273_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2273_cast_fp16, y = var_23505_to_fp16)[name = tensor("aw_chunk_2273_cast_fp16")]; tensor var_23507_to_fp16 = const()[name = tensor("op_23507_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2275_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2275_cast_fp16, y = var_23507_to_fp16)[name = tensor("aw_chunk_2275_cast_fp16")]; tensor var_23509_to_fp16 = const()[name = tensor("op_23509_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2277_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2277_cast_fp16, y = var_23509_to_fp16)[name = tensor("aw_chunk_2277_cast_fp16")]; tensor var_23511_to_fp16 = const()[name = tensor("op_23511_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2279_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2279_cast_fp16, y = var_23511_to_fp16)[name = tensor("aw_chunk_2279_cast_fp16")]; tensor var_23513_to_fp16 = const()[name = tensor("op_23513_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2281_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2281_cast_fp16, y = var_23513_to_fp16)[name = tensor("aw_chunk_2281_cast_fp16")]; tensor var_23515_to_fp16 = const()[name = tensor("op_23515_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2283_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2283_cast_fp16, y = var_23515_to_fp16)[name = tensor("aw_chunk_2283_cast_fp16")]; tensor var_23517_to_fp16 = const()[name = tensor("op_23517_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2285_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2285_cast_fp16, y = var_23517_to_fp16)[name = tensor("aw_chunk_2285_cast_fp16")]; tensor var_23519_to_fp16 = const()[name = tensor("op_23519_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2287_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2287_cast_fp16, y = var_23519_to_fp16)[name = tensor("aw_chunk_2287_cast_fp16")]; tensor var_23521_to_fp16 = const()[name = tensor("op_23521_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2289_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2289_cast_fp16, y = var_23521_to_fp16)[name = tensor("aw_chunk_2289_cast_fp16")]; tensor var_23523_to_fp16 = const()[name = tensor("op_23523_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2291_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2291_cast_fp16, y = var_23523_to_fp16)[name = tensor("aw_chunk_2291_cast_fp16")]; tensor var_23525_to_fp16 = const()[name = tensor("op_23525_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2293_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2293_cast_fp16, y = var_23525_to_fp16)[name = tensor("aw_chunk_2293_cast_fp16")]; tensor var_23527_to_fp16 = const()[name = tensor("op_23527_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2295_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2295_cast_fp16, y = var_23527_to_fp16)[name = tensor("aw_chunk_2295_cast_fp16")]; tensor var_23529_to_fp16 = const()[name = tensor("op_23529_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2297_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2297_cast_fp16, y = var_23529_to_fp16)[name = tensor("aw_chunk_2297_cast_fp16")]; tensor var_23531_to_fp16 = const()[name = tensor("op_23531_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2299_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2299_cast_fp16, y = var_23531_to_fp16)[name = tensor("aw_chunk_2299_cast_fp16")]; tensor var_23533_to_fp16 = const()[name = tensor("op_23533_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2301_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2301_cast_fp16, y = var_23533_to_fp16)[name = tensor("aw_chunk_2301_cast_fp16")]; tensor var_23535_to_fp16 = const()[name = tensor("op_23535_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2303_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2303_cast_fp16, y = var_23535_to_fp16)[name = tensor("aw_chunk_2303_cast_fp16")]; tensor var_23537_to_fp16 = const()[name = tensor("op_23537_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2305_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2305_cast_fp16, y = var_23537_to_fp16)[name = tensor("aw_chunk_2305_cast_fp16")]; tensor var_23539_to_fp16 = const()[name = tensor("op_23539_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2307_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2307_cast_fp16, y = var_23539_to_fp16)[name = tensor("aw_chunk_2307_cast_fp16")]; tensor var_23541_to_fp16 = const()[name = tensor("op_23541_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2309_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2309_cast_fp16, y = var_23541_to_fp16)[name = tensor("aw_chunk_2309_cast_fp16")]; tensor var_23543_to_fp16 = const()[name = tensor("op_23543_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2311_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2311_cast_fp16, y = var_23543_to_fp16)[name = tensor("aw_chunk_2311_cast_fp16")]; tensor var_23545_to_fp16 = const()[name = tensor("op_23545_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2313_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2313_cast_fp16, y = var_23545_to_fp16)[name = tensor("aw_chunk_2313_cast_fp16")]; tensor var_23547_to_fp16 = const()[name = tensor("op_23547_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2315_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2315_cast_fp16, y = var_23547_to_fp16)[name = tensor("aw_chunk_2315_cast_fp16")]; tensor var_23549_to_fp16 = const()[name = tensor("op_23549_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2317_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2317_cast_fp16, y = var_23549_to_fp16)[name = tensor("aw_chunk_2317_cast_fp16")]; tensor var_23551_to_fp16 = const()[name = tensor("op_23551_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2319_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2319_cast_fp16, y = var_23551_to_fp16)[name = tensor("aw_chunk_2319_cast_fp16")]; tensor var_23553_to_fp16 = const()[name = tensor("op_23553_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2321_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2321_cast_fp16, y = var_23553_to_fp16)[name = tensor("aw_chunk_2321_cast_fp16")]; tensor var_23555_to_fp16 = const()[name = tensor("op_23555_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2323_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2323_cast_fp16, y = var_23555_to_fp16)[name = tensor("aw_chunk_2323_cast_fp16")]; tensor var_23557_to_fp16 = const()[name = tensor("op_23557_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2325_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2325_cast_fp16, y = var_23557_to_fp16)[name = tensor("aw_chunk_2325_cast_fp16")]; tensor var_23559_to_fp16 = const()[name = tensor("op_23559_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2327_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2327_cast_fp16, y = var_23559_to_fp16)[name = tensor("aw_chunk_2327_cast_fp16")]; tensor var_23561_to_fp16 = const()[name = tensor("op_23561_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2329_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2329_cast_fp16, y = var_23561_to_fp16)[name = tensor("aw_chunk_2329_cast_fp16")]; tensor var_23563_to_fp16 = const()[name = tensor("op_23563_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2331_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2331_cast_fp16, y = var_23563_to_fp16)[name = tensor("aw_chunk_2331_cast_fp16")]; tensor var_23565_to_fp16 = const()[name = tensor("op_23565_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2333_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2333_cast_fp16, y = var_23565_to_fp16)[name = tensor("aw_chunk_2333_cast_fp16")]; tensor var_23567_to_fp16 = const()[name = tensor("op_23567_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2335_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2335_cast_fp16, y = var_23567_to_fp16)[name = tensor("aw_chunk_2335_cast_fp16")]; tensor var_23569_to_fp16 = const()[name = tensor("op_23569_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2337_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2337_cast_fp16, y = var_23569_to_fp16)[name = tensor("aw_chunk_2337_cast_fp16")]; tensor var_23571_to_fp16 = const()[name = tensor("op_23571_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2339_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2339_cast_fp16, y = var_23571_to_fp16)[name = tensor("aw_chunk_2339_cast_fp16")]; tensor var_23573_to_fp16 = const()[name = tensor("op_23573_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2341_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2341_cast_fp16, y = var_23573_to_fp16)[name = tensor("aw_chunk_2341_cast_fp16")]; tensor var_23575_to_fp16 = const()[name = tensor("op_23575_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2343_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2343_cast_fp16, y = var_23575_to_fp16)[name = tensor("aw_chunk_2343_cast_fp16")]; tensor var_23577_to_fp16 = const()[name = tensor("op_23577_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2345_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2345_cast_fp16, y = var_23577_to_fp16)[name = tensor("aw_chunk_2345_cast_fp16")]; tensor var_23579_to_fp16 = const()[name = tensor("op_23579_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2347_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2347_cast_fp16, y = var_23579_to_fp16)[name = tensor("aw_chunk_2347_cast_fp16")]; tensor var_23581_to_fp16 = const()[name = tensor("op_23581_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2349_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2349_cast_fp16, y = var_23581_to_fp16)[name = tensor("aw_chunk_2349_cast_fp16")]; tensor var_23583_to_fp16 = const()[name = tensor("op_23583_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2351_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2351_cast_fp16, y = var_23583_to_fp16)[name = tensor("aw_chunk_2351_cast_fp16")]; tensor var_23585_to_fp16 = const()[name = tensor("op_23585_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2353_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2353_cast_fp16, y = var_23585_to_fp16)[name = tensor("aw_chunk_2353_cast_fp16")]; tensor var_23587_to_fp16 = const()[name = tensor("op_23587_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2355_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2355_cast_fp16, y = var_23587_to_fp16)[name = tensor("aw_chunk_2355_cast_fp16")]; tensor var_23589_to_fp16 = const()[name = tensor("op_23589_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2357_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2357_cast_fp16, y = var_23589_to_fp16)[name = tensor("aw_chunk_2357_cast_fp16")]; tensor var_23591_to_fp16 = const()[name = tensor("op_23591_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2359_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2359_cast_fp16, y = var_23591_to_fp16)[name = tensor("aw_chunk_2359_cast_fp16")]; tensor var_23593_to_fp16 = const()[name = tensor("op_23593_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2361_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2361_cast_fp16, y = var_23593_to_fp16)[name = tensor("aw_chunk_2361_cast_fp16")]; tensor var_23595_to_fp16 = const()[name = tensor("op_23595_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2363_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2363_cast_fp16, y = var_23595_to_fp16)[name = tensor("aw_chunk_2363_cast_fp16")]; tensor var_23597_to_fp16 = const()[name = tensor("op_23597_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2365_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2365_cast_fp16, y = var_23597_to_fp16)[name = tensor("aw_chunk_2365_cast_fp16")]; tensor var_23599_to_fp16 = const()[name = tensor("op_23599_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2367_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2367_cast_fp16, y = var_23599_to_fp16)[name = tensor("aw_chunk_2367_cast_fp16")]; tensor var_23601_to_fp16 = const()[name = tensor("op_23601_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2369_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2369_cast_fp16, y = var_23601_to_fp16)[name = tensor("aw_chunk_2369_cast_fp16")]; tensor var_23603_to_fp16 = const()[name = tensor("op_23603_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2371_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2371_cast_fp16, y = var_23603_to_fp16)[name = tensor("aw_chunk_2371_cast_fp16")]; tensor var_23605_to_fp16 = const()[name = tensor("op_23605_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2373_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2373_cast_fp16, y = var_23605_to_fp16)[name = tensor("aw_chunk_2373_cast_fp16")]; tensor var_23607_to_fp16 = const()[name = tensor("op_23607_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2375_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2375_cast_fp16, y = var_23607_to_fp16)[name = tensor("aw_chunk_2375_cast_fp16")]; tensor var_23609_to_fp16 = const()[name = tensor("op_23609_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2377_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2377_cast_fp16, y = var_23609_to_fp16)[name = tensor("aw_chunk_2377_cast_fp16")]; tensor var_23611_to_fp16 = const()[name = tensor("op_23611_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2379_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2379_cast_fp16, y = var_23611_to_fp16)[name = tensor("aw_chunk_2379_cast_fp16")]; tensor var_23613_to_fp16 = const()[name = tensor("op_23613_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2381_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2381_cast_fp16, y = var_23613_to_fp16)[name = tensor("aw_chunk_2381_cast_fp16")]; tensor var_23615_to_fp16 = const()[name = tensor("op_23615_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2383_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2383_cast_fp16, y = var_23615_to_fp16)[name = tensor("aw_chunk_2383_cast_fp16")]; tensor var_23617_to_fp16 = const()[name = tensor("op_23617_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2385_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2385_cast_fp16, y = var_23617_to_fp16)[name = tensor("aw_chunk_2385_cast_fp16")]; tensor var_23619_to_fp16 = const()[name = tensor("op_23619_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2387_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2387_cast_fp16, y = var_23619_to_fp16)[name = tensor("aw_chunk_2387_cast_fp16")]; tensor var_23621_to_fp16 = const()[name = tensor("op_23621_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2389_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2389_cast_fp16, y = var_23621_to_fp16)[name = tensor("aw_chunk_2389_cast_fp16")]; tensor var_23623_to_fp16 = const()[name = tensor("op_23623_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2391_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2391_cast_fp16, y = var_23623_to_fp16)[name = tensor("aw_chunk_2391_cast_fp16")]; tensor var_23625_to_fp16 = const()[name = tensor("op_23625_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2393_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2393_cast_fp16, y = var_23625_to_fp16)[name = tensor("aw_chunk_2393_cast_fp16")]; tensor var_23627_to_fp16 = const()[name = tensor("op_23627_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2395_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2395_cast_fp16, y = var_23627_to_fp16)[name = tensor("aw_chunk_2395_cast_fp16")]; tensor var_23629_to_fp16 = const()[name = tensor("op_23629_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2397_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2397_cast_fp16, y = var_23629_to_fp16)[name = tensor("aw_chunk_2397_cast_fp16")]; tensor var_23631_to_fp16 = const()[name = tensor("op_23631_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2399_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2399_cast_fp16, y = var_23631_to_fp16)[name = tensor("aw_chunk_2399_cast_fp16")]; tensor var_23633_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2241_cast_fp16)[name = tensor("op_23633_cast_fp16")]; tensor var_23634_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2243_cast_fp16)[name = tensor("op_23634_cast_fp16")]; tensor var_23635_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2245_cast_fp16)[name = tensor("op_23635_cast_fp16")]; tensor var_23636_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2247_cast_fp16)[name = tensor("op_23636_cast_fp16")]; tensor var_23637_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2249_cast_fp16)[name = tensor("op_23637_cast_fp16")]; tensor var_23638_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2251_cast_fp16)[name = tensor("op_23638_cast_fp16")]; tensor var_23639_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2253_cast_fp16)[name = tensor("op_23639_cast_fp16")]; tensor var_23640_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2255_cast_fp16)[name = tensor("op_23640_cast_fp16")]; tensor var_23641_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2257_cast_fp16)[name = tensor("op_23641_cast_fp16")]; tensor var_23642_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2259_cast_fp16)[name = tensor("op_23642_cast_fp16")]; tensor var_23643_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2261_cast_fp16)[name = tensor("op_23643_cast_fp16")]; tensor var_23644_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2263_cast_fp16)[name = tensor("op_23644_cast_fp16")]; tensor var_23645_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2265_cast_fp16)[name = tensor("op_23645_cast_fp16")]; tensor var_23646_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2267_cast_fp16)[name = tensor("op_23646_cast_fp16")]; tensor var_23647_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2269_cast_fp16)[name = tensor("op_23647_cast_fp16")]; tensor var_23648_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2271_cast_fp16)[name = tensor("op_23648_cast_fp16")]; tensor var_23649_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2273_cast_fp16)[name = tensor("op_23649_cast_fp16")]; tensor var_23650_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2275_cast_fp16)[name = tensor("op_23650_cast_fp16")]; tensor var_23651_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2277_cast_fp16)[name = tensor("op_23651_cast_fp16")]; tensor var_23652_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2279_cast_fp16)[name = tensor("op_23652_cast_fp16")]; tensor var_23653_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2281_cast_fp16)[name = tensor("op_23653_cast_fp16")]; tensor var_23654_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2283_cast_fp16)[name = tensor("op_23654_cast_fp16")]; tensor var_23655_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2285_cast_fp16)[name = tensor("op_23655_cast_fp16")]; tensor var_23656_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2287_cast_fp16)[name = tensor("op_23656_cast_fp16")]; tensor var_23657_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2289_cast_fp16)[name = tensor("op_23657_cast_fp16")]; tensor var_23658_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2291_cast_fp16)[name = tensor("op_23658_cast_fp16")]; tensor var_23659_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2293_cast_fp16)[name = tensor("op_23659_cast_fp16")]; tensor var_23660_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2295_cast_fp16)[name = tensor("op_23660_cast_fp16")]; tensor var_23661_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2297_cast_fp16)[name = tensor("op_23661_cast_fp16")]; tensor var_23662_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2299_cast_fp16)[name = tensor("op_23662_cast_fp16")]; tensor var_23663_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2301_cast_fp16)[name = tensor("op_23663_cast_fp16")]; tensor var_23664_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2303_cast_fp16)[name = tensor("op_23664_cast_fp16")]; tensor var_23665_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2305_cast_fp16)[name = tensor("op_23665_cast_fp16")]; tensor var_23666_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2307_cast_fp16)[name = tensor("op_23666_cast_fp16")]; tensor var_23667_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2309_cast_fp16)[name = tensor("op_23667_cast_fp16")]; tensor var_23668_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2311_cast_fp16)[name = tensor("op_23668_cast_fp16")]; tensor var_23669_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2313_cast_fp16)[name = tensor("op_23669_cast_fp16")]; tensor var_23670_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2315_cast_fp16)[name = tensor("op_23670_cast_fp16")]; tensor var_23671_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2317_cast_fp16)[name = tensor("op_23671_cast_fp16")]; tensor var_23672_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2319_cast_fp16)[name = tensor("op_23672_cast_fp16")]; tensor var_23673_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2321_cast_fp16)[name = tensor("op_23673_cast_fp16")]; tensor var_23674_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2323_cast_fp16)[name = tensor("op_23674_cast_fp16")]; tensor var_23675_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2325_cast_fp16)[name = tensor("op_23675_cast_fp16")]; tensor var_23676_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2327_cast_fp16)[name = tensor("op_23676_cast_fp16")]; tensor var_23677_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2329_cast_fp16)[name = tensor("op_23677_cast_fp16")]; tensor var_23678_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2331_cast_fp16)[name = tensor("op_23678_cast_fp16")]; tensor var_23679_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2333_cast_fp16)[name = tensor("op_23679_cast_fp16")]; tensor var_23680_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2335_cast_fp16)[name = tensor("op_23680_cast_fp16")]; tensor var_23681_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2337_cast_fp16)[name = tensor("op_23681_cast_fp16")]; tensor var_23682_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2339_cast_fp16)[name = tensor("op_23682_cast_fp16")]; tensor var_23683_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2341_cast_fp16)[name = tensor("op_23683_cast_fp16")]; tensor var_23684_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2343_cast_fp16)[name = tensor("op_23684_cast_fp16")]; tensor var_23685_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2345_cast_fp16)[name = tensor("op_23685_cast_fp16")]; tensor var_23686_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2347_cast_fp16)[name = tensor("op_23686_cast_fp16")]; tensor var_23687_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2349_cast_fp16)[name = tensor("op_23687_cast_fp16")]; tensor var_23688_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2351_cast_fp16)[name = tensor("op_23688_cast_fp16")]; tensor var_23689_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2353_cast_fp16)[name = tensor("op_23689_cast_fp16")]; tensor var_23690_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2355_cast_fp16)[name = tensor("op_23690_cast_fp16")]; tensor var_23691_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2357_cast_fp16)[name = tensor("op_23691_cast_fp16")]; tensor var_23692_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2359_cast_fp16)[name = tensor("op_23692_cast_fp16")]; tensor var_23693_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2361_cast_fp16)[name = tensor("op_23693_cast_fp16")]; tensor var_23694_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2363_cast_fp16)[name = tensor("op_23694_cast_fp16")]; tensor var_23695_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2365_cast_fp16)[name = tensor("op_23695_cast_fp16")]; tensor var_23696_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2367_cast_fp16)[name = tensor("op_23696_cast_fp16")]; tensor var_23697_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2369_cast_fp16)[name = tensor("op_23697_cast_fp16")]; tensor var_23698_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2371_cast_fp16)[name = tensor("op_23698_cast_fp16")]; tensor var_23699_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2373_cast_fp16)[name = tensor("op_23699_cast_fp16")]; tensor var_23700_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2375_cast_fp16)[name = tensor("op_23700_cast_fp16")]; tensor var_23701_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2377_cast_fp16)[name = tensor("op_23701_cast_fp16")]; tensor var_23702_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2379_cast_fp16)[name = tensor("op_23702_cast_fp16")]; tensor var_23703_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2381_cast_fp16)[name = tensor("op_23703_cast_fp16")]; tensor var_23704_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2383_cast_fp16)[name = tensor("op_23704_cast_fp16")]; tensor var_23705_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2385_cast_fp16)[name = tensor("op_23705_cast_fp16")]; tensor var_23706_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2387_cast_fp16)[name = tensor("op_23706_cast_fp16")]; tensor var_23707_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2389_cast_fp16)[name = tensor("op_23707_cast_fp16")]; tensor var_23708_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2391_cast_fp16)[name = tensor("op_23708_cast_fp16")]; tensor var_23709_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2393_cast_fp16)[name = tensor("op_23709_cast_fp16")]; tensor var_23710_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2395_cast_fp16)[name = tensor("op_23710_cast_fp16")]; tensor var_23711_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2397_cast_fp16)[name = tensor("op_23711_cast_fp16")]; tensor var_23712_cast_fp16 = softmax(axis = var_22431, x = aw_chunk_2399_cast_fp16)[name = tensor("op_23712_cast_fp16")]; tensor var_23714_equation_0 = const()[name = tensor("op_23714_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23714_cast_fp16 = einsum(equation = var_23714_equation_0, values = (var_23234_cast_fp16, var_23633_cast_fp16))[name = tensor("op_23714_cast_fp16")]; tensor var_23716_equation_0 = const()[name = tensor("op_23716_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23716_cast_fp16 = einsum(equation = var_23716_equation_0, values = (var_23234_cast_fp16, var_23634_cast_fp16))[name = tensor("op_23716_cast_fp16")]; tensor var_23718_equation_0 = const()[name = tensor("op_23718_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23718_cast_fp16 = einsum(equation = var_23718_equation_0, values = (var_23234_cast_fp16, var_23635_cast_fp16))[name = tensor("op_23718_cast_fp16")]; tensor var_23720_equation_0 = const()[name = tensor("op_23720_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23720_cast_fp16 = einsum(equation = var_23720_equation_0, values = (var_23234_cast_fp16, var_23636_cast_fp16))[name = tensor("op_23720_cast_fp16")]; tensor var_23722_equation_0 = const()[name = tensor("op_23722_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23722_cast_fp16 = einsum(equation = var_23722_equation_0, values = (var_23238_cast_fp16, var_23637_cast_fp16))[name = tensor("op_23722_cast_fp16")]; tensor var_23724_equation_0 = const()[name = tensor("op_23724_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23724_cast_fp16 = einsum(equation = var_23724_equation_0, values = (var_23238_cast_fp16, var_23638_cast_fp16))[name = tensor("op_23724_cast_fp16")]; tensor var_23726_equation_0 = const()[name = tensor("op_23726_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23726_cast_fp16 = einsum(equation = var_23726_equation_0, values = (var_23238_cast_fp16, var_23639_cast_fp16))[name = tensor("op_23726_cast_fp16")]; tensor var_23728_equation_0 = const()[name = tensor("op_23728_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23728_cast_fp16 = einsum(equation = var_23728_equation_0, values = (var_23238_cast_fp16, var_23640_cast_fp16))[name = tensor("op_23728_cast_fp16")]; tensor var_23730_equation_0 = const()[name = tensor("op_23730_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23730_cast_fp16 = einsum(equation = var_23730_equation_0, values = (var_23242_cast_fp16, var_23641_cast_fp16))[name = tensor("op_23730_cast_fp16")]; tensor var_23732_equation_0 = const()[name = tensor("op_23732_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23732_cast_fp16 = einsum(equation = var_23732_equation_0, values = (var_23242_cast_fp16, var_23642_cast_fp16))[name = tensor("op_23732_cast_fp16")]; tensor var_23734_equation_0 = const()[name = tensor("op_23734_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23734_cast_fp16 = einsum(equation = var_23734_equation_0, values = (var_23242_cast_fp16, var_23643_cast_fp16))[name = tensor("op_23734_cast_fp16")]; tensor var_23736_equation_0 = const()[name = tensor("op_23736_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23736_cast_fp16 = einsum(equation = var_23736_equation_0, values = (var_23242_cast_fp16, var_23644_cast_fp16))[name = tensor("op_23736_cast_fp16")]; tensor var_23738_equation_0 = const()[name = tensor("op_23738_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23738_cast_fp16 = einsum(equation = var_23738_equation_0, values = (var_23246_cast_fp16, var_23645_cast_fp16))[name = tensor("op_23738_cast_fp16")]; tensor var_23740_equation_0 = const()[name = tensor("op_23740_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23740_cast_fp16 = einsum(equation = var_23740_equation_0, values = (var_23246_cast_fp16, var_23646_cast_fp16))[name = tensor("op_23740_cast_fp16")]; tensor var_23742_equation_0 = const()[name = tensor("op_23742_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23742_cast_fp16 = einsum(equation = var_23742_equation_0, values = (var_23246_cast_fp16, var_23647_cast_fp16))[name = tensor("op_23742_cast_fp16")]; tensor var_23744_equation_0 = const()[name = tensor("op_23744_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23744_cast_fp16 = einsum(equation = var_23744_equation_0, values = (var_23246_cast_fp16, var_23648_cast_fp16))[name = tensor("op_23744_cast_fp16")]; tensor var_23746_equation_0 = const()[name = tensor("op_23746_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23746_cast_fp16 = einsum(equation = var_23746_equation_0, values = (var_23250_cast_fp16, var_23649_cast_fp16))[name = tensor("op_23746_cast_fp16")]; tensor var_23748_equation_0 = const()[name = tensor("op_23748_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23748_cast_fp16 = einsum(equation = var_23748_equation_0, values = (var_23250_cast_fp16, var_23650_cast_fp16))[name = tensor("op_23748_cast_fp16")]; tensor var_23750_equation_0 = const()[name = tensor("op_23750_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23750_cast_fp16 = einsum(equation = var_23750_equation_0, values = (var_23250_cast_fp16, var_23651_cast_fp16))[name = tensor("op_23750_cast_fp16")]; tensor var_23752_equation_0 = const()[name = tensor("op_23752_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23752_cast_fp16 = einsum(equation = var_23752_equation_0, values = (var_23250_cast_fp16, var_23652_cast_fp16))[name = tensor("op_23752_cast_fp16")]; tensor var_23754_equation_0 = const()[name = tensor("op_23754_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23754_cast_fp16 = einsum(equation = var_23754_equation_0, values = (var_23254_cast_fp16, var_23653_cast_fp16))[name = tensor("op_23754_cast_fp16")]; tensor var_23756_equation_0 = const()[name = tensor("op_23756_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23756_cast_fp16 = einsum(equation = var_23756_equation_0, values = (var_23254_cast_fp16, var_23654_cast_fp16))[name = tensor("op_23756_cast_fp16")]; tensor var_23758_equation_0 = const()[name = tensor("op_23758_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23758_cast_fp16 = einsum(equation = var_23758_equation_0, values = (var_23254_cast_fp16, var_23655_cast_fp16))[name = tensor("op_23758_cast_fp16")]; tensor var_23760_equation_0 = const()[name = tensor("op_23760_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23760_cast_fp16 = einsum(equation = var_23760_equation_0, values = (var_23254_cast_fp16, var_23656_cast_fp16))[name = tensor("op_23760_cast_fp16")]; tensor var_23762_equation_0 = const()[name = tensor("op_23762_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23762_cast_fp16 = einsum(equation = var_23762_equation_0, values = (var_23258_cast_fp16, var_23657_cast_fp16))[name = tensor("op_23762_cast_fp16")]; tensor var_23764_equation_0 = const()[name = tensor("op_23764_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23764_cast_fp16 = einsum(equation = var_23764_equation_0, values = (var_23258_cast_fp16, var_23658_cast_fp16))[name = tensor("op_23764_cast_fp16")]; tensor var_23766_equation_0 = const()[name = tensor("op_23766_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23766_cast_fp16 = einsum(equation = var_23766_equation_0, values = (var_23258_cast_fp16, var_23659_cast_fp16))[name = tensor("op_23766_cast_fp16")]; tensor var_23768_equation_0 = const()[name = tensor("op_23768_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23768_cast_fp16 = einsum(equation = var_23768_equation_0, values = (var_23258_cast_fp16, var_23660_cast_fp16))[name = tensor("op_23768_cast_fp16")]; tensor var_23770_equation_0 = const()[name = tensor("op_23770_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23770_cast_fp16 = einsum(equation = var_23770_equation_0, values = (var_23262_cast_fp16, var_23661_cast_fp16))[name = tensor("op_23770_cast_fp16")]; tensor var_23772_equation_0 = const()[name = tensor("op_23772_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23772_cast_fp16 = einsum(equation = var_23772_equation_0, values = (var_23262_cast_fp16, var_23662_cast_fp16))[name = tensor("op_23772_cast_fp16")]; tensor var_23774_equation_0 = const()[name = tensor("op_23774_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23774_cast_fp16 = einsum(equation = var_23774_equation_0, values = (var_23262_cast_fp16, var_23663_cast_fp16))[name = tensor("op_23774_cast_fp16")]; tensor var_23776_equation_0 = const()[name = tensor("op_23776_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23776_cast_fp16 = einsum(equation = var_23776_equation_0, values = (var_23262_cast_fp16, var_23664_cast_fp16))[name = tensor("op_23776_cast_fp16")]; tensor var_23778_equation_0 = const()[name = tensor("op_23778_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23778_cast_fp16 = einsum(equation = var_23778_equation_0, values = (var_23266_cast_fp16, var_23665_cast_fp16))[name = tensor("op_23778_cast_fp16")]; tensor var_23780_equation_0 = const()[name = tensor("op_23780_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23780_cast_fp16 = einsum(equation = var_23780_equation_0, values = (var_23266_cast_fp16, var_23666_cast_fp16))[name = tensor("op_23780_cast_fp16")]; tensor var_23782_equation_0 = const()[name = tensor("op_23782_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23782_cast_fp16 = einsum(equation = var_23782_equation_0, values = (var_23266_cast_fp16, var_23667_cast_fp16))[name = tensor("op_23782_cast_fp16")]; tensor var_23784_equation_0 = const()[name = tensor("op_23784_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23784_cast_fp16 = einsum(equation = var_23784_equation_0, values = (var_23266_cast_fp16, var_23668_cast_fp16))[name = tensor("op_23784_cast_fp16")]; tensor var_23786_equation_0 = const()[name = tensor("op_23786_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23786_cast_fp16 = einsum(equation = var_23786_equation_0, values = (var_23270_cast_fp16, var_23669_cast_fp16))[name = tensor("op_23786_cast_fp16")]; tensor var_23788_equation_0 = const()[name = tensor("op_23788_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23788_cast_fp16 = einsum(equation = var_23788_equation_0, values = (var_23270_cast_fp16, var_23670_cast_fp16))[name = tensor("op_23788_cast_fp16")]; tensor var_23790_equation_0 = const()[name = tensor("op_23790_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23790_cast_fp16 = einsum(equation = var_23790_equation_0, values = (var_23270_cast_fp16, var_23671_cast_fp16))[name = tensor("op_23790_cast_fp16")]; tensor var_23792_equation_0 = const()[name = tensor("op_23792_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23792_cast_fp16 = einsum(equation = var_23792_equation_0, values = (var_23270_cast_fp16, var_23672_cast_fp16))[name = tensor("op_23792_cast_fp16")]; tensor var_23794_equation_0 = const()[name = tensor("op_23794_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23794_cast_fp16 = einsum(equation = var_23794_equation_0, values = (var_23274_cast_fp16, var_23673_cast_fp16))[name = tensor("op_23794_cast_fp16")]; tensor var_23796_equation_0 = const()[name = tensor("op_23796_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23796_cast_fp16 = einsum(equation = var_23796_equation_0, values = (var_23274_cast_fp16, var_23674_cast_fp16))[name = tensor("op_23796_cast_fp16")]; tensor var_23798_equation_0 = const()[name = tensor("op_23798_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23798_cast_fp16 = einsum(equation = var_23798_equation_0, values = (var_23274_cast_fp16, var_23675_cast_fp16))[name = tensor("op_23798_cast_fp16")]; tensor var_23800_equation_0 = const()[name = tensor("op_23800_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23800_cast_fp16 = einsum(equation = var_23800_equation_0, values = (var_23274_cast_fp16, var_23676_cast_fp16))[name = tensor("op_23800_cast_fp16")]; tensor var_23802_equation_0 = const()[name = tensor("op_23802_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23802_cast_fp16 = einsum(equation = var_23802_equation_0, values = (var_23278_cast_fp16, var_23677_cast_fp16))[name = tensor("op_23802_cast_fp16")]; tensor var_23804_equation_0 = const()[name = tensor("op_23804_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23804_cast_fp16 = einsum(equation = var_23804_equation_0, values = (var_23278_cast_fp16, var_23678_cast_fp16))[name = tensor("op_23804_cast_fp16")]; tensor var_23806_equation_0 = const()[name = tensor("op_23806_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23806_cast_fp16 = einsum(equation = var_23806_equation_0, values = (var_23278_cast_fp16, var_23679_cast_fp16))[name = tensor("op_23806_cast_fp16")]; tensor var_23808_equation_0 = const()[name = tensor("op_23808_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23808_cast_fp16 = einsum(equation = var_23808_equation_0, values = (var_23278_cast_fp16, var_23680_cast_fp16))[name = tensor("op_23808_cast_fp16")]; tensor var_23810_equation_0 = const()[name = tensor("op_23810_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23810_cast_fp16 = einsum(equation = var_23810_equation_0, values = (var_23282_cast_fp16, var_23681_cast_fp16))[name = tensor("op_23810_cast_fp16")]; tensor var_23812_equation_0 = const()[name = tensor("op_23812_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23812_cast_fp16 = einsum(equation = var_23812_equation_0, values = (var_23282_cast_fp16, var_23682_cast_fp16))[name = tensor("op_23812_cast_fp16")]; tensor var_23814_equation_0 = const()[name = tensor("op_23814_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23814_cast_fp16 = einsum(equation = var_23814_equation_0, values = (var_23282_cast_fp16, var_23683_cast_fp16))[name = tensor("op_23814_cast_fp16")]; tensor var_23816_equation_0 = const()[name = tensor("op_23816_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23816_cast_fp16 = einsum(equation = var_23816_equation_0, values = (var_23282_cast_fp16, var_23684_cast_fp16))[name = tensor("op_23816_cast_fp16")]; tensor var_23818_equation_0 = const()[name = tensor("op_23818_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23818_cast_fp16 = einsum(equation = var_23818_equation_0, values = (var_23286_cast_fp16, var_23685_cast_fp16))[name = tensor("op_23818_cast_fp16")]; tensor var_23820_equation_0 = const()[name = tensor("op_23820_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23820_cast_fp16 = einsum(equation = var_23820_equation_0, values = (var_23286_cast_fp16, var_23686_cast_fp16))[name = tensor("op_23820_cast_fp16")]; tensor var_23822_equation_0 = const()[name = tensor("op_23822_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23822_cast_fp16 = einsum(equation = var_23822_equation_0, values = (var_23286_cast_fp16, var_23687_cast_fp16))[name = tensor("op_23822_cast_fp16")]; tensor var_23824_equation_0 = const()[name = tensor("op_23824_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23824_cast_fp16 = einsum(equation = var_23824_equation_0, values = (var_23286_cast_fp16, var_23688_cast_fp16))[name = tensor("op_23824_cast_fp16")]; tensor var_23826_equation_0 = const()[name = tensor("op_23826_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23826_cast_fp16 = einsum(equation = var_23826_equation_0, values = (var_23290_cast_fp16, var_23689_cast_fp16))[name = tensor("op_23826_cast_fp16")]; tensor var_23828_equation_0 = const()[name = tensor("op_23828_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23828_cast_fp16 = einsum(equation = var_23828_equation_0, values = (var_23290_cast_fp16, var_23690_cast_fp16))[name = tensor("op_23828_cast_fp16")]; tensor var_23830_equation_0 = const()[name = tensor("op_23830_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23830_cast_fp16 = einsum(equation = var_23830_equation_0, values = (var_23290_cast_fp16, var_23691_cast_fp16))[name = tensor("op_23830_cast_fp16")]; tensor var_23832_equation_0 = const()[name = tensor("op_23832_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23832_cast_fp16 = einsum(equation = var_23832_equation_0, values = (var_23290_cast_fp16, var_23692_cast_fp16))[name = tensor("op_23832_cast_fp16")]; tensor var_23834_equation_0 = const()[name = tensor("op_23834_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23834_cast_fp16 = einsum(equation = var_23834_equation_0, values = (var_23294_cast_fp16, var_23693_cast_fp16))[name = tensor("op_23834_cast_fp16")]; tensor var_23836_equation_0 = const()[name = tensor("op_23836_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23836_cast_fp16 = einsum(equation = var_23836_equation_0, values = (var_23294_cast_fp16, var_23694_cast_fp16))[name = tensor("op_23836_cast_fp16")]; tensor var_23838_equation_0 = const()[name = tensor("op_23838_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23838_cast_fp16 = einsum(equation = var_23838_equation_0, values = (var_23294_cast_fp16, var_23695_cast_fp16))[name = tensor("op_23838_cast_fp16")]; tensor var_23840_equation_0 = const()[name = tensor("op_23840_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23840_cast_fp16 = einsum(equation = var_23840_equation_0, values = (var_23294_cast_fp16, var_23696_cast_fp16))[name = tensor("op_23840_cast_fp16")]; tensor var_23842_equation_0 = const()[name = tensor("op_23842_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23842_cast_fp16 = einsum(equation = var_23842_equation_0, values = (var_23298_cast_fp16, var_23697_cast_fp16))[name = tensor("op_23842_cast_fp16")]; tensor var_23844_equation_0 = const()[name = tensor("op_23844_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23844_cast_fp16 = einsum(equation = var_23844_equation_0, values = (var_23298_cast_fp16, var_23698_cast_fp16))[name = tensor("op_23844_cast_fp16")]; tensor var_23846_equation_0 = const()[name = tensor("op_23846_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23846_cast_fp16 = einsum(equation = var_23846_equation_0, values = (var_23298_cast_fp16, var_23699_cast_fp16))[name = tensor("op_23846_cast_fp16")]; tensor var_23848_equation_0 = const()[name = tensor("op_23848_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23848_cast_fp16 = einsum(equation = var_23848_equation_0, values = (var_23298_cast_fp16, var_23700_cast_fp16))[name = tensor("op_23848_cast_fp16")]; tensor var_23850_equation_0 = const()[name = tensor("op_23850_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23850_cast_fp16 = einsum(equation = var_23850_equation_0, values = (var_23302_cast_fp16, var_23701_cast_fp16))[name = tensor("op_23850_cast_fp16")]; tensor var_23852_equation_0 = const()[name = tensor("op_23852_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23852_cast_fp16 = einsum(equation = var_23852_equation_0, values = (var_23302_cast_fp16, var_23702_cast_fp16))[name = tensor("op_23852_cast_fp16")]; tensor var_23854_equation_0 = const()[name = tensor("op_23854_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23854_cast_fp16 = einsum(equation = var_23854_equation_0, values = (var_23302_cast_fp16, var_23703_cast_fp16))[name = tensor("op_23854_cast_fp16")]; tensor var_23856_equation_0 = const()[name = tensor("op_23856_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23856_cast_fp16 = einsum(equation = var_23856_equation_0, values = (var_23302_cast_fp16, var_23704_cast_fp16))[name = tensor("op_23856_cast_fp16")]; tensor var_23858_equation_0 = const()[name = tensor("op_23858_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23858_cast_fp16 = einsum(equation = var_23858_equation_0, values = (var_23306_cast_fp16, var_23705_cast_fp16))[name = tensor("op_23858_cast_fp16")]; tensor var_23860_equation_0 = const()[name = tensor("op_23860_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23860_cast_fp16 = einsum(equation = var_23860_equation_0, values = (var_23306_cast_fp16, var_23706_cast_fp16))[name = tensor("op_23860_cast_fp16")]; tensor var_23862_equation_0 = const()[name = tensor("op_23862_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23862_cast_fp16 = einsum(equation = var_23862_equation_0, values = (var_23306_cast_fp16, var_23707_cast_fp16))[name = tensor("op_23862_cast_fp16")]; tensor var_23864_equation_0 = const()[name = tensor("op_23864_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23864_cast_fp16 = einsum(equation = var_23864_equation_0, values = (var_23306_cast_fp16, var_23708_cast_fp16))[name = tensor("op_23864_cast_fp16")]; tensor var_23866_equation_0 = const()[name = tensor("op_23866_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23866_cast_fp16 = einsum(equation = var_23866_equation_0, values = (var_23310_cast_fp16, var_23709_cast_fp16))[name = tensor("op_23866_cast_fp16")]; tensor var_23868_equation_0 = const()[name = tensor("op_23868_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23868_cast_fp16 = einsum(equation = var_23868_equation_0, values = (var_23310_cast_fp16, var_23710_cast_fp16))[name = tensor("op_23868_cast_fp16")]; tensor var_23870_equation_0 = const()[name = tensor("op_23870_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23870_cast_fp16 = einsum(equation = var_23870_equation_0, values = (var_23310_cast_fp16, var_23711_cast_fp16))[name = tensor("op_23870_cast_fp16")]; tensor var_23872_equation_0 = const()[name = tensor("op_23872_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_23872_cast_fp16 = einsum(equation = var_23872_equation_0, values = (var_23310_cast_fp16, var_23712_cast_fp16))[name = tensor("op_23872_cast_fp16")]; tensor var_23874_interleave_0 = const()[name = tensor("op_23874_interleave_0"), val = tensor(false)]; tensor var_23874_cast_fp16 = concat(axis = var_22406, interleave = var_23874_interleave_0, values = (var_23714_cast_fp16, var_23716_cast_fp16, var_23718_cast_fp16, var_23720_cast_fp16))[name = tensor("op_23874_cast_fp16")]; tensor var_23876_interleave_0 = const()[name = tensor("op_23876_interleave_0"), val = tensor(false)]; tensor var_23876_cast_fp16 = concat(axis = var_22406, interleave = var_23876_interleave_0, values = (var_23722_cast_fp16, var_23724_cast_fp16, var_23726_cast_fp16, var_23728_cast_fp16))[name = tensor("op_23876_cast_fp16")]; tensor var_23878_interleave_0 = const()[name = tensor("op_23878_interleave_0"), val = tensor(false)]; tensor var_23878_cast_fp16 = concat(axis = var_22406, interleave = var_23878_interleave_0, values = (var_23730_cast_fp16, var_23732_cast_fp16, var_23734_cast_fp16, var_23736_cast_fp16))[name = tensor("op_23878_cast_fp16")]; tensor var_23880_interleave_0 = const()[name = tensor("op_23880_interleave_0"), val = tensor(false)]; tensor var_23880_cast_fp16 = concat(axis = var_22406, interleave = var_23880_interleave_0, values = (var_23738_cast_fp16, var_23740_cast_fp16, var_23742_cast_fp16, var_23744_cast_fp16))[name = tensor("op_23880_cast_fp16")]; tensor var_23882_interleave_0 = const()[name = tensor("op_23882_interleave_0"), val = tensor(false)]; tensor var_23882_cast_fp16 = concat(axis = var_22406, interleave = var_23882_interleave_0, values = (var_23746_cast_fp16, var_23748_cast_fp16, var_23750_cast_fp16, var_23752_cast_fp16))[name = tensor("op_23882_cast_fp16")]; tensor var_23884_interleave_0 = const()[name = tensor("op_23884_interleave_0"), val = tensor(false)]; tensor var_23884_cast_fp16 = concat(axis = var_22406, interleave = var_23884_interleave_0, values = (var_23754_cast_fp16, var_23756_cast_fp16, var_23758_cast_fp16, var_23760_cast_fp16))[name = tensor("op_23884_cast_fp16")]; tensor var_23886_interleave_0 = const()[name = tensor("op_23886_interleave_0"), val = tensor(false)]; tensor var_23886_cast_fp16 = concat(axis = var_22406, interleave = var_23886_interleave_0, values = (var_23762_cast_fp16, var_23764_cast_fp16, var_23766_cast_fp16, var_23768_cast_fp16))[name = tensor("op_23886_cast_fp16")]; tensor var_23888_interleave_0 = const()[name = tensor("op_23888_interleave_0"), val = tensor(false)]; tensor var_23888_cast_fp16 = concat(axis = var_22406, interleave = var_23888_interleave_0, values = (var_23770_cast_fp16, var_23772_cast_fp16, var_23774_cast_fp16, var_23776_cast_fp16))[name = tensor("op_23888_cast_fp16")]; tensor var_23890_interleave_0 = const()[name = tensor("op_23890_interleave_0"), val = tensor(false)]; tensor var_23890_cast_fp16 = concat(axis = var_22406, interleave = var_23890_interleave_0, values = (var_23778_cast_fp16, var_23780_cast_fp16, var_23782_cast_fp16, var_23784_cast_fp16))[name = tensor("op_23890_cast_fp16")]; tensor var_23892_interleave_0 = const()[name = tensor("op_23892_interleave_0"), val = tensor(false)]; tensor var_23892_cast_fp16 = concat(axis = var_22406, interleave = var_23892_interleave_0, values = (var_23786_cast_fp16, var_23788_cast_fp16, var_23790_cast_fp16, var_23792_cast_fp16))[name = tensor("op_23892_cast_fp16")]; tensor var_23894_interleave_0 = const()[name = tensor("op_23894_interleave_0"), val = tensor(false)]; tensor var_23894_cast_fp16 = concat(axis = var_22406, interleave = var_23894_interleave_0, values = (var_23794_cast_fp16, var_23796_cast_fp16, var_23798_cast_fp16, var_23800_cast_fp16))[name = tensor("op_23894_cast_fp16")]; tensor var_23896_interleave_0 = const()[name = tensor("op_23896_interleave_0"), val = tensor(false)]; tensor var_23896_cast_fp16 = concat(axis = var_22406, interleave = var_23896_interleave_0, values = (var_23802_cast_fp16, var_23804_cast_fp16, var_23806_cast_fp16, var_23808_cast_fp16))[name = tensor("op_23896_cast_fp16")]; tensor var_23898_interleave_0 = const()[name = tensor("op_23898_interleave_0"), val = tensor(false)]; tensor var_23898_cast_fp16 = concat(axis = var_22406, interleave = var_23898_interleave_0, values = (var_23810_cast_fp16, var_23812_cast_fp16, var_23814_cast_fp16, var_23816_cast_fp16))[name = tensor("op_23898_cast_fp16")]; tensor var_23900_interleave_0 = const()[name = tensor("op_23900_interleave_0"), val = tensor(false)]; tensor var_23900_cast_fp16 = concat(axis = var_22406, interleave = var_23900_interleave_0, values = (var_23818_cast_fp16, var_23820_cast_fp16, var_23822_cast_fp16, var_23824_cast_fp16))[name = tensor("op_23900_cast_fp16")]; tensor var_23902_interleave_0 = const()[name = tensor("op_23902_interleave_0"), val = tensor(false)]; tensor var_23902_cast_fp16 = concat(axis = var_22406, interleave = var_23902_interleave_0, values = (var_23826_cast_fp16, var_23828_cast_fp16, var_23830_cast_fp16, var_23832_cast_fp16))[name = tensor("op_23902_cast_fp16")]; tensor var_23904_interleave_0 = const()[name = tensor("op_23904_interleave_0"), val = tensor(false)]; tensor var_23904_cast_fp16 = concat(axis = var_22406, interleave = var_23904_interleave_0, values = (var_23834_cast_fp16, var_23836_cast_fp16, var_23838_cast_fp16, var_23840_cast_fp16))[name = tensor("op_23904_cast_fp16")]; tensor var_23906_interleave_0 = const()[name = tensor("op_23906_interleave_0"), val = tensor(false)]; tensor var_23906_cast_fp16 = concat(axis = var_22406, interleave = var_23906_interleave_0, values = (var_23842_cast_fp16, var_23844_cast_fp16, var_23846_cast_fp16, var_23848_cast_fp16))[name = tensor("op_23906_cast_fp16")]; tensor var_23908_interleave_0 = const()[name = tensor("op_23908_interleave_0"), val = tensor(false)]; tensor var_23908_cast_fp16 = concat(axis = var_22406, interleave = var_23908_interleave_0, values = (var_23850_cast_fp16, var_23852_cast_fp16, var_23854_cast_fp16, var_23856_cast_fp16))[name = tensor("op_23908_cast_fp16")]; tensor var_23910_interleave_0 = const()[name = tensor("op_23910_interleave_0"), val = tensor(false)]; tensor var_23910_cast_fp16 = concat(axis = var_22406, interleave = var_23910_interleave_0, values = (var_23858_cast_fp16, var_23860_cast_fp16, var_23862_cast_fp16, var_23864_cast_fp16))[name = tensor("op_23910_cast_fp16")]; tensor var_23912_interleave_0 = const()[name = tensor("op_23912_interleave_0"), val = tensor(false)]; tensor var_23912_cast_fp16 = concat(axis = var_22406, interleave = var_23912_interleave_0, values = (var_23866_cast_fp16, var_23868_cast_fp16, var_23870_cast_fp16, var_23872_cast_fp16))[name = tensor("op_23912_cast_fp16")]; tensor input_113_interleave_0 = const()[name = tensor("input_113_interleave_0"), val = tensor(false)]; tensor input_113_cast_fp16 = concat(axis = var_22431, interleave = input_113_interleave_0, values = (var_23874_cast_fp16, var_23876_cast_fp16, var_23878_cast_fp16, var_23880_cast_fp16, var_23882_cast_fp16, var_23884_cast_fp16, var_23886_cast_fp16, var_23888_cast_fp16, var_23890_cast_fp16, var_23892_cast_fp16, var_23894_cast_fp16, var_23896_cast_fp16, var_23898_cast_fp16, var_23900_cast_fp16, var_23902_cast_fp16, var_23904_cast_fp16, var_23906_cast_fp16, var_23908_cast_fp16, var_23910_cast_fp16, var_23912_cast_fp16))[name = tensor("input_113_cast_fp16")]; tensor var_23923_pad_type_0 = const()[name = tensor("op_23923_pad_type_0"), val = tensor("valid")]; tensor var_23923_strides_0 = const()[name = tensor("op_23923_strides_0"), val = tensor([1, 1])]; tensor var_23923_pad_0 = const()[name = tensor("op_23923_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_23923_dilations_0 = const()[name = tensor("op_23923_dilations_0"), val = tensor([1, 1])]; tensor var_23923_groups_0 = const()[name = tensor("op_23923_groups_0"), val = tensor(1)]; tensor layers_14_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199168832))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199988096))), name = tensor("layers_14_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_14_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_14_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199988224)))]; tensor var_23923_cast_fp16 = conv(bias = layers_14_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_23923_dilations_0, groups = var_23923_groups_0, pad = var_23923_pad_0, pad_type = var_23923_pad_type_0, strides = var_23923_strides_0, weight = layers_14_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_113_cast_fp16)[name = tensor("op_23923_cast_fp16")]; tensor var_23929_pad_type_0 = const()[name = tensor("op_23929_pad_type_0"), val = tensor("valid")]; tensor var_23929_strides_0 = const()[name = tensor("op_23929_strides_0"), val = tensor([1, 1])]; tensor var_23929_pad_0 = const()[name = tensor("op_23929_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_23929_dilations_0 = const()[name = tensor("op_23929_dilations_0"), val = tensor([1, 1])]; tensor var_23929_groups_0 = const()[name = tensor("op_23929_groups_0"), val = tensor(1)]; tensor layers_14_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(200005632))), name = tensor("layers_14_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(199990848))), shape = tensor([1280, 1280, 1, 1])]; tensor var_23929_cast_fp16 = conv(dilations = var_23929_dilations_0, groups = var_23929_groups_0, pad = var_23929_pad_0, pad_type = var_23929_pad_type_0, strides = var_23929_strides_0, weight = layers_14_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_113_cast_fp16)[name = tensor("op_23929_cast_fp16")]; tensor obj_59_cast_fp16 = add(x = var_23923_cast_fp16, y = var_23929_cast_fp16)[name = tensor("obj_59_cast_fp16")]; tensor inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = obj_59_cast_fp16)[name = tensor("inputs_59_cast_fp16")]; tensor out_59_axes_0 = const()[name = tensor("out_59_axes_0"), val = tensor([1])]; tensor var_23940_to_fp16 = const()[name = tensor("op_23940_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_59_cast_fp16 = layer_norm(axes = out_59_axes_0, epsilon = var_23940_to_fp16, x = inputs_59_cast_fp16)[name = tensor("out_59_cast_fp16")]; tensor input_115_gamma_0_to_fp16 = const()[name = tensor("input_115_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(200210496)))]; tensor input_115_beta_0_to_fp16 = const()[name = tensor("input_115_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(200213120)))]; tensor input_115_epsilon_0_to_fp16 = const()[name = tensor("input_115_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_115_cast_fp16 = batch_norm(beta = input_115_beta_0_to_fp16, epsilon = input_115_epsilon_0_to_fp16, gamma = input_115_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_59_cast_fp16)[name = tensor("input_115_cast_fp16")]; tensor var_23958_pad_type_0 = const()[name = tensor("op_23958_pad_type_0"), val = tensor("valid")]; tensor var_23958_strides_0 = const()[name = tensor("op_23958_strides_0"), val = tensor([1, 1])]; tensor var_23958_pad_0 = const()[name = tensor("op_23958_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_23958_dilations_0 = const()[name = tensor("op_23958_dilations_0"), val = tensor([1, 1])]; tensor var_23958_groups_0 = const()[name = tensor("op_23958_groups_0"), val = tensor(1)]; tensor layers_14_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(200215744))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(203492608))), name = tensor("layers_14_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_14_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_14_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(203492736)))]; tensor var_23958_cast_fp16 = conv(bias = layers_14_fc1_inlier_module_bias_to_fp16, dilations = var_23958_dilations_0, groups = var_23958_groups_0, pad = var_23958_pad_0, pad_type = var_23958_pad_type_0, strides = var_23958_strides_0, weight = layers_14_fc1_inlier_module_weight_to_fp16_palettized, x = input_115_cast_fp16)[name = tensor("op_23958_cast_fp16")]; tensor var_23964_pad_type_0 = const()[name = tensor("op_23964_pad_type_0"), val = tensor("valid")]; tensor var_23964_strides_0 = const()[name = tensor("op_23964_strides_0"), val = tensor([1, 1])]; tensor var_23964_pad_0 = const()[name = tensor("op_23964_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_23964_dilations_0 = const()[name = tensor("op_23964_dilations_0"), val = tensor([1, 1])]; tensor var_23964_groups_0 = const()[name = tensor("op_23964_groups_0"), val = tensor(1)]; tensor layers_14_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(203552448))), name = tensor("layers_14_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(203503040))), shape = tensor([5120, 1280, 1, 1])]; tensor var_23964_cast_fp16 = conv(dilations = var_23964_dilations_0, groups = var_23964_groups_0, pad = var_23964_pad_0, pad_type = var_23964_pad_type_0, strides = var_23964_strides_0, weight = layers_14_fc1_outlier_module_weight_to_fp16_sparsified, x = input_115_cast_fp16)[name = tensor("op_23964_cast_fp16")]; tensor input_117_cast_fp16 = add(x = var_23958_cast_fp16, y = var_23964_cast_fp16)[name = tensor("input_117_cast_fp16")]; tensor input_119_mode_0 = const()[name = tensor("input_119_mode_0"), val = tensor("EXACT")]; tensor input_119_cast_fp16 = gelu(mode = input_119_mode_0, x = input_117_cast_fp16)[name = tensor("input_119_cast_fp16")]; tensor var_23975_pad_type_0 = const()[name = tensor("op_23975_pad_type_0"), val = tensor("valid")]; tensor var_23975_strides_0 = const()[name = tensor("op_23975_strides_0"), val = tensor([1, 1])]; tensor var_23975_pad_0 = const()[name = tensor("op_23975_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_23975_dilations_0 = const()[name = tensor("op_23975_dilations_0"), val = tensor([1, 1])]; tensor var_23975_groups_0 = const()[name = tensor("op_23975_groups_0"), val = tensor(1)]; tensor layers_14_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(204371712))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(207648576))), name = tensor("layers_14_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_14_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_14_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(207648704)))]; tensor var_23975_cast_fp16 = conv(bias = layers_14_fc2_inlier_module_bias_to_fp16, dilations = var_23975_dilations_0, groups = var_23975_groups_0, pad = var_23975_pad_0, pad_type = var_23975_pad_type_0, strides = var_23975_strides_0, weight = layers_14_fc2_inlier_module_weight_to_fp16_palettized, x = input_119_cast_fp16)[name = tensor("op_23975_cast_fp16")]; tensor var_23981_pad_type_0 = const()[name = tensor("op_23981_pad_type_0"), val = tensor("valid")]; tensor var_23981_strides_0 = const()[name = tensor("op_23981_strides_0"), val = tensor([1, 1])]; tensor var_23981_pad_0 = const()[name = tensor("op_23981_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_23981_dilations_0 = const()[name = tensor("op_23981_dilations_0"), val = tensor([1, 1])]; tensor var_23981_groups_0 = const()[name = tensor("op_23981_groups_0"), val = tensor(1)]; tensor layers_14_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(207800128))), name = tensor("layers_14_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(207651328))), shape = tensor([1280, 5120, 1, 1])]; tensor var_23981_cast_fp16 = conv(dilations = var_23981_dilations_0, groups = var_23981_groups_0, pad = var_23981_pad_0, pad_type = var_23981_pad_type_0, strides = var_23981_strides_0, weight = layers_14_fc2_outlier_module_weight_to_fp16_sparsified, x = input_119_cast_fp16)[name = tensor("op_23981_cast_fp16")]; tensor hidden_states_33_cast_fp16 = add(x = var_23975_cast_fp16, y = var_23981_cast_fp16)[name = tensor("hidden_states_33_cast_fp16")]; tensor inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = hidden_states_33_cast_fp16)[name = tensor("inputs_61_cast_fp16")]; tensor var_23987 = const()[name = tensor("op_23987"), val = tensor(3)]; tensor var_24012 = const()[name = tensor("op_24012"), val = tensor(1)]; tensor out_61_axes_0 = const()[name = tensor("out_61_axes_0"), val = tensor([1])]; tensor var_24029_to_fp16 = const()[name = tensor("op_24029_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_61_cast_fp16 = layer_norm(axes = out_61_axes_0, epsilon = var_24029_to_fp16, x = inputs_61_cast_fp16)[name = tensor("out_61_cast_fp16")]; tensor obj_61_gamma_0_to_fp16 = const()[name = tensor("obj_61_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(208619392)))]; tensor obj_61_beta_0_to_fp16 = const()[name = tensor("obj_61_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(208622016)))]; tensor obj_61_epsilon_0_to_fp16 = const()[name = tensor("obj_61_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_61_cast_fp16 = batch_norm(beta = obj_61_beta_0_to_fp16, epsilon = obj_61_epsilon_0_to_fp16, gamma = obj_61_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_61_cast_fp16)[name = tensor("obj_61_cast_fp16")]; tensor var_24051_pad_type_0 = const()[name = tensor("op_24051_pad_type_0"), val = tensor("valid")]; tensor var_24051_strides_0 = const()[name = tensor("op_24051_strides_0"), val = tensor([1, 1])]; tensor var_24051_pad_0 = const()[name = tensor("op_24051_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_24051_dilations_0 = const()[name = tensor("op_24051_dilations_0"), val = tensor([1, 1])]; tensor var_24051_groups_0 = const()[name = tensor("op_24051_groups_0"), val = tensor(1)]; tensor layers_15_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(208624640))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209443904))), name = tensor("layers_15_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_15_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_15_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209444032)))]; tensor var_24051_cast_fp16 = conv(bias = layers_15_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_24051_dilations_0, groups = var_24051_groups_0, pad = var_24051_pad_0, pad_type = var_24051_pad_type_0, strides = var_24051_strides_0, weight = layers_15_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = tensor("op_24051_cast_fp16")]; tensor var_24057_pad_type_0 = const()[name = tensor("op_24057_pad_type_0"), val = tensor("valid")]; tensor var_24057_strides_0 = const()[name = tensor("op_24057_strides_0"), val = tensor([1, 1])]; tensor var_24057_pad_0 = const()[name = tensor("op_24057_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_24057_dilations_0 = const()[name = tensor("op_24057_dilations_0"), val = tensor([1, 1])]; tensor var_24057_groups_0 = const()[name = tensor("op_24057_groups_0"), val = tensor(1)]; tensor layers_15_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209492416))), name = tensor("layers_15_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209446656))), shape = tensor([1280, 1280, 1, 1])]; tensor var_24057_cast_fp16 = conv(dilations = var_24057_dilations_0, groups = var_24057_groups_0, pad = var_24057_pad_0, pad_type = var_24057_pad_type_0, strides = var_24057_strides_0, weight = layers_15_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = tensor("op_24057_cast_fp16")]; tensor query_31_cast_fp16 = add(x = var_24051_cast_fp16, y = var_24057_cast_fp16)[name = tensor("query_31_cast_fp16")]; tensor var_24066_pad_type_0 = const()[name = tensor("op_24066_pad_type_0"), val = tensor("valid")]; tensor var_24066_strides_0 = const()[name = tensor("op_24066_strides_0"), val = tensor([1, 1])]; tensor var_24066_pad_0 = const()[name = tensor("op_24066_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_24066_dilations_0 = const()[name = tensor("op_24066_dilations_0"), val = tensor([1, 1])]; tensor var_24066_groups_0 = const()[name = tensor("op_24066_groups_0"), val = tensor(1)]; tensor layers_15_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(209697280))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210516544))), name = tensor("layers_15_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_24066_cast_fp16 = conv(dilations = var_24066_dilations_0, groups = var_24066_groups_0, pad = var_24066_pad_0, pad_type = var_24066_pad_type_0, strides = var_24066_strides_0, weight = layers_15_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = tensor("op_24066_cast_fp16")]; tensor var_24072_pad_type_0 = const()[name = tensor("op_24072_pad_type_0"), val = tensor("valid")]; tensor var_24072_strides_0 = const()[name = tensor("op_24072_strides_0"), val = tensor([1, 1])]; tensor var_24072_pad_0 = const()[name = tensor("op_24072_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_24072_dilations_0 = const()[name = tensor("op_24072_dilations_0"), val = tensor([1, 1])]; tensor var_24072_groups_0 = const()[name = tensor("op_24072_groups_0"), val = tensor(1)]; tensor layers_15_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210542144))), name = tensor("layers_15_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210516672))), shape = tensor([1280, 1280, 1, 1])]; tensor var_24072_cast_fp16 = conv(dilations = var_24072_dilations_0, groups = var_24072_groups_0, pad = var_24072_pad_0, pad_type = var_24072_pad_type_0, strides = var_24072_strides_0, weight = layers_15_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = tensor("op_24072_cast_fp16")]; tensor key_31_cast_fp16 = add(x = var_24066_cast_fp16, y = var_24072_cast_fp16)[name = tensor("key_31_cast_fp16")]; tensor var_24082_pad_type_0 = const()[name = tensor("op_24082_pad_type_0"), val = tensor("valid")]; tensor var_24082_strides_0 = const()[name = tensor("op_24082_strides_0"), val = tensor([1, 1])]; tensor var_24082_pad_0 = const()[name = tensor("op_24082_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_24082_dilations_0 = const()[name = tensor("op_24082_dilations_0"), val = tensor([1, 1])]; tensor var_24082_groups_0 = const()[name = tensor("op_24082_groups_0"), val = tensor(1)]; tensor layers_15_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(210747008))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211566272))), name = tensor("layers_15_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_15_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_15_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211566400)))]; tensor var_24082_cast_fp16 = conv(bias = layers_15_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_24082_dilations_0, groups = var_24082_groups_0, pad = var_24082_pad_0, pad_type = var_24082_pad_type_0, strides = var_24082_strides_0, weight = layers_15_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_61_cast_fp16)[name = tensor("op_24082_cast_fp16")]; tensor var_24088_pad_type_0 = const()[name = tensor("op_24088_pad_type_0"), val = tensor("valid")]; tensor var_24088_strides_0 = const()[name = tensor("op_24088_strides_0"), val = tensor([1, 1])]; tensor var_24088_pad_0 = const()[name = tensor("op_24088_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_24088_dilations_0 = const()[name = tensor("op_24088_dilations_0"), val = tensor([1, 1])]; tensor var_24088_groups_0 = const()[name = tensor("op_24088_groups_0"), val = tensor(1)]; tensor layers_15_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211586240))), name = tensor("layers_15_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211569024))), shape = tensor([1280, 1280, 1, 1])]; tensor var_24088_cast_fp16 = conv(dilations = var_24088_dilations_0, groups = var_24088_groups_0, pad = var_24088_pad_0, pad_type = var_24088_pad_type_0, strides = var_24088_strides_0, weight = layers_15_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_61_cast_fp16)[name = tensor("op_24088_cast_fp16")]; tensor value_31_cast_fp16 = add(x = var_24082_cast_fp16, y = var_24088_cast_fp16)[name = tensor("value_31_cast_fp16")]; tensor var_24094_begin_0 = const()[name = tensor("op_24094_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24094_end_0 = const()[name = tensor("op_24094_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24094_end_mask_0 = const()[name = tensor("op_24094_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24094_cast_fp16 = slice_by_index(begin = var_24094_begin_0, end = var_24094_end_0, end_mask = var_24094_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24094_cast_fp16")]; tensor var_24098_begin_0 = const()[name = tensor("op_24098_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_24098_end_0 = const()[name = tensor("op_24098_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_24098_end_mask_0 = const()[name = tensor("op_24098_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24098_cast_fp16 = slice_by_index(begin = var_24098_begin_0, end = var_24098_end_0, end_mask = var_24098_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24098_cast_fp16")]; tensor var_24102_begin_0 = const()[name = tensor("op_24102_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_24102_end_0 = const()[name = tensor("op_24102_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_24102_end_mask_0 = const()[name = tensor("op_24102_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24102_cast_fp16 = slice_by_index(begin = var_24102_begin_0, end = var_24102_end_0, end_mask = var_24102_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24102_cast_fp16")]; tensor var_24106_begin_0 = const()[name = tensor("op_24106_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_24106_end_0 = const()[name = tensor("op_24106_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_24106_end_mask_0 = const()[name = tensor("op_24106_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24106_cast_fp16 = slice_by_index(begin = var_24106_begin_0, end = var_24106_end_0, end_mask = var_24106_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24106_cast_fp16")]; tensor var_24110_begin_0 = const()[name = tensor("op_24110_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_24110_end_0 = const()[name = tensor("op_24110_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_24110_end_mask_0 = const()[name = tensor("op_24110_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24110_cast_fp16 = slice_by_index(begin = var_24110_begin_0, end = var_24110_end_0, end_mask = var_24110_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24110_cast_fp16")]; tensor var_24114_begin_0 = const()[name = tensor("op_24114_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_24114_end_0 = const()[name = tensor("op_24114_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_24114_end_mask_0 = const()[name = tensor("op_24114_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24114_cast_fp16 = slice_by_index(begin = var_24114_begin_0, end = var_24114_end_0, end_mask = var_24114_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24114_cast_fp16")]; tensor var_24118_begin_0 = const()[name = tensor("op_24118_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_24118_end_0 = const()[name = tensor("op_24118_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_24118_end_mask_0 = const()[name = tensor("op_24118_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24118_cast_fp16 = slice_by_index(begin = var_24118_begin_0, end = var_24118_end_0, end_mask = var_24118_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24118_cast_fp16")]; tensor var_24122_begin_0 = const()[name = tensor("op_24122_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_24122_end_0 = const()[name = tensor("op_24122_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_24122_end_mask_0 = const()[name = tensor("op_24122_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24122_cast_fp16 = slice_by_index(begin = var_24122_begin_0, end = var_24122_end_0, end_mask = var_24122_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24122_cast_fp16")]; tensor var_24126_begin_0 = const()[name = tensor("op_24126_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_24126_end_0 = const()[name = tensor("op_24126_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_24126_end_mask_0 = const()[name = tensor("op_24126_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24126_cast_fp16 = slice_by_index(begin = var_24126_begin_0, end = var_24126_end_0, end_mask = var_24126_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24126_cast_fp16")]; tensor var_24130_begin_0 = const()[name = tensor("op_24130_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_24130_end_0 = const()[name = tensor("op_24130_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_24130_end_mask_0 = const()[name = tensor("op_24130_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24130_cast_fp16 = slice_by_index(begin = var_24130_begin_0, end = var_24130_end_0, end_mask = var_24130_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24130_cast_fp16")]; tensor var_24134_begin_0 = const()[name = tensor("op_24134_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_24134_end_0 = const()[name = tensor("op_24134_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_24134_end_mask_0 = const()[name = tensor("op_24134_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24134_cast_fp16 = slice_by_index(begin = var_24134_begin_0, end = var_24134_end_0, end_mask = var_24134_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24134_cast_fp16")]; tensor var_24138_begin_0 = const()[name = tensor("op_24138_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_24138_end_0 = const()[name = tensor("op_24138_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_24138_end_mask_0 = const()[name = tensor("op_24138_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24138_cast_fp16 = slice_by_index(begin = var_24138_begin_0, end = var_24138_end_0, end_mask = var_24138_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24138_cast_fp16")]; tensor var_24142_begin_0 = const()[name = tensor("op_24142_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_24142_end_0 = const()[name = tensor("op_24142_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_24142_end_mask_0 = const()[name = tensor("op_24142_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24142_cast_fp16 = slice_by_index(begin = var_24142_begin_0, end = var_24142_end_0, end_mask = var_24142_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24142_cast_fp16")]; tensor var_24146_begin_0 = const()[name = tensor("op_24146_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_24146_end_0 = const()[name = tensor("op_24146_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_24146_end_mask_0 = const()[name = tensor("op_24146_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24146_cast_fp16 = slice_by_index(begin = var_24146_begin_0, end = var_24146_end_0, end_mask = var_24146_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24146_cast_fp16")]; tensor var_24150_begin_0 = const()[name = tensor("op_24150_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_24150_end_0 = const()[name = tensor("op_24150_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_24150_end_mask_0 = const()[name = tensor("op_24150_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24150_cast_fp16 = slice_by_index(begin = var_24150_begin_0, end = var_24150_end_0, end_mask = var_24150_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24150_cast_fp16")]; tensor var_24154_begin_0 = const()[name = tensor("op_24154_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_24154_end_0 = const()[name = tensor("op_24154_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_24154_end_mask_0 = const()[name = tensor("op_24154_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24154_cast_fp16 = slice_by_index(begin = var_24154_begin_0, end = var_24154_end_0, end_mask = var_24154_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24154_cast_fp16")]; tensor var_24158_begin_0 = const()[name = tensor("op_24158_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_24158_end_0 = const()[name = tensor("op_24158_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_24158_end_mask_0 = const()[name = tensor("op_24158_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24158_cast_fp16 = slice_by_index(begin = var_24158_begin_0, end = var_24158_end_0, end_mask = var_24158_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24158_cast_fp16")]; tensor var_24162_begin_0 = const()[name = tensor("op_24162_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_24162_end_0 = const()[name = tensor("op_24162_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_24162_end_mask_0 = const()[name = tensor("op_24162_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24162_cast_fp16 = slice_by_index(begin = var_24162_begin_0, end = var_24162_end_0, end_mask = var_24162_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24162_cast_fp16")]; tensor var_24166_begin_0 = const()[name = tensor("op_24166_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_24166_end_0 = const()[name = tensor("op_24166_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_24166_end_mask_0 = const()[name = tensor("op_24166_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24166_cast_fp16 = slice_by_index(begin = var_24166_begin_0, end = var_24166_end_0, end_mask = var_24166_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24166_cast_fp16")]; tensor var_24170_begin_0 = const()[name = tensor("op_24170_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_24170_end_0 = const()[name = tensor("op_24170_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_24170_end_mask_0 = const()[name = tensor("op_24170_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24170_cast_fp16 = slice_by_index(begin = var_24170_begin_0, end = var_24170_end_0, end_mask = var_24170_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_24170_cast_fp16")]; tensor var_24179_begin_0 = const()[name = tensor("op_24179_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24179_end_0 = const()[name = tensor("op_24179_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24179_end_mask_0 = const()[name = tensor("op_24179_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24179_cast_fp16 = slice_by_index(begin = var_24179_begin_0, end = var_24179_end_0, end_mask = var_24179_end_mask_0, x = var_24094_cast_fp16)[name = tensor("op_24179_cast_fp16")]; tensor var_24186_begin_0 = const()[name = tensor("op_24186_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24186_end_0 = const()[name = tensor("op_24186_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24186_end_mask_0 = const()[name = tensor("op_24186_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24186_cast_fp16 = slice_by_index(begin = var_24186_begin_0, end = var_24186_end_0, end_mask = var_24186_end_mask_0, x = var_24094_cast_fp16)[name = tensor("op_24186_cast_fp16")]; tensor var_24193_begin_0 = const()[name = tensor("op_24193_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24193_end_0 = const()[name = tensor("op_24193_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24193_end_mask_0 = const()[name = tensor("op_24193_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24193_cast_fp16 = slice_by_index(begin = var_24193_begin_0, end = var_24193_end_0, end_mask = var_24193_end_mask_0, x = var_24094_cast_fp16)[name = tensor("op_24193_cast_fp16")]; tensor var_24200_begin_0 = const()[name = tensor("op_24200_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24200_end_0 = const()[name = tensor("op_24200_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24200_end_mask_0 = const()[name = tensor("op_24200_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24200_cast_fp16 = slice_by_index(begin = var_24200_begin_0, end = var_24200_end_0, end_mask = var_24200_end_mask_0, x = var_24094_cast_fp16)[name = tensor("op_24200_cast_fp16")]; tensor var_24207_begin_0 = const()[name = tensor("op_24207_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24207_end_0 = const()[name = tensor("op_24207_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24207_end_mask_0 = const()[name = tensor("op_24207_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24207_cast_fp16 = slice_by_index(begin = var_24207_begin_0, end = var_24207_end_0, end_mask = var_24207_end_mask_0, x = var_24098_cast_fp16)[name = tensor("op_24207_cast_fp16")]; tensor var_24214_begin_0 = const()[name = tensor("op_24214_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24214_end_0 = const()[name = tensor("op_24214_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24214_end_mask_0 = const()[name = tensor("op_24214_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24214_cast_fp16 = slice_by_index(begin = var_24214_begin_0, end = var_24214_end_0, end_mask = var_24214_end_mask_0, x = var_24098_cast_fp16)[name = tensor("op_24214_cast_fp16")]; tensor var_24221_begin_0 = const()[name = tensor("op_24221_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24221_end_0 = const()[name = tensor("op_24221_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24221_end_mask_0 = const()[name = tensor("op_24221_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24221_cast_fp16 = slice_by_index(begin = var_24221_begin_0, end = var_24221_end_0, end_mask = var_24221_end_mask_0, x = var_24098_cast_fp16)[name = tensor("op_24221_cast_fp16")]; tensor var_24228_begin_0 = const()[name = tensor("op_24228_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24228_end_0 = const()[name = tensor("op_24228_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24228_end_mask_0 = const()[name = tensor("op_24228_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24228_cast_fp16 = slice_by_index(begin = var_24228_begin_0, end = var_24228_end_0, end_mask = var_24228_end_mask_0, x = var_24098_cast_fp16)[name = tensor("op_24228_cast_fp16")]; tensor var_24235_begin_0 = const()[name = tensor("op_24235_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24235_end_0 = const()[name = tensor("op_24235_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24235_end_mask_0 = const()[name = tensor("op_24235_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24235_cast_fp16 = slice_by_index(begin = var_24235_begin_0, end = var_24235_end_0, end_mask = var_24235_end_mask_0, x = var_24102_cast_fp16)[name = tensor("op_24235_cast_fp16")]; tensor var_24242_begin_0 = const()[name = tensor("op_24242_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24242_end_0 = const()[name = tensor("op_24242_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24242_end_mask_0 = const()[name = tensor("op_24242_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24242_cast_fp16 = slice_by_index(begin = var_24242_begin_0, end = var_24242_end_0, end_mask = var_24242_end_mask_0, x = var_24102_cast_fp16)[name = tensor("op_24242_cast_fp16")]; tensor var_24249_begin_0 = const()[name = tensor("op_24249_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24249_end_0 = const()[name = tensor("op_24249_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24249_end_mask_0 = const()[name = tensor("op_24249_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24249_cast_fp16 = slice_by_index(begin = var_24249_begin_0, end = var_24249_end_0, end_mask = var_24249_end_mask_0, x = var_24102_cast_fp16)[name = tensor("op_24249_cast_fp16")]; tensor var_24256_begin_0 = const()[name = tensor("op_24256_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24256_end_0 = const()[name = tensor("op_24256_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24256_end_mask_0 = const()[name = tensor("op_24256_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24256_cast_fp16 = slice_by_index(begin = var_24256_begin_0, end = var_24256_end_0, end_mask = var_24256_end_mask_0, x = var_24102_cast_fp16)[name = tensor("op_24256_cast_fp16")]; tensor var_24263_begin_0 = const()[name = tensor("op_24263_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24263_end_0 = const()[name = tensor("op_24263_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24263_end_mask_0 = const()[name = tensor("op_24263_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24263_cast_fp16 = slice_by_index(begin = var_24263_begin_0, end = var_24263_end_0, end_mask = var_24263_end_mask_0, x = var_24106_cast_fp16)[name = tensor("op_24263_cast_fp16")]; tensor var_24270_begin_0 = const()[name = tensor("op_24270_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24270_end_0 = const()[name = tensor("op_24270_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24270_end_mask_0 = const()[name = tensor("op_24270_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24270_cast_fp16 = slice_by_index(begin = var_24270_begin_0, end = var_24270_end_0, end_mask = var_24270_end_mask_0, x = var_24106_cast_fp16)[name = tensor("op_24270_cast_fp16")]; tensor var_24277_begin_0 = const()[name = tensor("op_24277_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24277_end_0 = const()[name = tensor("op_24277_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24277_end_mask_0 = const()[name = tensor("op_24277_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24277_cast_fp16 = slice_by_index(begin = var_24277_begin_0, end = var_24277_end_0, end_mask = var_24277_end_mask_0, x = var_24106_cast_fp16)[name = tensor("op_24277_cast_fp16")]; tensor var_24284_begin_0 = const()[name = tensor("op_24284_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24284_end_0 = const()[name = tensor("op_24284_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24284_end_mask_0 = const()[name = tensor("op_24284_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24284_cast_fp16 = slice_by_index(begin = var_24284_begin_0, end = var_24284_end_0, end_mask = var_24284_end_mask_0, x = var_24106_cast_fp16)[name = tensor("op_24284_cast_fp16")]; tensor var_24291_begin_0 = const()[name = tensor("op_24291_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24291_end_0 = const()[name = tensor("op_24291_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24291_end_mask_0 = const()[name = tensor("op_24291_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24291_cast_fp16 = slice_by_index(begin = var_24291_begin_0, end = var_24291_end_0, end_mask = var_24291_end_mask_0, x = var_24110_cast_fp16)[name = tensor("op_24291_cast_fp16")]; tensor var_24298_begin_0 = const()[name = tensor("op_24298_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24298_end_0 = const()[name = tensor("op_24298_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24298_end_mask_0 = const()[name = tensor("op_24298_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24298_cast_fp16 = slice_by_index(begin = var_24298_begin_0, end = var_24298_end_0, end_mask = var_24298_end_mask_0, x = var_24110_cast_fp16)[name = tensor("op_24298_cast_fp16")]; tensor var_24305_begin_0 = const()[name = tensor("op_24305_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24305_end_0 = const()[name = tensor("op_24305_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24305_end_mask_0 = const()[name = tensor("op_24305_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24305_cast_fp16 = slice_by_index(begin = var_24305_begin_0, end = var_24305_end_0, end_mask = var_24305_end_mask_0, x = var_24110_cast_fp16)[name = tensor("op_24305_cast_fp16")]; tensor var_24312_begin_0 = const()[name = tensor("op_24312_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24312_end_0 = const()[name = tensor("op_24312_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24312_end_mask_0 = const()[name = tensor("op_24312_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24312_cast_fp16 = slice_by_index(begin = var_24312_begin_0, end = var_24312_end_0, end_mask = var_24312_end_mask_0, x = var_24110_cast_fp16)[name = tensor("op_24312_cast_fp16")]; tensor var_24319_begin_0 = const()[name = tensor("op_24319_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24319_end_0 = const()[name = tensor("op_24319_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24319_end_mask_0 = const()[name = tensor("op_24319_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24319_cast_fp16 = slice_by_index(begin = var_24319_begin_0, end = var_24319_end_0, end_mask = var_24319_end_mask_0, x = var_24114_cast_fp16)[name = tensor("op_24319_cast_fp16")]; tensor var_24326_begin_0 = const()[name = tensor("op_24326_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24326_end_0 = const()[name = tensor("op_24326_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24326_end_mask_0 = const()[name = tensor("op_24326_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24326_cast_fp16 = slice_by_index(begin = var_24326_begin_0, end = var_24326_end_0, end_mask = var_24326_end_mask_0, x = var_24114_cast_fp16)[name = tensor("op_24326_cast_fp16")]; tensor var_24333_begin_0 = const()[name = tensor("op_24333_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24333_end_0 = const()[name = tensor("op_24333_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24333_end_mask_0 = const()[name = tensor("op_24333_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24333_cast_fp16 = slice_by_index(begin = var_24333_begin_0, end = var_24333_end_0, end_mask = var_24333_end_mask_0, x = var_24114_cast_fp16)[name = tensor("op_24333_cast_fp16")]; tensor var_24340_begin_0 = const()[name = tensor("op_24340_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24340_end_0 = const()[name = tensor("op_24340_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24340_end_mask_0 = const()[name = tensor("op_24340_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24340_cast_fp16 = slice_by_index(begin = var_24340_begin_0, end = var_24340_end_0, end_mask = var_24340_end_mask_0, x = var_24114_cast_fp16)[name = tensor("op_24340_cast_fp16")]; tensor var_24347_begin_0 = const()[name = tensor("op_24347_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24347_end_0 = const()[name = tensor("op_24347_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24347_end_mask_0 = const()[name = tensor("op_24347_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24347_cast_fp16 = slice_by_index(begin = var_24347_begin_0, end = var_24347_end_0, end_mask = var_24347_end_mask_0, x = var_24118_cast_fp16)[name = tensor("op_24347_cast_fp16")]; tensor var_24354_begin_0 = const()[name = tensor("op_24354_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24354_end_0 = const()[name = tensor("op_24354_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24354_end_mask_0 = const()[name = tensor("op_24354_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24354_cast_fp16 = slice_by_index(begin = var_24354_begin_0, end = var_24354_end_0, end_mask = var_24354_end_mask_0, x = var_24118_cast_fp16)[name = tensor("op_24354_cast_fp16")]; tensor var_24361_begin_0 = const()[name = tensor("op_24361_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24361_end_0 = const()[name = tensor("op_24361_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24361_end_mask_0 = const()[name = tensor("op_24361_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24361_cast_fp16 = slice_by_index(begin = var_24361_begin_0, end = var_24361_end_0, end_mask = var_24361_end_mask_0, x = var_24118_cast_fp16)[name = tensor("op_24361_cast_fp16")]; tensor var_24368_begin_0 = const()[name = tensor("op_24368_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24368_end_0 = const()[name = tensor("op_24368_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24368_end_mask_0 = const()[name = tensor("op_24368_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24368_cast_fp16 = slice_by_index(begin = var_24368_begin_0, end = var_24368_end_0, end_mask = var_24368_end_mask_0, x = var_24118_cast_fp16)[name = tensor("op_24368_cast_fp16")]; tensor var_24375_begin_0 = const()[name = tensor("op_24375_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24375_end_0 = const()[name = tensor("op_24375_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24375_end_mask_0 = const()[name = tensor("op_24375_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24375_cast_fp16 = slice_by_index(begin = var_24375_begin_0, end = var_24375_end_0, end_mask = var_24375_end_mask_0, x = var_24122_cast_fp16)[name = tensor("op_24375_cast_fp16")]; tensor var_24382_begin_0 = const()[name = tensor("op_24382_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24382_end_0 = const()[name = tensor("op_24382_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24382_end_mask_0 = const()[name = tensor("op_24382_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24382_cast_fp16 = slice_by_index(begin = var_24382_begin_0, end = var_24382_end_0, end_mask = var_24382_end_mask_0, x = var_24122_cast_fp16)[name = tensor("op_24382_cast_fp16")]; tensor var_24389_begin_0 = const()[name = tensor("op_24389_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24389_end_0 = const()[name = tensor("op_24389_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24389_end_mask_0 = const()[name = tensor("op_24389_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24389_cast_fp16 = slice_by_index(begin = var_24389_begin_0, end = var_24389_end_0, end_mask = var_24389_end_mask_0, x = var_24122_cast_fp16)[name = tensor("op_24389_cast_fp16")]; tensor var_24396_begin_0 = const()[name = tensor("op_24396_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24396_end_0 = const()[name = tensor("op_24396_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24396_end_mask_0 = const()[name = tensor("op_24396_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24396_cast_fp16 = slice_by_index(begin = var_24396_begin_0, end = var_24396_end_0, end_mask = var_24396_end_mask_0, x = var_24122_cast_fp16)[name = tensor("op_24396_cast_fp16")]; tensor var_24403_begin_0 = const()[name = tensor("op_24403_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24403_end_0 = const()[name = tensor("op_24403_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24403_end_mask_0 = const()[name = tensor("op_24403_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24403_cast_fp16 = slice_by_index(begin = var_24403_begin_0, end = var_24403_end_0, end_mask = var_24403_end_mask_0, x = var_24126_cast_fp16)[name = tensor("op_24403_cast_fp16")]; tensor var_24410_begin_0 = const()[name = tensor("op_24410_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24410_end_0 = const()[name = tensor("op_24410_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24410_end_mask_0 = const()[name = tensor("op_24410_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24410_cast_fp16 = slice_by_index(begin = var_24410_begin_0, end = var_24410_end_0, end_mask = var_24410_end_mask_0, x = var_24126_cast_fp16)[name = tensor("op_24410_cast_fp16")]; tensor var_24417_begin_0 = const()[name = tensor("op_24417_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24417_end_0 = const()[name = tensor("op_24417_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24417_end_mask_0 = const()[name = tensor("op_24417_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24417_cast_fp16 = slice_by_index(begin = var_24417_begin_0, end = var_24417_end_0, end_mask = var_24417_end_mask_0, x = var_24126_cast_fp16)[name = tensor("op_24417_cast_fp16")]; tensor var_24424_begin_0 = const()[name = tensor("op_24424_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24424_end_0 = const()[name = tensor("op_24424_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24424_end_mask_0 = const()[name = tensor("op_24424_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24424_cast_fp16 = slice_by_index(begin = var_24424_begin_0, end = var_24424_end_0, end_mask = var_24424_end_mask_0, x = var_24126_cast_fp16)[name = tensor("op_24424_cast_fp16")]; tensor var_24431_begin_0 = const()[name = tensor("op_24431_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24431_end_0 = const()[name = tensor("op_24431_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24431_end_mask_0 = const()[name = tensor("op_24431_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24431_cast_fp16 = slice_by_index(begin = var_24431_begin_0, end = var_24431_end_0, end_mask = var_24431_end_mask_0, x = var_24130_cast_fp16)[name = tensor("op_24431_cast_fp16")]; tensor var_24438_begin_0 = const()[name = tensor("op_24438_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24438_end_0 = const()[name = tensor("op_24438_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24438_end_mask_0 = const()[name = tensor("op_24438_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24438_cast_fp16 = slice_by_index(begin = var_24438_begin_0, end = var_24438_end_0, end_mask = var_24438_end_mask_0, x = var_24130_cast_fp16)[name = tensor("op_24438_cast_fp16")]; tensor var_24445_begin_0 = const()[name = tensor("op_24445_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24445_end_0 = const()[name = tensor("op_24445_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24445_end_mask_0 = const()[name = tensor("op_24445_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24445_cast_fp16 = slice_by_index(begin = var_24445_begin_0, end = var_24445_end_0, end_mask = var_24445_end_mask_0, x = var_24130_cast_fp16)[name = tensor("op_24445_cast_fp16")]; tensor var_24452_begin_0 = const()[name = tensor("op_24452_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24452_end_0 = const()[name = tensor("op_24452_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24452_end_mask_0 = const()[name = tensor("op_24452_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24452_cast_fp16 = slice_by_index(begin = var_24452_begin_0, end = var_24452_end_0, end_mask = var_24452_end_mask_0, x = var_24130_cast_fp16)[name = tensor("op_24452_cast_fp16")]; tensor var_24459_begin_0 = const()[name = tensor("op_24459_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24459_end_0 = const()[name = tensor("op_24459_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24459_end_mask_0 = const()[name = tensor("op_24459_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24459_cast_fp16 = slice_by_index(begin = var_24459_begin_0, end = var_24459_end_0, end_mask = var_24459_end_mask_0, x = var_24134_cast_fp16)[name = tensor("op_24459_cast_fp16")]; tensor var_24466_begin_0 = const()[name = tensor("op_24466_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24466_end_0 = const()[name = tensor("op_24466_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24466_end_mask_0 = const()[name = tensor("op_24466_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24466_cast_fp16 = slice_by_index(begin = var_24466_begin_0, end = var_24466_end_0, end_mask = var_24466_end_mask_0, x = var_24134_cast_fp16)[name = tensor("op_24466_cast_fp16")]; tensor var_24473_begin_0 = const()[name = tensor("op_24473_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24473_end_0 = const()[name = tensor("op_24473_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24473_end_mask_0 = const()[name = tensor("op_24473_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24473_cast_fp16 = slice_by_index(begin = var_24473_begin_0, end = var_24473_end_0, end_mask = var_24473_end_mask_0, x = var_24134_cast_fp16)[name = tensor("op_24473_cast_fp16")]; tensor var_24480_begin_0 = const()[name = tensor("op_24480_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24480_end_0 = const()[name = tensor("op_24480_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24480_end_mask_0 = const()[name = tensor("op_24480_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24480_cast_fp16 = slice_by_index(begin = var_24480_begin_0, end = var_24480_end_0, end_mask = var_24480_end_mask_0, x = var_24134_cast_fp16)[name = tensor("op_24480_cast_fp16")]; tensor var_24487_begin_0 = const()[name = tensor("op_24487_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24487_end_0 = const()[name = tensor("op_24487_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24487_end_mask_0 = const()[name = tensor("op_24487_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24487_cast_fp16 = slice_by_index(begin = var_24487_begin_0, end = var_24487_end_0, end_mask = var_24487_end_mask_0, x = var_24138_cast_fp16)[name = tensor("op_24487_cast_fp16")]; tensor var_24494_begin_0 = const()[name = tensor("op_24494_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24494_end_0 = const()[name = tensor("op_24494_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24494_end_mask_0 = const()[name = tensor("op_24494_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24494_cast_fp16 = slice_by_index(begin = var_24494_begin_0, end = var_24494_end_0, end_mask = var_24494_end_mask_0, x = var_24138_cast_fp16)[name = tensor("op_24494_cast_fp16")]; tensor var_24501_begin_0 = const()[name = tensor("op_24501_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24501_end_0 = const()[name = tensor("op_24501_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24501_end_mask_0 = const()[name = tensor("op_24501_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24501_cast_fp16 = slice_by_index(begin = var_24501_begin_0, end = var_24501_end_0, end_mask = var_24501_end_mask_0, x = var_24138_cast_fp16)[name = tensor("op_24501_cast_fp16")]; tensor var_24508_begin_0 = const()[name = tensor("op_24508_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24508_end_0 = const()[name = tensor("op_24508_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24508_end_mask_0 = const()[name = tensor("op_24508_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24508_cast_fp16 = slice_by_index(begin = var_24508_begin_0, end = var_24508_end_0, end_mask = var_24508_end_mask_0, x = var_24138_cast_fp16)[name = tensor("op_24508_cast_fp16")]; tensor var_24515_begin_0 = const()[name = tensor("op_24515_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24515_end_0 = const()[name = tensor("op_24515_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24515_end_mask_0 = const()[name = tensor("op_24515_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24515_cast_fp16 = slice_by_index(begin = var_24515_begin_0, end = var_24515_end_0, end_mask = var_24515_end_mask_0, x = var_24142_cast_fp16)[name = tensor("op_24515_cast_fp16")]; tensor var_24522_begin_0 = const()[name = tensor("op_24522_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24522_end_0 = const()[name = tensor("op_24522_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24522_end_mask_0 = const()[name = tensor("op_24522_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24522_cast_fp16 = slice_by_index(begin = var_24522_begin_0, end = var_24522_end_0, end_mask = var_24522_end_mask_0, x = var_24142_cast_fp16)[name = tensor("op_24522_cast_fp16")]; tensor var_24529_begin_0 = const()[name = tensor("op_24529_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24529_end_0 = const()[name = tensor("op_24529_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24529_end_mask_0 = const()[name = tensor("op_24529_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24529_cast_fp16 = slice_by_index(begin = var_24529_begin_0, end = var_24529_end_0, end_mask = var_24529_end_mask_0, x = var_24142_cast_fp16)[name = tensor("op_24529_cast_fp16")]; tensor var_24536_begin_0 = const()[name = tensor("op_24536_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24536_end_0 = const()[name = tensor("op_24536_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24536_end_mask_0 = const()[name = tensor("op_24536_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24536_cast_fp16 = slice_by_index(begin = var_24536_begin_0, end = var_24536_end_0, end_mask = var_24536_end_mask_0, x = var_24142_cast_fp16)[name = tensor("op_24536_cast_fp16")]; tensor var_24543_begin_0 = const()[name = tensor("op_24543_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24543_end_0 = const()[name = tensor("op_24543_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24543_end_mask_0 = const()[name = tensor("op_24543_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24543_cast_fp16 = slice_by_index(begin = var_24543_begin_0, end = var_24543_end_0, end_mask = var_24543_end_mask_0, x = var_24146_cast_fp16)[name = tensor("op_24543_cast_fp16")]; tensor var_24550_begin_0 = const()[name = tensor("op_24550_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24550_end_0 = const()[name = tensor("op_24550_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24550_end_mask_0 = const()[name = tensor("op_24550_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24550_cast_fp16 = slice_by_index(begin = var_24550_begin_0, end = var_24550_end_0, end_mask = var_24550_end_mask_0, x = var_24146_cast_fp16)[name = tensor("op_24550_cast_fp16")]; tensor var_24557_begin_0 = const()[name = tensor("op_24557_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24557_end_0 = const()[name = tensor("op_24557_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24557_end_mask_0 = const()[name = tensor("op_24557_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24557_cast_fp16 = slice_by_index(begin = var_24557_begin_0, end = var_24557_end_0, end_mask = var_24557_end_mask_0, x = var_24146_cast_fp16)[name = tensor("op_24557_cast_fp16")]; tensor var_24564_begin_0 = const()[name = tensor("op_24564_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24564_end_0 = const()[name = tensor("op_24564_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24564_end_mask_0 = const()[name = tensor("op_24564_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24564_cast_fp16 = slice_by_index(begin = var_24564_begin_0, end = var_24564_end_0, end_mask = var_24564_end_mask_0, x = var_24146_cast_fp16)[name = tensor("op_24564_cast_fp16")]; tensor var_24571_begin_0 = const()[name = tensor("op_24571_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24571_end_0 = const()[name = tensor("op_24571_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24571_end_mask_0 = const()[name = tensor("op_24571_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24571_cast_fp16 = slice_by_index(begin = var_24571_begin_0, end = var_24571_end_0, end_mask = var_24571_end_mask_0, x = var_24150_cast_fp16)[name = tensor("op_24571_cast_fp16")]; tensor var_24578_begin_0 = const()[name = tensor("op_24578_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24578_end_0 = const()[name = tensor("op_24578_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24578_end_mask_0 = const()[name = tensor("op_24578_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24578_cast_fp16 = slice_by_index(begin = var_24578_begin_0, end = var_24578_end_0, end_mask = var_24578_end_mask_0, x = var_24150_cast_fp16)[name = tensor("op_24578_cast_fp16")]; tensor var_24585_begin_0 = const()[name = tensor("op_24585_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24585_end_0 = const()[name = tensor("op_24585_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24585_end_mask_0 = const()[name = tensor("op_24585_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24585_cast_fp16 = slice_by_index(begin = var_24585_begin_0, end = var_24585_end_0, end_mask = var_24585_end_mask_0, x = var_24150_cast_fp16)[name = tensor("op_24585_cast_fp16")]; tensor var_24592_begin_0 = const()[name = tensor("op_24592_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24592_end_0 = const()[name = tensor("op_24592_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24592_end_mask_0 = const()[name = tensor("op_24592_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24592_cast_fp16 = slice_by_index(begin = var_24592_begin_0, end = var_24592_end_0, end_mask = var_24592_end_mask_0, x = var_24150_cast_fp16)[name = tensor("op_24592_cast_fp16")]; tensor var_24599_begin_0 = const()[name = tensor("op_24599_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24599_end_0 = const()[name = tensor("op_24599_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24599_end_mask_0 = const()[name = tensor("op_24599_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24599_cast_fp16 = slice_by_index(begin = var_24599_begin_0, end = var_24599_end_0, end_mask = var_24599_end_mask_0, x = var_24154_cast_fp16)[name = tensor("op_24599_cast_fp16")]; tensor var_24606_begin_0 = const()[name = tensor("op_24606_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24606_end_0 = const()[name = tensor("op_24606_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24606_end_mask_0 = const()[name = tensor("op_24606_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24606_cast_fp16 = slice_by_index(begin = var_24606_begin_0, end = var_24606_end_0, end_mask = var_24606_end_mask_0, x = var_24154_cast_fp16)[name = tensor("op_24606_cast_fp16")]; tensor var_24613_begin_0 = const()[name = tensor("op_24613_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24613_end_0 = const()[name = tensor("op_24613_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24613_end_mask_0 = const()[name = tensor("op_24613_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24613_cast_fp16 = slice_by_index(begin = var_24613_begin_0, end = var_24613_end_0, end_mask = var_24613_end_mask_0, x = var_24154_cast_fp16)[name = tensor("op_24613_cast_fp16")]; tensor var_24620_begin_0 = const()[name = tensor("op_24620_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24620_end_0 = const()[name = tensor("op_24620_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24620_end_mask_0 = const()[name = tensor("op_24620_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24620_cast_fp16 = slice_by_index(begin = var_24620_begin_0, end = var_24620_end_0, end_mask = var_24620_end_mask_0, x = var_24154_cast_fp16)[name = tensor("op_24620_cast_fp16")]; tensor var_24627_begin_0 = const()[name = tensor("op_24627_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24627_end_0 = const()[name = tensor("op_24627_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24627_end_mask_0 = const()[name = tensor("op_24627_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24627_cast_fp16 = slice_by_index(begin = var_24627_begin_0, end = var_24627_end_0, end_mask = var_24627_end_mask_0, x = var_24158_cast_fp16)[name = tensor("op_24627_cast_fp16")]; tensor var_24634_begin_0 = const()[name = tensor("op_24634_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24634_end_0 = const()[name = tensor("op_24634_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24634_end_mask_0 = const()[name = tensor("op_24634_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24634_cast_fp16 = slice_by_index(begin = var_24634_begin_0, end = var_24634_end_0, end_mask = var_24634_end_mask_0, x = var_24158_cast_fp16)[name = tensor("op_24634_cast_fp16")]; tensor var_24641_begin_0 = const()[name = tensor("op_24641_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24641_end_0 = const()[name = tensor("op_24641_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24641_end_mask_0 = const()[name = tensor("op_24641_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24641_cast_fp16 = slice_by_index(begin = var_24641_begin_0, end = var_24641_end_0, end_mask = var_24641_end_mask_0, x = var_24158_cast_fp16)[name = tensor("op_24641_cast_fp16")]; tensor var_24648_begin_0 = const()[name = tensor("op_24648_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24648_end_0 = const()[name = tensor("op_24648_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24648_end_mask_0 = const()[name = tensor("op_24648_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24648_cast_fp16 = slice_by_index(begin = var_24648_begin_0, end = var_24648_end_0, end_mask = var_24648_end_mask_0, x = var_24158_cast_fp16)[name = tensor("op_24648_cast_fp16")]; tensor var_24655_begin_0 = const()[name = tensor("op_24655_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24655_end_0 = const()[name = tensor("op_24655_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24655_end_mask_0 = const()[name = tensor("op_24655_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24655_cast_fp16 = slice_by_index(begin = var_24655_begin_0, end = var_24655_end_0, end_mask = var_24655_end_mask_0, x = var_24162_cast_fp16)[name = tensor("op_24655_cast_fp16")]; tensor var_24662_begin_0 = const()[name = tensor("op_24662_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24662_end_0 = const()[name = tensor("op_24662_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24662_end_mask_0 = const()[name = tensor("op_24662_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24662_cast_fp16 = slice_by_index(begin = var_24662_begin_0, end = var_24662_end_0, end_mask = var_24662_end_mask_0, x = var_24162_cast_fp16)[name = tensor("op_24662_cast_fp16")]; tensor var_24669_begin_0 = const()[name = tensor("op_24669_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24669_end_0 = const()[name = tensor("op_24669_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24669_end_mask_0 = const()[name = tensor("op_24669_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24669_cast_fp16 = slice_by_index(begin = var_24669_begin_0, end = var_24669_end_0, end_mask = var_24669_end_mask_0, x = var_24162_cast_fp16)[name = tensor("op_24669_cast_fp16")]; tensor var_24676_begin_0 = const()[name = tensor("op_24676_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24676_end_0 = const()[name = tensor("op_24676_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24676_end_mask_0 = const()[name = tensor("op_24676_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24676_cast_fp16 = slice_by_index(begin = var_24676_begin_0, end = var_24676_end_0, end_mask = var_24676_end_mask_0, x = var_24162_cast_fp16)[name = tensor("op_24676_cast_fp16")]; tensor var_24683_begin_0 = const()[name = tensor("op_24683_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24683_end_0 = const()[name = tensor("op_24683_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24683_end_mask_0 = const()[name = tensor("op_24683_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24683_cast_fp16 = slice_by_index(begin = var_24683_begin_0, end = var_24683_end_0, end_mask = var_24683_end_mask_0, x = var_24166_cast_fp16)[name = tensor("op_24683_cast_fp16")]; tensor var_24690_begin_0 = const()[name = tensor("op_24690_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24690_end_0 = const()[name = tensor("op_24690_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24690_end_mask_0 = const()[name = tensor("op_24690_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24690_cast_fp16 = slice_by_index(begin = var_24690_begin_0, end = var_24690_end_0, end_mask = var_24690_end_mask_0, x = var_24166_cast_fp16)[name = tensor("op_24690_cast_fp16")]; tensor var_24697_begin_0 = const()[name = tensor("op_24697_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24697_end_0 = const()[name = tensor("op_24697_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24697_end_mask_0 = const()[name = tensor("op_24697_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24697_cast_fp16 = slice_by_index(begin = var_24697_begin_0, end = var_24697_end_0, end_mask = var_24697_end_mask_0, x = var_24166_cast_fp16)[name = tensor("op_24697_cast_fp16")]; tensor var_24704_begin_0 = const()[name = tensor("op_24704_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24704_end_0 = const()[name = tensor("op_24704_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24704_end_mask_0 = const()[name = tensor("op_24704_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24704_cast_fp16 = slice_by_index(begin = var_24704_begin_0, end = var_24704_end_0, end_mask = var_24704_end_mask_0, x = var_24166_cast_fp16)[name = tensor("op_24704_cast_fp16")]; tensor var_24711_begin_0 = const()[name = tensor("op_24711_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24711_end_0 = const()[name = tensor("op_24711_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_24711_end_mask_0 = const()[name = tensor("op_24711_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24711_cast_fp16 = slice_by_index(begin = var_24711_begin_0, end = var_24711_end_0, end_mask = var_24711_end_mask_0, x = var_24170_cast_fp16)[name = tensor("op_24711_cast_fp16")]; tensor var_24718_begin_0 = const()[name = tensor("op_24718_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_24718_end_0 = const()[name = tensor("op_24718_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_24718_end_mask_0 = const()[name = tensor("op_24718_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24718_cast_fp16 = slice_by_index(begin = var_24718_begin_0, end = var_24718_end_0, end_mask = var_24718_end_mask_0, x = var_24170_cast_fp16)[name = tensor("op_24718_cast_fp16")]; tensor var_24725_begin_0 = const()[name = tensor("op_24725_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_24725_end_0 = const()[name = tensor("op_24725_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_24725_end_mask_0 = const()[name = tensor("op_24725_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24725_cast_fp16 = slice_by_index(begin = var_24725_begin_0, end = var_24725_end_0, end_mask = var_24725_end_mask_0, x = var_24170_cast_fp16)[name = tensor("op_24725_cast_fp16")]; tensor var_24732_begin_0 = const()[name = tensor("op_24732_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_24732_end_0 = const()[name = tensor("op_24732_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24732_end_mask_0 = const()[name = tensor("op_24732_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24732_cast_fp16 = slice_by_index(begin = var_24732_begin_0, end = var_24732_end_0, end_mask = var_24732_end_mask_0, x = var_24170_cast_fp16)[name = tensor("op_24732_cast_fp16")]; tensor k_31_perm_0 = const()[name = tensor("k_31_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_24737_begin_0 = const()[name = tensor("op_24737_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24737_end_0 = const()[name = tensor("op_24737_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_24737_end_mask_0 = const()[name = tensor("op_24737_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_31_cast_fp16 = transpose(perm = k_31_perm_0, x = key_31_cast_fp16)[name = tensor("transpose_16")]; tensor var_24737_cast_fp16 = slice_by_index(begin = var_24737_begin_0, end = var_24737_end_0, end_mask = var_24737_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24737_cast_fp16")]; tensor var_24741_begin_0 = const()[name = tensor("op_24741_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_24741_end_0 = const()[name = tensor("op_24741_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_24741_end_mask_0 = const()[name = tensor("op_24741_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24741_cast_fp16 = slice_by_index(begin = var_24741_begin_0, end = var_24741_end_0, end_mask = var_24741_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24741_cast_fp16")]; tensor var_24745_begin_0 = const()[name = tensor("op_24745_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_24745_end_0 = const()[name = tensor("op_24745_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_24745_end_mask_0 = const()[name = tensor("op_24745_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24745_cast_fp16 = slice_by_index(begin = var_24745_begin_0, end = var_24745_end_0, end_mask = var_24745_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24745_cast_fp16")]; tensor var_24749_begin_0 = const()[name = tensor("op_24749_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_24749_end_0 = const()[name = tensor("op_24749_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_24749_end_mask_0 = const()[name = tensor("op_24749_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24749_cast_fp16 = slice_by_index(begin = var_24749_begin_0, end = var_24749_end_0, end_mask = var_24749_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24749_cast_fp16")]; tensor var_24753_begin_0 = const()[name = tensor("op_24753_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_24753_end_0 = const()[name = tensor("op_24753_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_24753_end_mask_0 = const()[name = tensor("op_24753_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24753_cast_fp16 = slice_by_index(begin = var_24753_begin_0, end = var_24753_end_0, end_mask = var_24753_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24753_cast_fp16")]; tensor var_24757_begin_0 = const()[name = tensor("op_24757_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_24757_end_0 = const()[name = tensor("op_24757_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_24757_end_mask_0 = const()[name = tensor("op_24757_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24757_cast_fp16 = slice_by_index(begin = var_24757_begin_0, end = var_24757_end_0, end_mask = var_24757_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24757_cast_fp16")]; tensor var_24761_begin_0 = const()[name = tensor("op_24761_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_24761_end_0 = const()[name = tensor("op_24761_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_24761_end_mask_0 = const()[name = tensor("op_24761_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24761_cast_fp16 = slice_by_index(begin = var_24761_begin_0, end = var_24761_end_0, end_mask = var_24761_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24761_cast_fp16")]; tensor var_24765_begin_0 = const()[name = tensor("op_24765_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_24765_end_0 = const()[name = tensor("op_24765_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_24765_end_mask_0 = const()[name = tensor("op_24765_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24765_cast_fp16 = slice_by_index(begin = var_24765_begin_0, end = var_24765_end_0, end_mask = var_24765_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24765_cast_fp16")]; tensor var_24769_begin_0 = const()[name = tensor("op_24769_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_24769_end_0 = const()[name = tensor("op_24769_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_24769_end_mask_0 = const()[name = tensor("op_24769_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24769_cast_fp16 = slice_by_index(begin = var_24769_begin_0, end = var_24769_end_0, end_mask = var_24769_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24769_cast_fp16")]; tensor var_24773_begin_0 = const()[name = tensor("op_24773_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_24773_end_0 = const()[name = tensor("op_24773_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_24773_end_mask_0 = const()[name = tensor("op_24773_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24773_cast_fp16 = slice_by_index(begin = var_24773_begin_0, end = var_24773_end_0, end_mask = var_24773_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24773_cast_fp16")]; tensor var_24777_begin_0 = const()[name = tensor("op_24777_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_24777_end_0 = const()[name = tensor("op_24777_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_24777_end_mask_0 = const()[name = tensor("op_24777_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24777_cast_fp16 = slice_by_index(begin = var_24777_begin_0, end = var_24777_end_0, end_mask = var_24777_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24777_cast_fp16")]; tensor var_24781_begin_0 = const()[name = tensor("op_24781_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_24781_end_0 = const()[name = tensor("op_24781_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_24781_end_mask_0 = const()[name = tensor("op_24781_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24781_cast_fp16 = slice_by_index(begin = var_24781_begin_0, end = var_24781_end_0, end_mask = var_24781_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24781_cast_fp16")]; tensor var_24785_begin_0 = const()[name = tensor("op_24785_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_24785_end_0 = const()[name = tensor("op_24785_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_24785_end_mask_0 = const()[name = tensor("op_24785_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24785_cast_fp16 = slice_by_index(begin = var_24785_begin_0, end = var_24785_end_0, end_mask = var_24785_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24785_cast_fp16")]; tensor var_24789_begin_0 = const()[name = tensor("op_24789_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_24789_end_0 = const()[name = tensor("op_24789_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_24789_end_mask_0 = const()[name = tensor("op_24789_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24789_cast_fp16 = slice_by_index(begin = var_24789_begin_0, end = var_24789_end_0, end_mask = var_24789_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24789_cast_fp16")]; tensor var_24793_begin_0 = const()[name = tensor("op_24793_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_24793_end_0 = const()[name = tensor("op_24793_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_24793_end_mask_0 = const()[name = tensor("op_24793_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24793_cast_fp16 = slice_by_index(begin = var_24793_begin_0, end = var_24793_end_0, end_mask = var_24793_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24793_cast_fp16")]; tensor var_24797_begin_0 = const()[name = tensor("op_24797_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_24797_end_0 = const()[name = tensor("op_24797_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_24797_end_mask_0 = const()[name = tensor("op_24797_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24797_cast_fp16 = slice_by_index(begin = var_24797_begin_0, end = var_24797_end_0, end_mask = var_24797_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24797_cast_fp16")]; tensor var_24801_begin_0 = const()[name = tensor("op_24801_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_24801_end_0 = const()[name = tensor("op_24801_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_24801_end_mask_0 = const()[name = tensor("op_24801_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24801_cast_fp16 = slice_by_index(begin = var_24801_begin_0, end = var_24801_end_0, end_mask = var_24801_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24801_cast_fp16")]; tensor var_24805_begin_0 = const()[name = tensor("op_24805_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_24805_end_0 = const()[name = tensor("op_24805_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_24805_end_mask_0 = const()[name = tensor("op_24805_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24805_cast_fp16 = slice_by_index(begin = var_24805_begin_0, end = var_24805_end_0, end_mask = var_24805_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24805_cast_fp16")]; tensor var_24809_begin_0 = const()[name = tensor("op_24809_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_24809_end_0 = const()[name = tensor("op_24809_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_24809_end_mask_0 = const()[name = tensor("op_24809_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24809_cast_fp16 = slice_by_index(begin = var_24809_begin_0, end = var_24809_end_0, end_mask = var_24809_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24809_cast_fp16")]; tensor var_24813_begin_0 = const()[name = tensor("op_24813_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_24813_end_0 = const()[name = tensor("op_24813_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_24813_end_mask_0 = const()[name = tensor("op_24813_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_24813_cast_fp16 = slice_by_index(begin = var_24813_begin_0, end = var_24813_end_0, end_mask = var_24813_end_mask_0, x = k_31_cast_fp16)[name = tensor("op_24813_cast_fp16")]; tensor var_24815_begin_0 = const()[name = tensor("op_24815_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_24815_end_0 = const()[name = tensor("op_24815_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_24815_end_mask_0 = const()[name = tensor("op_24815_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24815_cast_fp16 = slice_by_index(begin = var_24815_begin_0, end = var_24815_end_0, end_mask = var_24815_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24815_cast_fp16")]; tensor var_24819_begin_0 = const()[name = tensor("op_24819_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_24819_end_0 = const()[name = tensor("op_24819_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_24819_end_mask_0 = const()[name = tensor("op_24819_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24819_cast_fp16 = slice_by_index(begin = var_24819_begin_0, end = var_24819_end_0, end_mask = var_24819_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24819_cast_fp16")]; tensor var_24823_begin_0 = const()[name = tensor("op_24823_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_24823_end_0 = const()[name = tensor("op_24823_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_24823_end_mask_0 = const()[name = tensor("op_24823_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24823_cast_fp16 = slice_by_index(begin = var_24823_begin_0, end = var_24823_end_0, end_mask = var_24823_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24823_cast_fp16")]; tensor var_24827_begin_0 = const()[name = tensor("op_24827_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_24827_end_0 = const()[name = tensor("op_24827_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_24827_end_mask_0 = const()[name = tensor("op_24827_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24827_cast_fp16 = slice_by_index(begin = var_24827_begin_0, end = var_24827_end_0, end_mask = var_24827_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24827_cast_fp16")]; tensor var_24831_begin_0 = const()[name = tensor("op_24831_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_24831_end_0 = const()[name = tensor("op_24831_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_24831_end_mask_0 = const()[name = tensor("op_24831_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24831_cast_fp16 = slice_by_index(begin = var_24831_begin_0, end = var_24831_end_0, end_mask = var_24831_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24831_cast_fp16")]; tensor var_24835_begin_0 = const()[name = tensor("op_24835_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_24835_end_0 = const()[name = tensor("op_24835_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_24835_end_mask_0 = const()[name = tensor("op_24835_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24835_cast_fp16 = slice_by_index(begin = var_24835_begin_0, end = var_24835_end_0, end_mask = var_24835_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24835_cast_fp16")]; tensor var_24839_begin_0 = const()[name = tensor("op_24839_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_24839_end_0 = const()[name = tensor("op_24839_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_24839_end_mask_0 = const()[name = tensor("op_24839_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24839_cast_fp16 = slice_by_index(begin = var_24839_begin_0, end = var_24839_end_0, end_mask = var_24839_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24839_cast_fp16")]; tensor var_24843_begin_0 = const()[name = tensor("op_24843_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_24843_end_0 = const()[name = tensor("op_24843_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_24843_end_mask_0 = const()[name = tensor("op_24843_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24843_cast_fp16 = slice_by_index(begin = var_24843_begin_0, end = var_24843_end_0, end_mask = var_24843_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24843_cast_fp16")]; tensor var_24847_begin_0 = const()[name = tensor("op_24847_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_24847_end_0 = const()[name = tensor("op_24847_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_24847_end_mask_0 = const()[name = tensor("op_24847_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24847_cast_fp16 = slice_by_index(begin = var_24847_begin_0, end = var_24847_end_0, end_mask = var_24847_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24847_cast_fp16")]; tensor var_24851_begin_0 = const()[name = tensor("op_24851_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_24851_end_0 = const()[name = tensor("op_24851_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_24851_end_mask_0 = const()[name = tensor("op_24851_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24851_cast_fp16 = slice_by_index(begin = var_24851_begin_0, end = var_24851_end_0, end_mask = var_24851_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24851_cast_fp16")]; tensor var_24855_begin_0 = const()[name = tensor("op_24855_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_24855_end_0 = const()[name = tensor("op_24855_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_24855_end_mask_0 = const()[name = tensor("op_24855_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24855_cast_fp16 = slice_by_index(begin = var_24855_begin_0, end = var_24855_end_0, end_mask = var_24855_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24855_cast_fp16")]; tensor var_24859_begin_0 = const()[name = tensor("op_24859_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_24859_end_0 = const()[name = tensor("op_24859_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_24859_end_mask_0 = const()[name = tensor("op_24859_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24859_cast_fp16 = slice_by_index(begin = var_24859_begin_0, end = var_24859_end_0, end_mask = var_24859_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24859_cast_fp16")]; tensor var_24863_begin_0 = const()[name = tensor("op_24863_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_24863_end_0 = const()[name = tensor("op_24863_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_24863_end_mask_0 = const()[name = tensor("op_24863_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24863_cast_fp16 = slice_by_index(begin = var_24863_begin_0, end = var_24863_end_0, end_mask = var_24863_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24863_cast_fp16")]; tensor var_24867_begin_0 = const()[name = tensor("op_24867_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_24867_end_0 = const()[name = tensor("op_24867_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_24867_end_mask_0 = const()[name = tensor("op_24867_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24867_cast_fp16 = slice_by_index(begin = var_24867_begin_0, end = var_24867_end_0, end_mask = var_24867_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24867_cast_fp16")]; tensor var_24871_begin_0 = const()[name = tensor("op_24871_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_24871_end_0 = const()[name = tensor("op_24871_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_24871_end_mask_0 = const()[name = tensor("op_24871_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24871_cast_fp16 = slice_by_index(begin = var_24871_begin_0, end = var_24871_end_0, end_mask = var_24871_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24871_cast_fp16")]; tensor var_24875_begin_0 = const()[name = tensor("op_24875_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_24875_end_0 = const()[name = tensor("op_24875_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_24875_end_mask_0 = const()[name = tensor("op_24875_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24875_cast_fp16 = slice_by_index(begin = var_24875_begin_0, end = var_24875_end_0, end_mask = var_24875_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24875_cast_fp16")]; tensor var_24879_begin_0 = const()[name = tensor("op_24879_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_24879_end_0 = const()[name = tensor("op_24879_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_24879_end_mask_0 = const()[name = tensor("op_24879_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24879_cast_fp16 = slice_by_index(begin = var_24879_begin_0, end = var_24879_end_0, end_mask = var_24879_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24879_cast_fp16")]; tensor var_24883_begin_0 = const()[name = tensor("op_24883_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_24883_end_0 = const()[name = tensor("op_24883_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_24883_end_mask_0 = const()[name = tensor("op_24883_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24883_cast_fp16 = slice_by_index(begin = var_24883_begin_0, end = var_24883_end_0, end_mask = var_24883_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24883_cast_fp16")]; tensor var_24887_begin_0 = const()[name = tensor("op_24887_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_24887_end_0 = const()[name = tensor("op_24887_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_24887_end_mask_0 = const()[name = tensor("op_24887_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24887_cast_fp16 = slice_by_index(begin = var_24887_begin_0, end = var_24887_end_0, end_mask = var_24887_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24887_cast_fp16")]; tensor var_24891_begin_0 = const()[name = tensor("op_24891_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_24891_end_0 = const()[name = tensor("op_24891_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_24891_end_mask_0 = const()[name = tensor("op_24891_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_24891_cast_fp16 = slice_by_index(begin = var_24891_begin_0, end = var_24891_end_0, end_mask = var_24891_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_24891_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2401_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2401_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2401_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2401_equation_0, values = (var_24737_cast_fp16, var_24179_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2401_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2403_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2403_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2403_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2403_equation_0, values = (var_24737_cast_fp16, var_24186_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2403_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2405_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2405_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2405_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2405_equation_0, values = (var_24737_cast_fp16, var_24193_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2405_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2407_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2407_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2407_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2407_equation_0, values = (var_24737_cast_fp16, var_24200_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2407_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2409_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2409_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2409_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2409_equation_0, values = (var_24741_cast_fp16, var_24207_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2409_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2411_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2411_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2411_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2411_equation_0, values = (var_24741_cast_fp16, var_24214_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2411_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2413_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2413_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2413_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2413_equation_0, values = (var_24741_cast_fp16, var_24221_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2413_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2415_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2415_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2415_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2415_equation_0, values = (var_24741_cast_fp16, var_24228_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2415_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2417_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2417_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2417_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2417_equation_0, values = (var_24745_cast_fp16, var_24235_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2417_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2419_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2419_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2419_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2419_equation_0, values = (var_24745_cast_fp16, var_24242_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2419_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2421_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2421_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2421_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2421_equation_0, values = (var_24745_cast_fp16, var_24249_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2421_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2423_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2423_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2423_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2423_equation_0, values = (var_24745_cast_fp16, var_24256_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2423_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2425_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2425_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2425_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2425_equation_0, values = (var_24749_cast_fp16, var_24263_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2425_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2427_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2427_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2427_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2427_equation_0, values = (var_24749_cast_fp16, var_24270_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2427_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2429_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2429_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2429_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2429_equation_0, values = (var_24749_cast_fp16, var_24277_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2429_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2431_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2431_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2431_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2431_equation_0, values = (var_24749_cast_fp16, var_24284_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2431_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2433_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2433_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2433_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2433_equation_0, values = (var_24753_cast_fp16, var_24291_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2433_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2435_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2435_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2435_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2435_equation_0, values = (var_24753_cast_fp16, var_24298_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2435_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2437_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2437_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2437_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2437_equation_0, values = (var_24753_cast_fp16, var_24305_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2437_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2439_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2439_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2439_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2439_equation_0, values = (var_24753_cast_fp16, var_24312_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2439_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2441_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2441_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2441_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2441_equation_0, values = (var_24757_cast_fp16, var_24319_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2441_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2443_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2443_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2443_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2443_equation_0, values = (var_24757_cast_fp16, var_24326_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2443_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2445_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2445_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2445_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2445_equation_0, values = (var_24757_cast_fp16, var_24333_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2445_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2447_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2447_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2447_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2447_equation_0, values = (var_24757_cast_fp16, var_24340_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2447_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2449_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2449_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2449_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2449_equation_0, values = (var_24761_cast_fp16, var_24347_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2449_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2451_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2451_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2451_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2451_equation_0, values = (var_24761_cast_fp16, var_24354_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2451_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2453_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2453_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2453_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2453_equation_0, values = (var_24761_cast_fp16, var_24361_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2453_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2455_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2455_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2455_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2455_equation_0, values = (var_24761_cast_fp16, var_24368_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2455_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2457_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2457_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2457_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2457_equation_0, values = (var_24765_cast_fp16, var_24375_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2457_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2459_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2459_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2459_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2459_equation_0, values = (var_24765_cast_fp16, var_24382_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2459_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2461_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2461_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2461_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2461_equation_0, values = (var_24765_cast_fp16, var_24389_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2461_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2463_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2463_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2463_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2463_equation_0, values = (var_24765_cast_fp16, var_24396_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2463_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2465_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2465_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2465_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2465_equation_0, values = (var_24769_cast_fp16, var_24403_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2465_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2467_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2467_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2467_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2467_equation_0, values = (var_24769_cast_fp16, var_24410_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2467_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2469_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2469_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2469_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2469_equation_0, values = (var_24769_cast_fp16, var_24417_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2469_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2471_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2471_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2471_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2471_equation_0, values = (var_24769_cast_fp16, var_24424_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2471_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2473_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2473_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2473_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2473_equation_0, values = (var_24773_cast_fp16, var_24431_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2473_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2475_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2475_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2475_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2475_equation_0, values = (var_24773_cast_fp16, var_24438_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2475_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2477_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2477_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2477_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2477_equation_0, values = (var_24773_cast_fp16, var_24445_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2477_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2479_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2479_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2479_equation_0, values = (var_24773_cast_fp16, var_24452_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2479_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2481_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2481_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2481_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2481_equation_0, values = (var_24777_cast_fp16, var_24459_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2481_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2483_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2483_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2483_equation_0, values = (var_24777_cast_fp16, var_24466_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2483_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2485_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2485_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2485_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2485_equation_0, values = (var_24777_cast_fp16, var_24473_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2485_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2487_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2487_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2487_equation_0, values = (var_24777_cast_fp16, var_24480_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2487_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2489_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2489_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2489_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2489_equation_0, values = (var_24781_cast_fp16, var_24487_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2489_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2491_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2491_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2491_equation_0, values = (var_24781_cast_fp16, var_24494_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2491_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2493_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2493_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2493_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2493_equation_0, values = (var_24781_cast_fp16, var_24501_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2493_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2495_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2495_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2495_equation_0, values = (var_24781_cast_fp16, var_24508_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2495_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2497_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2497_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2497_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2497_equation_0, values = (var_24785_cast_fp16, var_24515_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2497_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2499_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2499_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2499_equation_0, values = (var_24785_cast_fp16, var_24522_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2499_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2501_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2501_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2501_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2501_equation_0, values = (var_24785_cast_fp16, var_24529_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2501_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2503_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2503_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2503_equation_0, values = (var_24785_cast_fp16, var_24536_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2503_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2505_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2505_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2505_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2505_equation_0, values = (var_24789_cast_fp16, var_24543_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2505_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2507_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2507_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2507_equation_0, values = (var_24789_cast_fp16, var_24550_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2507_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2509_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2509_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2509_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2509_equation_0, values = (var_24789_cast_fp16, var_24557_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2509_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2511_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2511_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2511_equation_0, values = (var_24789_cast_fp16, var_24564_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2511_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2513_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2513_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2513_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2513_equation_0, values = (var_24793_cast_fp16, var_24571_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2513_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2515_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2515_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2515_equation_0, values = (var_24793_cast_fp16, var_24578_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2515_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2517_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2517_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2517_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2517_equation_0, values = (var_24793_cast_fp16, var_24585_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2517_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2519_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2519_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2519_equation_0, values = (var_24793_cast_fp16, var_24592_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2519_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2521_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2521_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2521_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2521_equation_0, values = (var_24797_cast_fp16, var_24599_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2521_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2523_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2523_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2523_equation_0, values = (var_24797_cast_fp16, var_24606_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2523_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2525_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2525_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2525_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2525_equation_0, values = (var_24797_cast_fp16, var_24613_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2525_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2527_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2527_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2527_equation_0, values = (var_24797_cast_fp16, var_24620_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2527_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2529_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2529_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2529_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2529_equation_0, values = (var_24801_cast_fp16, var_24627_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2529_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2531_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2531_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2531_equation_0, values = (var_24801_cast_fp16, var_24634_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2531_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2533_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2533_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2533_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2533_equation_0, values = (var_24801_cast_fp16, var_24641_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2533_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2535_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2535_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2535_equation_0, values = (var_24801_cast_fp16, var_24648_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2535_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2537_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2537_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2537_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2537_equation_0, values = (var_24805_cast_fp16, var_24655_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2537_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2539_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2539_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2539_equation_0, values = (var_24805_cast_fp16, var_24662_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2539_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2541_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2541_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2541_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2541_equation_0, values = (var_24805_cast_fp16, var_24669_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2541_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2543_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2543_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2543_equation_0, values = (var_24805_cast_fp16, var_24676_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2543_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2545_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2545_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2545_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2545_equation_0, values = (var_24809_cast_fp16, var_24683_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2545_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2547_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2547_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2547_equation_0, values = (var_24809_cast_fp16, var_24690_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2547_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2549_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2549_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2549_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2549_equation_0, values = (var_24809_cast_fp16, var_24697_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2549_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2551_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2551_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2551_equation_0, values = (var_24809_cast_fp16, var_24704_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2551_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2553_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2553_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2553_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2553_equation_0, values = (var_24813_cast_fp16, var_24711_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2553_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2555_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2555_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2555_equation_0, values = (var_24813_cast_fp16, var_24718_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2555_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2557_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2557_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2557_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2557_equation_0, values = (var_24813_cast_fp16, var_24725_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2557_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2559_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2559_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2559_equation_0, values = (var_24813_cast_fp16, var_24732_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2559_cast_fp16")]; tensor var_25054_to_fp16 = const()[name = tensor("op_25054_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2401_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2401_cast_fp16, y = var_25054_to_fp16)[name = tensor("aw_chunk_2401_cast_fp16")]; tensor var_25056_to_fp16 = const()[name = tensor("op_25056_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2403_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2403_cast_fp16, y = var_25056_to_fp16)[name = tensor("aw_chunk_2403_cast_fp16")]; tensor var_25058_to_fp16 = const()[name = tensor("op_25058_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2405_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2405_cast_fp16, y = var_25058_to_fp16)[name = tensor("aw_chunk_2405_cast_fp16")]; tensor var_25060_to_fp16 = const()[name = tensor("op_25060_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2407_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2407_cast_fp16, y = var_25060_to_fp16)[name = tensor("aw_chunk_2407_cast_fp16")]; tensor var_25062_to_fp16 = const()[name = tensor("op_25062_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2409_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2409_cast_fp16, y = var_25062_to_fp16)[name = tensor("aw_chunk_2409_cast_fp16")]; tensor var_25064_to_fp16 = const()[name = tensor("op_25064_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2411_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2411_cast_fp16, y = var_25064_to_fp16)[name = tensor("aw_chunk_2411_cast_fp16")]; tensor var_25066_to_fp16 = const()[name = tensor("op_25066_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2413_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2413_cast_fp16, y = var_25066_to_fp16)[name = tensor("aw_chunk_2413_cast_fp16")]; tensor var_25068_to_fp16 = const()[name = tensor("op_25068_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2415_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2415_cast_fp16, y = var_25068_to_fp16)[name = tensor("aw_chunk_2415_cast_fp16")]; tensor var_25070_to_fp16 = const()[name = tensor("op_25070_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2417_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2417_cast_fp16, y = var_25070_to_fp16)[name = tensor("aw_chunk_2417_cast_fp16")]; tensor var_25072_to_fp16 = const()[name = tensor("op_25072_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2419_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2419_cast_fp16, y = var_25072_to_fp16)[name = tensor("aw_chunk_2419_cast_fp16")]; tensor var_25074_to_fp16 = const()[name = tensor("op_25074_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2421_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2421_cast_fp16, y = var_25074_to_fp16)[name = tensor("aw_chunk_2421_cast_fp16")]; tensor var_25076_to_fp16 = const()[name = tensor("op_25076_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2423_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2423_cast_fp16, y = var_25076_to_fp16)[name = tensor("aw_chunk_2423_cast_fp16")]; tensor var_25078_to_fp16 = const()[name = tensor("op_25078_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2425_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2425_cast_fp16, y = var_25078_to_fp16)[name = tensor("aw_chunk_2425_cast_fp16")]; tensor var_25080_to_fp16 = const()[name = tensor("op_25080_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2427_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2427_cast_fp16, y = var_25080_to_fp16)[name = tensor("aw_chunk_2427_cast_fp16")]; tensor var_25082_to_fp16 = const()[name = tensor("op_25082_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2429_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2429_cast_fp16, y = var_25082_to_fp16)[name = tensor("aw_chunk_2429_cast_fp16")]; tensor var_25084_to_fp16 = const()[name = tensor("op_25084_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2431_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2431_cast_fp16, y = var_25084_to_fp16)[name = tensor("aw_chunk_2431_cast_fp16")]; tensor var_25086_to_fp16 = const()[name = tensor("op_25086_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2433_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2433_cast_fp16, y = var_25086_to_fp16)[name = tensor("aw_chunk_2433_cast_fp16")]; tensor var_25088_to_fp16 = const()[name = tensor("op_25088_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2435_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2435_cast_fp16, y = var_25088_to_fp16)[name = tensor("aw_chunk_2435_cast_fp16")]; tensor var_25090_to_fp16 = const()[name = tensor("op_25090_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2437_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2437_cast_fp16, y = var_25090_to_fp16)[name = tensor("aw_chunk_2437_cast_fp16")]; tensor var_25092_to_fp16 = const()[name = tensor("op_25092_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2439_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2439_cast_fp16, y = var_25092_to_fp16)[name = tensor("aw_chunk_2439_cast_fp16")]; tensor var_25094_to_fp16 = const()[name = tensor("op_25094_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2441_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2441_cast_fp16, y = var_25094_to_fp16)[name = tensor("aw_chunk_2441_cast_fp16")]; tensor var_25096_to_fp16 = const()[name = tensor("op_25096_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2443_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2443_cast_fp16, y = var_25096_to_fp16)[name = tensor("aw_chunk_2443_cast_fp16")]; tensor var_25098_to_fp16 = const()[name = tensor("op_25098_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2445_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2445_cast_fp16, y = var_25098_to_fp16)[name = tensor("aw_chunk_2445_cast_fp16")]; tensor var_25100_to_fp16 = const()[name = tensor("op_25100_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2447_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2447_cast_fp16, y = var_25100_to_fp16)[name = tensor("aw_chunk_2447_cast_fp16")]; tensor var_25102_to_fp16 = const()[name = tensor("op_25102_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2449_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2449_cast_fp16, y = var_25102_to_fp16)[name = tensor("aw_chunk_2449_cast_fp16")]; tensor var_25104_to_fp16 = const()[name = tensor("op_25104_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2451_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2451_cast_fp16, y = var_25104_to_fp16)[name = tensor("aw_chunk_2451_cast_fp16")]; tensor var_25106_to_fp16 = const()[name = tensor("op_25106_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2453_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2453_cast_fp16, y = var_25106_to_fp16)[name = tensor("aw_chunk_2453_cast_fp16")]; tensor var_25108_to_fp16 = const()[name = tensor("op_25108_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2455_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2455_cast_fp16, y = var_25108_to_fp16)[name = tensor("aw_chunk_2455_cast_fp16")]; tensor var_25110_to_fp16 = const()[name = tensor("op_25110_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2457_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2457_cast_fp16, y = var_25110_to_fp16)[name = tensor("aw_chunk_2457_cast_fp16")]; tensor var_25112_to_fp16 = const()[name = tensor("op_25112_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2459_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2459_cast_fp16, y = var_25112_to_fp16)[name = tensor("aw_chunk_2459_cast_fp16")]; tensor var_25114_to_fp16 = const()[name = tensor("op_25114_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2461_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2461_cast_fp16, y = var_25114_to_fp16)[name = tensor("aw_chunk_2461_cast_fp16")]; tensor var_25116_to_fp16 = const()[name = tensor("op_25116_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2463_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2463_cast_fp16, y = var_25116_to_fp16)[name = tensor("aw_chunk_2463_cast_fp16")]; tensor var_25118_to_fp16 = const()[name = tensor("op_25118_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2465_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2465_cast_fp16, y = var_25118_to_fp16)[name = tensor("aw_chunk_2465_cast_fp16")]; tensor var_25120_to_fp16 = const()[name = tensor("op_25120_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2467_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2467_cast_fp16, y = var_25120_to_fp16)[name = tensor("aw_chunk_2467_cast_fp16")]; tensor var_25122_to_fp16 = const()[name = tensor("op_25122_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2469_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2469_cast_fp16, y = var_25122_to_fp16)[name = tensor("aw_chunk_2469_cast_fp16")]; tensor var_25124_to_fp16 = const()[name = tensor("op_25124_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2471_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2471_cast_fp16, y = var_25124_to_fp16)[name = tensor("aw_chunk_2471_cast_fp16")]; tensor var_25126_to_fp16 = const()[name = tensor("op_25126_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2473_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2473_cast_fp16, y = var_25126_to_fp16)[name = tensor("aw_chunk_2473_cast_fp16")]; tensor var_25128_to_fp16 = const()[name = tensor("op_25128_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2475_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2475_cast_fp16, y = var_25128_to_fp16)[name = tensor("aw_chunk_2475_cast_fp16")]; tensor var_25130_to_fp16 = const()[name = tensor("op_25130_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2477_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2477_cast_fp16, y = var_25130_to_fp16)[name = tensor("aw_chunk_2477_cast_fp16")]; tensor var_25132_to_fp16 = const()[name = tensor("op_25132_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2479_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2479_cast_fp16, y = var_25132_to_fp16)[name = tensor("aw_chunk_2479_cast_fp16")]; tensor var_25134_to_fp16 = const()[name = tensor("op_25134_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2481_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2481_cast_fp16, y = var_25134_to_fp16)[name = tensor("aw_chunk_2481_cast_fp16")]; tensor var_25136_to_fp16 = const()[name = tensor("op_25136_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2483_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2483_cast_fp16, y = var_25136_to_fp16)[name = tensor("aw_chunk_2483_cast_fp16")]; tensor var_25138_to_fp16 = const()[name = tensor("op_25138_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2485_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2485_cast_fp16, y = var_25138_to_fp16)[name = tensor("aw_chunk_2485_cast_fp16")]; tensor var_25140_to_fp16 = const()[name = tensor("op_25140_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2487_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2487_cast_fp16, y = var_25140_to_fp16)[name = tensor("aw_chunk_2487_cast_fp16")]; tensor var_25142_to_fp16 = const()[name = tensor("op_25142_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2489_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2489_cast_fp16, y = var_25142_to_fp16)[name = tensor("aw_chunk_2489_cast_fp16")]; tensor var_25144_to_fp16 = const()[name = tensor("op_25144_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2491_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2491_cast_fp16, y = var_25144_to_fp16)[name = tensor("aw_chunk_2491_cast_fp16")]; tensor var_25146_to_fp16 = const()[name = tensor("op_25146_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2493_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2493_cast_fp16, y = var_25146_to_fp16)[name = tensor("aw_chunk_2493_cast_fp16")]; tensor var_25148_to_fp16 = const()[name = tensor("op_25148_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2495_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2495_cast_fp16, y = var_25148_to_fp16)[name = tensor("aw_chunk_2495_cast_fp16")]; tensor var_25150_to_fp16 = const()[name = tensor("op_25150_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2497_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2497_cast_fp16, y = var_25150_to_fp16)[name = tensor("aw_chunk_2497_cast_fp16")]; tensor var_25152_to_fp16 = const()[name = tensor("op_25152_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2499_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2499_cast_fp16, y = var_25152_to_fp16)[name = tensor("aw_chunk_2499_cast_fp16")]; tensor var_25154_to_fp16 = const()[name = tensor("op_25154_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2501_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2501_cast_fp16, y = var_25154_to_fp16)[name = tensor("aw_chunk_2501_cast_fp16")]; tensor var_25156_to_fp16 = const()[name = tensor("op_25156_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2503_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2503_cast_fp16, y = var_25156_to_fp16)[name = tensor("aw_chunk_2503_cast_fp16")]; tensor var_25158_to_fp16 = const()[name = tensor("op_25158_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2505_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2505_cast_fp16, y = var_25158_to_fp16)[name = tensor("aw_chunk_2505_cast_fp16")]; tensor var_25160_to_fp16 = const()[name = tensor("op_25160_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2507_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2507_cast_fp16, y = var_25160_to_fp16)[name = tensor("aw_chunk_2507_cast_fp16")]; tensor var_25162_to_fp16 = const()[name = tensor("op_25162_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2509_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2509_cast_fp16, y = var_25162_to_fp16)[name = tensor("aw_chunk_2509_cast_fp16")]; tensor var_25164_to_fp16 = const()[name = tensor("op_25164_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2511_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2511_cast_fp16, y = var_25164_to_fp16)[name = tensor("aw_chunk_2511_cast_fp16")]; tensor var_25166_to_fp16 = const()[name = tensor("op_25166_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2513_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2513_cast_fp16, y = var_25166_to_fp16)[name = tensor("aw_chunk_2513_cast_fp16")]; tensor var_25168_to_fp16 = const()[name = tensor("op_25168_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2515_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2515_cast_fp16, y = var_25168_to_fp16)[name = tensor("aw_chunk_2515_cast_fp16")]; tensor var_25170_to_fp16 = const()[name = tensor("op_25170_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2517_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2517_cast_fp16, y = var_25170_to_fp16)[name = tensor("aw_chunk_2517_cast_fp16")]; tensor var_25172_to_fp16 = const()[name = tensor("op_25172_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2519_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2519_cast_fp16, y = var_25172_to_fp16)[name = tensor("aw_chunk_2519_cast_fp16")]; tensor var_25174_to_fp16 = const()[name = tensor("op_25174_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2521_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2521_cast_fp16, y = var_25174_to_fp16)[name = tensor("aw_chunk_2521_cast_fp16")]; tensor var_25176_to_fp16 = const()[name = tensor("op_25176_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2523_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2523_cast_fp16, y = var_25176_to_fp16)[name = tensor("aw_chunk_2523_cast_fp16")]; tensor var_25178_to_fp16 = const()[name = tensor("op_25178_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2525_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2525_cast_fp16, y = var_25178_to_fp16)[name = tensor("aw_chunk_2525_cast_fp16")]; tensor var_25180_to_fp16 = const()[name = tensor("op_25180_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2527_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2527_cast_fp16, y = var_25180_to_fp16)[name = tensor("aw_chunk_2527_cast_fp16")]; tensor var_25182_to_fp16 = const()[name = tensor("op_25182_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2529_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2529_cast_fp16, y = var_25182_to_fp16)[name = tensor("aw_chunk_2529_cast_fp16")]; tensor var_25184_to_fp16 = const()[name = tensor("op_25184_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2531_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2531_cast_fp16, y = var_25184_to_fp16)[name = tensor("aw_chunk_2531_cast_fp16")]; tensor var_25186_to_fp16 = const()[name = tensor("op_25186_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2533_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2533_cast_fp16, y = var_25186_to_fp16)[name = tensor("aw_chunk_2533_cast_fp16")]; tensor var_25188_to_fp16 = const()[name = tensor("op_25188_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2535_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2535_cast_fp16, y = var_25188_to_fp16)[name = tensor("aw_chunk_2535_cast_fp16")]; tensor var_25190_to_fp16 = const()[name = tensor("op_25190_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2537_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2537_cast_fp16, y = var_25190_to_fp16)[name = tensor("aw_chunk_2537_cast_fp16")]; tensor var_25192_to_fp16 = const()[name = tensor("op_25192_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2539_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2539_cast_fp16, y = var_25192_to_fp16)[name = tensor("aw_chunk_2539_cast_fp16")]; tensor var_25194_to_fp16 = const()[name = tensor("op_25194_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2541_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2541_cast_fp16, y = var_25194_to_fp16)[name = tensor("aw_chunk_2541_cast_fp16")]; tensor var_25196_to_fp16 = const()[name = tensor("op_25196_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2543_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2543_cast_fp16, y = var_25196_to_fp16)[name = tensor("aw_chunk_2543_cast_fp16")]; tensor var_25198_to_fp16 = const()[name = tensor("op_25198_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2545_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2545_cast_fp16, y = var_25198_to_fp16)[name = tensor("aw_chunk_2545_cast_fp16")]; tensor var_25200_to_fp16 = const()[name = tensor("op_25200_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2547_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2547_cast_fp16, y = var_25200_to_fp16)[name = tensor("aw_chunk_2547_cast_fp16")]; tensor var_25202_to_fp16 = const()[name = tensor("op_25202_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2549_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2549_cast_fp16, y = var_25202_to_fp16)[name = tensor("aw_chunk_2549_cast_fp16")]; tensor var_25204_to_fp16 = const()[name = tensor("op_25204_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2551_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2551_cast_fp16, y = var_25204_to_fp16)[name = tensor("aw_chunk_2551_cast_fp16")]; tensor var_25206_to_fp16 = const()[name = tensor("op_25206_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2553_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2553_cast_fp16, y = var_25206_to_fp16)[name = tensor("aw_chunk_2553_cast_fp16")]; tensor var_25208_to_fp16 = const()[name = tensor("op_25208_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2555_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2555_cast_fp16, y = var_25208_to_fp16)[name = tensor("aw_chunk_2555_cast_fp16")]; tensor var_25210_to_fp16 = const()[name = tensor("op_25210_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2557_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2557_cast_fp16, y = var_25210_to_fp16)[name = tensor("aw_chunk_2557_cast_fp16")]; tensor var_25212_to_fp16 = const()[name = tensor("op_25212_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2559_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2559_cast_fp16, y = var_25212_to_fp16)[name = tensor("aw_chunk_2559_cast_fp16")]; tensor var_25214_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2401_cast_fp16)[name = tensor("op_25214_cast_fp16")]; tensor var_25215_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2403_cast_fp16)[name = tensor("op_25215_cast_fp16")]; tensor var_25216_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2405_cast_fp16)[name = tensor("op_25216_cast_fp16")]; tensor var_25217_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2407_cast_fp16)[name = tensor("op_25217_cast_fp16")]; tensor var_25218_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2409_cast_fp16)[name = tensor("op_25218_cast_fp16")]; tensor var_25219_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2411_cast_fp16)[name = tensor("op_25219_cast_fp16")]; tensor var_25220_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2413_cast_fp16)[name = tensor("op_25220_cast_fp16")]; tensor var_25221_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2415_cast_fp16)[name = tensor("op_25221_cast_fp16")]; tensor var_25222_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2417_cast_fp16)[name = tensor("op_25222_cast_fp16")]; tensor var_25223_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2419_cast_fp16)[name = tensor("op_25223_cast_fp16")]; tensor var_25224_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2421_cast_fp16)[name = tensor("op_25224_cast_fp16")]; tensor var_25225_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2423_cast_fp16)[name = tensor("op_25225_cast_fp16")]; tensor var_25226_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2425_cast_fp16)[name = tensor("op_25226_cast_fp16")]; tensor var_25227_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2427_cast_fp16)[name = tensor("op_25227_cast_fp16")]; tensor var_25228_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2429_cast_fp16)[name = tensor("op_25228_cast_fp16")]; tensor var_25229_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2431_cast_fp16)[name = tensor("op_25229_cast_fp16")]; tensor var_25230_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2433_cast_fp16)[name = tensor("op_25230_cast_fp16")]; tensor var_25231_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2435_cast_fp16)[name = tensor("op_25231_cast_fp16")]; tensor var_25232_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2437_cast_fp16)[name = tensor("op_25232_cast_fp16")]; tensor var_25233_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2439_cast_fp16)[name = tensor("op_25233_cast_fp16")]; tensor var_25234_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2441_cast_fp16)[name = tensor("op_25234_cast_fp16")]; tensor var_25235_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2443_cast_fp16)[name = tensor("op_25235_cast_fp16")]; tensor var_25236_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2445_cast_fp16)[name = tensor("op_25236_cast_fp16")]; tensor var_25237_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2447_cast_fp16)[name = tensor("op_25237_cast_fp16")]; tensor var_25238_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2449_cast_fp16)[name = tensor("op_25238_cast_fp16")]; tensor var_25239_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2451_cast_fp16)[name = tensor("op_25239_cast_fp16")]; tensor var_25240_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2453_cast_fp16)[name = tensor("op_25240_cast_fp16")]; tensor var_25241_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2455_cast_fp16)[name = tensor("op_25241_cast_fp16")]; tensor var_25242_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2457_cast_fp16)[name = tensor("op_25242_cast_fp16")]; tensor var_25243_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2459_cast_fp16)[name = tensor("op_25243_cast_fp16")]; tensor var_25244_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2461_cast_fp16)[name = tensor("op_25244_cast_fp16")]; tensor var_25245_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2463_cast_fp16)[name = tensor("op_25245_cast_fp16")]; tensor var_25246_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2465_cast_fp16)[name = tensor("op_25246_cast_fp16")]; tensor var_25247_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2467_cast_fp16)[name = tensor("op_25247_cast_fp16")]; tensor var_25248_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2469_cast_fp16)[name = tensor("op_25248_cast_fp16")]; tensor var_25249_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2471_cast_fp16)[name = tensor("op_25249_cast_fp16")]; tensor var_25250_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2473_cast_fp16)[name = tensor("op_25250_cast_fp16")]; tensor var_25251_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2475_cast_fp16)[name = tensor("op_25251_cast_fp16")]; tensor var_25252_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2477_cast_fp16)[name = tensor("op_25252_cast_fp16")]; tensor var_25253_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2479_cast_fp16)[name = tensor("op_25253_cast_fp16")]; tensor var_25254_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2481_cast_fp16)[name = tensor("op_25254_cast_fp16")]; tensor var_25255_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2483_cast_fp16)[name = tensor("op_25255_cast_fp16")]; tensor var_25256_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2485_cast_fp16)[name = tensor("op_25256_cast_fp16")]; tensor var_25257_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2487_cast_fp16)[name = tensor("op_25257_cast_fp16")]; tensor var_25258_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2489_cast_fp16)[name = tensor("op_25258_cast_fp16")]; tensor var_25259_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2491_cast_fp16)[name = tensor("op_25259_cast_fp16")]; tensor var_25260_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2493_cast_fp16)[name = tensor("op_25260_cast_fp16")]; tensor var_25261_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2495_cast_fp16)[name = tensor("op_25261_cast_fp16")]; tensor var_25262_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2497_cast_fp16)[name = tensor("op_25262_cast_fp16")]; tensor var_25263_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2499_cast_fp16)[name = tensor("op_25263_cast_fp16")]; tensor var_25264_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2501_cast_fp16)[name = tensor("op_25264_cast_fp16")]; tensor var_25265_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2503_cast_fp16)[name = tensor("op_25265_cast_fp16")]; tensor var_25266_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2505_cast_fp16)[name = tensor("op_25266_cast_fp16")]; tensor var_25267_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2507_cast_fp16)[name = tensor("op_25267_cast_fp16")]; tensor var_25268_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2509_cast_fp16)[name = tensor("op_25268_cast_fp16")]; tensor var_25269_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2511_cast_fp16)[name = tensor("op_25269_cast_fp16")]; tensor var_25270_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2513_cast_fp16)[name = tensor("op_25270_cast_fp16")]; tensor var_25271_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2515_cast_fp16)[name = tensor("op_25271_cast_fp16")]; tensor var_25272_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2517_cast_fp16)[name = tensor("op_25272_cast_fp16")]; tensor var_25273_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2519_cast_fp16)[name = tensor("op_25273_cast_fp16")]; tensor var_25274_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2521_cast_fp16)[name = tensor("op_25274_cast_fp16")]; tensor var_25275_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2523_cast_fp16)[name = tensor("op_25275_cast_fp16")]; tensor var_25276_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2525_cast_fp16)[name = tensor("op_25276_cast_fp16")]; tensor var_25277_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2527_cast_fp16)[name = tensor("op_25277_cast_fp16")]; tensor var_25278_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2529_cast_fp16)[name = tensor("op_25278_cast_fp16")]; tensor var_25279_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2531_cast_fp16)[name = tensor("op_25279_cast_fp16")]; tensor var_25280_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2533_cast_fp16)[name = tensor("op_25280_cast_fp16")]; tensor var_25281_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2535_cast_fp16)[name = tensor("op_25281_cast_fp16")]; tensor var_25282_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2537_cast_fp16)[name = tensor("op_25282_cast_fp16")]; tensor var_25283_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2539_cast_fp16)[name = tensor("op_25283_cast_fp16")]; tensor var_25284_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2541_cast_fp16)[name = tensor("op_25284_cast_fp16")]; tensor var_25285_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2543_cast_fp16)[name = tensor("op_25285_cast_fp16")]; tensor var_25286_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2545_cast_fp16)[name = tensor("op_25286_cast_fp16")]; tensor var_25287_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2547_cast_fp16)[name = tensor("op_25287_cast_fp16")]; tensor var_25288_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2549_cast_fp16)[name = tensor("op_25288_cast_fp16")]; tensor var_25289_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2551_cast_fp16)[name = tensor("op_25289_cast_fp16")]; tensor var_25290_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2553_cast_fp16)[name = tensor("op_25290_cast_fp16")]; tensor var_25291_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2555_cast_fp16)[name = tensor("op_25291_cast_fp16")]; tensor var_25292_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2557_cast_fp16)[name = tensor("op_25292_cast_fp16")]; tensor var_25293_cast_fp16 = softmax(axis = var_24012, x = aw_chunk_2559_cast_fp16)[name = tensor("op_25293_cast_fp16")]; tensor var_25295_equation_0 = const()[name = tensor("op_25295_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25295_cast_fp16 = einsum(equation = var_25295_equation_0, values = (var_24815_cast_fp16, var_25214_cast_fp16))[name = tensor("op_25295_cast_fp16")]; tensor var_25297_equation_0 = const()[name = tensor("op_25297_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25297_cast_fp16 = einsum(equation = var_25297_equation_0, values = (var_24815_cast_fp16, var_25215_cast_fp16))[name = tensor("op_25297_cast_fp16")]; tensor var_25299_equation_0 = const()[name = tensor("op_25299_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25299_cast_fp16 = einsum(equation = var_25299_equation_0, values = (var_24815_cast_fp16, var_25216_cast_fp16))[name = tensor("op_25299_cast_fp16")]; tensor var_25301_equation_0 = const()[name = tensor("op_25301_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25301_cast_fp16 = einsum(equation = var_25301_equation_0, values = (var_24815_cast_fp16, var_25217_cast_fp16))[name = tensor("op_25301_cast_fp16")]; tensor var_25303_equation_0 = const()[name = tensor("op_25303_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25303_cast_fp16 = einsum(equation = var_25303_equation_0, values = (var_24819_cast_fp16, var_25218_cast_fp16))[name = tensor("op_25303_cast_fp16")]; tensor var_25305_equation_0 = const()[name = tensor("op_25305_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25305_cast_fp16 = einsum(equation = var_25305_equation_0, values = (var_24819_cast_fp16, var_25219_cast_fp16))[name = tensor("op_25305_cast_fp16")]; tensor var_25307_equation_0 = const()[name = tensor("op_25307_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25307_cast_fp16 = einsum(equation = var_25307_equation_0, values = (var_24819_cast_fp16, var_25220_cast_fp16))[name = tensor("op_25307_cast_fp16")]; tensor var_25309_equation_0 = const()[name = tensor("op_25309_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25309_cast_fp16 = einsum(equation = var_25309_equation_0, values = (var_24819_cast_fp16, var_25221_cast_fp16))[name = tensor("op_25309_cast_fp16")]; tensor var_25311_equation_0 = const()[name = tensor("op_25311_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25311_cast_fp16 = einsum(equation = var_25311_equation_0, values = (var_24823_cast_fp16, var_25222_cast_fp16))[name = tensor("op_25311_cast_fp16")]; tensor var_25313_equation_0 = const()[name = tensor("op_25313_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25313_cast_fp16 = einsum(equation = var_25313_equation_0, values = (var_24823_cast_fp16, var_25223_cast_fp16))[name = tensor("op_25313_cast_fp16")]; tensor var_25315_equation_0 = const()[name = tensor("op_25315_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25315_cast_fp16 = einsum(equation = var_25315_equation_0, values = (var_24823_cast_fp16, var_25224_cast_fp16))[name = tensor("op_25315_cast_fp16")]; tensor var_25317_equation_0 = const()[name = tensor("op_25317_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25317_cast_fp16 = einsum(equation = var_25317_equation_0, values = (var_24823_cast_fp16, var_25225_cast_fp16))[name = tensor("op_25317_cast_fp16")]; tensor var_25319_equation_0 = const()[name = tensor("op_25319_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25319_cast_fp16 = einsum(equation = var_25319_equation_0, values = (var_24827_cast_fp16, var_25226_cast_fp16))[name = tensor("op_25319_cast_fp16")]; tensor var_25321_equation_0 = const()[name = tensor("op_25321_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25321_cast_fp16 = einsum(equation = var_25321_equation_0, values = (var_24827_cast_fp16, var_25227_cast_fp16))[name = tensor("op_25321_cast_fp16")]; tensor var_25323_equation_0 = const()[name = tensor("op_25323_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25323_cast_fp16 = einsum(equation = var_25323_equation_0, values = (var_24827_cast_fp16, var_25228_cast_fp16))[name = tensor("op_25323_cast_fp16")]; tensor var_25325_equation_0 = const()[name = tensor("op_25325_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25325_cast_fp16 = einsum(equation = var_25325_equation_0, values = (var_24827_cast_fp16, var_25229_cast_fp16))[name = tensor("op_25325_cast_fp16")]; tensor var_25327_equation_0 = const()[name = tensor("op_25327_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25327_cast_fp16 = einsum(equation = var_25327_equation_0, values = (var_24831_cast_fp16, var_25230_cast_fp16))[name = tensor("op_25327_cast_fp16")]; tensor var_25329_equation_0 = const()[name = tensor("op_25329_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25329_cast_fp16 = einsum(equation = var_25329_equation_0, values = (var_24831_cast_fp16, var_25231_cast_fp16))[name = tensor("op_25329_cast_fp16")]; tensor var_25331_equation_0 = const()[name = tensor("op_25331_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25331_cast_fp16 = einsum(equation = var_25331_equation_0, values = (var_24831_cast_fp16, var_25232_cast_fp16))[name = tensor("op_25331_cast_fp16")]; tensor var_25333_equation_0 = const()[name = tensor("op_25333_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25333_cast_fp16 = einsum(equation = var_25333_equation_0, values = (var_24831_cast_fp16, var_25233_cast_fp16))[name = tensor("op_25333_cast_fp16")]; tensor var_25335_equation_0 = const()[name = tensor("op_25335_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25335_cast_fp16 = einsum(equation = var_25335_equation_0, values = (var_24835_cast_fp16, var_25234_cast_fp16))[name = tensor("op_25335_cast_fp16")]; tensor var_25337_equation_0 = const()[name = tensor("op_25337_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25337_cast_fp16 = einsum(equation = var_25337_equation_0, values = (var_24835_cast_fp16, var_25235_cast_fp16))[name = tensor("op_25337_cast_fp16")]; tensor var_25339_equation_0 = const()[name = tensor("op_25339_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25339_cast_fp16 = einsum(equation = var_25339_equation_0, values = (var_24835_cast_fp16, var_25236_cast_fp16))[name = tensor("op_25339_cast_fp16")]; tensor var_25341_equation_0 = const()[name = tensor("op_25341_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25341_cast_fp16 = einsum(equation = var_25341_equation_0, values = (var_24835_cast_fp16, var_25237_cast_fp16))[name = tensor("op_25341_cast_fp16")]; tensor var_25343_equation_0 = const()[name = tensor("op_25343_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25343_cast_fp16 = einsum(equation = var_25343_equation_0, values = (var_24839_cast_fp16, var_25238_cast_fp16))[name = tensor("op_25343_cast_fp16")]; tensor var_25345_equation_0 = const()[name = tensor("op_25345_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25345_cast_fp16 = einsum(equation = var_25345_equation_0, values = (var_24839_cast_fp16, var_25239_cast_fp16))[name = tensor("op_25345_cast_fp16")]; tensor var_25347_equation_0 = const()[name = tensor("op_25347_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25347_cast_fp16 = einsum(equation = var_25347_equation_0, values = (var_24839_cast_fp16, var_25240_cast_fp16))[name = tensor("op_25347_cast_fp16")]; tensor var_25349_equation_0 = const()[name = tensor("op_25349_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25349_cast_fp16 = einsum(equation = var_25349_equation_0, values = (var_24839_cast_fp16, var_25241_cast_fp16))[name = tensor("op_25349_cast_fp16")]; tensor var_25351_equation_0 = const()[name = tensor("op_25351_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25351_cast_fp16 = einsum(equation = var_25351_equation_0, values = (var_24843_cast_fp16, var_25242_cast_fp16))[name = tensor("op_25351_cast_fp16")]; tensor var_25353_equation_0 = const()[name = tensor("op_25353_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25353_cast_fp16 = einsum(equation = var_25353_equation_0, values = (var_24843_cast_fp16, var_25243_cast_fp16))[name = tensor("op_25353_cast_fp16")]; tensor var_25355_equation_0 = const()[name = tensor("op_25355_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25355_cast_fp16 = einsum(equation = var_25355_equation_0, values = (var_24843_cast_fp16, var_25244_cast_fp16))[name = tensor("op_25355_cast_fp16")]; tensor var_25357_equation_0 = const()[name = tensor("op_25357_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25357_cast_fp16 = einsum(equation = var_25357_equation_0, values = (var_24843_cast_fp16, var_25245_cast_fp16))[name = tensor("op_25357_cast_fp16")]; tensor var_25359_equation_0 = const()[name = tensor("op_25359_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25359_cast_fp16 = einsum(equation = var_25359_equation_0, values = (var_24847_cast_fp16, var_25246_cast_fp16))[name = tensor("op_25359_cast_fp16")]; tensor var_25361_equation_0 = const()[name = tensor("op_25361_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25361_cast_fp16 = einsum(equation = var_25361_equation_0, values = (var_24847_cast_fp16, var_25247_cast_fp16))[name = tensor("op_25361_cast_fp16")]; tensor var_25363_equation_0 = const()[name = tensor("op_25363_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25363_cast_fp16 = einsum(equation = var_25363_equation_0, values = (var_24847_cast_fp16, var_25248_cast_fp16))[name = tensor("op_25363_cast_fp16")]; tensor var_25365_equation_0 = const()[name = tensor("op_25365_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25365_cast_fp16 = einsum(equation = var_25365_equation_0, values = (var_24847_cast_fp16, var_25249_cast_fp16))[name = tensor("op_25365_cast_fp16")]; tensor var_25367_equation_0 = const()[name = tensor("op_25367_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25367_cast_fp16 = einsum(equation = var_25367_equation_0, values = (var_24851_cast_fp16, var_25250_cast_fp16))[name = tensor("op_25367_cast_fp16")]; tensor var_25369_equation_0 = const()[name = tensor("op_25369_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25369_cast_fp16 = einsum(equation = var_25369_equation_0, values = (var_24851_cast_fp16, var_25251_cast_fp16))[name = tensor("op_25369_cast_fp16")]; tensor var_25371_equation_0 = const()[name = tensor("op_25371_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25371_cast_fp16 = einsum(equation = var_25371_equation_0, values = (var_24851_cast_fp16, var_25252_cast_fp16))[name = tensor("op_25371_cast_fp16")]; tensor var_25373_equation_0 = const()[name = tensor("op_25373_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25373_cast_fp16 = einsum(equation = var_25373_equation_0, values = (var_24851_cast_fp16, var_25253_cast_fp16))[name = tensor("op_25373_cast_fp16")]; tensor var_25375_equation_0 = const()[name = tensor("op_25375_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25375_cast_fp16 = einsum(equation = var_25375_equation_0, values = (var_24855_cast_fp16, var_25254_cast_fp16))[name = tensor("op_25375_cast_fp16")]; tensor var_25377_equation_0 = const()[name = tensor("op_25377_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25377_cast_fp16 = einsum(equation = var_25377_equation_0, values = (var_24855_cast_fp16, var_25255_cast_fp16))[name = tensor("op_25377_cast_fp16")]; tensor var_25379_equation_0 = const()[name = tensor("op_25379_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25379_cast_fp16 = einsum(equation = var_25379_equation_0, values = (var_24855_cast_fp16, var_25256_cast_fp16))[name = tensor("op_25379_cast_fp16")]; tensor var_25381_equation_0 = const()[name = tensor("op_25381_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25381_cast_fp16 = einsum(equation = var_25381_equation_0, values = (var_24855_cast_fp16, var_25257_cast_fp16))[name = tensor("op_25381_cast_fp16")]; tensor var_25383_equation_0 = const()[name = tensor("op_25383_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25383_cast_fp16 = einsum(equation = var_25383_equation_0, values = (var_24859_cast_fp16, var_25258_cast_fp16))[name = tensor("op_25383_cast_fp16")]; tensor var_25385_equation_0 = const()[name = tensor("op_25385_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25385_cast_fp16 = einsum(equation = var_25385_equation_0, values = (var_24859_cast_fp16, var_25259_cast_fp16))[name = tensor("op_25385_cast_fp16")]; tensor var_25387_equation_0 = const()[name = tensor("op_25387_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25387_cast_fp16 = einsum(equation = var_25387_equation_0, values = (var_24859_cast_fp16, var_25260_cast_fp16))[name = tensor("op_25387_cast_fp16")]; tensor var_25389_equation_0 = const()[name = tensor("op_25389_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25389_cast_fp16 = einsum(equation = var_25389_equation_0, values = (var_24859_cast_fp16, var_25261_cast_fp16))[name = tensor("op_25389_cast_fp16")]; tensor var_25391_equation_0 = const()[name = tensor("op_25391_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25391_cast_fp16 = einsum(equation = var_25391_equation_0, values = (var_24863_cast_fp16, var_25262_cast_fp16))[name = tensor("op_25391_cast_fp16")]; tensor var_25393_equation_0 = const()[name = tensor("op_25393_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25393_cast_fp16 = einsum(equation = var_25393_equation_0, values = (var_24863_cast_fp16, var_25263_cast_fp16))[name = tensor("op_25393_cast_fp16")]; tensor var_25395_equation_0 = const()[name = tensor("op_25395_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25395_cast_fp16 = einsum(equation = var_25395_equation_0, values = (var_24863_cast_fp16, var_25264_cast_fp16))[name = tensor("op_25395_cast_fp16")]; tensor var_25397_equation_0 = const()[name = tensor("op_25397_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25397_cast_fp16 = einsum(equation = var_25397_equation_0, values = (var_24863_cast_fp16, var_25265_cast_fp16))[name = tensor("op_25397_cast_fp16")]; tensor var_25399_equation_0 = const()[name = tensor("op_25399_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25399_cast_fp16 = einsum(equation = var_25399_equation_0, values = (var_24867_cast_fp16, var_25266_cast_fp16))[name = tensor("op_25399_cast_fp16")]; tensor var_25401_equation_0 = const()[name = tensor("op_25401_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25401_cast_fp16 = einsum(equation = var_25401_equation_0, values = (var_24867_cast_fp16, var_25267_cast_fp16))[name = tensor("op_25401_cast_fp16")]; tensor var_25403_equation_0 = const()[name = tensor("op_25403_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25403_cast_fp16 = einsum(equation = var_25403_equation_0, values = (var_24867_cast_fp16, var_25268_cast_fp16))[name = tensor("op_25403_cast_fp16")]; tensor var_25405_equation_0 = const()[name = tensor("op_25405_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25405_cast_fp16 = einsum(equation = var_25405_equation_0, values = (var_24867_cast_fp16, var_25269_cast_fp16))[name = tensor("op_25405_cast_fp16")]; tensor var_25407_equation_0 = const()[name = tensor("op_25407_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25407_cast_fp16 = einsum(equation = var_25407_equation_0, values = (var_24871_cast_fp16, var_25270_cast_fp16))[name = tensor("op_25407_cast_fp16")]; tensor var_25409_equation_0 = const()[name = tensor("op_25409_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25409_cast_fp16 = einsum(equation = var_25409_equation_0, values = (var_24871_cast_fp16, var_25271_cast_fp16))[name = tensor("op_25409_cast_fp16")]; tensor var_25411_equation_0 = const()[name = tensor("op_25411_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25411_cast_fp16 = einsum(equation = var_25411_equation_0, values = (var_24871_cast_fp16, var_25272_cast_fp16))[name = tensor("op_25411_cast_fp16")]; tensor var_25413_equation_0 = const()[name = tensor("op_25413_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25413_cast_fp16 = einsum(equation = var_25413_equation_0, values = (var_24871_cast_fp16, var_25273_cast_fp16))[name = tensor("op_25413_cast_fp16")]; tensor var_25415_equation_0 = const()[name = tensor("op_25415_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25415_cast_fp16 = einsum(equation = var_25415_equation_0, values = (var_24875_cast_fp16, var_25274_cast_fp16))[name = tensor("op_25415_cast_fp16")]; tensor var_25417_equation_0 = const()[name = tensor("op_25417_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25417_cast_fp16 = einsum(equation = var_25417_equation_0, values = (var_24875_cast_fp16, var_25275_cast_fp16))[name = tensor("op_25417_cast_fp16")]; tensor var_25419_equation_0 = const()[name = tensor("op_25419_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25419_cast_fp16 = einsum(equation = var_25419_equation_0, values = (var_24875_cast_fp16, var_25276_cast_fp16))[name = tensor("op_25419_cast_fp16")]; tensor var_25421_equation_0 = const()[name = tensor("op_25421_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25421_cast_fp16 = einsum(equation = var_25421_equation_0, values = (var_24875_cast_fp16, var_25277_cast_fp16))[name = tensor("op_25421_cast_fp16")]; tensor var_25423_equation_0 = const()[name = tensor("op_25423_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25423_cast_fp16 = einsum(equation = var_25423_equation_0, values = (var_24879_cast_fp16, var_25278_cast_fp16))[name = tensor("op_25423_cast_fp16")]; tensor var_25425_equation_0 = const()[name = tensor("op_25425_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25425_cast_fp16 = einsum(equation = var_25425_equation_0, values = (var_24879_cast_fp16, var_25279_cast_fp16))[name = tensor("op_25425_cast_fp16")]; tensor var_25427_equation_0 = const()[name = tensor("op_25427_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25427_cast_fp16 = einsum(equation = var_25427_equation_0, values = (var_24879_cast_fp16, var_25280_cast_fp16))[name = tensor("op_25427_cast_fp16")]; tensor var_25429_equation_0 = const()[name = tensor("op_25429_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25429_cast_fp16 = einsum(equation = var_25429_equation_0, values = (var_24879_cast_fp16, var_25281_cast_fp16))[name = tensor("op_25429_cast_fp16")]; tensor var_25431_equation_0 = const()[name = tensor("op_25431_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25431_cast_fp16 = einsum(equation = var_25431_equation_0, values = (var_24883_cast_fp16, var_25282_cast_fp16))[name = tensor("op_25431_cast_fp16")]; tensor var_25433_equation_0 = const()[name = tensor("op_25433_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25433_cast_fp16 = einsum(equation = var_25433_equation_0, values = (var_24883_cast_fp16, var_25283_cast_fp16))[name = tensor("op_25433_cast_fp16")]; tensor var_25435_equation_0 = const()[name = tensor("op_25435_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25435_cast_fp16 = einsum(equation = var_25435_equation_0, values = (var_24883_cast_fp16, var_25284_cast_fp16))[name = tensor("op_25435_cast_fp16")]; tensor var_25437_equation_0 = const()[name = tensor("op_25437_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25437_cast_fp16 = einsum(equation = var_25437_equation_0, values = (var_24883_cast_fp16, var_25285_cast_fp16))[name = tensor("op_25437_cast_fp16")]; tensor var_25439_equation_0 = const()[name = tensor("op_25439_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25439_cast_fp16 = einsum(equation = var_25439_equation_0, values = (var_24887_cast_fp16, var_25286_cast_fp16))[name = tensor("op_25439_cast_fp16")]; tensor var_25441_equation_0 = const()[name = tensor("op_25441_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25441_cast_fp16 = einsum(equation = var_25441_equation_0, values = (var_24887_cast_fp16, var_25287_cast_fp16))[name = tensor("op_25441_cast_fp16")]; tensor var_25443_equation_0 = const()[name = tensor("op_25443_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25443_cast_fp16 = einsum(equation = var_25443_equation_0, values = (var_24887_cast_fp16, var_25288_cast_fp16))[name = tensor("op_25443_cast_fp16")]; tensor var_25445_equation_0 = const()[name = tensor("op_25445_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25445_cast_fp16 = einsum(equation = var_25445_equation_0, values = (var_24887_cast_fp16, var_25289_cast_fp16))[name = tensor("op_25445_cast_fp16")]; tensor var_25447_equation_0 = const()[name = tensor("op_25447_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25447_cast_fp16 = einsum(equation = var_25447_equation_0, values = (var_24891_cast_fp16, var_25290_cast_fp16))[name = tensor("op_25447_cast_fp16")]; tensor var_25449_equation_0 = const()[name = tensor("op_25449_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25449_cast_fp16 = einsum(equation = var_25449_equation_0, values = (var_24891_cast_fp16, var_25291_cast_fp16))[name = tensor("op_25449_cast_fp16")]; tensor var_25451_equation_0 = const()[name = tensor("op_25451_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25451_cast_fp16 = einsum(equation = var_25451_equation_0, values = (var_24891_cast_fp16, var_25292_cast_fp16))[name = tensor("op_25451_cast_fp16")]; tensor var_25453_equation_0 = const()[name = tensor("op_25453_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_25453_cast_fp16 = einsum(equation = var_25453_equation_0, values = (var_24891_cast_fp16, var_25293_cast_fp16))[name = tensor("op_25453_cast_fp16")]; tensor var_25455_interleave_0 = const()[name = tensor("op_25455_interleave_0"), val = tensor(false)]; tensor var_25455_cast_fp16 = concat(axis = var_23987, interleave = var_25455_interleave_0, values = (var_25295_cast_fp16, var_25297_cast_fp16, var_25299_cast_fp16, var_25301_cast_fp16))[name = tensor("op_25455_cast_fp16")]; tensor var_25457_interleave_0 = const()[name = tensor("op_25457_interleave_0"), val = tensor(false)]; tensor var_25457_cast_fp16 = concat(axis = var_23987, interleave = var_25457_interleave_0, values = (var_25303_cast_fp16, var_25305_cast_fp16, var_25307_cast_fp16, var_25309_cast_fp16))[name = tensor("op_25457_cast_fp16")]; tensor var_25459_interleave_0 = const()[name = tensor("op_25459_interleave_0"), val = tensor(false)]; tensor var_25459_cast_fp16 = concat(axis = var_23987, interleave = var_25459_interleave_0, values = (var_25311_cast_fp16, var_25313_cast_fp16, var_25315_cast_fp16, var_25317_cast_fp16))[name = tensor("op_25459_cast_fp16")]; tensor var_25461_interleave_0 = const()[name = tensor("op_25461_interleave_0"), val = tensor(false)]; tensor var_25461_cast_fp16 = concat(axis = var_23987, interleave = var_25461_interleave_0, values = (var_25319_cast_fp16, var_25321_cast_fp16, var_25323_cast_fp16, var_25325_cast_fp16))[name = tensor("op_25461_cast_fp16")]; tensor var_25463_interleave_0 = const()[name = tensor("op_25463_interleave_0"), val = tensor(false)]; tensor var_25463_cast_fp16 = concat(axis = var_23987, interleave = var_25463_interleave_0, values = (var_25327_cast_fp16, var_25329_cast_fp16, var_25331_cast_fp16, var_25333_cast_fp16))[name = tensor("op_25463_cast_fp16")]; tensor var_25465_interleave_0 = const()[name = tensor("op_25465_interleave_0"), val = tensor(false)]; tensor var_25465_cast_fp16 = concat(axis = var_23987, interleave = var_25465_interleave_0, values = (var_25335_cast_fp16, var_25337_cast_fp16, var_25339_cast_fp16, var_25341_cast_fp16))[name = tensor("op_25465_cast_fp16")]; tensor var_25467_interleave_0 = const()[name = tensor("op_25467_interleave_0"), val = tensor(false)]; tensor var_25467_cast_fp16 = concat(axis = var_23987, interleave = var_25467_interleave_0, values = (var_25343_cast_fp16, var_25345_cast_fp16, var_25347_cast_fp16, var_25349_cast_fp16))[name = tensor("op_25467_cast_fp16")]; tensor var_25469_interleave_0 = const()[name = tensor("op_25469_interleave_0"), val = tensor(false)]; tensor var_25469_cast_fp16 = concat(axis = var_23987, interleave = var_25469_interleave_0, values = (var_25351_cast_fp16, var_25353_cast_fp16, var_25355_cast_fp16, var_25357_cast_fp16))[name = tensor("op_25469_cast_fp16")]; tensor var_25471_interleave_0 = const()[name = tensor("op_25471_interleave_0"), val = tensor(false)]; tensor var_25471_cast_fp16 = concat(axis = var_23987, interleave = var_25471_interleave_0, values = (var_25359_cast_fp16, var_25361_cast_fp16, var_25363_cast_fp16, var_25365_cast_fp16))[name = tensor("op_25471_cast_fp16")]; tensor var_25473_interleave_0 = const()[name = tensor("op_25473_interleave_0"), val = tensor(false)]; tensor var_25473_cast_fp16 = concat(axis = var_23987, interleave = var_25473_interleave_0, values = (var_25367_cast_fp16, var_25369_cast_fp16, var_25371_cast_fp16, var_25373_cast_fp16))[name = tensor("op_25473_cast_fp16")]; tensor var_25475_interleave_0 = const()[name = tensor("op_25475_interleave_0"), val = tensor(false)]; tensor var_25475_cast_fp16 = concat(axis = var_23987, interleave = var_25475_interleave_0, values = (var_25375_cast_fp16, var_25377_cast_fp16, var_25379_cast_fp16, var_25381_cast_fp16))[name = tensor("op_25475_cast_fp16")]; tensor var_25477_interleave_0 = const()[name = tensor("op_25477_interleave_0"), val = tensor(false)]; tensor var_25477_cast_fp16 = concat(axis = var_23987, interleave = var_25477_interleave_0, values = (var_25383_cast_fp16, var_25385_cast_fp16, var_25387_cast_fp16, var_25389_cast_fp16))[name = tensor("op_25477_cast_fp16")]; tensor var_25479_interleave_0 = const()[name = tensor("op_25479_interleave_0"), val = tensor(false)]; tensor var_25479_cast_fp16 = concat(axis = var_23987, interleave = var_25479_interleave_0, values = (var_25391_cast_fp16, var_25393_cast_fp16, var_25395_cast_fp16, var_25397_cast_fp16))[name = tensor("op_25479_cast_fp16")]; tensor var_25481_interleave_0 = const()[name = tensor("op_25481_interleave_0"), val = tensor(false)]; tensor var_25481_cast_fp16 = concat(axis = var_23987, interleave = var_25481_interleave_0, values = (var_25399_cast_fp16, var_25401_cast_fp16, var_25403_cast_fp16, var_25405_cast_fp16))[name = tensor("op_25481_cast_fp16")]; tensor var_25483_interleave_0 = const()[name = tensor("op_25483_interleave_0"), val = tensor(false)]; tensor var_25483_cast_fp16 = concat(axis = var_23987, interleave = var_25483_interleave_0, values = (var_25407_cast_fp16, var_25409_cast_fp16, var_25411_cast_fp16, var_25413_cast_fp16))[name = tensor("op_25483_cast_fp16")]; tensor var_25485_interleave_0 = const()[name = tensor("op_25485_interleave_0"), val = tensor(false)]; tensor var_25485_cast_fp16 = concat(axis = var_23987, interleave = var_25485_interleave_0, values = (var_25415_cast_fp16, var_25417_cast_fp16, var_25419_cast_fp16, var_25421_cast_fp16))[name = tensor("op_25485_cast_fp16")]; tensor var_25487_interleave_0 = const()[name = tensor("op_25487_interleave_0"), val = tensor(false)]; tensor var_25487_cast_fp16 = concat(axis = var_23987, interleave = var_25487_interleave_0, values = (var_25423_cast_fp16, var_25425_cast_fp16, var_25427_cast_fp16, var_25429_cast_fp16))[name = tensor("op_25487_cast_fp16")]; tensor var_25489_interleave_0 = const()[name = tensor("op_25489_interleave_0"), val = tensor(false)]; tensor var_25489_cast_fp16 = concat(axis = var_23987, interleave = var_25489_interleave_0, values = (var_25431_cast_fp16, var_25433_cast_fp16, var_25435_cast_fp16, var_25437_cast_fp16))[name = tensor("op_25489_cast_fp16")]; tensor var_25491_interleave_0 = const()[name = tensor("op_25491_interleave_0"), val = tensor(false)]; tensor var_25491_cast_fp16 = concat(axis = var_23987, interleave = var_25491_interleave_0, values = (var_25439_cast_fp16, var_25441_cast_fp16, var_25443_cast_fp16, var_25445_cast_fp16))[name = tensor("op_25491_cast_fp16")]; tensor var_25493_interleave_0 = const()[name = tensor("op_25493_interleave_0"), val = tensor(false)]; tensor var_25493_cast_fp16 = concat(axis = var_23987, interleave = var_25493_interleave_0, values = (var_25447_cast_fp16, var_25449_cast_fp16, var_25451_cast_fp16, var_25453_cast_fp16))[name = tensor("op_25493_cast_fp16")]; tensor input_121_interleave_0 = const()[name = tensor("input_121_interleave_0"), val = tensor(false)]; tensor input_121_cast_fp16 = concat(axis = var_24012, interleave = input_121_interleave_0, values = (var_25455_cast_fp16, var_25457_cast_fp16, var_25459_cast_fp16, var_25461_cast_fp16, var_25463_cast_fp16, var_25465_cast_fp16, var_25467_cast_fp16, var_25469_cast_fp16, var_25471_cast_fp16, var_25473_cast_fp16, var_25475_cast_fp16, var_25477_cast_fp16, var_25479_cast_fp16, var_25481_cast_fp16, var_25483_cast_fp16, var_25485_cast_fp16, var_25487_cast_fp16, var_25489_cast_fp16, var_25491_cast_fp16, var_25493_cast_fp16))[name = tensor("input_121_cast_fp16")]; tensor var_25504_pad_type_0 = const()[name = tensor("op_25504_pad_type_0"), val = tensor("valid")]; tensor var_25504_strides_0 = const()[name = tensor("op_25504_strides_0"), val = tensor([1, 1])]; tensor var_25504_pad_0 = const()[name = tensor("op_25504_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_25504_dilations_0 = const()[name = tensor("op_25504_dilations_0"), val = tensor([1, 1])]; tensor var_25504_groups_0 = const()[name = tensor("op_25504_groups_0"), val = tensor(1)]; tensor layers_15_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211791104))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212610368))), name = tensor("layers_15_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_15_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_15_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212610496)))]; tensor var_25504_cast_fp16 = conv(bias = layers_15_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_25504_dilations_0, groups = var_25504_groups_0, pad = var_25504_pad_0, pad_type = var_25504_pad_type_0, strides = var_25504_strides_0, weight = layers_15_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_121_cast_fp16)[name = tensor("op_25504_cast_fp16")]; tensor var_25510_pad_type_0 = const()[name = tensor("op_25510_pad_type_0"), val = tensor("valid")]; tensor var_25510_strides_0 = const()[name = tensor("op_25510_strides_0"), val = tensor([1, 1])]; tensor var_25510_pad_0 = const()[name = tensor("op_25510_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_25510_dilations_0 = const()[name = tensor("op_25510_dilations_0"), val = tensor([1, 1])]; tensor var_25510_groups_0 = const()[name = tensor("op_25510_groups_0"), val = tensor(1)]; tensor layers_15_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212633664))), name = tensor("layers_15_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212613120))), shape = tensor([1280, 1280, 1, 1])]; tensor var_25510_cast_fp16 = conv(dilations = var_25510_dilations_0, groups = var_25510_groups_0, pad = var_25510_pad_0, pad_type = var_25510_pad_type_0, strides = var_25510_strides_0, weight = layers_15_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_121_cast_fp16)[name = tensor("op_25510_cast_fp16")]; tensor obj_63_cast_fp16 = add(x = var_25504_cast_fp16, y = var_25510_cast_fp16)[name = tensor("obj_63_cast_fp16")]; tensor inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = obj_63_cast_fp16)[name = tensor("inputs_63_cast_fp16")]; tensor out_63_axes_0 = const()[name = tensor("out_63_axes_0"), val = tensor([1])]; tensor var_25521_to_fp16 = const()[name = tensor("op_25521_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_63_cast_fp16 = layer_norm(axes = out_63_axes_0, epsilon = var_25521_to_fp16, x = inputs_63_cast_fp16)[name = tensor("out_63_cast_fp16")]; tensor input_123_gamma_0_to_fp16 = const()[name = tensor("input_123_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212838528)))]; tensor input_123_beta_0_to_fp16 = const()[name = tensor("input_123_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212841152)))]; tensor input_123_epsilon_0_to_fp16 = const()[name = tensor("input_123_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_123_cast_fp16 = batch_norm(beta = input_123_beta_0_to_fp16, epsilon = input_123_epsilon_0_to_fp16, gamma = input_123_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_63_cast_fp16)[name = tensor("input_123_cast_fp16")]; tensor var_25539_pad_type_0 = const()[name = tensor("op_25539_pad_type_0"), val = tensor("valid")]; tensor var_25539_strides_0 = const()[name = tensor("op_25539_strides_0"), val = tensor([1, 1])]; tensor var_25539_pad_0 = const()[name = tensor("op_25539_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_25539_dilations_0 = const()[name = tensor("op_25539_dilations_0"), val = tensor([1, 1])]; tensor var_25539_groups_0 = const()[name = tensor("op_25539_groups_0"), val = tensor(1)]; tensor layers_15_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(212843776))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216120640))), name = tensor("layers_15_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_15_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_15_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216120768)))]; tensor var_25539_cast_fp16 = conv(bias = layers_15_fc1_inlier_module_bias_to_fp16, dilations = var_25539_dilations_0, groups = var_25539_groups_0, pad = var_25539_pad_0, pad_type = var_25539_pad_type_0, strides = var_25539_strides_0, weight = layers_15_fc1_inlier_module_weight_to_fp16_palettized, x = input_123_cast_fp16)[name = tensor("op_25539_cast_fp16")]; tensor var_25545_pad_type_0 = const()[name = tensor("op_25545_pad_type_0"), val = tensor("valid")]; tensor var_25545_strides_0 = const()[name = tensor("op_25545_strides_0"), val = tensor([1, 1])]; tensor var_25545_pad_0 = const()[name = tensor("op_25545_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_25545_dilations_0 = const()[name = tensor("op_25545_dilations_0"), val = tensor([1, 1])]; tensor var_25545_groups_0 = const()[name = tensor("op_25545_groups_0"), val = tensor(1)]; tensor layers_15_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216176896))), name = tensor("layers_15_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216131072))), shape = tensor([5120, 1280, 1, 1])]; tensor var_25545_cast_fp16 = conv(dilations = var_25545_dilations_0, groups = var_25545_groups_0, pad = var_25545_pad_0, pad_type = var_25545_pad_type_0, strides = var_25545_strides_0, weight = layers_15_fc1_outlier_module_weight_to_fp16_sparsified, x = input_123_cast_fp16)[name = tensor("op_25545_cast_fp16")]; tensor input_125_cast_fp16 = add(x = var_25539_cast_fp16, y = var_25545_cast_fp16)[name = tensor("input_125_cast_fp16")]; tensor input_127_mode_0 = const()[name = tensor("input_127_mode_0"), val = tensor("EXACT")]; tensor input_127_cast_fp16 = gelu(mode = input_127_mode_0, x = input_125_cast_fp16)[name = tensor("input_127_cast_fp16")]; tensor var_25556_pad_type_0 = const()[name = tensor("op_25556_pad_type_0"), val = tensor("valid")]; tensor var_25556_strides_0 = const()[name = tensor("op_25556_strides_0"), val = tensor([1, 1])]; tensor var_25556_pad_0 = const()[name = tensor("op_25556_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_25556_dilations_0 = const()[name = tensor("op_25556_dilations_0"), val = tensor([1, 1])]; tensor var_25556_groups_0 = const()[name = tensor("op_25556_groups_0"), val = tensor(1)]; tensor layers_15_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(216996160))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220273024))), name = tensor("layers_15_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_15_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_15_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220273152)))]; tensor var_25556_cast_fp16 = conv(bias = layers_15_fc2_inlier_module_bias_to_fp16, dilations = var_25556_dilations_0, groups = var_25556_groups_0, pad = var_25556_pad_0, pad_type = var_25556_pad_type_0, strides = var_25556_strides_0, weight = layers_15_fc2_inlier_module_weight_to_fp16_palettized, x = input_127_cast_fp16)[name = tensor("op_25556_cast_fp16")]; tensor var_25562_pad_type_0 = const()[name = tensor("op_25562_pad_type_0"), val = tensor("valid")]; tensor var_25562_strides_0 = const()[name = tensor("op_25562_strides_0"), val = tensor([1, 1])]; tensor var_25562_pad_0 = const()[name = tensor("op_25562_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_25562_dilations_0 = const()[name = tensor("op_25562_dilations_0"), val = tensor([1, 1])]; tensor var_25562_groups_0 = const()[name = tensor("op_25562_groups_0"), val = tensor(1)]; tensor layers_15_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220449024))), name = tensor("layers_15_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220275776))), shape = tensor([1280, 5120, 1, 1])]; tensor var_25562_cast_fp16 = conv(dilations = var_25562_dilations_0, groups = var_25562_groups_0, pad = var_25562_pad_0, pad_type = var_25562_pad_type_0, strides = var_25562_strides_0, weight = layers_15_fc2_outlier_module_weight_to_fp16_sparsified, x = input_127_cast_fp16)[name = tensor("op_25562_cast_fp16")]; tensor hidden_states_35_cast_fp16 = add(x = var_25556_cast_fp16, y = var_25562_cast_fp16)[name = tensor("hidden_states_35_cast_fp16")]; tensor inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = hidden_states_35_cast_fp16)[name = tensor("inputs_65_cast_fp16")]; tensor var_25568 = const()[name = tensor("op_25568"), val = tensor(3)]; tensor var_25593 = const()[name = tensor("op_25593"), val = tensor(1)]; tensor out_65_axes_0 = const()[name = tensor("out_65_axes_0"), val = tensor([1])]; tensor var_25610_to_fp16 = const()[name = tensor("op_25610_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_65_cast_fp16 = layer_norm(axes = out_65_axes_0, epsilon = var_25610_to_fp16, x = inputs_65_cast_fp16)[name = tensor("out_65_cast_fp16")]; tensor obj_65_gamma_0_to_fp16 = const()[name = tensor("obj_65_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(221268288)))]; tensor obj_65_beta_0_to_fp16 = const()[name = tensor("obj_65_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(221270912)))]; tensor obj_65_epsilon_0_to_fp16 = const()[name = tensor("obj_65_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_65_cast_fp16 = batch_norm(beta = obj_65_beta_0_to_fp16, epsilon = obj_65_epsilon_0_to_fp16, gamma = obj_65_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_65_cast_fp16)[name = tensor("obj_65_cast_fp16")]; tensor var_25632_pad_type_0 = const()[name = tensor("op_25632_pad_type_0"), val = tensor("valid")]; tensor var_25632_strides_0 = const()[name = tensor("op_25632_strides_0"), val = tensor([1, 1])]; tensor var_25632_pad_0 = const()[name = tensor("op_25632_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_25632_dilations_0 = const()[name = tensor("op_25632_dilations_0"), val = tensor([1, 1])]; tensor var_25632_groups_0 = const()[name = tensor("op_25632_groups_0"), val = tensor(1)]; tensor layers_16_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(221273536))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(222092800))), name = tensor("layers_16_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_16_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_16_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(222092928)))]; tensor var_25632_cast_fp16 = conv(bias = layers_16_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_25632_dilations_0, groups = var_25632_groups_0, pad = var_25632_pad_0, pad_type = var_25632_pad_type_0, strides = var_25632_strides_0, weight = layers_16_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_65_cast_fp16)[name = tensor("op_25632_cast_fp16")]; tensor var_25638_pad_type_0 = const()[name = tensor("op_25638_pad_type_0"), val = tensor("valid")]; tensor var_25638_strides_0 = const()[name = tensor("op_25638_strides_0"), val = tensor([1, 1])]; tensor var_25638_pad_0 = const()[name = tensor("op_25638_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_25638_dilations_0 = const()[name = tensor("op_25638_dilations_0"), val = tensor([1, 1])]; tensor var_25638_groups_0 = const()[name = tensor("op_25638_groups_0"), val = tensor(1)]; tensor layers_16_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(222164416))), name = tensor("layers_16_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(222095552))), shape = tensor([1280, 1280, 1, 1])]; tensor var_25638_cast_fp16 = conv(dilations = var_25638_dilations_0, groups = var_25638_groups_0, pad = var_25638_pad_0, pad_type = var_25638_pad_type_0, strides = var_25638_strides_0, weight = layers_16_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_65_cast_fp16)[name = tensor("op_25638_cast_fp16")]; tensor query_33_cast_fp16 = add(x = var_25632_cast_fp16, y = var_25638_cast_fp16)[name = tensor("query_33_cast_fp16")]; tensor var_25647_pad_type_0 = const()[name = tensor("op_25647_pad_type_0"), val = tensor("valid")]; tensor var_25647_strides_0 = const()[name = tensor("op_25647_strides_0"), val = tensor([1, 1])]; tensor var_25647_pad_0 = const()[name = tensor("op_25647_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_25647_dilations_0 = const()[name = tensor("op_25647_dilations_0"), val = tensor([1, 1])]; tensor var_25647_groups_0 = const()[name = tensor("op_25647_groups_0"), val = tensor(1)]; tensor layers_16_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(222369280))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(223188544))), name = tensor("layers_16_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_25647_cast_fp16 = conv(dilations = var_25647_dilations_0, groups = var_25647_groups_0, pad = var_25647_pad_0, pad_type = var_25647_pad_type_0, strides = var_25647_strides_0, weight = layers_16_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_65_cast_fp16)[name = tensor("op_25647_cast_fp16")]; tensor var_25653_pad_type_0 = const()[name = tensor("op_25653_pad_type_0"), val = tensor("valid")]; tensor var_25653_strides_0 = const()[name = tensor("op_25653_strides_0"), val = tensor([1, 1])]; tensor var_25653_pad_0 = const()[name = tensor("op_25653_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_25653_dilations_0 = const()[name = tensor("op_25653_dilations_0"), val = tensor([1, 1])]; tensor var_25653_groups_0 = const()[name = tensor("op_25653_groups_0"), val = tensor(1)]; tensor layers_16_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(223217792))), name = tensor("layers_16_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(223188672))), shape = tensor([1280, 1280, 1, 1])]; tensor var_25653_cast_fp16 = conv(dilations = var_25653_dilations_0, groups = var_25653_groups_0, pad = var_25653_pad_0, pad_type = var_25653_pad_type_0, strides = var_25653_strides_0, weight = layers_16_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_65_cast_fp16)[name = tensor("op_25653_cast_fp16")]; tensor key_33_cast_fp16 = add(x = var_25647_cast_fp16, y = var_25653_cast_fp16)[name = tensor("key_33_cast_fp16")]; tensor var_25663_pad_type_0 = const()[name = tensor("op_25663_pad_type_0"), val = tensor("valid")]; tensor var_25663_strides_0 = const()[name = tensor("op_25663_strides_0"), val = tensor([1, 1])]; tensor var_25663_pad_0 = const()[name = tensor("op_25663_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_25663_dilations_0 = const()[name = tensor("op_25663_dilations_0"), val = tensor([1, 1])]; tensor var_25663_groups_0 = const()[name = tensor("op_25663_groups_0"), val = tensor(1)]; tensor layers_16_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(223422656))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224241920))), name = tensor("layers_16_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_16_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_16_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224242048)))]; tensor var_25663_cast_fp16 = conv(bias = layers_16_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_25663_dilations_0, groups = var_25663_groups_0, pad = var_25663_pad_0, pad_type = var_25663_pad_type_0, strides = var_25663_strides_0, weight = layers_16_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_65_cast_fp16)[name = tensor("op_25663_cast_fp16")]; tensor var_25669_pad_type_0 = const()[name = tensor("op_25669_pad_type_0"), val = tensor("valid")]; tensor var_25669_strides_0 = const()[name = tensor("op_25669_strides_0"), val = tensor([1, 1])]; tensor var_25669_pad_0 = const()[name = tensor("op_25669_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_25669_dilations_0 = const()[name = tensor("op_25669_dilations_0"), val = tensor([1, 1])]; tensor var_25669_groups_0 = const()[name = tensor("op_25669_groups_0"), val = tensor(1)]; tensor layers_16_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224261888))), name = tensor("layers_16_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224244672))), shape = tensor([1280, 1280, 1, 1])]; tensor var_25669_cast_fp16 = conv(dilations = var_25669_dilations_0, groups = var_25669_groups_0, pad = var_25669_pad_0, pad_type = var_25669_pad_type_0, strides = var_25669_strides_0, weight = layers_16_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_65_cast_fp16)[name = tensor("op_25669_cast_fp16")]; tensor value_33_cast_fp16 = add(x = var_25663_cast_fp16, y = var_25669_cast_fp16)[name = tensor("value_33_cast_fp16")]; tensor var_25675_begin_0 = const()[name = tensor("op_25675_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_25675_end_0 = const()[name = tensor("op_25675_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_25675_end_mask_0 = const()[name = tensor("op_25675_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25675_cast_fp16 = slice_by_index(begin = var_25675_begin_0, end = var_25675_end_0, end_mask = var_25675_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25675_cast_fp16")]; tensor var_25679_begin_0 = const()[name = tensor("op_25679_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_25679_end_0 = const()[name = tensor("op_25679_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_25679_end_mask_0 = const()[name = tensor("op_25679_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25679_cast_fp16 = slice_by_index(begin = var_25679_begin_0, end = var_25679_end_0, end_mask = var_25679_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25679_cast_fp16")]; tensor var_25683_begin_0 = const()[name = tensor("op_25683_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_25683_end_0 = const()[name = tensor("op_25683_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_25683_end_mask_0 = const()[name = tensor("op_25683_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25683_cast_fp16 = slice_by_index(begin = var_25683_begin_0, end = var_25683_end_0, end_mask = var_25683_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25683_cast_fp16")]; tensor var_25687_begin_0 = const()[name = tensor("op_25687_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_25687_end_0 = const()[name = tensor("op_25687_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_25687_end_mask_0 = const()[name = tensor("op_25687_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25687_cast_fp16 = slice_by_index(begin = var_25687_begin_0, end = var_25687_end_0, end_mask = var_25687_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25687_cast_fp16")]; tensor var_25691_begin_0 = const()[name = tensor("op_25691_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_25691_end_0 = const()[name = tensor("op_25691_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_25691_end_mask_0 = const()[name = tensor("op_25691_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25691_cast_fp16 = slice_by_index(begin = var_25691_begin_0, end = var_25691_end_0, end_mask = var_25691_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25691_cast_fp16")]; tensor var_25695_begin_0 = const()[name = tensor("op_25695_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_25695_end_0 = const()[name = tensor("op_25695_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_25695_end_mask_0 = const()[name = tensor("op_25695_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25695_cast_fp16 = slice_by_index(begin = var_25695_begin_0, end = var_25695_end_0, end_mask = var_25695_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25695_cast_fp16")]; tensor var_25699_begin_0 = const()[name = tensor("op_25699_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_25699_end_0 = const()[name = tensor("op_25699_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_25699_end_mask_0 = const()[name = tensor("op_25699_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25699_cast_fp16 = slice_by_index(begin = var_25699_begin_0, end = var_25699_end_0, end_mask = var_25699_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25699_cast_fp16")]; tensor var_25703_begin_0 = const()[name = tensor("op_25703_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_25703_end_0 = const()[name = tensor("op_25703_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_25703_end_mask_0 = const()[name = tensor("op_25703_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25703_cast_fp16 = slice_by_index(begin = var_25703_begin_0, end = var_25703_end_0, end_mask = var_25703_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25703_cast_fp16")]; tensor var_25707_begin_0 = const()[name = tensor("op_25707_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_25707_end_0 = const()[name = tensor("op_25707_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_25707_end_mask_0 = const()[name = tensor("op_25707_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25707_cast_fp16 = slice_by_index(begin = var_25707_begin_0, end = var_25707_end_0, end_mask = var_25707_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25707_cast_fp16")]; tensor var_25711_begin_0 = const()[name = tensor("op_25711_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_25711_end_0 = const()[name = tensor("op_25711_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_25711_end_mask_0 = const()[name = tensor("op_25711_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25711_cast_fp16 = slice_by_index(begin = var_25711_begin_0, end = var_25711_end_0, end_mask = var_25711_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25711_cast_fp16")]; tensor var_25715_begin_0 = const()[name = tensor("op_25715_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_25715_end_0 = const()[name = tensor("op_25715_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_25715_end_mask_0 = const()[name = tensor("op_25715_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25715_cast_fp16 = slice_by_index(begin = var_25715_begin_0, end = var_25715_end_0, end_mask = var_25715_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25715_cast_fp16")]; tensor var_25719_begin_0 = const()[name = tensor("op_25719_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_25719_end_0 = const()[name = tensor("op_25719_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_25719_end_mask_0 = const()[name = tensor("op_25719_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25719_cast_fp16 = slice_by_index(begin = var_25719_begin_0, end = var_25719_end_0, end_mask = var_25719_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25719_cast_fp16")]; tensor var_25723_begin_0 = const()[name = tensor("op_25723_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_25723_end_0 = const()[name = tensor("op_25723_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_25723_end_mask_0 = const()[name = tensor("op_25723_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25723_cast_fp16 = slice_by_index(begin = var_25723_begin_0, end = var_25723_end_0, end_mask = var_25723_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25723_cast_fp16")]; tensor var_25727_begin_0 = const()[name = tensor("op_25727_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_25727_end_0 = const()[name = tensor("op_25727_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_25727_end_mask_0 = const()[name = tensor("op_25727_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25727_cast_fp16 = slice_by_index(begin = var_25727_begin_0, end = var_25727_end_0, end_mask = var_25727_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25727_cast_fp16")]; tensor var_25731_begin_0 = const()[name = tensor("op_25731_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_25731_end_0 = const()[name = tensor("op_25731_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_25731_end_mask_0 = const()[name = tensor("op_25731_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25731_cast_fp16 = slice_by_index(begin = var_25731_begin_0, end = var_25731_end_0, end_mask = var_25731_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25731_cast_fp16")]; tensor var_25735_begin_0 = const()[name = tensor("op_25735_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_25735_end_0 = const()[name = tensor("op_25735_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_25735_end_mask_0 = const()[name = tensor("op_25735_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25735_cast_fp16 = slice_by_index(begin = var_25735_begin_0, end = var_25735_end_0, end_mask = var_25735_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25735_cast_fp16")]; tensor var_25739_begin_0 = const()[name = tensor("op_25739_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_25739_end_0 = const()[name = tensor("op_25739_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_25739_end_mask_0 = const()[name = tensor("op_25739_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25739_cast_fp16 = slice_by_index(begin = var_25739_begin_0, end = var_25739_end_0, end_mask = var_25739_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25739_cast_fp16")]; tensor var_25743_begin_0 = const()[name = tensor("op_25743_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_25743_end_0 = const()[name = tensor("op_25743_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_25743_end_mask_0 = const()[name = tensor("op_25743_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25743_cast_fp16 = slice_by_index(begin = var_25743_begin_0, end = var_25743_end_0, end_mask = var_25743_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25743_cast_fp16")]; tensor var_25747_begin_0 = const()[name = tensor("op_25747_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_25747_end_0 = const()[name = tensor("op_25747_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_25747_end_mask_0 = const()[name = tensor("op_25747_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25747_cast_fp16 = slice_by_index(begin = var_25747_begin_0, end = var_25747_end_0, end_mask = var_25747_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25747_cast_fp16")]; tensor var_25751_begin_0 = const()[name = tensor("op_25751_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_25751_end_0 = const()[name = tensor("op_25751_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_25751_end_mask_0 = const()[name = tensor("op_25751_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_25751_cast_fp16 = slice_by_index(begin = var_25751_begin_0, end = var_25751_end_0, end_mask = var_25751_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_25751_cast_fp16")]; tensor var_25760_begin_0 = const()[name = tensor("op_25760_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_25760_end_0 = const()[name = tensor("op_25760_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_25760_end_mask_0 = const()[name = tensor("op_25760_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25760_cast_fp16 = slice_by_index(begin = var_25760_begin_0, end = var_25760_end_0, end_mask = var_25760_end_mask_0, x = var_25675_cast_fp16)[name = tensor("op_25760_cast_fp16")]; tensor var_25767_begin_0 = const()[name = tensor("op_25767_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_25767_end_0 = const()[name = tensor("op_25767_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_25767_end_mask_0 = const()[name = tensor("op_25767_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25767_cast_fp16 = slice_by_index(begin = var_25767_begin_0, end = var_25767_end_0, end_mask = var_25767_end_mask_0, x = var_25675_cast_fp16)[name = tensor("op_25767_cast_fp16")]; tensor var_25774_begin_0 = const()[name = tensor("op_25774_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_25774_end_0 = const()[name = tensor("op_25774_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_25774_end_mask_0 = const()[name = tensor("op_25774_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25774_cast_fp16 = slice_by_index(begin = var_25774_begin_0, end = var_25774_end_0, end_mask = var_25774_end_mask_0, x = var_25675_cast_fp16)[name = tensor("op_25774_cast_fp16")]; tensor var_25781_begin_0 = const()[name = tensor("op_25781_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_25781_end_0 = const()[name = tensor("op_25781_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_25781_end_mask_0 = const()[name = tensor("op_25781_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25781_cast_fp16 = slice_by_index(begin = var_25781_begin_0, end = var_25781_end_0, end_mask = var_25781_end_mask_0, x = var_25675_cast_fp16)[name = tensor("op_25781_cast_fp16")]; tensor var_25788_begin_0 = const()[name = tensor("op_25788_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_25788_end_0 = const()[name = tensor("op_25788_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_25788_end_mask_0 = const()[name = tensor("op_25788_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25788_cast_fp16 = slice_by_index(begin = var_25788_begin_0, end = var_25788_end_0, end_mask = var_25788_end_mask_0, x = var_25679_cast_fp16)[name = tensor("op_25788_cast_fp16")]; tensor var_25795_begin_0 = const()[name = tensor("op_25795_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_25795_end_0 = const()[name = tensor("op_25795_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_25795_end_mask_0 = const()[name = tensor("op_25795_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25795_cast_fp16 = slice_by_index(begin = var_25795_begin_0, end = var_25795_end_0, end_mask = var_25795_end_mask_0, x = var_25679_cast_fp16)[name = tensor("op_25795_cast_fp16")]; tensor var_25802_begin_0 = const()[name = tensor("op_25802_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_25802_end_0 = const()[name = tensor("op_25802_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_25802_end_mask_0 = const()[name = tensor("op_25802_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25802_cast_fp16 = slice_by_index(begin = var_25802_begin_0, end = var_25802_end_0, end_mask = var_25802_end_mask_0, x = var_25679_cast_fp16)[name = tensor("op_25802_cast_fp16")]; tensor var_25809_begin_0 = const()[name = tensor("op_25809_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_25809_end_0 = const()[name = tensor("op_25809_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_25809_end_mask_0 = const()[name = tensor("op_25809_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25809_cast_fp16 = slice_by_index(begin = var_25809_begin_0, end = var_25809_end_0, end_mask = var_25809_end_mask_0, x = var_25679_cast_fp16)[name = tensor("op_25809_cast_fp16")]; tensor var_25816_begin_0 = const()[name = tensor("op_25816_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_25816_end_0 = const()[name = tensor("op_25816_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_25816_end_mask_0 = const()[name = tensor("op_25816_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25816_cast_fp16 = slice_by_index(begin = var_25816_begin_0, end = var_25816_end_0, end_mask = var_25816_end_mask_0, x = var_25683_cast_fp16)[name = tensor("op_25816_cast_fp16")]; tensor var_25823_begin_0 = const()[name = tensor("op_25823_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_25823_end_0 = const()[name = tensor("op_25823_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_25823_end_mask_0 = const()[name = tensor("op_25823_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25823_cast_fp16 = slice_by_index(begin = var_25823_begin_0, end = var_25823_end_0, end_mask = var_25823_end_mask_0, x = var_25683_cast_fp16)[name = tensor("op_25823_cast_fp16")]; tensor var_25830_begin_0 = const()[name = tensor("op_25830_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_25830_end_0 = const()[name = tensor("op_25830_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_25830_end_mask_0 = const()[name = tensor("op_25830_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25830_cast_fp16 = slice_by_index(begin = var_25830_begin_0, end = var_25830_end_0, end_mask = var_25830_end_mask_0, x = var_25683_cast_fp16)[name = tensor("op_25830_cast_fp16")]; tensor var_25837_begin_0 = const()[name = tensor("op_25837_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_25837_end_0 = const()[name = tensor("op_25837_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_25837_end_mask_0 = const()[name = tensor("op_25837_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25837_cast_fp16 = slice_by_index(begin = var_25837_begin_0, end = var_25837_end_0, end_mask = var_25837_end_mask_0, x = var_25683_cast_fp16)[name = tensor("op_25837_cast_fp16")]; tensor var_25844_begin_0 = const()[name = tensor("op_25844_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_25844_end_0 = const()[name = tensor("op_25844_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_25844_end_mask_0 = const()[name = tensor("op_25844_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25844_cast_fp16 = slice_by_index(begin = var_25844_begin_0, end = var_25844_end_0, end_mask = var_25844_end_mask_0, x = var_25687_cast_fp16)[name = tensor("op_25844_cast_fp16")]; tensor var_25851_begin_0 = const()[name = tensor("op_25851_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_25851_end_0 = const()[name = tensor("op_25851_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_25851_end_mask_0 = const()[name = tensor("op_25851_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25851_cast_fp16 = slice_by_index(begin = var_25851_begin_0, end = var_25851_end_0, end_mask = var_25851_end_mask_0, x = var_25687_cast_fp16)[name = tensor("op_25851_cast_fp16")]; tensor var_25858_begin_0 = const()[name = tensor("op_25858_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_25858_end_0 = const()[name = tensor("op_25858_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_25858_end_mask_0 = const()[name = tensor("op_25858_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25858_cast_fp16 = slice_by_index(begin = var_25858_begin_0, end = var_25858_end_0, end_mask = var_25858_end_mask_0, x = var_25687_cast_fp16)[name = tensor("op_25858_cast_fp16")]; tensor var_25865_begin_0 = const()[name = tensor("op_25865_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_25865_end_0 = const()[name = tensor("op_25865_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_25865_end_mask_0 = const()[name = tensor("op_25865_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25865_cast_fp16 = slice_by_index(begin = var_25865_begin_0, end = var_25865_end_0, end_mask = var_25865_end_mask_0, x = var_25687_cast_fp16)[name = tensor("op_25865_cast_fp16")]; tensor var_25872_begin_0 = const()[name = tensor("op_25872_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_25872_end_0 = const()[name = tensor("op_25872_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_25872_end_mask_0 = const()[name = tensor("op_25872_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25872_cast_fp16 = slice_by_index(begin = var_25872_begin_0, end = var_25872_end_0, end_mask = var_25872_end_mask_0, x = var_25691_cast_fp16)[name = tensor("op_25872_cast_fp16")]; tensor var_25879_begin_0 = const()[name = tensor("op_25879_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_25879_end_0 = const()[name = tensor("op_25879_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_25879_end_mask_0 = const()[name = tensor("op_25879_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25879_cast_fp16 = slice_by_index(begin = var_25879_begin_0, end = var_25879_end_0, end_mask = var_25879_end_mask_0, x = var_25691_cast_fp16)[name = tensor("op_25879_cast_fp16")]; tensor var_25886_begin_0 = const()[name = tensor("op_25886_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_25886_end_0 = const()[name = tensor("op_25886_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_25886_end_mask_0 = const()[name = tensor("op_25886_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25886_cast_fp16 = slice_by_index(begin = var_25886_begin_0, end = var_25886_end_0, end_mask = var_25886_end_mask_0, x = var_25691_cast_fp16)[name = tensor("op_25886_cast_fp16")]; tensor var_25893_begin_0 = const()[name = tensor("op_25893_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_25893_end_0 = const()[name = tensor("op_25893_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_25893_end_mask_0 = const()[name = tensor("op_25893_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25893_cast_fp16 = slice_by_index(begin = var_25893_begin_0, end = var_25893_end_0, end_mask = var_25893_end_mask_0, x = var_25691_cast_fp16)[name = tensor("op_25893_cast_fp16")]; tensor var_25900_begin_0 = const()[name = tensor("op_25900_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_25900_end_0 = const()[name = tensor("op_25900_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_25900_end_mask_0 = const()[name = tensor("op_25900_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25900_cast_fp16 = slice_by_index(begin = var_25900_begin_0, end = var_25900_end_0, end_mask = var_25900_end_mask_0, x = var_25695_cast_fp16)[name = tensor("op_25900_cast_fp16")]; tensor var_25907_begin_0 = const()[name = tensor("op_25907_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_25907_end_0 = const()[name = tensor("op_25907_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_25907_end_mask_0 = const()[name = tensor("op_25907_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25907_cast_fp16 = slice_by_index(begin = var_25907_begin_0, end = var_25907_end_0, end_mask = var_25907_end_mask_0, x = var_25695_cast_fp16)[name = tensor("op_25907_cast_fp16")]; tensor var_25914_begin_0 = const()[name = tensor("op_25914_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_25914_end_0 = const()[name = tensor("op_25914_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_25914_end_mask_0 = const()[name = tensor("op_25914_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25914_cast_fp16 = slice_by_index(begin = var_25914_begin_0, end = var_25914_end_0, end_mask = var_25914_end_mask_0, x = var_25695_cast_fp16)[name = tensor("op_25914_cast_fp16")]; tensor var_25921_begin_0 = const()[name = tensor("op_25921_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_25921_end_0 = const()[name = tensor("op_25921_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_25921_end_mask_0 = const()[name = tensor("op_25921_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25921_cast_fp16 = slice_by_index(begin = var_25921_begin_0, end = var_25921_end_0, end_mask = var_25921_end_mask_0, x = var_25695_cast_fp16)[name = tensor("op_25921_cast_fp16")]; tensor var_25928_begin_0 = const()[name = tensor("op_25928_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_25928_end_0 = const()[name = tensor("op_25928_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_25928_end_mask_0 = const()[name = tensor("op_25928_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25928_cast_fp16 = slice_by_index(begin = var_25928_begin_0, end = var_25928_end_0, end_mask = var_25928_end_mask_0, x = var_25699_cast_fp16)[name = tensor("op_25928_cast_fp16")]; tensor var_25935_begin_0 = const()[name = tensor("op_25935_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_25935_end_0 = const()[name = tensor("op_25935_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_25935_end_mask_0 = const()[name = tensor("op_25935_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25935_cast_fp16 = slice_by_index(begin = var_25935_begin_0, end = var_25935_end_0, end_mask = var_25935_end_mask_0, x = var_25699_cast_fp16)[name = tensor("op_25935_cast_fp16")]; tensor var_25942_begin_0 = const()[name = tensor("op_25942_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_25942_end_0 = const()[name = tensor("op_25942_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_25942_end_mask_0 = const()[name = tensor("op_25942_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25942_cast_fp16 = slice_by_index(begin = var_25942_begin_0, end = var_25942_end_0, end_mask = var_25942_end_mask_0, x = var_25699_cast_fp16)[name = tensor("op_25942_cast_fp16")]; tensor var_25949_begin_0 = const()[name = tensor("op_25949_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_25949_end_0 = const()[name = tensor("op_25949_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_25949_end_mask_0 = const()[name = tensor("op_25949_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25949_cast_fp16 = slice_by_index(begin = var_25949_begin_0, end = var_25949_end_0, end_mask = var_25949_end_mask_0, x = var_25699_cast_fp16)[name = tensor("op_25949_cast_fp16")]; tensor var_25956_begin_0 = const()[name = tensor("op_25956_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_25956_end_0 = const()[name = tensor("op_25956_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_25956_end_mask_0 = const()[name = tensor("op_25956_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25956_cast_fp16 = slice_by_index(begin = var_25956_begin_0, end = var_25956_end_0, end_mask = var_25956_end_mask_0, x = var_25703_cast_fp16)[name = tensor("op_25956_cast_fp16")]; tensor var_25963_begin_0 = const()[name = tensor("op_25963_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_25963_end_0 = const()[name = tensor("op_25963_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_25963_end_mask_0 = const()[name = tensor("op_25963_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25963_cast_fp16 = slice_by_index(begin = var_25963_begin_0, end = var_25963_end_0, end_mask = var_25963_end_mask_0, x = var_25703_cast_fp16)[name = tensor("op_25963_cast_fp16")]; tensor var_25970_begin_0 = const()[name = tensor("op_25970_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_25970_end_0 = const()[name = tensor("op_25970_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_25970_end_mask_0 = const()[name = tensor("op_25970_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25970_cast_fp16 = slice_by_index(begin = var_25970_begin_0, end = var_25970_end_0, end_mask = var_25970_end_mask_0, x = var_25703_cast_fp16)[name = tensor("op_25970_cast_fp16")]; tensor var_25977_begin_0 = const()[name = tensor("op_25977_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_25977_end_0 = const()[name = tensor("op_25977_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_25977_end_mask_0 = const()[name = tensor("op_25977_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25977_cast_fp16 = slice_by_index(begin = var_25977_begin_0, end = var_25977_end_0, end_mask = var_25977_end_mask_0, x = var_25703_cast_fp16)[name = tensor("op_25977_cast_fp16")]; tensor var_25984_begin_0 = const()[name = tensor("op_25984_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_25984_end_0 = const()[name = tensor("op_25984_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_25984_end_mask_0 = const()[name = tensor("op_25984_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25984_cast_fp16 = slice_by_index(begin = var_25984_begin_0, end = var_25984_end_0, end_mask = var_25984_end_mask_0, x = var_25707_cast_fp16)[name = tensor("op_25984_cast_fp16")]; tensor var_25991_begin_0 = const()[name = tensor("op_25991_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_25991_end_0 = const()[name = tensor("op_25991_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_25991_end_mask_0 = const()[name = tensor("op_25991_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25991_cast_fp16 = slice_by_index(begin = var_25991_begin_0, end = var_25991_end_0, end_mask = var_25991_end_mask_0, x = var_25707_cast_fp16)[name = tensor("op_25991_cast_fp16")]; tensor var_25998_begin_0 = const()[name = tensor("op_25998_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_25998_end_0 = const()[name = tensor("op_25998_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_25998_end_mask_0 = const()[name = tensor("op_25998_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_25998_cast_fp16 = slice_by_index(begin = var_25998_begin_0, end = var_25998_end_0, end_mask = var_25998_end_mask_0, x = var_25707_cast_fp16)[name = tensor("op_25998_cast_fp16")]; tensor var_26005_begin_0 = const()[name = tensor("op_26005_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_26005_end_0 = const()[name = tensor("op_26005_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_26005_end_mask_0 = const()[name = tensor("op_26005_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26005_cast_fp16 = slice_by_index(begin = var_26005_begin_0, end = var_26005_end_0, end_mask = var_26005_end_mask_0, x = var_25707_cast_fp16)[name = tensor("op_26005_cast_fp16")]; tensor var_26012_begin_0 = const()[name = tensor("op_26012_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_26012_end_0 = const()[name = tensor("op_26012_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_26012_end_mask_0 = const()[name = tensor("op_26012_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26012_cast_fp16 = slice_by_index(begin = var_26012_begin_0, end = var_26012_end_0, end_mask = var_26012_end_mask_0, x = var_25711_cast_fp16)[name = tensor("op_26012_cast_fp16")]; tensor var_26019_begin_0 = const()[name = tensor("op_26019_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_26019_end_0 = const()[name = tensor("op_26019_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_26019_end_mask_0 = const()[name = tensor("op_26019_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26019_cast_fp16 = slice_by_index(begin = var_26019_begin_0, end = var_26019_end_0, end_mask = var_26019_end_mask_0, x = var_25711_cast_fp16)[name = tensor("op_26019_cast_fp16")]; tensor var_26026_begin_0 = const()[name = tensor("op_26026_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_26026_end_0 = const()[name = tensor("op_26026_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_26026_end_mask_0 = const()[name = tensor("op_26026_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26026_cast_fp16 = slice_by_index(begin = var_26026_begin_0, end = var_26026_end_0, end_mask = var_26026_end_mask_0, x = var_25711_cast_fp16)[name = tensor("op_26026_cast_fp16")]; tensor var_26033_begin_0 = const()[name = tensor("op_26033_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_26033_end_0 = const()[name = tensor("op_26033_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_26033_end_mask_0 = const()[name = tensor("op_26033_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26033_cast_fp16 = slice_by_index(begin = var_26033_begin_0, end = var_26033_end_0, end_mask = var_26033_end_mask_0, x = var_25711_cast_fp16)[name = tensor("op_26033_cast_fp16")]; tensor var_26040_begin_0 = const()[name = tensor("op_26040_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_26040_end_0 = const()[name = tensor("op_26040_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_26040_end_mask_0 = const()[name = tensor("op_26040_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26040_cast_fp16 = slice_by_index(begin = var_26040_begin_0, end = var_26040_end_0, end_mask = var_26040_end_mask_0, x = var_25715_cast_fp16)[name = tensor("op_26040_cast_fp16")]; tensor var_26047_begin_0 = const()[name = tensor("op_26047_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_26047_end_0 = const()[name = tensor("op_26047_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_26047_end_mask_0 = const()[name = tensor("op_26047_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26047_cast_fp16 = slice_by_index(begin = var_26047_begin_0, end = var_26047_end_0, end_mask = var_26047_end_mask_0, x = var_25715_cast_fp16)[name = tensor("op_26047_cast_fp16")]; tensor var_26054_begin_0 = const()[name = tensor("op_26054_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_26054_end_0 = const()[name = tensor("op_26054_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_26054_end_mask_0 = const()[name = tensor("op_26054_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26054_cast_fp16 = slice_by_index(begin = var_26054_begin_0, end = var_26054_end_0, end_mask = var_26054_end_mask_0, x = var_25715_cast_fp16)[name = tensor("op_26054_cast_fp16")]; tensor var_26061_begin_0 = const()[name = tensor("op_26061_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_26061_end_0 = const()[name = tensor("op_26061_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_26061_end_mask_0 = const()[name = tensor("op_26061_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26061_cast_fp16 = slice_by_index(begin = var_26061_begin_0, end = var_26061_end_0, end_mask = var_26061_end_mask_0, x = var_25715_cast_fp16)[name = tensor("op_26061_cast_fp16")]; tensor var_26068_begin_0 = const()[name = tensor("op_26068_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_26068_end_0 = const()[name = tensor("op_26068_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_26068_end_mask_0 = const()[name = tensor("op_26068_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26068_cast_fp16 = slice_by_index(begin = var_26068_begin_0, end = var_26068_end_0, end_mask = var_26068_end_mask_0, x = var_25719_cast_fp16)[name = tensor("op_26068_cast_fp16")]; tensor var_26075_begin_0 = const()[name = tensor("op_26075_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_26075_end_0 = const()[name = tensor("op_26075_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_26075_end_mask_0 = const()[name = tensor("op_26075_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26075_cast_fp16 = slice_by_index(begin = var_26075_begin_0, end = var_26075_end_0, end_mask = var_26075_end_mask_0, x = var_25719_cast_fp16)[name = tensor("op_26075_cast_fp16")]; tensor var_26082_begin_0 = const()[name = tensor("op_26082_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_26082_end_0 = const()[name = tensor("op_26082_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_26082_end_mask_0 = const()[name = tensor("op_26082_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26082_cast_fp16 = slice_by_index(begin = var_26082_begin_0, end = var_26082_end_0, end_mask = var_26082_end_mask_0, x = var_25719_cast_fp16)[name = tensor("op_26082_cast_fp16")]; tensor var_26089_begin_0 = const()[name = tensor("op_26089_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_26089_end_0 = const()[name = tensor("op_26089_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_26089_end_mask_0 = const()[name = tensor("op_26089_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26089_cast_fp16 = slice_by_index(begin = var_26089_begin_0, end = var_26089_end_0, end_mask = var_26089_end_mask_0, x = var_25719_cast_fp16)[name = tensor("op_26089_cast_fp16")]; tensor var_26096_begin_0 = const()[name = tensor("op_26096_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_26096_end_0 = const()[name = tensor("op_26096_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_26096_end_mask_0 = const()[name = tensor("op_26096_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26096_cast_fp16 = slice_by_index(begin = var_26096_begin_0, end = var_26096_end_0, end_mask = var_26096_end_mask_0, x = var_25723_cast_fp16)[name = tensor("op_26096_cast_fp16")]; tensor var_26103_begin_0 = const()[name = tensor("op_26103_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_26103_end_0 = const()[name = tensor("op_26103_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_26103_end_mask_0 = const()[name = tensor("op_26103_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26103_cast_fp16 = slice_by_index(begin = var_26103_begin_0, end = var_26103_end_0, end_mask = var_26103_end_mask_0, x = var_25723_cast_fp16)[name = tensor("op_26103_cast_fp16")]; tensor var_26110_begin_0 = const()[name = tensor("op_26110_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_26110_end_0 = const()[name = tensor("op_26110_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_26110_end_mask_0 = const()[name = tensor("op_26110_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26110_cast_fp16 = slice_by_index(begin = var_26110_begin_0, end = var_26110_end_0, end_mask = var_26110_end_mask_0, x = var_25723_cast_fp16)[name = tensor("op_26110_cast_fp16")]; tensor var_26117_begin_0 = const()[name = tensor("op_26117_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_26117_end_0 = const()[name = tensor("op_26117_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_26117_end_mask_0 = const()[name = tensor("op_26117_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26117_cast_fp16 = slice_by_index(begin = var_26117_begin_0, end = var_26117_end_0, end_mask = var_26117_end_mask_0, x = var_25723_cast_fp16)[name = tensor("op_26117_cast_fp16")]; tensor var_26124_begin_0 = const()[name = tensor("op_26124_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_26124_end_0 = const()[name = tensor("op_26124_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_26124_end_mask_0 = const()[name = tensor("op_26124_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26124_cast_fp16 = slice_by_index(begin = var_26124_begin_0, end = var_26124_end_0, end_mask = var_26124_end_mask_0, x = var_25727_cast_fp16)[name = tensor("op_26124_cast_fp16")]; tensor var_26131_begin_0 = const()[name = tensor("op_26131_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_26131_end_0 = const()[name = tensor("op_26131_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_26131_end_mask_0 = const()[name = tensor("op_26131_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26131_cast_fp16 = slice_by_index(begin = var_26131_begin_0, end = var_26131_end_0, end_mask = var_26131_end_mask_0, x = var_25727_cast_fp16)[name = tensor("op_26131_cast_fp16")]; tensor var_26138_begin_0 = const()[name = tensor("op_26138_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_26138_end_0 = const()[name = tensor("op_26138_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_26138_end_mask_0 = const()[name = tensor("op_26138_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26138_cast_fp16 = slice_by_index(begin = var_26138_begin_0, end = var_26138_end_0, end_mask = var_26138_end_mask_0, x = var_25727_cast_fp16)[name = tensor("op_26138_cast_fp16")]; tensor var_26145_begin_0 = const()[name = tensor("op_26145_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_26145_end_0 = const()[name = tensor("op_26145_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_26145_end_mask_0 = const()[name = tensor("op_26145_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26145_cast_fp16 = slice_by_index(begin = var_26145_begin_0, end = var_26145_end_0, end_mask = var_26145_end_mask_0, x = var_25727_cast_fp16)[name = tensor("op_26145_cast_fp16")]; tensor var_26152_begin_0 = const()[name = tensor("op_26152_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_26152_end_0 = const()[name = tensor("op_26152_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_26152_end_mask_0 = const()[name = tensor("op_26152_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26152_cast_fp16 = slice_by_index(begin = var_26152_begin_0, end = var_26152_end_0, end_mask = var_26152_end_mask_0, x = var_25731_cast_fp16)[name = tensor("op_26152_cast_fp16")]; tensor var_26159_begin_0 = const()[name = tensor("op_26159_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_26159_end_0 = const()[name = tensor("op_26159_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_26159_end_mask_0 = const()[name = tensor("op_26159_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26159_cast_fp16 = slice_by_index(begin = var_26159_begin_0, end = var_26159_end_0, end_mask = var_26159_end_mask_0, x = var_25731_cast_fp16)[name = tensor("op_26159_cast_fp16")]; tensor var_26166_begin_0 = const()[name = tensor("op_26166_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_26166_end_0 = const()[name = tensor("op_26166_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_26166_end_mask_0 = const()[name = tensor("op_26166_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26166_cast_fp16 = slice_by_index(begin = var_26166_begin_0, end = var_26166_end_0, end_mask = var_26166_end_mask_0, x = var_25731_cast_fp16)[name = tensor("op_26166_cast_fp16")]; tensor var_26173_begin_0 = const()[name = tensor("op_26173_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_26173_end_0 = const()[name = tensor("op_26173_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_26173_end_mask_0 = const()[name = tensor("op_26173_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26173_cast_fp16 = slice_by_index(begin = var_26173_begin_0, end = var_26173_end_0, end_mask = var_26173_end_mask_0, x = var_25731_cast_fp16)[name = tensor("op_26173_cast_fp16")]; tensor var_26180_begin_0 = const()[name = tensor("op_26180_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_26180_end_0 = const()[name = tensor("op_26180_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_26180_end_mask_0 = const()[name = tensor("op_26180_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26180_cast_fp16 = slice_by_index(begin = var_26180_begin_0, end = var_26180_end_0, end_mask = var_26180_end_mask_0, x = var_25735_cast_fp16)[name = tensor("op_26180_cast_fp16")]; tensor var_26187_begin_0 = const()[name = tensor("op_26187_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_26187_end_0 = const()[name = tensor("op_26187_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_26187_end_mask_0 = const()[name = tensor("op_26187_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26187_cast_fp16 = slice_by_index(begin = var_26187_begin_0, end = var_26187_end_0, end_mask = var_26187_end_mask_0, x = var_25735_cast_fp16)[name = tensor("op_26187_cast_fp16")]; tensor var_26194_begin_0 = const()[name = tensor("op_26194_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_26194_end_0 = const()[name = tensor("op_26194_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_26194_end_mask_0 = const()[name = tensor("op_26194_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26194_cast_fp16 = slice_by_index(begin = var_26194_begin_0, end = var_26194_end_0, end_mask = var_26194_end_mask_0, x = var_25735_cast_fp16)[name = tensor("op_26194_cast_fp16")]; tensor var_26201_begin_0 = const()[name = tensor("op_26201_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_26201_end_0 = const()[name = tensor("op_26201_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_26201_end_mask_0 = const()[name = tensor("op_26201_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26201_cast_fp16 = slice_by_index(begin = var_26201_begin_0, end = var_26201_end_0, end_mask = var_26201_end_mask_0, x = var_25735_cast_fp16)[name = tensor("op_26201_cast_fp16")]; tensor var_26208_begin_0 = const()[name = tensor("op_26208_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_26208_end_0 = const()[name = tensor("op_26208_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_26208_end_mask_0 = const()[name = tensor("op_26208_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26208_cast_fp16 = slice_by_index(begin = var_26208_begin_0, end = var_26208_end_0, end_mask = var_26208_end_mask_0, x = var_25739_cast_fp16)[name = tensor("op_26208_cast_fp16")]; tensor var_26215_begin_0 = const()[name = tensor("op_26215_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_26215_end_0 = const()[name = tensor("op_26215_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_26215_end_mask_0 = const()[name = tensor("op_26215_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26215_cast_fp16 = slice_by_index(begin = var_26215_begin_0, end = var_26215_end_0, end_mask = var_26215_end_mask_0, x = var_25739_cast_fp16)[name = tensor("op_26215_cast_fp16")]; tensor var_26222_begin_0 = const()[name = tensor("op_26222_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_26222_end_0 = const()[name = tensor("op_26222_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_26222_end_mask_0 = const()[name = tensor("op_26222_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26222_cast_fp16 = slice_by_index(begin = var_26222_begin_0, end = var_26222_end_0, end_mask = var_26222_end_mask_0, x = var_25739_cast_fp16)[name = tensor("op_26222_cast_fp16")]; tensor var_26229_begin_0 = const()[name = tensor("op_26229_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_26229_end_0 = const()[name = tensor("op_26229_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_26229_end_mask_0 = const()[name = tensor("op_26229_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26229_cast_fp16 = slice_by_index(begin = var_26229_begin_0, end = var_26229_end_0, end_mask = var_26229_end_mask_0, x = var_25739_cast_fp16)[name = tensor("op_26229_cast_fp16")]; tensor var_26236_begin_0 = const()[name = tensor("op_26236_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_26236_end_0 = const()[name = tensor("op_26236_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_26236_end_mask_0 = const()[name = tensor("op_26236_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26236_cast_fp16 = slice_by_index(begin = var_26236_begin_0, end = var_26236_end_0, end_mask = var_26236_end_mask_0, x = var_25743_cast_fp16)[name = tensor("op_26236_cast_fp16")]; tensor var_26243_begin_0 = const()[name = tensor("op_26243_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_26243_end_0 = const()[name = tensor("op_26243_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_26243_end_mask_0 = const()[name = tensor("op_26243_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26243_cast_fp16 = slice_by_index(begin = var_26243_begin_0, end = var_26243_end_0, end_mask = var_26243_end_mask_0, x = var_25743_cast_fp16)[name = tensor("op_26243_cast_fp16")]; tensor var_26250_begin_0 = const()[name = tensor("op_26250_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_26250_end_0 = const()[name = tensor("op_26250_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_26250_end_mask_0 = const()[name = tensor("op_26250_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26250_cast_fp16 = slice_by_index(begin = var_26250_begin_0, end = var_26250_end_0, end_mask = var_26250_end_mask_0, x = var_25743_cast_fp16)[name = tensor("op_26250_cast_fp16")]; tensor var_26257_begin_0 = const()[name = tensor("op_26257_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_26257_end_0 = const()[name = tensor("op_26257_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_26257_end_mask_0 = const()[name = tensor("op_26257_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26257_cast_fp16 = slice_by_index(begin = var_26257_begin_0, end = var_26257_end_0, end_mask = var_26257_end_mask_0, x = var_25743_cast_fp16)[name = tensor("op_26257_cast_fp16")]; tensor var_26264_begin_0 = const()[name = tensor("op_26264_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_26264_end_0 = const()[name = tensor("op_26264_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_26264_end_mask_0 = const()[name = tensor("op_26264_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26264_cast_fp16 = slice_by_index(begin = var_26264_begin_0, end = var_26264_end_0, end_mask = var_26264_end_mask_0, x = var_25747_cast_fp16)[name = tensor("op_26264_cast_fp16")]; tensor var_26271_begin_0 = const()[name = tensor("op_26271_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_26271_end_0 = const()[name = tensor("op_26271_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_26271_end_mask_0 = const()[name = tensor("op_26271_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26271_cast_fp16 = slice_by_index(begin = var_26271_begin_0, end = var_26271_end_0, end_mask = var_26271_end_mask_0, x = var_25747_cast_fp16)[name = tensor("op_26271_cast_fp16")]; tensor var_26278_begin_0 = const()[name = tensor("op_26278_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_26278_end_0 = const()[name = tensor("op_26278_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_26278_end_mask_0 = const()[name = tensor("op_26278_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26278_cast_fp16 = slice_by_index(begin = var_26278_begin_0, end = var_26278_end_0, end_mask = var_26278_end_mask_0, x = var_25747_cast_fp16)[name = tensor("op_26278_cast_fp16")]; tensor var_26285_begin_0 = const()[name = tensor("op_26285_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_26285_end_0 = const()[name = tensor("op_26285_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_26285_end_mask_0 = const()[name = tensor("op_26285_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26285_cast_fp16 = slice_by_index(begin = var_26285_begin_0, end = var_26285_end_0, end_mask = var_26285_end_mask_0, x = var_25747_cast_fp16)[name = tensor("op_26285_cast_fp16")]; tensor var_26292_begin_0 = const()[name = tensor("op_26292_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_26292_end_0 = const()[name = tensor("op_26292_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_26292_end_mask_0 = const()[name = tensor("op_26292_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26292_cast_fp16 = slice_by_index(begin = var_26292_begin_0, end = var_26292_end_0, end_mask = var_26292_end_mask_0, x = var_25751_cast_fp16)[name = tensor("op_26292_cast_fp16")]; tensor var_26299_begin_0 = const()[name = tensor("op_26299_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_26299_end_0 = const()[name = tensor("op_26299_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_26299_end_mask_0 = const()[name = tensor("op_26299_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26299_cast_fp16 = slice_by_index(begin = var_26299_begin_0, end = var_26299_end_0, end_mask = var_26299_end_mask_0, x = var_25751_cast_fp16)[name = tensor("op_26299_cast_fp16")]; tensor var_26306_begin_0 = const()[name = tensor("op_26306_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_26306_end_0 = const()[name = tensor("op_26306_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_26306_end_mask_0 = const()[name = tensor("op_26306_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26306_cast_fp16 = slice_by_index(begin = var_26306_begin_0, end = var_26306_end_0, end_mask = var_26306_end_mask_0, x = var_25751_cast_fp16)[name = tensor("op_26306_cast_fp16")]; tensor var_26313_begin_0 = const()[name = tensor("op_26313_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_26313_end_0 = const()[name = tensor("op_26313_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_26313_end_mask_0 = const()[name = tensor("op_26313_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26313_cast_fp16 = slice_by_index(begin = var_26313_begin_0, end = var_26313_end_0, end_mask = var_26313_end_mask_0, x = var_25751_cast_fp16)[name = tensor("op_26313_cast_fp16")]; tensor k_33_perm_0 = const()[name = tensor("k_33_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_26318_begin_0 = const()[name = tensor("op_26318_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_26318_end_0 = const()[name = tensor("op_26318_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_26318_end_mask_0 = const()[name = tensor("op_26318_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_33_cast_fp16 = transpose(perm = k_33_perm_0, x = key_33_cast_fp16)[name = tensor("transpose_15")]; tensor var_26318_cast_fp16 = slice_by_index(begin = var_26318_begin_0, end = var_26318_end_0, end_mask = var_26318_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26318_cast_fp16")]; tensor var_26322_begin_0 = const()[name = tensor("op_26322_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_26322_end_0 = const()[name = tensor("op_26322_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_26322_end_mask_0 = const()[name = tensor("op_26322_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26322_cast_fp16 = slice_by_index(begin = var_26322_begin_0, end = var_26322_end_0, end_mask = var_26322_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26322_cast_fp16")]; tensor var_26326_begin_0 = const()[name = tensor("op_26326_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_26326_end_0 = const()[name = tensor("op_26326_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_26326_end_mask_0 = const()[name = tensor("op_26326_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26326_cast_fp16 = slice_by_index(begin = var_26326_begin_0, end = var_26326_end_0, end_mask = var_26326_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26326_cast_fp16")]; tensor var_26330_begin_0 = const()[name = tensor("op_26330_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_26330_end_0 = const()[name = tensor("op_26330_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_26330_end_mask_0 = const()[name = tensor("op_26330_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26330_cast_fp16 = slice_by_index(begin = var_26330_begin_0, end = var_26330_end_0, end_mask = var_26330_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26330_cast_fp16")]; tensor var_26334_begin_0 = const()[name = tensor("op_26334_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_26334_end_0 = const()[name = tensor("op_26334_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_26334_end_mask_0 = const()[name = tensor("op_26334_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26334_cast_fp16 = slice_by_index(begin = var_26334_begin_0, end = var_26334_end_0, end_mask = var_26334_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26334_cast_fp16")]; tensor var_26338_begin_0 = const()[name = tensor("op_26338_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_26338_end_0 = const()[name = tensor("op_26338_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_26338_end_mask_0 = const()[name = tensor("op_26338_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26338_cast_fp16 = slice_by_index(begin = var_26338_begin_0, end = var_26338_end_0, end_mask = var_26338_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26338_cast_fp16")]; tensor var_26342_begin_0 = const()[name = tensor("op_26342_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_26342_end_0 = const()[name = tensor("op_26342_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_26342_end_mask_0 = const()[name = tensor("op_26342_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26342_cast_fp16 = slice_by_index(begin = var_26342_begin_0, end = var_26342_end_0, end_mask = var_26342_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26342_cast_fp16")]; tensor var_26346_begin_0 = const()[name = tensor("op_26346_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_26346_end_0 = const()[name = tensor("op_26346_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_26346_end_mask_0 = const()[name = tensor("op_26346_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26346_cast_fp16 = slice_by_index(begin = var_26346_begin_0, end = var_26346_end_0, end_mask = var_26346_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26346_cast_fp16")]; tensor var_26350_begin_0 = const()[name = tensor("op_26350_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_26350_end_0 = const()[name = tensor("op_26350_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_26350_end_mask_0 = const()[name = tensor("op_26350_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26350_cast_fp16 = slice_by_index(begin = var_26350_begin_0, end = var_26350_end_0, end_mask = var_26350_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26350_cast_fp16")]; tensor var_26354_begin_0 = const()[name = tensor("op_26354_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_26354_end_0 = const()[name = tensor("op_26354_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_26354_end_mask_0 = const()[name = tensor("op_26354_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26354_cast_fp16 = slice_by_index(begin = var_26354_begin_0, end = var_26354_end_0, end_mask = var_26354_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26354_cast_fp16")]; tensor var_26358_begin_0 = const()[name = tensor("op_26358_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_26358_end_0 = const()[name = tensor("op_26358_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_26358_end_mask_0 = const()[name = tensor("op_26358_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26358_cast_fp16 = slice_by_index(begin = var_26358_begin_0, end = var_26358_end_0, end_mask = var_26358_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26358_cast_fp16")]; tensor var_26362_begin_0 = const()[name = tensor("op_26362_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_26362_end_0 = const()[name = tensor("op_26362_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_26362_end_mask_0 = const()[name = tensor("op_26362_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26362_cast_fp16 = slice_by_index(begin = var_26362_begin_0, end = var_26362_end_0, end_mask = var_26362_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26362_cast_fp16")]; tensor var_26366_begin_0 = const()[name = tensor("op_26366_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_26366_end_0 = const()[name = tensor("op_26366_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_26366_end_mask_0 = const()[name = tensor("op_26366_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26366_cast_fp16 = slice_by_index(begin = var_26366_begin_0, end = var_26366_end_0, end_mask = var_26366_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26366_cast_fp16")]; tensor var_26370_begin_0 = const()[name = tensor("op_26370_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_26370_end_0 = const()[name = tensor("op_26370_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_26370_end_mask_0 = const()[name = tensor("op_26370_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26370_cast_fp16 = slice_by_index(begin = var_26370_begin_0, end = var_26370_end_0, end_mask = var_26370_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26370_cast_fp16")]; tensor var_26374_begin_0 = const()[name = tensor("op_26374_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_26374_end_0 = const()[name = tensor("op_26374_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_26374_end_mask_0 = const()[name = tensor("op_26374_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26374_cast_fp16 = slice_by_index(begin = var_26374_begin_0, end = var_26374_end_0, end_mask = var_26374_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26374_cast_fp16")]; tensor var_26378_begin_0 = const()[name = tensor("op_26378_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_26378_end_0 = const()[name = tensor("op_26378_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_26378_end_mask_0 = const()[name = tensor("op_26378_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26378_cast_fp16 = slice_by_index(begin = var_26378_begin_0, end = var_26378_end_0, end_mask = var_26378_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26378_cast_fp16")]; tensor var_26382_begin_0 = const()[name = tensor("op_26382_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_26382_end_0 = const()[name = tensor("op_26382_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_26382_end_mask_0 = const()[name = tensor("op_26382_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26382_cast_fp16 = slice_by_index(begin = var_26382_begin_0, end = var_26382_end_0, end_mask = var_26382_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26382_cast_fp16")]; tensor var_26386_begin_0 = const()[name = tensor("op_26386_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_26386_end_0 = const()[name = tensor("op_26386_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_26386_end_mask_0 = const()[name = tensor("op_26386_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26386_cast_fp16 = slice_by_index(begin = var_26386_begin_0, end = var_26386_end_0, end_mask = var_26386_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26386_cast_fp16")]; tensor var_26390_begin_0 = const()[name = tensor("op_26390_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_26390_end_0 = const()[name = tensor("op_26390_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_26390_end_mask_0 = const()[name = tensor("op_26390_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26390_cast_fp16 = slice_by_index(begin = var_26390_begin_0, end = var_26390_end_0, end_mask = var_26390_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26390_cast_fp16")]; tensor var_26394_begin_0 = const()[name = tensor("op_26394_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_26394_end_0 = const()[name = tensor("op_26394_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_26394_end_mask_0 = const()[name = tensor("op_26394_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_26394_cast_fp16 = slice_by_index(begin = var_26394_begin_0, end = var_26394_end_0, end_mask = var_26394_end_mask_0, x = k_33_cast_fp16)[name = tensor("op_26394_cast_fp16")]; tensor var_26396_begin_0 = const()[name = tensor("op_26396_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_26396_end_0 = const()[name = tensor("op_26396_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_26396_end_mask_0 = const()[name = tensor("op_26396_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26396_cast_fp16 = slice_by_index(begin = var_26396_begin_0, end = var_26396_end_0, end_mask = var_26396_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26396_cast_fp16")]; tensor var_26400_begin_0 = const()[name = tensor("op_26400_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_26400_end_0 = const()[name = tensor("op_26400_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_26400_end_mask_0 = const()[name = tensor("op_26400_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26400_cast_fp16 = slice_by_index(begin = var_26400_begin_0, end = var_26400_end_0, end_mask = var_26400_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26400_cast_fp16")]; tensor var_26404_begin_0 = const()[name = tensor("op_26404_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_26404_end_0 = const()[name = tensor("op_26404_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_26404_end_mask_0 = const()[name = tensor("op_26404_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26404_cast_fp16 = slice_by_index(begin = var_26404_begin_0, end = var_26404_end_0, end_mask = var_26404_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26404_cast_fp16")]; tensor var_26408_begin_0 = const()[name = tensor("op_26408_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_26408_end_0 = const()[name = tensor("op_26408_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_26408_end_mask_0 = const()[name = tensor("op_26408_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26408_cast_fp16 = slice_by_index(begin = var_26408_begin_0, end = var_26408_end_0, end_mask = var_26408_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26408_cast_fp16")]; tensor var_26412_begin_0 = const()[name = tensor("op_26412_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_26412_end_0 = const()[name = tensor("op_26412_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_26412_end_mask_0 = const()[name = tensor("op_26412_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26412_cast_fp16 = slice_by_index(begin = var_26412_begin_0, end = var_26412_end_0, end_mask = var_26412_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26412_cast_fp16")]; tensor var_26416_begin_0 = const()[name = tensor("op_26416_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_26416_end_0 = const()[name = tensor("op_26416_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_26416_end_mask_0 = const()[name = tensor("op_26416_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26416_cast_fp16 = slice_by_index(begin = var_26416_begin_0, end = var_26416_end_0, end_mask = var_26416_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26416_cast_fp16")]; tensor var_26420_begin_0 = const()[name = tensor("op_26420_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_26420_end_0 = const()[name = tensor("op_26420_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_26420_end_mask_0 = const()[name = tensor("op_26420_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26420_cast_fp16 = slice_by_index(begin = var_26420_begin_0, end = var_26420_end_0, end_mask = var_26420_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26420_cast_fp16")]; tensor var_26424_begin_0 = const()[name = tensor("op_26424_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_26424_end_0 = const()[name = tensor("op_26424_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_26424_end_mask_0 = const()[name = tensor("op_26424_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26424_cast_fp16 = slice_by_index(begin = var_26424_begin_0, end = var_26424_end_0, end_mask = var_26424_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26424_cast_fp16")]; tensor var_26428_begin_0 = const()[name = tensor("op_26428_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_26428_end_0 = const()[name = tensor("op_26428_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_26428_end_mask_0 = const()[name = tensor("op_26428_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26428_cast_fp16 = slice_by_index(begin = var_26428_begin_0, end = var_26428_end_0, end_mask = var_26428_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26428_cast_fp16")]; tensor var_26432_begin_0 = const()[name = tensor("op_26432_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_26432_end_0 = const()[name = tensor("op_26432_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_26432_end_mask_0 = const()[name = tensor("op_26432_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26432_cast_fp16 = slice_by_index(begin = var_26432_begin_0, end = var_26432_end_0, end_mask = var_26432_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26432_cast_fp16")]; tensor var_26436_begin_0 = const()[name = tensor("op_26436_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_26436_end_0 = const()[name = tensor("op_26436_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_26436_end_mask_0 = const()[name = tensor("op_26436_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26436_cast_fp16 = slice_by_index(begin = var_26436_begin_0, end = var_26436_end_0, end_mask = var_26436_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26436_cast_fp16")]; tensor var_26440_begin_0 = const()[name = tensor("op_26440_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_26440_end_0 = const()[name = tensor("op_26440_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_26440_end_mask_0 = const()[name = tensor("op_26440_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26440_cast_fp16 = slice_by_index(begin = var_26440_begin_0, end = var_26440_end_0, end_mask = var_26440_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26440_cast_fp16")]; tensor var_26444_begin_0 = const()[name = tensor("op_26444_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_26444_end_0 = const()[name = tensor("op_26444_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_26444_end_mask_0 = const()[name = tensor("op_26444_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26444_cast_fp16 = slice_by_index(begin = var_26444_begin_0, end = var_26444_end_0, end_mask = var_26444_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26444_cast_fp16")]; tensor var_26448_begin_0 = const()[name = tensor("op_26448_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_26448_end_0 = const()[name = tensor("op_26448_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_26448_end_mask_0 = const()[name = tensor("op_26448_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26448_cast_fp16 = slice_by_index(begin = var_26448_begin_0, end = var_26448_end_0, end_mask = var_26448_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26448_cast_fp16")]; tensor var_26452_begin_0 = const()[name = tensor("op_26452_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_26452_end_0 = const()[name = tensor("op_26452_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_26452_end_mask_0 = const()[name = tensor("op_26452_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26452_cast_fp16 = slice_by_index(begin = var_26452_begin_0, end = var_26452_end_0, end_mask = var_26452_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26452_cast_fp16")]; tensor var_26456_begin_0 = const()[name = tensor("op_26456_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_26456_end_0 = const()[name = tensor("op_26456_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_26456_end_mask_0 = const()[name = tensor("op_26456_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26456_cast_fp16 = slice_by_index(begin = var_26456_begin_0, end = var_26456_end_0, end_mask = var_26456_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26456_cast_fp16")]; tensor var_26460_begin_0 = const()[name = tensor("op_26460_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_26460_end_0 = const()[name = tensor("op_26460_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_26460_end_mask_0 = const()[name = tensor("op_26460_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26460_cast_fp16 = slice_by_index(begin = var_26460_begin_0, end = var_26460_end_0, end_mask = var_26460_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26460_cast_fp16")]; tensor var_26464_begin_0 = const()[name = tensor("op_26464_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_26464_end_0 = const()[name = tensor("op_26464_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_26464_end_mask_0 = const()[name = tensor("op_26464_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26464_cast_fp16 = slice_by_index(begin = var_26464_begin_0, end = var_26464_end_0, end_mask = var_26464_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26464_cast_fp16")]; tensor var_26468_begin_0 = const()[name = tensor("op_26468_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_26468_end_0 = const()[name = tensor("op_26468_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_26468_end_mask_0 = const()[name = tensor("op_26468_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26468_cast_fp16 = slice_by_index(begin = var_26468_begin_0, end = var_26468_end_0, end_mask = var_26468_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26468_cast_fp16")]; tensor var_26472_begin_0 = const()[name = tensor("op_26472_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_26472_end_0 = const()[name = tensor("op_26472_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_26472_end_mask_0 = const()[name = tensor("op_26472_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_26472_cast_fp16 = slice_by_index(begin = var_26472_begin_0, end = var_26472_end_0, end_mask = var_26472_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_26472_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2561_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2561_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2561_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2561_equation_0, values = (var_26318_cast_fp16, var_25760_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2561_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2563_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2563_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2563_equation_0, values = (var_26318_cast_fp16, var_25767_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2563_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2565_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2565_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2565_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2565_equation_0, values = (var_26318_cast_fp16, var_25774_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2565_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2567_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2567_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2567_equation_0, values = (var_26318_cast_fp16, var_25781_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2567_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2569_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2569_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2569_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2569_equation_0, values = (var_26322_cast_fp16, var_25788_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2569_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2571_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2571_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2571_equation_0, values = (var_26322_cast_fp16, var_25795_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2571_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2573_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2573_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2573_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2573_equation_0, values = (var_26322_cast_fp16, var_25802_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2573_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2575_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2575_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2575_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2575_equation_0, values = (var_26322_cast_fp16, var_25809_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2575_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2577_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2577_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2577_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2577_equation_0, values = (var_26326_cast_fp16, var_25816_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2577_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2579_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2579_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2579_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2579_equation_0, values = (var_26326_cast_fp16, var_25823_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2579_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2581_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2581_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2581_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2581_equation_0, values = (var_26326_cast_fp16, var_25830_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2581_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2583_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2583_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2583_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2583_equation_0, values = (var_26326_cast_fp16, var_25837_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2583_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2585_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2585_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2585_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2585_equation_0, values = (var_26330_cast_fp16, var_25844_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2585_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2587_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2587_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2587_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2587_equation_0, values = (var_26330_cast_fp16, var_25851_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2587_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2589_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2589_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2589_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2589_equation_0, values = (var_26330_cast_fp16, var_25858_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2589_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2591_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2591_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2591_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2591_equation_0, values = (var_26330_cast_fp16, var_25865_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2591_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2593_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2593_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2593_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2593_equation_0, values = (var_26334_cast_fp16, var_25872_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2593_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2595_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2595_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2595_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2595_equation_0, values = (var_26334_cast_fp16, var_25879_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2595_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2597_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2597_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2597_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2597_equation_0, values = (var_26334_cast_fp16, var_25886_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2597_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2599_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2599_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2599_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2599_equation_0, values = (var_26334_cast_fp16, var_25893_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2599_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2601_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2601_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2601_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2601_equation_0, values = (var_26338_cast_fp16, var_25900_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2601_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2603_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2603_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2603_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2603_equation_0, values = (var_26338_cast_fp16, var_25907_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2603_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2605_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2605_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2605_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2605_equation_0, values = (var_26338_cast_fp16, var_25914_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2605_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2607_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2607_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2607_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2607_equation_0, values = (var_26338_cast_fp16, var_25921_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2607_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2609_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2609_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2609_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2609_equation_0, values = (var_26342_cast_fp16, var_25928_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2609_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2611_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2611_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2611_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2611_equation_0, values = (var_26342_cast_fp16, var_25935_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2611_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2613_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2613_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2613_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2613_equation_0, values = (var_26342_cast_fp16, var_25942_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2613_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2615_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2615_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2615_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2615_equation_0, values = (var_26342_cast_fp16, var_25949_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2615_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2617_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2617_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2617_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2617_equation_0, values = (var_26346_cast_fp16, var_25956_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2617_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2619_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2619_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2619_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2619_equation_0, values = (var_26346_cast_fp16, var_25963_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2619_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2621_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2621_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2621_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2621_equation_0, values = (var_26346_cast_fp16, var_25970_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2621_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2623_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2623_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2623_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2623_equation_0, values = (var_26346_cast_fp16, var_25977_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2623_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2625_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2625_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2625_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2625_equation_0, values = (var_26350_cast_fp16, var_25984_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2625_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2627_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2627_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2627_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2627_equation_0, values = (var_26350_cast_fp16, var_25991_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2627_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2629_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2629_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2629_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2629_equation_0, values = (var_26350_cast_fp16, var_25998_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2629_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2631_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2631_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2631_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2631_equation_0, values = (var_26350_cast_fp16, var_26005_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2631_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2633_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2633_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2633_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2633_equation_0, values = (var_26354_cast_fp16, var_26012_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2633_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2635_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2635_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2635_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2635_equation_0, values = (var_26354_cast_fp16, var_26019_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2635_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2637_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2637_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2637_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2637_equation_0, values = (var_26354_cast_fp16, var_26026_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2637_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2639_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2639_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2639_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2639_equation_0, values = (var_26354_cast_fp16, var_26033_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2639_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2641_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2641_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2641_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2641_equation_0, values = (var_26358_cast_fp16, var_26040_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2641_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2643_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2643_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2643_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2643_equation_0, values = (var_26358_cast_fp16, var_26047_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2643_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2645_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2645_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2645_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2645_equation_0, values = (var_26358_cast_fp16, var_26054_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2645_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2647_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2647_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2647_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2647_equation_0, values = (var_26358_cast_fp16, var_26061_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2647_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2649_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2649_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2649_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2649_equation_0, values = (var_26362_cast_fp16, var_26068_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2649_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2651_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2651_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2651_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2651_equation_0, values = (var_26362_cast_fp16, var_26075_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2651_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2653_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2653_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2653_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2653_equation_0, values = (var_26362_cast_fp16, var_26082_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2653_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2655_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2655_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2655_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2655_equation_0, values = (var_26362_cast_fp16, var_26089_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2655_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2657_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2657_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2657_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2657_equation_0, values = (var_26366_cast_fp16, var_26096_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2657_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2659_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2659_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2659_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2659_equation_0, values = (var_26366_cast_fp16, var_26103_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2659_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2661_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2661_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2661_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2661_equation_0, values = (var_26366_cast_fp16, var_26110_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2661_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2663_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2663_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2663_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2663_equation_0, values = (var_26366_cast_fp16, var_26117_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2663_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2665_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2665_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2665_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2665_equation_0, values = (var_26370_cast_fp16, var_26124_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2665_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2667_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2667_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2667_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2667_equation_0, values = (var_26370_cast_fp16, var_26131_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2667_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2669_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2669_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2669_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2669_equation_0, values = (var_26370_cast_fp16, var_26138_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2669_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2671_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2671_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2671_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2671_equation_0, values = (var_26370_cast_fp16, var_26145_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2671_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2673_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2673_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2673_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2673_equation_0, values = (var_26374_cast_fp16, var_26152_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2673_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2675_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2675_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2675_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2675_equation_0, values = (var_26374_cast_fp16, var_26159_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2675_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2677_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2677_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2677_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2677_equation_0, values = (var_26374_cast_fp16, var_26166_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2677_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2679_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2679_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2679_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2679_equation_0, values = (var_26374_cast_fp16, var_26173_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2679_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2681_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2681_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2681_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2681_equation_0, values = (var_26378_cast_fp16, var_26180_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2681_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2683_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2683_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2683_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2683_equation_0, values = (var_26378_cast_fp16, var_26187_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2683_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2685_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2685_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2685_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2685_equation_0, values = (var_26378_cast_fp16, var_26194_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2685_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2687_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2687_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2687_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2687_equation_0, values = (var_26378_cast_fp16, var_26201_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2687_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2689_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2689_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2689_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2689_equation_0, values = (var_26382_cast_fp16, var_26208_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2689_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2691_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2691_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2691_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2691_equation_0, values = (var_26382_cast_fp16, var_26215_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2691_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2693_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2693_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2693_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2693_equation_0, values = (var_26382_cast_fp16, var_26222_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2693_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2695_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2695_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2695_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2695_equation_0, values = (var_26382_cast_fp16, var_26229_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2695_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2697_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2697_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2697_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2697_equation_0, values = (var_26386_cast_fp16, var_26236_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2697_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2699_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2699_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2699_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2699_equation_0, values = (var_26386_cast_fp16, var_26243_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2699_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2701_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2701_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2701_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2701_equation_0, values = (var_26386_cast_fp16, var_26250_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2701_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2703_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2703_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2703_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2703_equation_0, values = (var_26386_cast_fp16, var_26257_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2703_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2705_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2705_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2705_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2705_equation_0, values = (var_26390_cast_fp16, var_26264_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2705_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2707_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2707_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2707_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2707_equation_0, values = (var_26390_cast_fp16, var_26271_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2707_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2709_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2709_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2709_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2709_equation_0, values = (var_26390_cast_fp16, var_26278_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2709_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2711_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2711_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2711_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2711_equation_0, values = (var_26390_cast_fp16, var_26285_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2711_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2713_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2713_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2713_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2713_equation_0, values = (var_26394_cast_fp16, var_26292_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2713_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2715_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2715_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2715_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2715_equation_0, values = (var_26394_cast_fp16, var_26299_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2715_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2717_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2717_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2717_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2717_equation_0, values = (var_26394_cast_fp16, var_26306_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2717_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2719_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2719_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2719_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2719_equation_0, values = (var_26394_cast_fp16, var_26313_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2719_cast_fp16")]; tensor var_26635_to_fp16 = const()[name = tensor("op_26635_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2561_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2561_cast_fp16, y = var_26635_to_fp16)[name = tensor("aw_chunk_2561_cast_fp16")]; tensor var_26637_to_fp16 = const()[name = tensor("op_26637_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2563_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2563_cast_fp16, y = var_26637_to_fp16)[name = tensor("aw_chunk_2563_cast_fp16")]; tensor var_26639_to_fp16 = const()[name = tensor("op_26639_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2565_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2565_cast_fp16, y = var_26639_to_fp16)[name = tensor("aw_chunk_2565_cast_fp16")]; tensor var_26641_to_fp16 = const()[name = tensor("op_26641_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2567_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2567_cast_fp16, y = var_26641_to_fp16)[name = tensor("aw_chunk_2567_cast_fp16")]; tensor var_26643_to_fp16 = const()[name = tensor("op_26643_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2569_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2569_cast_fp16, y = var_26643_to_fp16)[name = tensor("aw_chunk_2569_cast_fp16")]; tensor var_26645_to_fp16 = const()[name = tensor("op_26645_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2571_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2571_cast_fp16, y = var_26645_to_fp16)[name = tensor("aw_chunk_2571_cast_fp16")]; tensor var_26647_to_fp16 = const()[name = tensor("op_26647_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2573_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2573_cast_fp16, y = var_26647_to_fp16)[name = tensor("aw_chunk_2573_cast_fp16")]; tensor var_26649_to_fp16 = const()[name = tensor("op_26649_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2575_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2575_cast_fp16, y = var_26649_to_fp16)[name = tensor("aw_chunk_2575_cast_fp16")]; tensor var_26651_to_fp16 = const()[name = tensor("op_26651_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2577_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2577_cast_fp16, y = var_26651_to_fp16)[name = tensor("aw_chunk_2577_cast_fp16")]; tensor var_26653_to_fp16 = const()[name = tensor("op_26653_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2579_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2579_cast_fp16, y = var_26653_to_fp16)[name = tensor("aw_chunk_2579_cast_fp16")]; tensor var_26655_to_fp16 = const()[name = tensor("op_26655_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2581_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2581_cast_fp16, y = var_26655_to_fp16)[name = tensor("aw_chunk_2581_cast_fp16")]; tensor var_26657_to_fp16 = const()[name = tensor("op_26657_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2583_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2583_cast_fp16, y = var_26657_to_fp16)[name = tensor("aw_chunk_2583_cast_fp16")]; tensor var_26659_to_fp16 = const()[name = tensor("op_26659_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2585_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2585_cast_fp16, y = var_26659_to_fp16)[name = tensor("aw_chunk_2585_cast_fp16")]; tensor var_26661_to_fp16 = const()[name = tensor("op_26661_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2587_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2587_cast_fp16, y = var_26661_to_fp16)[name = tensor("aw_chunk_2587_cast_fp16")]; tensor var_26663_to_fp16 = const()[name = tensor("op_26663_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2589_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2589_cast_fp16, y = var_26663_to_fp16)[name = tensor("aw_chunk_2589_cast_fp16")]; tensor var_26665_to_fp16 = const()[name = tensor("op_26665_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2591_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2591_cast_fp16, y = var_26665_to_fp16)[name = tensor("aw_chunk_2591_cast_fp16")]; tensor var_26667_to_fp16 = const()[name = tensor("op_26667_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2593_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2593_cast_fp16, y = var_26667_to_fp16)[name = tensor("aw_chunk_2593_cast_fp16")]; tensor var_26669_to_fp16 = const()[name = tensor("op_26669_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2595_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2595_cast_fp16, y = var_26669_to_fp16)[name = tensor("aw_chunk_2595_cast_fp16")]; tensor var_26671_to_fp16 = const()[name = tensor("op_26671_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2597_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2597_cast_fp16, y = var_26671_to_fp16)[name = tensor("aw_chunk_2597_cast_fp16")]; tensor var_26673_to_fp16 = const()[name = tensor("op_26673_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2599_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2599_cast_fp16, y = var_26673_to_fp16)[name = tensor("aw_chunk_2599_cast_fp16")]; tensor var_26675_to_fp16 = const()[name = tensor("op_26675_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2601_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2601_cast_fp16, y = var_26675_to_fp16)[name = tensor("aw_chunk_2601_cast_fp16")]; tensor var_26677_to_fp16 = const()[name = tensor("op_26677_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2603_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2603_cast_fp16, y = var_26677_to_fp16)[name = tensor("aw_chunk_2603_cast_fp16")]; tensor var_26679_to_fp16 = const()[name = tensor("op_26679_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2605_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2605_cast_fp16, y = var_26679_to_fp16)[name = tensor("aw_chunk_2605_cast_fp16")]; tensor var_26681_to_fp16 = const()[name = tensor("op_26681_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2607_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2607_cast_fp16, y = var_26681_to_fp16)[name = tensor("aw_chunk_2607_cast_fp16")]; tensor var_26683_to_fp16 = const()[name = tensor("op_26683_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2609_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2609_cast_fp16, y = var_26683_to_fp16)[name = tensor("aw_chunk_2609_cast_fp16")]; tensor var_26685_to_fp16 = const()[name = tensor("op_26685_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2611_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2611_cast_fp16, y = var_26685_to_fp16)[name = tensor("aw_chunk_2611_cast_fp16")]; tensor var_26687_to_fp16 = const()[name = tensor("op_26687_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2613_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2613_cast_fp16, y = var_26687_to_fp16)[name = tensor("aw_chunk_2613_cast_fp16")]; tensor var_26689_to_fp16 = const()[name = tensor("op_26689_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2615_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2615_cast_fp16, y = var_26689_to_fp16)[name = tensor("aw_chunk_2615_cast_fp16")]; tensor var_26691_to_fp16 = const()[name = tensor("op_26691_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2617_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2617_cast_fp16, y = var_26691_to_fp16)[name = tensor("aw_chunk_2617_cast_fp16")]; tensor var_26693_to_fp16 = const()[name = tensor("op_26693_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2619_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2619_cast_fp16, y = var_26693_to_fp16)[name = tensor("aw_chunk_2619_cast_fp16")]; tensor var_26695_to_fp16 = const()[name = tensor("op_26695_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2621_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2621_cast_fp16, y = var_26695_to_fp16)[name = tensor("aw_chunk_2621_cast_fp16")]; tensor var_26697_to_fp16 = const()[name = tensor("op_26697_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2623_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2623_cast_fp16, y = var_26697_to_fp16)[name = tensor("aw_chunk_2623_cast_fp16")]; tensor var_26699_to_fp16 = const()[name = tensor("op_26699_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2625_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2625_cast_fp16, y = var_26699_to_fp16)[name = tensor("aw_chunk_2625_cast_fp16")]; tensor var_26701_to_fp16 = const()[name = tensor("op_26701_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2627_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2627_cast_fp16, y = var_26701_to_fp16)[name = tensor("aw_chunk_2627_cast_fp16")]; tensor var_26703_to_fp16 = const()[name = tensor("op_26703_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2629_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2629_cast_fp16, y = var_26703_to_fp16)[name = tensor("aw_chunk_2629_cast_fp16")]; tensor var_26705_to_fp16 = const()[name = tensor("op_26705_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2631_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2631_cast_fp16, y = var_26705_to_fp16)[name = tensor("aw_chunk_2631_cast_fp16")]; tensor var_26707_to_fp16 = const()[name = tensor("op_26707_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2633_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2633_cast_fp16, y = var_26707_to_fp16)[name = tensor("aw_chunk_2633_cast_fp16")]; tensor var_26709_to_fp16 = const()[name = tensor("op_26709_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2635_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2635_cast_fp16, y = var_26709_to_fp16)[name = tensor("aw_chunk_2635_cast_fp16")]; tensor var_26711_to_fp16 = const()[name = tensor("op_26711_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2637_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2637_cast_fp16, y = var_26711_to_fp16)[name = tensor("aw_chunk_2637_cast_fp16")]; tensor var_26713_to_fp16 = const()[name = tensor("op_26713_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2639_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2639_cast_fp16, y = var_26713_to_fp16)[name = tensor("aw_chunk_2639_cast_fp16")]; tensor var_26715_to_fp16 = const()[name = tensor("op_26715_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2641_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2641_cast_fp16, y = var_26715_to_fp16)[name = tensor("aw_chunk_2641_cast_fp16")]; tensor var_26717_to_fp16 = const()[name = tensor("op_26717_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2643_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2643_cast_fp16, y = var_26717_to_fp16)[name = tensor("aw_chunk_2643_cast_fp16")]; tensor var_26719_to_fp16 = const()[name = tensor("op_26719_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2645_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2645_cast_fp16, y = var_26719_to_fp16)[name = tensor("aw_chunk_2645_cast_fp16")]; tensor var_26721_to_fp16 = const()[name = tensor("op_26721_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2647_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2647_cast_fp16, y = var_26721_to_fp16)[name = tensor("aw_chunk_2647_cast_fp16")]; tensor var_26723_to_fp16 = const()[name = tensor("op_26723_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2649_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2649_cast_fp16, y = var_26723_to_fp16)[name = tensor("aw_chunk_2649_cast_fp16")]; tensor var_26725_to_fp16 = const()[name = tensor("op_26725_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2651_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2651_cast_fp16, y = var_26725_to_fp16)[name = tensor("aw_chunk_2651_cast_fp16")]; tensor var_26727_to_fp16 = const()[name = tensor("op_26727_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2653_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2653_cast_fp16, y = var_26727_to_fp16)[name = tensor("aw_chunk_2653_cast_fp16")]; tensor var_26729_to_fp16 = const()[name = tensor("op_26729_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2655_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2655_cast_fp16, y = var_26729_to_fp16)[name = tensor("aw_chunk_2655_cast_fp16")]; tensor var_26731_to_fp16 = const()[name = tensor("op_26731_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2657_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2657_cast_fp16, y = var_26731_to_fp16)[name = tensor("aw_chunk_2657_cast_fp16")]; tensor var_26733_to_fp16 = const()[name = tensor("op_26733_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2659_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2659_cast_fp16, y = var_26733_to_fp16)[name = tensor("aw_chunk_2659_cast_fp16")]; tensor var_26735_to_fp16 = const()[name = tensor("op_26735_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2661_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2661_cast_fp16, y = var_26735_to_fp16)[name = tensor("aw_chunk_2661_cast_fp16")]; tensor var_26737_to_fp16 = const()[name = tensor("op_26737_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2663_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2663_cast_fp16, y = var_26737_to_fp16)[name = tensor("aw_chunk_2663_cast_fp16")]; tensor var_26739_to_fp16 = const()[name = tensor("op_26739_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2665_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2665_cast_fp16, y = var_26739_to_fp16)[name = tensor("aw_chunk_2665_cast_fp16")]; tensor var_26741_to_fp16 = const()[name = tensor("op_26741_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2667_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2667_cast_fp16, y = var_26741_to_fp16)[name = tensor("aw_chunk_2667_cast_fp16")]; tensor var_26743_to_fp16 = const()[name = tensor("op_26743_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2669_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2669_cast_fp16, y = var_26743_to_fp16)[name = tensor("aw_chunk_2669_cast_fp16")]; tensor var_26745_to_fp16 = const()[name = tensor("op_26745_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2671_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2671_cast_fp16, y = var_26745_to_fp16)[name = tensor("aw_chunk_2671_cast_fp16")]; tensor var_26747_to_fp16 = const()[name = tensor("op_26747_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2673_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2673_cast_fp16, y = var_26747_to_fp16)[name = tensor("aw_chunk_2673_cast_fp16")]; tensor var_26749_to_fp16 = const()[name = tensor("op_26749_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2675_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2675_cast_fp16, y = var_26749_to_fp16)[name = tensor("aw_chunk_2675_cast_fp16")]; tensor var_26751_to_fp16 = const()[name = tensor("op_26751_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2677_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2677_cast_fp16, y = var_26751_to_fp16)[name = tensor("aw_chunk_2677_cast_fp16")]; tensor var_26753_to_fp16 = const()[name = tensor("op_26753_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2679_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2679_cast_fp16, y = var_26753_to_fp16)[name = tensor("aw_chunk_2679_cast_fp16")]; tensor var_26755_to_fp16 = const()[name = tensor("op_26755_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2681_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2681_cast_fp16, y = var_26755_to_fp16)[name = tensor("aw_chunk_2681_cast_fp16")]; tensor var_26757_to_fp16 = const()[name = tensor("op_26757_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2683_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2683_cast_fp16, y = var_26757_to_fp16)[name = tensor("aw_chunk_2683_cast_fp16")]; tensor var_26759_to_fp16 = const()[name = tensor("op_26759_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2685_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2685_cast_fp16, y = var_26759_to_fp16)[name = tensor("aw_chunk_2685_cast_fp16")]; tensor var_26761_to_fp16 = const()[name = tensor("op_26761_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2687_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2687_cast_fp16, y = var_26761_to_fp16)[name = tensor("aw_chunk_2687_cast_fp16")]; tensor var_26763_to_fp16 = const()[name = tensor("op_26763_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2689_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2689_cast_fp16, y = var_26763_to_fp16)[name = tensor("aw_chunk_2689_cast_fp16")]; tensor var_26765_to_fp16 = const()[name = tensor("op_26765_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2691_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2691_cast_fp16, y = var_26765_to_fp16)[name = tensor("aw_chunk_2691_cast_fp16")]; tensor var_26767_to_fp16 = const()[name = tensor("op_26767_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2693_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2693_cast_fp16, y = var_26767_to_fp16)[name = tensor("aw_chunk_2693_cast_fp16")]; tensor var_26769_to_fp16 = const()[name = tensor("op_26769_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2695_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2695_cast_fp16, y = var_26769_to_fp16)[name = tensor("aw_chunk_2695_cast_fp16")]; tensor var_26771_to_fp16 = const()[name = tensor("op_26771_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2697_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2697_cast_fp16, y = var_26771_to_fp16)[name = tensor("aw_chunk_2697_cast_fp16")]; tensor var_26773_to_fp16 = const()[name = tensor("op_26773_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2699_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2699_cast_fp16, y = var_26773_to_fp16)[name = tensor("aw_chunk_2699_cast_fp16")]; tensor var_26775_to_fp16 = const()[name = tensor("op_26775_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2701_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2701_cast_fp16, y = var_26775_to_fp16)[name = tensor("aw_chunk_2701_cast_fp16")]; tensor var_26777_to_fp16 = const()[name = tensor("op_26777_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2703_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2703_cast_fp16, y = var_26777_to_fp16)[name = tensor("aw_chunk_2703_cast_fp16")]; tensor var_26779_to_fp16 = const()[name = tensor("op_26779_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2705_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2705_cast_fp16, y = var_26779_to_fp16)[name = tensor("aw_chunk_2705_cast_fp16")]; tensor var_26781_to_fp16 = const()[name = tensor("op_26781_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2707_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2707_cast_fp16, y = var_26781_to_fp16)[name = tensor("aw_chunk_2707_cast_fp16")]; tensor var_26783_to_fp16 = const()[name = tensor("op_26783_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2709_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2709_cast_fp16, y = var_26783_to_fp16)[name = tensor("aw_chunk_2709_cast_fp16")]; tensor var_26785_to_fp16 = const()[name = tensor("op_26785_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2711_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2711_cast_fp16, y = var_26785_to_fp16)[name = tensor("aw_chunk_2711_cast_fp16")]; tensor var_26787_to_fp16 = const()[name = tensor("op_26787_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2713_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2713_cast_fp16, y = var_26787_to_fp16)[name = tensor("aw_chunk_2713_cast_fp16")]; tensor var_26789_to_fp16 = const()[name = tensor("op_26789_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2715_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2715_cast_fp16, y = var_26789_to_fp16)[name = tensor("aw_chunk_2715_cast_fp16")]; tensor var_26791_to_fp16 = const()[name = tensor("op_26791_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2717_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2717_cast_fp16, y = var_26791_to_fp16)[name = tensor("aw_chunk_2717_cast_fp16")]; tensor var_26793_to_fp16 = const()[name = tensor("op_26793_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2719_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2719_cast_fp16, y = var_26793_to_fp16)[name = tensor("aw_chunk_2719_cast_fp16")]; tensor var_26795_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2561_cast_fp16)[name = tensor("op_26795_cast_fp16")]; tensor var_26796_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2563_cast_fp16)[name = tensor("op_26796_cast_fp16")]; tensor var_26797_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2565_cast_fp16)[name = tensor("op_26797_cast_fp16")]; tensor var_26798_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2567_cast_fp16)[name = tensor("op_26798_cast_fp16")]; tensor var_26799_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2569_cast_fp16)[name = tensor("op_26799_cast_fp16")]; tensor var_26800_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2571_cast_fp16)[name = tensor("op_26800_cast_fp16")]; tensor var_26801_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2573_cast_fp16)[name = tensor("op_26801_cast_fp16")]; tensor var_26802_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2575_cast_fp16)[name = tensor("op_26802_cast_fp16")]; tensor var_26803_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2577_cast_fp16)[name = tensor("op_26803_cast_fp16")]; tensor var_26804_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2579_cast_fp16)[name = tensor("op_26804_cast_fp16")]; tensor var_26805_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2581_cast_fp16)[name = tensor("op_26805_cast_fp16")]; tensor var_26806_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2583_cast_fp16)[name = tensor("op_26806_cast_fp16")]; tensor var_26807_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2585_cast_fp16)[name = tensor("op_26807_cast_fp16")]; tensor var_26808_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2587_cast_fp16)[name = tensor("op_26808_cast_fp16")]; tensor var_26809_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2589_cast_fp16)[name = tensor("op_26809_cast_fp16")]; tensor var_26810_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2591_cast_fp16)[name = tensor("op_26810_cast_fp16")]; tensor var_26811_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2593_cast_fp16)[name = tensor("op_26811_cast_fp16")]; tensor var_26812_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2595_cast_fp16)[name = tensor("op_26812_cast_fp16")]; tensor var_26813_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2597_cast_fp16)[name = tensor("op_26813_cast_fp16")]; tensor var_26814_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2599_cast_fp16)[name = tensor("op_26814_cast_fp16")]; tensor var_26815_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2601_cast_fp16)[name = tensor("op_26815_cast_fp16")]; tensor var_26816_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2603_cast_fp16)[name = tensor("op_26816_cast_fp16")]; tensor var_26817_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2605_cast_fp16)[name = tensor("op_26817_cast_fp16")]; tensor var_26818_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2607_cast_fp16)[name = tensor("op_26818_cast_fp16")]; tensor var_26819_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2609_cast_fp16)[name = tensor("op_26819_cast_fp16")]; tensor var_26820_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2611_cast_fp16)[name = tensor("op_26820_cast_fp16")]; tensor var_26821_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2613_cast_fp16)[name = tensor("op_26821_cast_fp16")]; tensor var_26822_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2615_cast_fp16)[name = tensor("op_26822_cast_fp16")]; tensor var_26823_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2617_cast_fp16)[name = tensor("op_26823_cast_fp16")]; tensor var_26824_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2619_cast_fp16)[name = tensor("op_26824_cast_fp16")]; tensor var_26825_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2621_cast_fp16)[name = tensor("op_26825_cast_fp16")]; tensor var_26826_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2623_cast_fp16)[name = tensor("op_26826_cast_fp16")]; tensor var_26827_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2625_cast_fp16)[name = tensor("op_26827_cast_fp16")]; tensor var_26828_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2627_cast_fp16)[name = tensor("op_26828_cast_fp16")]; tensor var_26829_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2629_cast_fp16)[name = tensor("op_26829_cast_fp16")]; tensor var_26830_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2631_cast_fp16)[name = tensor("op_26830_cast_fp16")]; tensor var_26831_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2633_cast_fp16)[name = tensor("op_26831_cast_fp16")]; tensor var_26832_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2635_cast_fp16)[name = tensor("op_26832_cast_fp16")]; tensor var_26833_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2637_cast_fp16)[name = tensor("op_26833_cast_fp16")]; tensor var_26834_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2639_cast_fp16)[name = tensor("op_26834_cast_fp16")]; tensor var_26835_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2641_cast_fp16)[name = tensor("op_26835_cast_fp16")]; tensor var_26836_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2643_cast_fp16)[name = tensor("op_26836_cast_fp16")]; tensor var_26837_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2645_cast_fp16)[name = tensor("op_26837_cast_fp16")]; tensor var_26838_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2647_cast_fp16)[name = tensor("op_26838_cast_fp16")]; tensor var_26839_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2649_cast_fp16)[name = tensor("op_26839_cast_fp16")]; tensor var_26840_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2651_cast_fp16)[name = tensor("op_26840_cast_fp16")]; tensor var_26841_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2653_cast_fp16)[name = tensor("op_26841_cast_fp16")]; tensor var_26842_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2655_cast_fp16)[name = tensor("op_26842_cast_fp16")]; tensor var_26843_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2657_cast_fp16)[name = tensor("op_26843_cast_fp16")]; tensor var_26844_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2659_cast_fp16)[name = tensor("op_26844_cast_fp16")]; tensor var_26845_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2661_cast_fp16)[name = tensor("op_26845_cast_fp16")]; tensor var_26846_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2663_cast_fp16)[name = tensor("op_26846_cast_fp16")]; tensor var_26847_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2665_cast_fp16)[name = tensor("op_26847_cast_fp16")]; tensor var_26848_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2667_cast_fp16)[name = tensor("op_26848_cast_fp16")]; tensor var_26849_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2669_cast_fp16)[name = tensor("op_26849_cast_fp16")]; tensor var_26850_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2671_cast_fp16)[name = tensor("op_26850_cast_fp16")]; tensor var_26851_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2673_cast_fp16)[name = tensor("op_26851_cast_fp16")]; tensor var_26852_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2675_cast_fp16)[name = tensor("op_26852_cast_fp16")]; tensor var_26853_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2677_cast_fp16)[name = tensor("op_26853_cast_fp16")]; tensor var_26854_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2679_cast_fp16)[name = tensor("op_26854_cast_fp16")]; tensor var_26855_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2681_cast_fp16)[name = tensor("op_26855_cast_fp16")]; tensor var_26856_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2683_cast_fp16)[name = tensor("op_26856_cast_fp16")]; tensor var_26857_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2685_cast_fp16)[name = tensor("op_26857_cast_fp16")]; tensor var_26858_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2687_cast_fp16)[name = tensor("op_26858_cast_fp16")]; tensor var_26859_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2689_cast_fp16)[name = tensor("op_26859_cast_fp16")]; tensor var_26860_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2691_cast_fp16)[name = tensor("op_26860_cast_fp16")]; tensor var_26861_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2693_cast_fp16)[name = tensor("op_26861_cast_fp16")]; tensor var_26862_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2695_cast_fp16)[name = tensor("op_26862_cast_fp16")]; tensor var_26863_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2697_cast_fp16)[name = tensor("op_26863_cast_fp16")]; tensor var_26864_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2699_cast_fp16)[name = tensor("op_26864_cast_fp16")]; tensor var_26865_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2701_cast_fp16)[name = tensor("op_26865_cast_fp16")]; tensor var_26866_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2703_cast_fp16)[name = tensor("op_26866_cast_fp16")]; tensor var_26867_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2705_cast_fp16)[name = tensor("op_26867_cast_fp16")]; tensor var_26868_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2707_cast_fp16)[name = tensor("op_26868_cast_fp16")]; tensor var_26869_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2709_cast_fp16)[name = tensor("op_26869_cast_fp16")]; tensor var_26870_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2711_cast_fp16)[name = tensor("op_26870_cast_fp16")]; tensor var_26871_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2713_cast_fp16)[name = tensor("op_26871_cast_fp16")]; tensor var_26872_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2715_cast_fp16)[name = tensor("op_26872_cast_fp16")]; tensor var_26873_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2717_cast_fp16)[name = tensor("op_26873_cast_fp16")]; tensor var_26874_cast_fp16 = softmax(axis = var_25593, x = aw_chunk_2719_cast_fp16)[name = tensor("op_26874_cast_fp16")]; tensor var_26876_equation_0 = const()[name = tensor("op_26876_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26876_cast_fp16 = einsum(equation = var_26876_equation_0, values = (var_26396_cast_fp16, var_26795_cast_fp16))[name = tensor("op_26876_cast_fp16")]; tensor var_26878_equation_0 = const()[name = tensor("op_26878_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26878_cast_fp16 = einsum(equation = var_26878_equation_0, values = (var_26396_cast_fp16, var_26796_cast_fp16))[name = tensor("op_26878_cast_fp16")]; tensor var_26880_equation_0 = const()[name = tensor("op_26880_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26880_cast_fp16 = einsum(equation = var_26880_equation_0, values = (var_26396_cast_fp16, var_26797_cast_fp16))[name = tensor("op_26880_cast_fp16")]; tensor var_26882_equation_0 = const()[name = tensor("op_26882_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26882_cast_fp16 = einsum(equation = var_26882_equation_0, values = (var_26396_cast_fp16, var_26798_cast_fp16))[name = tensor("op_26882_cast_fp16")]; tensor var_26884_equation_0 = const()[name = tensor("op_26884_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26884_cast_fp16 = einsum(equation = var_26884_equation_0, values = (var_26400_cast_fp16, var_26799_cast_fp16))[name = tensor("op_26884_cast_fp16")]; tensor var_26886_equation_0 = const()[name = tensor("op_26886_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26886_cast_fp16 = einsum(equation = var_26886_equation_0, values = (var_26400_cast_fp16, var_26800_cast_fp16))[name = tensor("op_26886_cast_fp16")]; tensor var_26888_equation_0 = const()[name = tensor("op_26888_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26888_cast_fp16 = einsum(equation = var_26888_equation_0, values = (var_26400_cast_fp16, var_26801_cast_fp16))[name = tensor("op_26888_cast_fp16")]; tensor var_26890_equation_0 = const()[name = tensor("op_26890_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26890_cast_fp16 = einsum(equation = var_26890_equation_0, values = (var_26400_cast_fp16, var_26802_cast_fp16))[name = tensor("op_26890_cast_fp16")]; tensor var_26892_equation_0 = const()[name = tensor("op_26892_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26892_cast_fp16 = einsum(equation = var_26892_equation_0, values = (var_26404_cast_fp16, var_26803_cast_fp16))[name = tensor("op_26892_cast_fp16")]; tensor var_26894_equation_0 = const()[name = tensor("op_26894_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26894_cast_fp16 = einsum(equation = var_26894_equation_0, values = (var_26404_cast_fp16, var_26804_cast_fp16))[name = tensor("op_26894_cast_fp16")]; tensor var_26896_equation_0 = const()[name = tensor("op_26896_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26896_cast_fp16 = einsum(equation = var_26896_equation_0, values = (var_26404_cast_fp16, var_26805_cast_fp16))[name = tensor("op_26896_cast_fp16")]; tensor var_26898_equation_0 = const()[name = tensor("op_26898_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26898_cast_fp16 = einsum(equation = var_26898_equation_0, values = (var_26404_cast_fp16, var_26806_cast_fp16))[name = tensor("op_26898_cast_fp16")]; tensor var_26900_equation_0 = const()[name = tensor("op_26900_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26900_cast_fp16 = einsum(equation = var_26900_equation_0, values = (var_26408_cast_fp16, var_26807_cast_fp16))[name = tensor("op_26900_cast_fp16")]; tensor var_26902_equation_0 = const()[name = tensor("op_26902_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26902_cast_fp16 = einsum(equation = var_26902_equation_0, values = (var_26408_cast_fp16, var_26808_cast_fp16))[name = tensor("op_26902_cast_fp16")]; tensor var_26904_equation_0 = const()[name = tensor("op_26904_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26904_cast_fp16 = einsum(equation = var_26904_equation_0, values = (var_26408_cast_fp16, var_26809_cast_fp16))[name = tensor("op_26904_cast_fp16")]; tensor var_26906_equation_0 = const()[name = tensor("op_26906_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26906_cast_fp16 = einsum(equation = var_26906_equation_0, values = (var_26408_cast_fp16, var_26810_cast_fp16))[name = tensor("op_26906_cast_fp16")]; tensor var_26908_equation_0 = const()[name = tensor("op_26908_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26908_cast_fp16 = einsum(equation = var_26908_equation_0, values = (var_26412_cast_fp16, var_26811_cast_fp16))[name = tensor("op_26908_cast_fp16")]; tensor var_26910_equation_0 = const()[name = tensor("op_26910_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26910_cast_fp16 = einsum(equation = var_26910_equation_0, values = (var_26412_cast_fp16, var_26812_cast_fp16))[name = tensor("op_26910_cast_fp16")]; tensor var_26912_equation_0 = const()[name = tensor("op_26912_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26912_cast_fp16 = einsum(equation = var_26912_equation_0, values = (var_26412_cast_fp16, var_26813_cast_fp16))[name = tensor("op_26912_cast_fp16")]; tensor var_26914_equation_0 = const()[name = tensor("op_26914_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26914_cast_fp16 = einsum(equation = var_26914_equation_0, values = (var_26412_cast_fp16, var_26814_cast_fp16))[name = tensor("op_26914_cast_fp16")]; tensor var_26916_equation_0 = const()[name = tensor("op_26916_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26916_cast_fp16 = einsum(equation = var_26916_equation_0, values = (var_26416_cast_fp16, var_26815_cast_fp16))[name = tensor("op_26916_cast_fp16")]; tensor var_26918_equation_0 = const()[name = tensor("op_26918_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26918_cast_fp16 = einsum(equation = var_26918_equation_0, values = (var_26416_cast_fp16, var_26816_cast_fp16))[name = tensor("op_26918_cast_fp16")]; tensor var_26920_equation_0 = const()[name = tensor("op_26920_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26920_cast_fp16 = einsum(equation = var_26920_equation_0, values = (var_26416_cast_fp16, var_26817_cast_fp16))[name = tensor("op_26920_cast_fp16")]; tensor var_26922_equation_0 = const()[name = tensor("op_26922_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26922_cast_fp16 = einsum(equation = var_26922_equation_0, values = (var_26416_cast_fp16, var_26818_cast_fp16))[name = tensor("op_26922_cast_fp16")]; tensor var_26924_equation_0 = const()[name = tensor("op_26924_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26924_cast_fp16 = einsum(equation = var_26924_equation_0, values = (var_26420_cast_fp16, var_26819_cast_fp16))[name = tensor("op_26924_cast_fp16")]; tensor var_26926_equation_0 = const()[name = tensor("op_26926_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26926_cast_fp16 = einsum(equation = var_26926_equation_0, values = (var_26420_cast_fp16, var_26820_cast_fp16))[name = tensor("op_26926_cast_fp16")]; tensor var_26928_equation_0 = const()[name = tensor("op_26928_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26928_cast_fp16 = einsum(equation = var_26928_equation_0, values = (var_26420_cast_fp16, var_26821_cast_fp16))[name = tensor("op_26928_cast_fp16")]; tensor var_26930_equation_0 = const()[name = tensor("op_26930_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26930_cast_fp16 = einsum(equation = var_26930_equation_0, values = (var_26420_cast_fp16, var_26822_cast_fp16))[name = tensor("op_26930_cast_fp16")]; tensor var_26932_equation_0 = const()[name = tensor("op_26932_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26932_cast_fp16 = einsum(equation = var_26932_equation_0, values = (var_26424_cast_fp16, var_26823_cast_fp16))[name = tensor("op_26932_cast_fp16")]; tensor var_26934_equation_0 = const()[name = tensor("op_26934_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26934_cast_fp16 = einsum(equation = var_26934_equation_0, values = (var_26424_cast_fp16, var_26824_cast_fp16))[name = tensor("op_26934_cast_fp16")]; tensor var_26936_equation_0 = const()[name = tensor("op_26936_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26936_cast_fp16 = einsum(equation = var_26936_equation_0, values = (var_26424_cast_fp16, var_26825_cast_fp16))[name = tensor("op_26936_cast_fp16")]; tensor var_26938_equation_0 = const()[name = tensor("op_26938_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26938_cast_fp16 = einsum(equation = var_26938_equation_0, values = (var_26424_cast_fp16, var_26826_cast_fp16))[name = tensor("op_26938_cast_fp16")]; tensor var_26940_equation_0 = const()[name = tensor("op_26940_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26940_cast_fp16 = einsum(equation = var_26940_equation_0, values = (var_26428_cast_fp16, var_26827_cast_fp16))[name = tensor("op_26940_cast_fp16")]; tensor var_26942_equation_0 = const()[name = tensor("op_26942_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26942_cast_fp16 = einsum(equation = var_26942_equation_0, values = (var_26428_cast_fp16, var_26828_cast_fp16))[name = tensor("op_26942_cast_fp16")]; tensor var_26944_equation_0 = const()[name = tensor("op_26944_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26944_cast_fp16 = einsum(equation = var_26944_equation_0, values = (var_26428_cast_fp16, var_26829_cast_fp16))[name = tensor("op_26944_cast_fp16")]; tensor var_26946_equation_0 = const()[name = tensor("op_26946_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26946_cast_fp16 = einsum(equation = var_26946_equation_0, values = (var_26428_cast_fp16, var_26830_cast_fp16))[name = tensor("op_26946_cast_fp16")]; tensor var_26948_equation_0 = const()[name = tensor("op_26948_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26948_cast_fp16 = einsum(equation = var_26948_equation_0, values = (var_26432_cast_fp16, var_26831_cast_fp16))[name = tensor("op_26948_cast_fp16")]; tensor var_26950_equation_0 = const()[name = tensor("op_26950_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26950_cast_fp16 = einsum(equation = var_26950_equation_0, values = (var_26432_cast_fp16, var_26832_cast_fp16))[name = tensor("op_26950_cast_fp16")]; tensor var_26952_equation_0 = const()[name = tensor("op_26952_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26952_cast_fp16 = einsum(equation = var_26952_equation_0, values = (var_26432_cast_fp16, var_26833_cast_fp16))[name = tensor("op_26952_cast_fp16")]; tensor var_26954_equation_0 = const()[name = tensor("op_26954_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26954_cast_fp16 = einsum(equation = var_26954_equation_0, values = (var_26432_cast_fp16, var_26834_cast_fp16))[name = tensor("op_26954_cast_fp16")]; tensor var_26956_equation_0 = const()[name = tensor("op_26956_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26956_cast_fp16 = einsum(equation = var_26956_equation_0, values = (var_26436_cast_fp16, var_26835_cast_fp16))[name = tensor("op_26956_cast_fp16")]; tensor var_26958_equation_0 = const()[name = tensor("op_26958_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26958_cast_fp16 = einsum(equation = var_26958_equation_0, values = (var_26436_cast_fp16, var_26836_cast_fp16))[name = tensor("op_26958_cast_fp16")]; tensor var_26960_equation_0 = const()[name = tensor("op_26960_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26960_cast_fp16 = einsum(equation = var_26960_equation_0, values = (var_26436_cast_fp16, var_26837_cast_fp16))[name = tensor("op_26960_cast_fp16")]; tensor var_26962_equation_0 = const()[name = tensor("op_26962_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26962_cast_fp16 = einsum(equation = var_26962_equation_0, values = (var_26436_cast_fp16, var_26838_cast_fp16))[name = tensor("op_26962_cast_fp16")]; tensor var_26964_equation_0 = const()[name = tensor("op_26964_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26964_cast_fp16 = einsum(equation = var_26964_equation_0, values = (var_26440_cast_fp16, var_26839_cast_fp16))[name = tensor("op_26964_cast_fp16")]; tensor var_26966_equation_0 = const()[name = tensor("op_26966_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26966_cast_fp16 = einsum(equation = var_26966_equation_0, values = (var_26440_cast_fp16, var_26840_cast_fp16))[name = tensor("op_26966_cast_fp16")]; tensor var_26968_equation_0 = const()[name = tensor("op_26968_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26968_cast_fp16 = einsum(equation = var_26968_equation_0, values = (var_26440_cast_fp16, var_26841_cast_fp16))[name = tensor("op_26968_cast_fp16")]; tensor var_26970_equation_0 = const()[name = tensor("op_26970_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26970_cast_fp16 = einsum(equation = var_26970_equation_0, values = (var_26440_cast_fp16, var_26842_cast_fp16))[name = tensor("op_26970_cast_fp16")]; tensor var_26972_equation_0 = const()[name = tensor("op_26972_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26972_cast_fp16 = einsum(equation = var_26972_equation_0, values = (var_26444_cast_fp16, var_26843_cast_fp16))[name = tensor("op_26972_cast_fp16")]; tensor var_26974_equation_0 = const()[name = tensor("op_26974_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26974_cast_fp16 = einsum(equation = var_26974_equation_0, values = (var_26444_cast_fp16, var_26844_cast_fp16))[name = tensor("op_26974_cast_fp16")]; tensor var_26976_equation_0 = const()[name = tensor("op_26976_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26976_cast_fp16 = einsum(equation = var_26976_equation_0, values = (var_26444_cast_fp16, var_26845_cast_fp16))[name = tensor("op_26976_cast_fp16")]; tensor var_26978_equation_0 = const()[name = tensor("op_26978_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26978_cast_fp16 = einsum(equation = var_26978_equation_0, values = (var_26444_cast_fp16, var_26846_cast_fp16))[name = tensor("op_26978_cast_fp16")]; tensor var_26980_equation_0 = const()[name = tensor("op_26980_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26980_cast_fp16 = einsum(equation = var_26980_equation_0, values = (var_26448_cast_fp16, var_26847_cast_fp16))[name = tensor("op_26980_cast_fp16")]; tensor var_26982_equation_0 = const()[name = tensor("op_26982_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26982_cast_fp16 = einsum(equation = var_26982_equation_0, values = (var_26448_cast_fp16, var_26848_cast_fp16))[name = tensor("op_26982_cast_fp16")]; tensor var_26984_equation_0 = const()[name = tensor("op_26984_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26984_cast_fp16 = einsum(equation = var_26984_equation_0, values = (var_26448_cast_fp16, var_26849_cast_fp16))[name = tensor("op_26984_cast_fp16")]; tensor var_26986_equation_0 = const()[name = tensor("op_26986_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26986_cast_fp16 = einsum(equation = var_26986_equation_0, values = (var_26448_cast_fp16, var_26850_cast_fp16))[name = tensor("op_26986_cast_fp16")]; tensor var_26988_equation_0 = const()[name = tensor("op_26988_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26988_cast_fp16 = einsum(equation = var_26988_equation_0, values = (var_26452_cast_fp16, var_26851_cast_fp16))[name = tensor("op_26988_cast_fp16")]; tensor var_26990_equation_0 = const()[name = tensor("op_26990_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26990_cast_fp16 = einsum(equation = var_26990_equation_0, values = (var_26452_cast_fp16, var_26852_cast_fp16))[name = tensor("op_26990_cast_fp16")]; tensor var_26992_equation_0 = const()[name = tensor("op_26992_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26992_cast_fp16 = einsum(equation = var_26992_equation_0, values = (var_26452_cast_fp16, var_26853_cast_fp16))[name = tensor("op_26992_cast_fp16")]; tensor var_26994_equation_0 = const()[name = tensor("op_26994_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26994_cast_fp16 = einsum(equation = var_26994_equation_0, values = (var_26452_cast_fp16, var_26854_cast_fp16))[name = tensor("op_26994_cast_fp16")]; tensor var_26996_equation_0 = const()[name = tensor("op_26996_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26996_cast_fp16 = einsum(equation = var_26996_equation_0, values = (var_26456_cast_fp16, var_26855_cast_fp16))[name = tensor("op_26996_cast_fp16")]; tensor var_26998_equation_0 = const()[name = tensor("op_26998_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_26998_cast_fp16 = einsum(equation = var_26998_equation_0, values = (var_26456_cast_fp16, var_26856_cast_fp16))[name = tensor("op_26998_cast_fp16")]; tensor var_27000_equation_0 = const()[name = tensor("op_27000_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_27000_cast_fp16 = einsum(equation = var_27000_equation_0, values = (var_26456_cast_fp16, var_26857_cast_fp16))[name = tensor("op_27000_cast_fp16")]; tensor var_27002_equation_0 = const()[name = tensor("op_27002_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_27002_cast_fp16 = einsum(equation = var_27002_equation_0, values = (var_26456_cast_fp16, var_26858_cast_fp16))[name = tensor("op_27002_cast_fp16")]; tensor var_27004_equation_0 = const()[name = tensor("op_27004_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_27004_cast_fp16 = einsum(equation = var_27004_equation_0, values = (var_26460_cast_fp16, var_26859_cast_fp16))[name = tensor("op_27004_cast_fp16")]; tensor var_27006_equation_0 = const()[name = tensor("op_27006_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_27006_cast_fp16 = einsum(equation = var_27006_equation_0, values = (var_26460_cast_fp16, var_26860_cast_fp16))[name = tensor("op_27006_cast_fp16")]; tensor var_27008_equation_0 = const()[name = tensor("op_27008_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_27008_cast_fp16 = einsum(equation = var_27008_equation_0, values = (var_26460_cast_fp16, var_26861_cast_fp16))[name = tensor("op_27008_cast_fp16")]; tensor var_27010_equation_0 = const()[name = tensor("op_27010_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_27010_cast_fp16 = einsum(equation = var_27010_equation_0, values = (var_26460_cast_fp16, var_26862_cast_fp16))[name = tensor("op_27010_cast_fp16")]; tensor var_27012_equation_0 = const()[name = tensor("op_27012_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_27012_cast_fp16 = einsum(equation = var_27012_equation_0, values = (var_26464_cast_fp16, var_26863_cast_fp16))[name = tensor("op_27012_cast_fp16")]; tensor var_27014_equation_0 = const()[name = tensor("op_27014_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_27014_cast_fp16 = einsum(equation = var_27014_equation_0, values = (var_26464_cast_fp16, var_26864_cast_fp16))[name = tensor("op_27014_cast_fp16")]; tensor var_27016_equation_0 = const()[name = tensor("op_27016_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_27016_cast_fp16 = einsum(equation = var_27016_equation_0, values = (var_26464_cast_fp16, var_26865_cast_fp16))[name = tensor("op_27016_cast_fp16")]; tensor var_27018_equation_0 = const()[name = tensor("op_27018_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_27018_cast_fp16 = einsum(equation = var_27018_equation_0, values = (var_26464_cast_fp16, var_26866_cast_fp16))[name = tensor("op_27018_cast_fp16")]; tensor var_27020_equation_0 = const()[name = tensor("op_27020_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_27020_cast_fp16 = einsum(equation = var_27020_equation_0, values = (var_26468_cast_fp16, var_26867_cast_fp16))[name = tensor("op_27020_cast_fp16")]; tensor var_27022_equation_0 = const()[name = tensor("op_27022_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_27022_cast_fp16 = einsum(equation = var_27022_equation_0, values = (var_26468_cast_fp16, var_26868_cast_fp16))[name = tensor("op_27022_cast_fp16")]; tensor var_27024_equation_0 = const()[name = tensor("op_27024_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_27024_cast_fp16 = einsum(equation = var_27024_equation_0, values = (var_26468_cast_fp16, var_26869_cast_fp16))[name = tensor("op_27024_cast_fp16")]; tensor var_27026_equation_0 = const()[name = tensor("op_27026_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_27026_cast_fp16 = einsum(equation = var_27026_equation_0, values = (var_26468_cast_fp16, var_26870_cast_fp16))[name = tensor("op_27026_cast_fp16")]; tensor var_27028_equation_0 = const()[name = tensor("op_27028_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_27028_cast_fp16 = einsum(equation = var_27028_equation_0, values = (var_26472_cast_fp16, var_26871_cast_fp16))[name = tensor("op_27028_cast_fp16")]; tensor var_27030_equation_0 = const()[name = tensor("op_27030_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_27030_cast_fp16 = einsum(equation = var_27030_equation_0, values = (var_26472_cast_fp16, var_26872_cast_fp16))[name = tensor("op_27030_cast_fp16")]; tensor var_27032_equation_0 = const()[name = tensor("op_27032_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_27032_cast_fp16 = einsum(equation = var_27032_equation_0, values = (var_26472_cast_fp16, var_26873_cast_fp16))[name = tensor("op_27032_cast_fp16")]; tensor var_27034_equation_0 = const()[name = tensor("op_27034_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_27034_cast_fp16 = einsum(equation = var_27034_equation_0, values = (var_26472_cast_fp16, var_26874_cast_fp16))[name = tensor("op_27034_cast_fp16")]; tensor var_27036_interleave_0 = const()[name = tensor("op_27036_interleave_0"), val = tensor(false)]; tensor var_27036_cast_fp16 = concat(axis = var_25568, interleave = var_27036_interleave_0, values = (var_26876_cast_fp16, var_26878_cast_fp16, var_26880_cast_fp16, var_26882_cast_fp16))[name = tensor("op_27036_cast_fp16")]; tensor var_27038_interleave_0 = const()[name = tensor("op_27038_interleave_0"), val = tensor(false)]; tensor var_27038_cast_fp16 = concat(axis = var_25568, interleave = var_27038_interleave_0, values = (var_26884_cast_fp16, var_26886_cast_fp16, var_26888_cast_fp16, var_26890_cast_fp16))[name = tensor("op_27038_cast_fp16")]; tensor var_27040_interleave_0 = const()[name = tensor("op_27040_interleave_0"), val = tensor(false)]; tensor var_27040_cast_fp16 = concat(axis = var_25568, interleave = var_27040_interleave_0, values = (var_26892_cast_fp16, var_26894_cast_fp16, var_26896_cast_fp16, var_26898_cast_fp16))[name = tensor("op_27040_cast_fp16")]; tensor var_27042_interleave_0 = const()[name = tensor("op_27042_interleave_0"), val = tensor(false)]; tensor var_27042_cast_fp16 = concat(axis = var_25568, interleave = var_27042_interleave_0, values = (var_26900_cast_fp16, var_26902_cast_fp16, var_26904_cast_fp16, var_26906_cast_fp16))[name = tensor("op_27042_cast_fp16")]; tensor var_27044_interleave_0 = const()[name = tensor("op_27044_interleave_0"), val = tensor(false)]; tensor var_27044_cast_fp16 = concat(axis = var_25568, interleave = var_27044_interleave_0, values = (var_26908_cast_fp16, var_26910_cast_fp16, var_26912_cast_fp16, var_26914_cast_fp16))[name = tensor("op_27044_cast_fp16")]; tensor var_27046_interleave_0 = const()[name = tensor("op_27046_interleave_0"), val = tensor(false)]; tensor var_27046_cast_fp16 = concat(axis = var_25568, interleave = var_27046_interleave_0, values = (var_26916_cast_fp16, var_26918_cast_fp16, var_26920_cast_fp16, var_26922_cast_fp16))[name = tensor("op_27046_cast_fp16")]; tensor var_27048_interleave_0 = const()[name = tensor("op_27048_interleave_0"), val = tensor(false)]; tensor var_27048_cast_fp16 = concat(axis = var_25568, interleave = var_27048_interleave_0, values = (var_26924_cast_fp16, var_26926_cast_fp16, var_26928_cast_fp16, var_26930_cast_fp16))[name = tensor("op_27048_cast_fp16")]; tensor var_27050_interleave_0 = const()[name = tensor("op_27050_interleave_0"), val = tensor(false)]; tensor var_27050_cast_fp16 = concat(axis = var_25568, interleave = var_27050_interleave_0, values = (var_26932_cast_fp16, var_26934_cast_fp16, var_26936_cast_fp16, var_26938_cast_fp16))[name = tensor("op_27050_cast_fp16")]; tensor var_27052_interleave_0 = const()[name = tensor("op_27052_interleave_0"), val = tensor(false)]; tensor var_27052_cast_fp16 = concat(axis = var_25568, interleave = var_27052_interleave_0, values = (var_26940_cast_fp16, var_26942_cast_fp16, var_26944_cast_fp16, var_26946_cast_fp16))[name = tensor("op_27052_cast_fp16")]; tensor var_27054_interleave_0 = const()[name = tensor("op_27054_interleave_0"), val = tensor(false)]; tensor var_27054_cast_fp16 = concat(axis = var_25568, interleave = var_27054_interleave_0, values = (var_26948_cast_fp16, var_26950_cast_fp16, var_26952_cast_fp16, var_26954_cast_fp16))[name = tensor("op_27054_cast_fp16")]; tensor var_27056_interleave_0 = const()[name = tensor("op_27056_interleave_0"), val = tensor(false)]; tensor var_27056_cast_fp16 = concat(axis = var_25568, interleave = var_27056_interleave_0, values = (var_26956_cast_fp16, var_26958_cast_fp16, var_26960_cast_fp16, var_26962_cast_fp16))[name = tensor("op_27056_cast_fp16")]; tensor var_27058_interleave_0 = const()[name = tensor("op_27058_interleave_0"), val = tensor(false)]; tensor var_27058_cast_fp16 = concat(axis = var_25568, interleave = var_27058_interleave_0, values = (var_26964_cast_fp16, var_26966_cast_fp16, var_26968_cast_fp16, var_26970_cast_fp16))[name = tensor("op_27058_cast_fp16")]; tensor var_27060_interleave_0 = const()[name = tensor("op_27060_interleave_0"), val = tensor(false)]; tensor var_27060_cast_fp16 = concat(axis = var_25568, interleave = var_27060_interleave_0, values = (var_26972_cast_fp16, var_26974_cast_fp16, var_26976_cast_fp16, var_26978_cast_fp16))[name = tensor("op_27060_cast_fp16")]; tensor var_27062_interleave_0 = const()[name = tensor("op_27062_interleave_0"), val = tensor(false)]; tensor var_27062_cast_fp16 = concat(axis = var_25568, interleave = var_27062_interleave_0, values = (var_26980_cast_fp16, var_26982_cast_fp16, var_26984_cast_fp16, var_26986_cast_fp16))[name = tensor("op_27062_cast_fp16")]; tensor var_27064_interleave_0 = const()[name = tensor("op_27064_interleave_0"), val = tensor(false)]; tensor var_27064_cast_fp16 = concat(axis = var_25568, interleave = var_27064_interleave_0, values = (var_26988_cast_fp16, var_26990_cast_fp16, var_26992_cast_fp16, var_26994_cast_fp16))[name = tensor("op_27064_cast_fp16")]; tensor var_27066_interleave_0 = const()[name = tensor("op_27066_interleave_0"), val = tensor(false)]; tensor var_27066_cast_fp16 = concat(axis = var_25568, interleave = var_27066_interleave_0, values = (var_26996_cast_fp16, var_26998_cast_fp16, var_27000_cast_fp16, var_27002_cast_fp16))[name = tensor("op_27066_cast_fp16")]; tensor var_27068_interleave_0 = const()[name = tensor("op_27068_interleave_0"), val = tensor(false)]; tensor var_27068_cast_fp16 = concat(axis = var_25568, interleave = var_27068_interleave_0, values = (var_27004_cast_fp16, var_27006_cast_fp16, var_27008_cast_fp16, var_27010_cast_fp16))[name = tensor("op_27068_cast_fp16")]; tensor var_27070_interleave_0 = const()[name = tensor("op_27070_interleave_0"), val = tensor(false)]; tensor var_27070_cast_fp16 = concat(axis = var_25568, interleave = var_27070_interleave_0, values = (var_27012_cast_fp16, var_27014_cast_fp16, var_27016_cast_fp16, var_27018_cast_fp16))[name = tensor("op_27070_cast_fp16")]; tensor var_27072_interleave_0 = const()[name = tensor("op_27072_interleave_0"), val = tensor(false)]; tensor var_27072_cast_fp16 = concat(axis = var_25568, interleave = var_27072_interleave_0, values = (var_27020_cast_fp16, var_27022_cast_fp16, var_27024_cast_fp16, var_27026_cast_fp16))[name = tensor("op_27072_cast_fp16")]; tensor var_27074_interleave_0 = const()[name = tensor("op_27074_interleave_0"), val = tensor(false)]; tensor var_27074_cast_fp16 = concat(axis = var_25568, interleave = var_27074_interleave_0, values = (var_27028_cast_fp16, var_27030_cast_fp16, var_27032_cast_fp16, var_27034_cast_fp16))[name = tensor("op_27074_cast_fp16")]; tensor input_129_interleave_0 = const()[name = tensor("input_129_interleave_0"), val = tensor(false)]; tensor input_129_cast_fp16 = concat(axis = var_25593, interleave = input_129_interleave_0, values = (var_27036_cast_fp16, var_27038_cast_fp16, var_27040_cast_fp16, var_27042_cast_fp16, var_27044_cast_fp16, var_27046_cast_fp16, var_27048_cast_fp16, var_27050_cast_fp16, var_27052_cast_fp16, var_27054_cast_fp16, var_27056_cast_fp16, var_27058_cast_fp16, var_27060_cast_fp16, var_27062_cast_fp16, var_27064_cast_fp16, var_27066_cast_fp16, var_27068_cast_fp16, var_27070_cast_fp16, var_27072_cast_fp16, var_27074_cast_fp16))[name = tensor("input_129_cast_fp16")]; tensor var_27085_pad_type_0 = const()[name = tensor("op_27085_pad_type_0"), val = tensor("valid")]; tensor var_27085_strides_0 = const()[name = tensor("op_27085_strides_0"), val = tensor([1, 1])]; tensor var_27085_pad_0 = const()[name = tensor("op_27085_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_27085_dilations_0 = const()[name = tensor("op_27085_dilations_0"), val = tensor([1, 1])]; tensor var_27085_groups_0 = const()[name = tensor("op_27085_groups_0"), val = tensor(1)]; tensor layers_16_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224466752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225286016))), name = tensor("layers_16_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_16_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_16_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225286144)))]; tensor var_27085_cast_fp16 = conv(bias = layers_16_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_27085_dilations_0, groups = var_27085_groups_0, pad = var_27085_pad_0, pad_type = var_27085_pad_type_0, strides = var_27085_strides_0, weight = layers_16_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_129_cast_fp16)[name = tensor("op_27085_cast_fp16")]; tensor var_27091_pad_type_0 = const()[name = tensor("op_27091_pad_type_0"), val = tensor("valid")]; tensor var_27091_strides_0 = const()[name = tensor("op_27091_strides_0"), val = tensor([1, 1])]; tensor var_27091_pad_0 = const()[name = tensor("op_27091_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_27091_dilations_0 = const()[name = tensor("op_27091_dilations_0"), val = tensor([1, 1])]; tensor var_27091_groups_0 = const()[name = tensor("op_27091_groups_0"), val = tensor(1)]; tensor layers_16_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225307136))), name = tensor("layers_16_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225288768))), shape = tensor([1280, 1280, 1, 1])]; tensor var_27091_cast_fp16 = conv(dilations = var_27091_dilations_0, groups = var_27091_groups_0, pad = var_27091_pad_0, pad_type = var_27091_pad_type_0, strides = var_27091_strides_0, weight = layers_16_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_129_cast_fp16)[name = tensor("op_27091_cast_fp16")]; tensor obj_67_cast_fp16 = add(x = var_27085_cast_fp16, y = var_27091_cast_fp16)[name = tensor("obj_67_cast_fp16")]; tensor inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = obj_67_cast_fp16)[name = tensor("inputs_67_cast_fp16")]; tensor out_67_axes_0 = const()[name = tensor("out_67_axes_0"), val = tensor([1])]; tensor var_27102_to_fp16 = const()[name = tensor("op_27102_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_67_cast_fp16 = layer_norm(axes = out_67_axes_0, epsilon = var_27102_to_fp16, x = inputs_67_cast_fp16)[name = tensor("out_67_cast_fp16")]; tensor input_131_gamma_0_to_fp16 = const()[name = tensor("input_131_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225512000)))]; tensor input_131_beta_0_to_fp16 = const()[name = tensor("input_131_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225514624)))]; tensor input_131_epsilon_0_to_fp16 = const()[name = tensor("input_131_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_131_cast_fp16 = batch_norm(beta = input_131_beta_0_to_fp16, epsilon = input_131_epsilon_0_to_fp16, gamma = input_131_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_67_cast_fp16)[name = tensor("input_131_cast_fp16")]; tensor var_27120_pad_type_0 = const()[name = tensor("op_27120_pad_type_0"), val = tensor("valid")]; tensor var_27120_strides_0 = const()[name = tensor("op_27120_strides_0"), val = tensor([1, 1])]; tensor var_27120_pad_0 = const()[name = tensor("op_27120_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_27120_dilations_0 = const()[name = tensor("op_27120_dilations_0"), val = tensor([1, 1])]; tensor var_27120_groups_0 = const()[name = tensor("op_27120_groups_0"), val = tensor(1)]; tensor layers_16_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(225517248))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(228794112))), name = tensor("layers_16_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_16_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_16_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(228794240)))]; tensor var_27120_cast_fp16 = conv(bias = layers_16_fc1_inlier_module_bias_to_fp16, dilations = var_27120_dilations_0, groups = var_27120_groups_0, pad = var_27120_pad_0, pad_type = var_27120_pad_type_0, strides = var_27120_strides_0, weight = layers_16_fc1_inlier_module_weight_to_fp16_palettized, x = input_131_cast_fp16)[name = tensor("op_27120_cast_fp16")]; tensor var_27126_pad_type_0 = const()[name = tensor("op_27126_pad_type_0"), val = tensor("valid")]; tensor var_27126_strides_0 = const()[name = tensor("op_27126_strides_0"), val = tensor([1, 1])]; tensor var_27126_pad_0 = const()[name = tensor("op_27126_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_27126_dilations_0 = const()[name = tensor("op_27126_dilations_0"), val = tensor([1, 1])]; tensor var_27126_groups_0 = const()[name = tensor("op_27126_groups_0"), val = tensor(1)]; tensor layers_16_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(228879488))), name = tensor("layers_16_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(228804544))), shape = tensor([5120, 1280, 1, 1])]; tensor var_27126_cast_fp16 = conv(dilations = var_27126_dilations_0, groups = var_27126_groups_0, pad = var_27126_pad_0, pad_type = var_27126_pad_type_0, strides = var_27126_strides_0, weight = layers_16_fc1_outlier_module_weight_to_fp16_sparsified, x = input_131_cast_fp16)[name = tensor("op_27126_cast_fp16")]; tensor input_133_cast_fp16 = add(x = var_27120_cast_fp16, y = var_27126_cast_fp16)[name = tensor("input_133_cast_fp16")]; tensor input_135_mode_0 = const()[name = tensor("input_135_mode_0"), val = tensor("EXACT")]; tensor input_135_cast_fp16 = gelu(mode = input_135_mode_0, x = input_133_cast_fp16)[name = tensor("input_135_cast_fp16")]; tensor var_27137_pad_type_0 = const()[name = tensor("op_27137_pad_type_0"), val = tensor("valid")]; tensor var_27137_strides_0 = const()[name = tensor("op_27137_strides_0"), val = tensor([1, 1])]; tensor var_27137_pad_0 = const()[name = tensor("op_27137_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_27137_dilations_0 = const()[name = tensor("op_27137_dilations_0"), val = tensor([1, 1])]; tensor var_27137_groups_0 = const()[name = tensor("op_27137_groups_0"), val = tensor(1)]; tensor layers_16_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(229698752))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232975616))), name = tensor("layers_16_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_16_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_16_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232975744)))]; tensor var_27137_cast_fp16 = conv(bias = layers_16_fc2_inlier_module_bias_to_fp16, dilations = var_27137_dilations_0, groups = var_27137_groups_0, pad = var_27137_pad_0, pad_type = var_27137_pad_type_0, strides = var_27137_strides_0, weight = layers_16_fc2_inlier_module_weight_to_fp16_palettized, x = input_135_cast_fp16)[name = tensor("op_27137_cast_fp16")]; tensor var_27143_pad_type_0 = const()[name = tensor("op_27143_pad_type_0"), val = tensor("valid")]; tensor var_27143_strides_0 = const()[name = tensor("op_27143_strides_0"), val = tensor([1, 1])]; tensor var_27143_pad_0 = const()[name = tensor("op_27143_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_27143_dilations_0 = const()[name = tensor("op_27143_dilations_0"), val = tensor([1, 1])]; tensor var_27143_groups_0 = const()[name = tensor("op_27143_groups_0"), val = tensor(1)]; tensor layers_16_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233104448))), name = tensor("layers_16_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232978368))), shape = tensor([1280, 5120, 1, 1])]; tensor var_27143_cast_fp16 = conv(dilations = var_27143_dilations_0, groups = var_27143_groups_0, pad = var_27143_pad_0, pad_type = var_27143_pad_type_0, strides = var_27143_strides_0, weight = layers_16_fc2_outlier_module_weight_to_fp16_sparsified, x = input_135_cast_fp16)[name = tensor("op_27143_cast_fp16")]; tensor hidden_states_37_cast_fp16 = add(x = var_27137_cast_fp16, y = var_27143_cast_fp16)[name = tensor("hidden_states_37_cast_fp16")]; tensor inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = hidden_states_37_cast_fp16)[name = tensor("inputs_69_cast_fp16")]; tensor var_27149 = const()[name = tensor("op_27149"), val = tensor(3)]; tensor var_27174 = const()[name = tensor("op_27174"), val = tensor(1)]; tensor out_69_axes_0 = const()[name = tensor("out_69_axes_0"), val = tensor([1])]; tensor var_27191_to_fp16 = const()[name = tensor("op_27191_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_69_cast_fp16 = layer_norm(axes = out_69_axes_0, epsilon = var_27191_to_fp16, x = inputs_69_cast_fp16)[name = tensor("out_69_cast_fp16")]; tensor obj_69_gamma_0_to_fp16 = const()[name = tensor("obj_69_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233923712)))]; tensor obj_69_beta_0_to_fp16 = const()[name = tensor("obj_69_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233926336)))]; tensor obj_69_epsilon_0_to_fp16 = const()[name = tensor("obj_69_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_69_cast_fp16 = batch_norm(beta = obj_69_beta_0_to_fp16, epsilon = obj_69_epsilon_0_to_fp16, gamma = obj_69_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_69_cast_fp16)[name = tensor("obj_69_cast_fp16")]; tensor var_27213_pad_type_0 = const()[name = tensor("op_27213_pad_type_0"), val = tensor("valid")]; tensor var_27213_strides_0 = const()[name = tensor("op_27213_strides_0"), val = tensor([1, 1])]; tensor var_27213_pad_0 = const()[name = tensor("op_27213_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_27213_dilations_0 = const()[name = tensor("op_27213_dilations_0"), val = tensor([1, 1])]; tensor var_27213_groups_0 = const()[name = tensor("op_27213_groups_0"), val = tensor(1)]; tensor layers_17_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(233928960))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234748224))), name = tensor("layers_17_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_17_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_17_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234748352)))]; tensor var_27213_cast_fp16 = conv(bias = layers_17_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_27213_dilations_0, groups = var_27213_groups_0, pad = var_27213_pad_0, pad_type = var_27213_pad_type_0, strides = var_27213_strides_0, weight = layers_17_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = tensor("op_27213_cast_fp16")]; tensor var_27219_pad_type_0 = const()[name = tensor("op_27219_pad_type_0"), val = tensor("valid")]; tensor var_27219_strides_0 = const()[name = tensor("op_27219_strides_0"), val = tensor([1, 1])]; tensor var_27219_pad_0 = const()[name = tensor("op_27219_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_27219_dilations_0 = const()[name = tensor("op_27219_dilations_0"), val = tensor([1, 1])]; tensor var_27219_groups_0 = const()[name = tensor("op_27219_groups_0"), val = tensor(1)]; tensor layers_17_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234801152))), name = tensor("layers_17_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(234750976))), shape = tensor([1280, 1280, 1, 1])]; tensor var_27219_cast_fp16 = conv(dilations = var_27219_dilations_0, groups = var_27219_groups_0, pad = var_27219_pad_0, pad_type = var_27219_pad_type_0, strides = var_27219_strides_0, weight = layers_17_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_69_cast_fp16)[name = tensor("op_27219_cast_fp16")]; tensor query_35_cast_fp16 = add(x = var_27213_cast_fp16, y = var_27219_cast_fp16)[name = tensor("query_35_cast_fp16")]; tensor var_27228_pad_type_0 = const()[name = tensor("op_27228_pad_type_0"), val = tensor("valid")]; tensor var_27228_strides_0 = const()[name = tensor("op_27228_strides_0"), val = tensor([1, 1])]; tensor var_27228_pad_0 = const()[name = tensor("op_27228_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_27228_dilations_0 = const()[name = tensor("op_27228_dilations_0"), val = tensor([1, 1])]; tensor var_27228_groups_0 = const()[name = tensor("op_27228_groups_0"), val = tensor(1)]; tensor layers_17_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235006016))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235825280))), name = tensor("layers_17_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_27228_cast_fp16 = conv(dilations = var_27228_dilations_0, groups = var_27228_groups_0, pad = var_27228_pad_0, pad_type = var_27228_pad_type_0, strides = var_27228_strides_0, weight = layers_17_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = tensor("op_27228_cast_fp16")]; tensor var_27234_pad_type_0 = const()[name = tensor("op_27234_pad_type_0"), val = tensor("valid")]; tensor var_27234_strides_0 = const()[name = tensor("op_27234_strides_0"), val = tensor([1, 1])]; tensor var_27234_pad_0 = const()[name = tensor("op_27234_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_27234_dilations_0 = const()[name = tensor("op_27234_dilations_0"), val = tensor([1, 1])]; tensor var_27234_groups_0 = const()[name = tensor("op_27234_groups_0"), val = tensor(1)]; tensor layers_17_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235854656))), name = tensor("layers_17_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(235825408))), shape = tensor([1280, 1280, 1, 1])]; tensor var_27234_cast_fp16 = conv(dilations = var_27234_dilations_0, groups = var_27234_groups_0, pad = var_27234_pad_0, pad_type = var_27234_pad_type_0, strides = var_27234_strides_0, weight = layers_17_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_69_cast_fp16)[name = tensor("op_27234_cast_fp16")]; tensor key_35_cast_fp16 = add(x = var_27228_cast_fp16, y = var_27234_cast_fp16)[name = tensor("key_35_cast_fp16")]; tensor var_27244_pad_type_0 = const()[name = tensor("op_27244_pad_type_0"), val = tensor("valid")]; tensor var_27244_strides_0 = const()[name = tensor("op_27244_strides_0"), val = tensor([1, 1])]; tensor var_27244_pad_0 = const()[name = tensor("op_27244_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_27244_dilations_0 = const()[name = tensor("op_27244_dilations_0"), val = tensor([1, 1])]; tensor var_27244_groups_0 = const()[name = tensor("op_27244_groups_0"), val = tensor(1)]; tensor layers_17_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236059520))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236878784))), name = tensor("layers_17_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_17_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_17_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236878912)))]; tensor var_27244_cast_fp16 = conv(bias = layers_17_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_27244_dilations_0, groups = var_27244_groups_0, pad = var_27244_pad_0, pad_type = var_27244_pad_type_0, strides = var_27244_strides_0, weight = layers_17_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_69_cast_fp16)[name = tensor("op_27244_cast_fp16")]; tensor var_27250_pad_type_0 = const()[name = tensor("op_27250_pad_type_0"), val = tensor("valid")]; tensor var_27250_strides_0 = const()[name = tensor("op_27250_strides_0"), val = tensor([1, 1])]; tensor var_27250_pad_0 = const()[name = tensor("op_27250_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_27250_dilations_0 = const()[name = tensor("op_27250_dilations_0"), val = tensor([1, 1])]; tensor var_27250_groups_0 = const()[name = tensor("op_27250_groups_0"), val = tensor(1)]; tensor layers_17_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236897920))), name = tensor("layers_17_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(236881536))), shape = tensor([1280, 1280, 1, 1])]; tensor var_27250_cast_fp16 = conv(dilations = var_27250_dilations_0, groups = var_27250_groups_0, pad = var_27250_pad_0, pad_type = var_27250_pad_type_0, strides = var_27250_strides_0, weight = layers_17_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_69_cast_fp16)[name = tensor("op_27250_cast_fp16")]; tensor value_35_cast_fp16 = add(x = var_27244_cast_fp16, y = var_27250_cast_fp16)[name = tensor("value_35_cast_fp16")]; tensor var_27256_begin_0 = const()[name = tensor("op_27256_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27256_end_0 = const()[name = tensor("op_27256_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27256_end_mask_0 = const()[name = tensor("op_27256_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27256_cast_fp16 = slice_by_index(begin = var_27256_begin_0, end = var_27256_end_0, end_mask = var_27256_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27256_cast_fp16")]; tensor var_27260_begin_0 = const()[name = tensor("op_27260_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_27260_end_0 = const()[name = tensor("op_27260_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_27260_end_mask_0 = const()[name = tensor("op_27260_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27260_cast_fp16 = slice_by_index(begin = var_27260_begin_0, end = var_27260_end_0, end_mask = var_27260_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27260_cast_fp16")]; tensor var_27264_begin_0 = const()[name = tensor("op_27264_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_27264_end_0 = const()[name = tensor("op_27264_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_27264_end_mask_0 = const()[name = tensor("op_27264_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27264_cast_fp16 = slice_by_index(begin = var_27264_begin_0, end = var_27264_end_0, end_mask = var_27264_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27264_cast_fp16")]; tensor var_27268_begin_0 = const()[name = tensor("op_27268_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_27268_end_0 = const()[name = tensor("op_27268_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_27268_end_mask_0 = const()[name = tensor("op_27268_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27268_cast_fp16 = slice_by_index(begin = var_27268_begin_0, end = var_27268_end_0, end_mask = var_27268_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27268_cast_fp16")]; tensor var_27272_begin_0 = const()[name = tensor("op_27272_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_27272_end_0 = const()[name = tensor("op_27272_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_27272_end_mask_0 = const()[name = tensor("op_27272_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27272_cast_fp16 = slice_by_index(begin = var_27272_begin_0, end = var_27272_end_0, end_mask = var_27272_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27272_cast_fp16")]; tensor var_27276_begin_0 = const()[name = tensor("op_27276_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_27276_end_0 = const()[name = tensor("op_27276_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_27276_end_mask_0 = const()[name = tensor("op_27276_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27276_cast_fp16 = slice_by_index(begin = var_27276_begin_0, end = var_27276_end_0, end_mask = var_27276_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27276_cast_fp16")]; tensor var_27280_begin_0 = const()[name = tensor("op_27280_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_27280_end_0 = const()[name = tensor("op_27280_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_27280_end_mask_0 = const()[name = tensor("op_27280_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27280_cast_fp16 = slice_by_index(begin = var_27280_begin_0, end = var_27280_end_0, end_mask = var_27280_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27280_cast_fp16")]; tensor var_27284_begin_0 = const()[name = tensor("op_27284_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_27284_end_0 = const()[name = tensor("op_27284_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_27284_end_mask_0 = const()[name = tensor("op_27284_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27284_cast_fp16 = slice_by_index(begin = var_27284_begin_0, end = var_27284_end_0, end_mask = var_27284_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27284_cast_fp16")]; tensor var_27288_begin_0 = const()[name = tensor("op_27288_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_27288_end_0 = const()[name = tensor("op_27288_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_27288_end_mask_0 = const()[name = tensor("op_27288_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27288_cast_fp16 = slice_by_index(begin = var_27288_begin_0, end = var_27288_end_0, end_mask = var_27288_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27288_cast_fp16")]; tensor var_27292_begin_0 = const()[name = tensor("op_27292_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_27292_end_0 = const()[name = tensor("op_27292_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_27292_end_mask_0 = const()[name = tensor("op_27292_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27292_cast_fp16 = slice_by_index(begin = var_27292_begin_0, end = var_27292_end_0, end_mask = var_27292_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27292_cast_fp16")]; tensor var_27296_begin_0 = const()[name = tensor("op_27296_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_27296_end_0 = const()[name = tensor("op_27296_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_27296_end_mask_0 = const()[name = tensor("op_27296_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27296_cast_fp16 = slice_by_index(begin = var_27296_begin_0, end = var_27296_end_0, end_mask = var_27296_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27296_cast_fp16")]; tensor var_27300_begin_0 = const()[name = tensor("op_27300_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_27300_end_0 = const()[name = tensor("op_27300_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_27300_end_mask_0 = const()[name = tensor("op_27300_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27300_cast_fp16 = slice_by_index(begin = var_27300_begin_0, end = var_27300_end_0, end_mask = var_27300_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27300_cast_fp16")]; tensor var_27304_begin_0 = const()[name = tensor("op_27304_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_27304_end_0 = const()[name = tensor("op_27304_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_27304_end_mask_0 = const()[name = tensor("op_27304_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27304_cast_fp16 = slice_by_index(begin = var_27304_begin_0, end = var_27304_end_0, end_mask = var_27304_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27304_cast_fp16")]; tensor var_27308_begin_0 = const()[name = tensor("op_27308_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_27308_end_0 = const()[name = tensor("op_27308_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_27308_end_mask_0 = const()[name = tensor("op_27308_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27308_cast_fp16 = slice_by_index(begin = var_27308_begin_0, end = var_27308_end_0, end_mask = var_27308_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27308_cast_fp16")]; tensor var_27312_begin_0 = const()[name = tensor("op_27312_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_27312_end_0 = const()[name = tensor("op_27312_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_27312_end_mask_0 = const()[name = tensor("op_27312_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27312_cast_fp16 = slice_by_index(begin = var_27312_begin_0, end = var_27312_end_0, end_mask = var_27312_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27312_cast_fp16")]; tensor var_27316_begin_0 = const()[name = tensor("op_27316_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_27316_end_0 = const()[name = tensor("op_27316_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_27316_end_mask_0 = const()[name = tensor("op_27316_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27316_cast_fp16 = slice_by_index(begin = var_27316_begin_0, end = var_27316_end_0, end_mask = var_27316_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27316_cast_fp16")]; tensor var_27320_begin_0 = const()[name = tensor("op_27320_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_27320_end_0 = const()[name = tensor("op_27320_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_27320_end_mask_0 = const()[name = tensor("op_27320_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27320_cast_fp16 = slice_by_index(begin = var_27320_begin_0, end = var_27320_end_0, end_mask = var_27320_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27320_cast_fp16")]; tensor var_27324_begin_0 = const()[name = tensor("op_27324_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_27324_end_0 = const()[name = tensor("op_27324_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_27324_end_mask_0 = const()[name = tensor("op_27324_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27324_cast_fp16 = slice_by_index(begin = var_27324_begin_0, end = var_27324_end_0, end_mask = var_27324_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27324_cast_fp16")]; tensor var_27328_begin_0 = const()[name = tensor("op_27328_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_27328_end_0 = const()[name = tensor("op_27328_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_27328_end_mask_0 = const()[name = tensor("op_27328_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27328_cast_fp16 = slice_by_index(begin = var_27328_begin_0, end = var_27328_end_0, end_mask = var_27328_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27328_cast_fp16")]; tensor var_27332_begin_0 = const()[name = tensor("op_27332_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_27332_end_0 = const()[name = tensor("op_27332_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_27332_end_mask_0 = const()[name = tensor("op_27332_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27332_cast_fp16 = slice_by_index(begin = var_27332_begin_0, end = var_27332_end_0, end_mask = var_27332_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_27332_cast_fp16")]; tensor var_27341_begin_0 = const()[name = tensor("op_27341_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27341_end_0 = const()[name = tensor("op_27341_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27341_end_mask_0 = const()[name = tensor("op_27341_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27341_cast_fp16 = slice_by_index(begin = var_27341_begin_0, end = var_27341_end_0, end_mask = var_27341_end_mask_0, x = var_27256_cast_fp16)[name = tensor("op_27341_cast_fp16")]; tensor var_27348_begin_0 = const()[name = tensor("op_27348_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27348_end_0 = const()[name = tensor("op_27348_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27348_end_mask_0 = const()[name = tensor("op_27348_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27348_cast_fp16 = slice_by_index(begin = var_27348_begin_0, end = var_27348_end_0, end_mask = var_27348_end_mask_0, x = var_27256_cast_fp16)[name = tensor("op_27348_cast_fp16")]; tensor var_27355_begin_0 = const()[name = tensor("op_27355_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27355_end_0 = const()[name = tensor("op_27355_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27355_end_mask_0 = const()[name = tensor("op_27355_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27355_cast_fp16 = slice_by_index(begin = var_27355_begin_0, end = var_27355_end_0, end_mask = var_27355_end_mask_0, x = var_27256_cast_fp16)[name = tensor("op_27355_cast_fp16")]; tensor var_27362_begin_0 = const()[name = tensor("op_27362_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27362_end_0 = const()[name = tensor("op_27362_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27362_end_mask_0 = const()[name = tensor("op_27362_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27362_cast_fp16 = slice_by_index(begin = var_27362_begin_0, end = var_27362_end_0, end_mask = var_27362_end_mask_0, x = var_27256_cast_fp16)[name = tensor("op_27362_cast_fp16")]; tensor var_27369_begin_0 = const()[name = tensor("op_27369_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27369_end_0 = const()[name = tensor("op_27369_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27369_end_mask_0 = const()[name = tensor("op_27369_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27369_cast_fp16 = slice_by_index(begin = var_27369_begin_0, end = var_27369_end_0, end_mask = var_27369_end_mask_0, x = var_27260_cast_fp16)[name = tensor("op_27369_cast_fp16")]; tensor var_27376_begin_0 = const()[name = tensor("op_27376_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27376_end_0 = const()[name = tensor("op_27376_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27376_end_mask_0 = const()[name = tensor("op_27376_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27376_cast_fp16 = slice_by_index(begin = var_27376_begin_0, end = var_27376_end_0, end_mask = var_27376_end_mask_0, x = var_27260_cast_fp16)[name = tensor("op_27376_cast_fp16")]; tensor var_27383_begin_0 = const()[name = tensor("op_27383_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27383_end_0 = const()[name = tensor("op_27383_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27383_end_mask_0 = const()[name = tensor("op_27383_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27383_cast_fp16 = slice_by_index(begin = var_27383_begin_0, end = var_27383_end_0, end_mask = var_27383_end_mask_0, x = var_27260_cast_fp16)[name = tensor("op_27383_cast_fp16")]; tensor var_27390_begin_0 = const()[name = tensor("op_27390_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27390_end_0 = const()[name = tensor("op_27390_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27390_end_mask_0 = const()[name = tensor("op_27390_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27390_cast_fp16 = slice_by_index(begin = var_27390_begin_0, end = var_27390_end_0, end_mask = var_27390_end_mask_0, x = var_27260_cast_fp16)[name = tensor("op_27390_cast_fp16")]; tensor var_27397_begin_0 = const()[name = tensor("op_27397_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27397_end_0 = const()[name = tensor("op_27397_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27397_end_mask_0 = const()[name = tensor("op_27397_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27397_cast_fp16 = slice_by_index(begin = var_27397_begin_0, end = var_27397_end_0, end_mask = var_27397_end_mask_0, x = var_27264_cast_fp16)[name = tensor("op_27397_cast_fp16")]; tensor var_27404_begin_0 = const()[name = tensor("op_27404_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27404_end_0 = const()[name = tensor("op_27404_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27404_end_mask_0 = const()[name = tensor("op_27404_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27404_cast_fp16 = slice_by_index(begin = var_27404_begin_0, end = var_27404_end_0, end_mask = var_27404_end_mask_0, x = var_27264_cast_fp16)[name = tensor("op_27404_cast_fp16")]; tensor var_27411_begin_0 = const()[name = tensor("op_27411_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27411_end_0 = const()[name = tensor("op_27411_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27411_end_mask_0 = const()[name = tensor("op_27411_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27411_cast_fp16 = slice_by_index(begin = var_27411_begin_0, end = var_27411_end_0, end_mask = var_27411_end_mask_0, x = var_27264_cast_fp16)[name = tensor("op_27411_cast_fp16")]; tensor var_27418_begin_0 = const()[name = tensor("op_27418_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27418_end_0 = const()[name = tensor("op_27418_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27418_end_mask_0 = const()[name = tensor("op_27418_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27418_cast_fp16 = slice_by_index(begin = var_27418_begin_0, end = var_27418_end_0, end_mask = var_27418_end_mask_0, x = var_27264_cast_fp16)[name = tensor("op_27418_cast_fp16")]; tensor var_27425_begin_0 = const()[name = tensor("op_27425_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27425_end_0 = const()[name = tensor("op_27425_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27425_end_mask_0 = const()[name = tensor("op_27425_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27425_cast_fp16 = slice_by_index(begin = var_27425_begin_0, end = var_27425_end_0, end_mask = var_27425_end_mask_0, x = var_27268_cast_fp16)[name = tensor("op_27425_cast_fp16")]; tensor var_27432_begin_0 = const()[name = tensor("op_27432_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27432_end_0 = const()[name = tensor("op_27432_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27432_end_mask_0 = const()[name = tensor("op_27432_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27432_cast_fp16 = slice_by_index(begin = var_27432_begin_0, end = var_27432_end_0, end_mask = var_27432_end_mask_0, x = var_27268_cast_fp16)[name = tensor("op_27432_cast_fp16")]; tensor var_27439_begin_0 = const()[name = tensor("op_27439_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27439_end_0 = const()[name = tensor("op_27439_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27439_end_mask_0 = const()[name = tensor("op_27439_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27439_cast_fp16 = slice_by_index(begin = var_27439_begin_0, end = var_27439_end_0, end_mask = var_27439_end_mask_0, x = var_27268_cast_fp16)[name = tensor("op_27439_cast_fp16")]; tensor var_27446_begin_0 = const()[name = tensor("op_27446_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27446_end_0 = const()[name = tensor("op_27446_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27446_end_mask_0 = const()[name = tensor("op_27446_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27446_cast_fp16 = slice_by_index(begin = var_27446_begin_0, end = var_27446_end_0, end_mask = var_27446_end_mask_0, x = var_27268_cast_fp16)[name = tensor("op_27446_cast_fp16")]; tensor var_27453_begin_0 = const()[name = tensor("op_27453_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27453_end_0 = const()[name = tensor("op_27453_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27453_end_mask_0 = const()[name = tensor("op_27453_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27453_cast_fp16 = slice_by_index(begin = var_27453_begin_0, end = var_27453_end_0, end_mask = var_27453_end_mask_0, x = var_27272_cast_fp16)[name = tensor("op_27453_cast_fp16")]; tensor var_27460_begin_0 = const()[name = tensor("op_27460_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27460_end_0 = const()[name = tensor("op_27460_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27460_end_mask_0 = const()[name = tensor("op_27460_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27460_cast_fp16 = slice_by_index(begin = var_27460_begin_0, end = var_27460_end_0, end_mask = var_27460_end_mask_0, x = var_27272_cast_fp16)[name = tensor("op_27460_cast_fp16")]; tensor var_27467_begin_0 = const()[name = tensor("op_27467_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27467_end_0 = const()[name = tensor("op_27467_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27467_end_mask_0 = const()[name = tensor("op_27467_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27467_cast_fp16 = slice_by_index(begin = var_27467_begin_0, end = var_27467_end_0, end_mask = var_27467_end_mask_0, x = var_27272_cast_fp16)[name = tensor("op_27467_cast_fp16")]; tensor var_27474_begin_0 = const()[name = tensor("op_27474_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27474_end_0 = const()[name = tensor("op_27474_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27474_end_mask_0 = const()[name = tensor("op_27474_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27474_cast_fp16 = slice_by_index(begin = var_27474_begin_0, end = var_27474_end_0, end_mask = var_27474_end_mask_0, x = var_27272_cast_fp16)[name = tensor("op_27474_cast_fp16")]; tensor var_27481_begin_0 = const()[name = tensor("op_27481_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27481_end_0 = const()[name = tensor("op_27481_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27481_end_mask_0 = const()[name = tensor("op_27481_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27481_cast_fp16 = slice_by_index(begin = var_27481_begin_0, end = var_27481_end_0, end_mask = var_27481_end_mask_0, x = var_27276_cast_fp16)[name = tensor("op_27481_cast_fp16")]; tensor var_27488_begin_0 = const()[name = tensor("op_27488_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27488_end_0 = const()[name = tensor("op_27488_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27488_end_mask_0 = const()[name = tensor("op_27488_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27488_cast_fp16 = slice_by_index(begin = var_27488_begin_0, end = var_27488_end_0, end_mask = var_27488_end_mask_0, x = var_27276_cast_fp16)[name = tensor("op_27488_cast_fp16")]; tensor var_27495_begin_0 = const()[name = tensor("op_27495_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27495_end_0 = const()[name = tensor("op_27495_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27495_end_mask_0 = const()[name = tensor("op_27495_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27495_cast_fp16 = slice_by_index(begin = var_27495_begin_0, end = var_27495_end_0, end_mask = var_27495_end_mask_0, x = var_27276_cast_fp16)[name = tensor("op_27495_cast_fp16")]; tensor var_27502_begin_0 = const()[name = tensor("op_27502_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27502_end_0 = const()[name = tensor("op_27502_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27502_end_mask_0 = const()[name = tensor("op_27502_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27502_cast_fp16 = slice_by_index(begin = var_27502_begin_0, end = var_27502_end_0, end_mask = var_27502_end_mask_0, x = var_27276_cast_fp16)[name = tensor("op_27502_cast_fp16")]; tensor var_27509_begin_0 = const()[name = tensor("op_27509_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27509_end_0 = const()[name = tensor("op_27509_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27509_end_mask_0 = const()[name = tensor("op_27509_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27509_cast_fp16 = slice_by_index(begin = var_27509_begin_0, end = var_27509_end_0, end_mask = var_27509_end_mask_0, x = var_27280_cast_fp16)[name = tensor("op_27509_cast_fp16")]; tensor var_27516_begin_0 = const()[name = tensor("op_27516_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27516_end_0 = const()[name = tensor("op_27516_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27516_end_mask_0 = const()[name = tensor("op_27516_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27516_cast_fp16 = slice_by_index(begin = var_27516_begin_0, end = var_27516_end_0, end_mask = var_27516_end_mask_0, x = var_27280_cast_fp16)[name = tensor("op_27516_cast_fp16")]; tensor var_27523_begin_0 = const()[name = tensor("op_27523_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27523_end_0 = const()[name = tensor("op_27523_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27523_end_mask_0 = const()[name = tensor("op_27523_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27523_cast_fp16 = slice_by_index(begin = var_27523_begin_0, end = var_27523_end_0, end_mask = var_27523_end_mask_0, x = var_27280_cast_fp16)[name = tensor("op_27523_cast_fp16")]; tensor var_27530_begin_0 = const()[name = tensor("op_27530_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27530_end_0 = const()[name = tensor("op_27530_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27530_end_mask_0 = const()[name = tensor("op_27530_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27530_cast_fp16 = slice_by_index(begin = var_27530_begin_0, end = var_27530_end_0, end_mask = var_27530_end_mask_0, x = var_27280_cast_fp16)[name = tensor("op_27530_cast_fp16")]; tensor var_27537_begin_0 = const()[name = tensor("op_27537_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27537_end_0 = const()[name = tensor("op_27537_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27537_end_mask_0 = const()[name = tensor("op_27537_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27537_cast_fp16 = slice_by_index(begin = var_27537_begin_0, end = var_27537_end_0, end_mask = var_27537_end_mask_0, x = var_27284_cast_fp16)[name = tensor("op_27537_cast_fp16")]; tensor var_27544_begin_0 = const()[name = tensor("op_27544_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27544_end_0 = const()[name = tensor("op_27544_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27544_end_mask_0 = const()[name = tensor("op_27544_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27544_cast_fp16 = slice_by_index(begin = var_27544_begin_0, end = var_27544_end_0, end_mask = var_27544_end_mask_0, x = var_27284_cast_fp16)[name = tensor("op_27544_cast_fp16")]; tensor var_27551_begin_0 = const()[name = tensor("op_27551_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27551_end_0 = const()[name = tensor("op_27551_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27551_end_mask_0 = const()[name = tensor("op_27551_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27551_cast_fp16 = slice_by_index(begin = var_27551_begin_0, end = var_27551_end_0, end_mask = var_27551_end_mask_0, x = var_27284_cast_fp16)[name = tensor("op_27551_cast_fp16")]; tensor var_27558_begin_0 = const()[name = tensor("op_27558_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27558_end_0 = const()[name = tensor("op_27558_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27558_end_mask_0 = const()[name = tensor("op_27558_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27558_cast_fp16 = slice_by_index(begin = var_27558_begin_0, end = var_27558_end_0, end_mask = var_27558_end_mask_0, x = var_27284_cast_fp16)[name = tensor("op_27558_cast_fp16")]; tensor var_27565_begin_0 = const()[name = tensor("op_27565_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27565_end_0 = const()[name = tensor("op_27565_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27565_end_mask_0 = const()[name = tensor("op_27565_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27565_cast_fp16 = slice_by_index(begin = var_27565_begin_0, end = var_27565_end_0, end_mask = var_27565_end_mask_0, x = var_27288_cast_fp16)[name = tensor("op_27565_cast_fp16")]; tensor var_27572_begin_0 = const()[name = tensor("op_27572_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27572_end_0 = const()[name = tensor("op_27572_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27572_end_mask_0 = const()[name = tensor("op_27572_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27572_cast_fp16 = slice_by_index(begin = var_27572_begin_0, end = var_27572_end_0, end_mask = var_27572_end_mask_0, x = var_27288_cast_fp16)[name = tensor("op_27572_cast_fp16")]; tensor var_27579_begin_0 = const()[name = tensor("op_27579_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27579_end_0 = const()[name = tensor("op_27579_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27579_end_mask_0 = const()[name = tensor("op_27579_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27579_cast_fp16 = slice_by_index(begin = var_27579_begin_0, end = var_27579_end_0, end_mask = var_27579_end_mask_0, x = var_27288_cast_fp16)[name = tensor("op_27579_cast_fp16")]; tensor var_27586_begin_0 = const()[name = tensor("op_27586_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27586_end_0 = const()[name = tensor("op_27586_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27586_end_mask_0 = const()[name = tensor("op_27586_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27586_cast_fp16 = slice_by_index(begin = var_27586_begin_0, end = var_27586_end_0, end_mask = var_27586_end_mask_0, x = var_27288_cast_fp16)[name = tensor("op_27586_cast_fp16")]; tensor var_27593_begin_0 = const()[name = tensor("op_27593_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27593_end_0 = const()[name = tensor("op_27593_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27593_end_mask_0 = const()[name = tensor("op_27593_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27593_cast_fp16 = slice_by_index(begin = var_27593_begin_0, end = var_27593_end_0, end_mask = var_27593_end_mask_0, x = var_27292_cast_fp16)[name = tensor("op_27593_cast_fp16")]; tensor var_27600_begin_0 = const()[name = tensor("op_27600_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27600_end_0 = const()[name = tensor("op_27600_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27600_end_mask_0 = const()[name = tensor("op_27600_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27600_cast_fp16 = slice_by_index(begin = var_27600_begin_0, end = var_27600_end_0, end_mask = var_27600_end_mask_0, x = var_27292_cast_fp16)[name = tensor("op_27600_cast_fp16")]; tensor var_27607_begin_0 = const()[name = tensor("op_27607_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27607_end_0 = const()[name = tensor("op_27607_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27607_end_mask_0 = const()[name = tensor("op_27607_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27607_cast_fp16 = slice_by_index(begin = var_27607_begin_0, end = var_27607_end_0, end_mask = var_27607_end_mask_0, x = var_27292_cast_fp16)[name = tensor("op_27607_cast_fp16")]; tensor var_27614_begin_0 = const()[name = tensor("op_27614_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27614_end_0 = const()[name = tensor("op_27614_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27614_end_mask_0 = const()[name = tensor("op_27614_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27614_cast_fp16 = slice_by_index(begin = var_27614_begin_0, end = var_27614_end_0, end_mask = var_27614_end_mask_0, x = var_27292_cast_fp16)[name = tensor("op_27614_cast_fp16")]; tensor var_27621_begin_0 = const()[name = tensor("op_27621_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27621_end_0 = const()[name = tensor("op_27621_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27621_end_mask_0 = const()[name = tensor("op_27621_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27621_cast_fp16 = slice_by_index(begin = var_27621_begin_0, end = var_27621_end_0, end_mask = var_27621_end_mask_0, x = var_27296_cast_fp16)[name = tensor("op_27621_cast_fp16")]; tensor var_27628_begin_0 = const()[name = tensor("op_27628_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27628_end_0 = const()[name = tensor("op_27628_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27628_end_mask_0 = const()[name = tensor("op_27628_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27628_cast_fp16 = slice_by_index(begin = var_27628_begin_0, end = var_27628_end_0, end_mask = var_27628_end_mask_0, x = var_27296_cast_fp16)[name = tensor("op_27628_cast_fp16")]; tensor var_27635_begin_0 = const()[name = tensor("op_27635_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27635_end_0 = const()[name = tensor("op_27635_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27635_end_mask_0 = const()[name = tensor("op_27635_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27635_cast_fp16 = slice_by_index(begin = var_27635_begin_0, end = var_27635_end_0, end_mask = var_27635_end_mask_0, x = var_27296_cast_fp16)[name = tensor("op_27635_cast_fp16")]; tensor var_27642_begin_0 = const()[name = tensor("op_27642_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27642_end_0 = const()[name = tensor("op_27642_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27642_end_mask_0 = const()[name = tensor("op_27642_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27642_cast_fp16 = slice_by_index(begin = var_27642_begin_0, end = var_27642_end_0, end_mask = var_27642_end_mask_0, x = var_27296_cast_fp16)[name = tensor("op_27642_cast_fp16")]; tensor var_27649_begin_0 = const()[name = tensor("op_27649_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27649_end_0 = const()[name = tensor("op_27649_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27649_end_mask_0 = const()[name = tensor("op_27649_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27649_cast_fp16 = slice_by_index(begin = var_27649_begin_0, end = var_27649_end_0, end_mask = var_27649_end_mask_0, x = var_27300_cast_fp16)[name = tensor("op_27649_cast_fp16")]; tensor var_27656_begin_0 = const()[name = tensor("op_27656_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27656_end_0 = const()[name = tensor("op_27656_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27656_end_mask_0 = const()[name = tensor("op_27656_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27656_cast_fp16 = slice_by_index(begin = var_27656_begin_0, end = var_27656_end_0, end_mask = var_27656_end_mask_0, x = var_27300_cast_fp16)[name = tensor("op_27656_cast_fp16")]; tensor var_27663_begin_0 = const()[name = tensor("op_27663_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27663_end_0 = const()[name = tensor("op_27663_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27663_end_mask_0 = const()[name = tensor("op_27663_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27663_cast_fp16 = slice_by_index(begin = var_27663_begin_0, end = var_27663_end_0, end_mask = var_27663_end_mask_0, x = var_27300_cast_fp16)[name = tensor("op_27663_cast_fp16")]; tensor var_27670_begin_0 = const()[name = tensor("op_27670_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27670_end_0 = const()[name = tensor("op_27670_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27670_end_mask_0 = const()[name = tensor("op_27670_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27670_cast_fp16 = slice_by_index(begin = var_27670_begin_0, end = var_27670_end_0, end_mask = var_27670_end_mask_0, x = var_27300_cast_fp16)[name = tensor("op_27670_cast_fp16")]; tensor var_27677_begin_0 = const()[name = tensor("op_27677_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27677_end_0 = const()[name = tensor("op_27677_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27677_end_mask_0 = const()[name = tensor("op_27677_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27677_cast_fp16 = slice_by_index(begin = var_27677_begin_0, end = var_27677_end_0, end_mask = var_27677_end_mask_0, x = var_27304_cast_fp16)[name = tensor("op_27677_cast_fp16")]; tensor var_27684_begin_0 = const()[name = tensor("op_27684_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27684_end_0 = const()[name = tensor("op_27684_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27684_end_mask_0 = const()[name = tensor("op_27684_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27684_cast_fp16 = slice_by_index(begin = var_27684_begin_0, end = var_27684_end_0, end_mask = var_27684_end_mask_0, x = var_27304_cast_fp16)[name = tensor("op_27684_cast_fp16")]; tensor var_27691_begin_0 = const()[name = tensor("op_27691_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27691_end_0 = const()[name = tensor("op_27691_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27691_end_mask_0 = const()[name = tensor("op_27691_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27691_cast_fp16 = slice_by_index(begin = var_27691_begin_0, end = var_27691_end_0, end_mask = var_27691_end_mask_0, x = var_27304_cast_fp16)[name = tensor("op_27691_cast_fp16")]; tensor var_27698_begin_0 = const()[name = tensor("op_27698_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27698_end_0 = const()[name = tensor("op_27698_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27698_end_mask_0 = const()[name = tensor("op_27698_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27698_cast_fp16 = slice_by_index(begin = var_27698_begin_0, end = var_27698_end_0, end_mask = var_27698_end_mask_0, x = var_27304_cast_fp16)[name = tensor("op_27698_cast_fp16")]; tensor var_27705_begin_0 = const()[name = tensor("op_27705_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27705_end_0 = const()[name = tensor("op_27705_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27705_end_mask_0 = const()[name = tensor("op_27705_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27705_cast_fp16 = slice_by_index(begin = var_27705_begin_0, end = var_27705_end_0, end_mask = var_27705_end_mask_0, x = var_27308_cast_fp16)[name = tensor("op_27705_cast_fp16")]; tensor var_27712_begin_0 = const()[name = tensor("op_27712_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27712_end_0 = const()[name = tensor("op_27712_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27712_end_mask_0 = const()[name = tensor("op_27712_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27712_cast_fp16 = slice_by_index(begin = var_27712_begin_0, end = var_27712_end_0, end_mask = var_27712_end_mask_0, x = var_27308_cast_fp16)[name = tensor("op_27712_cast_fp16")]; tensor var_27719_begin_0 = const()[name = tensor("op_27719_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27719_end_0 = const()[name = tensor("op_27719_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27719_end_mask_0 = const()[name = tensor("op_27719_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27719_cast_fp16 = slice_by_index(begin = var_27719_begin_0, end = var_27719_end_0, end_mask = var_27719_end_mask_0, x = var_27308_cast_fp16)[name = tensor("op_27719_cast_fp16")]; tensor var_27726_begin_0 = const()[name = tensor("op_27726_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27726_end_0 = const()[name = tensor("op_27726_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27726_end_mask_0 = const()[name = tensor("op_27726_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27726_cast_fp16 = slice_by_index(begin = var_27726_begin_0, end = var_27726_end_0, end_mask = var_27726_end_mask_0, x = var_27308_cast_fp16)[name = tensor("op_27726_cast_fp16")]; tensor var_27733_begin_0 = const()[name = tensor("op_27733_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27733_end_0 = const()[name = tensor("op_27733_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27733_end_mask_0 = const()[name = tensor("op_27733_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27733_cast_fp16 = slice_by_index(begin = var_27733_begin_0, end = var_27733_end_0, end_mask = var_27733_end_mask_0, x = var_27312_cast_fp16)[name = tensor("op_27733_cast_fp16")]; tensor var_27740_begin_0 = const()[name = tensor("op_27740_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27740_end_0 = const()[name = tensor("op_27740_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27740_end_mask_0 = const()[name = tensor("op_27740_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27740_cast_fp16 = slice_by_index(begin = var_27740_begin_0, end = var_27740_end_0, end_mask = var_27740_end_mask_0, x = var_27312_cast_fp16)[name = tensor("op_27740_cast_fp16")]; tensor var_27747_begin_0 = const()[name = tensor("op_27747_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27747_end_0 = const()[name = tensor("op_27747_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27747_end_mask_0 = const()[name = tensor("op_27747_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27747_cast_fp16 = slice_by_index(begin = var_27747_begin_0, end = var_27747_end_0, end_mask = var_27747_end_mask_0, x = var_27312_cast_fp16)[name = tensor("op_27747_cast_fp16")]; tensor var_27754_begin_0 = const()[name = tensor("op_27754_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27754_end_0 = const()[name = tensor("op_27754_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27754_end_mask_0 = const()[name = tensor("op_27754_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27754_cast_fp16 = slice_by_index(begin = var_27754_begin_0, end = var_27754_end_0, end_mask = var_27754_end_mask_0, x = var_27312_cast_fp16)[name = tensor("op_27754_cast_fp16")]; tensor var_27761_begin_0 = const()[name = tensor("op_27761_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27761_end_0 = const()[name = tensor("op_27761_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27761_end_mask_0 = const()[name = tensor("op_27761_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27761_cast_fp16 = slice_by_index(begin = var_27761_begin_0, end = var_27761_end_0, end_mask = var_27761_end_mask_0, x = var_27316_cast_fp16)[name = tensor("op_27761_cast_fp16")]; tensor var_27768_begin_0 = const()[name = tensor("op_27768_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27768_end_0 = const()[name = tensor("op_27768_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27768_end_mask_0 = const()[name = tensor("op_27768_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27768_cast_fp16 = slice_by_index(begin = var_27768_begin_0, end = var_27768_end_0, end_mask = var_27768_end_mask_0, x = var_27316_cast_fp16)[name = tensor("op_27768_cast_fp16")]; tensor var_27775_begin_0 = const()[name = tensor("op_27775_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27775_end_0 = const()[name = tensor("op_27775_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27775_end_mask_0 = const()[name = tensor("op_27775_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27775_cast_fp16 = slice_by_index(begin = var_27775_begin_0, end = var_27775_end_0, end_mask = var_27775_end_mask_0, x = var_27316_cast_fp16)[name = tensor("op_27775_cast_fp16")]; tensor var_27782_begin_0 = const()[name = tensor("op_27782_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27782_end_0 = const()[name = tensor("op_27782_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27782_end_mask_0 = const()[name = tensor("op_27782_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27782_cast_fp16 = slice_by_index(begin = var_27782_begin_0, end = var_27782_end_0, end_mask = var_27782_end_mask_0, x = var_27316_cast_fp16)[name = tensor("op_27782_cast_fp16")]; tensor var_27789_begin_0 = const()[name = tensor("op_27789_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27789_end_0 = const()[name = tensor("op_27789_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27789_end_mask_0 = const()[name = tensor("op_27789_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27789_cast_fp16 = slice_by_index(begin = var_27789_begin_0, end = var_27789_end_0, end_mask = var_27789_end_mask_0, x = var_27320_cast_fp16)[name = tensor("op_27789_cast_fp16")]; tensor var_27796_begin_0 = const()[name = tensor("op_27796_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27796_end_0 = const()[name = tensor("op_27796_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27796_end_mask_0 = const()[name = tensor("op_27796_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27796_cast_fp16 = slice_by_index(begin = var_27796_begin_0, end = var_27796_end_0, end_mask = var_27796_end_mask_0, x = var_27320_cast_fp16)[name = tensor("op_27796_cast_fp16")]; tensor var_27803_begin_0 = const()[name = tensor("op_27803_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27803_end_0 = const()[name = tensor("op_27803_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27803_end_mask_0 = const()[name = tensor("op_27803_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27803_cast_fp16 = slice_by_index(begin = var_27803_begin_0, end = var_27803_end_0, end_mask = var_27803_end_mask_0, x = var_27320_cast_fp16)[name = tensor("op_27803_cast_fp16")]; tensor var_27810_begin_0 = const()[name = tensor("op_27810_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27810_end_0 = const()[name = tensor("op_27810_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27810_end_mask_0 = const()[name = tensor("op_27810_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27810_cast_fp16 = slice_by_index(begin = var_27810_begin_0, end = var_27810_end_0, end_mask = var_27810_end_mask_0, x = var_27320_cast_fp16)[name = tensor("op_27810_cast_fp16")]; tensor var_27817_begin_0 = const()[name = tensor("op_27817_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27817_end_0 = const()[name = tensor("op_27817_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27817_end_mask_0 = const()[name = tensor("op_27817_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27817_cast_fp16 = slice_by_index(begin = var_27817_begin_0, end = var_27817_end_0, end_mask = var_27817_end_mask_0, x = var_27324_cast_fp16)[name = tensor("op_27817_cast_fp16")]; tensor var_27824_begin_0 = const()[name = tensor("op_27824_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27824_end_0 = const()[name = tensor("op_27824_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27824_end_mask_0 = const()[name = tensor("op_27824_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27824_cast_fp16 = slice_by_index(begin = var_27824_begin_0, end = var_27824_end_0, end_mask = var_27824_end_mask_0, x = var_27324_cast_fp16)[name = tensor("op_27824_cast_fp16")]; tensor var_27831_begin_0 = const()[name = tensor("op_27831_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27831_end_0 = const()[name = tensor("op_27831_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27831_end_mask_0 = const()[name = tensor("op_27831_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27831_cast_fp16 = slice_by_index(begin = var_27831_begin_0, end = var_27831_end_0, end_mask = var_27831_end_mask_0, x = var_27324_cast_fp16)[name = tensor("op_27831_cast_fp16")]; tensor var_27838_begin_0 = const()[name = tensor("op_27838_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27838_end_0 = const()[name = tensor("op_27838_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27838_end_mask_0 = const()[name = tensor("op_27838_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27838_cast_fp16 = slice_by_index(begin = var_27838_begin_0, end = var_27838_end_0, end_mask = var_27838_end_mask_0, x = var_27324_cast_fp16)[name = tensor("op_27838_cast_fp16")]; tensor var_27845_begin_0 = const()[name = tensor("op_27845_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27845_end_0 = const()[name = tensor("op_27845_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27845_end_mask_0 = const()[name = tensor("op_27845_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27845_cast_fp16 = slice_by_index(begin = var_27845_begin_0, end = var_27845_end_0, end_mask = var_27845_end_mask_0, x = var_27328_cast_fp16)[name = tensor("op_27845_cast_fp16")]; tensor var_27852_begin_0 = const()[name = tensor("op_27852_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27852_end_0 = const()[name = tensor("op_27852_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27852_end_mask_0 = const()[name = tensor("op_27852_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27852_cast_fp16 = slice_by_index(begin = var_27852_begin_0, end = var_27852_end_0, end_mask = var_27852_end_mask_0, x = var_27328_cast_fp16)[name = tensor("op_27852_cast_fp16")]; tensor var_27859_begin_0 = const()[name = tensor("op_27859_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27859_end_0 = const()[name = tensor("op_27859_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27859_end_mask_0 = const()[name = tensor("op_27859_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27859_cast_fp16 = slice_by_index(begin = var_27859_begin_0, end = var_27859_end_0, end_mask = var_27859_end_mask_0, x = var_27328_cast_fp16)[name = tensor("op_27859_cast_fp16")]; tensor var_27866_begin_0 = const()[name = tensor("op_27866_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27866_end_0 = const()[name = tensor("op_27866_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27866_end_mask_0 = const()[name = tensor("op_27866_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27866_cast_fp16 = slice_by_index(begin = var_27866_begin_0, end = var_27866_end_0, end_mask = var_27866_end_mask_0, x = var_27328_cast_fp16)[name = tensor("op_27866_cast_fp16")]; tensor var_27873_begin_0 = const()[name = tensor("op_27873_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27873_end_0 = const()[name = tensor("op_27873_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_27873_end_mask_0 = const()[name = tensor("op_27873_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27873_cast_fp16 = slice_by_index(begin = var_27873_begin_0, end = var_27873_end_0, end_mask = var_27873_end_mask_0, x = var_27332_cast_fp16)[name = tensor("op_27873_cast_fp16")]; tensor var_27880_begin_0 = const()[name = tensor("op_27880_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_27880_end_0 = const()[name = tensor("op_27880_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_27880_end_mask_0 = const()[name = tensor("op_27880_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27880_cast_fp16 = slice_by_index(begin = var_27880_begin_0, end = var_27880_end_0, end_mask = var_27880_end_mask_0, x = var_27332_cast_fp16)[name = tensor("op_27880_cast_fp16")]; tensor var_27887_begin_0 = const()[name = tensor("op_27887_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_27887_end_0 = const()[name = tensor("op_27887_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_27887_end_mask_0 = const()[name = tensor("op_27887_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27887_cast_fp16 = slice_by_index(begin = var_27887_begin_0, end = var_27887_end_0, end_mask = var_27887_end_mask_0, x = var_27332_cast_fp16)[name = tensor("op_27887_cast_fp16")]; tensor var_27894_begin_0 = const()[name = tensor("op_27894_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_27894_end_0 = const()[name = tensor("op_27894_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27894_end_mask_0 = const()[name = tensor("op_27894_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27894_cast_fp16 = slice_by_index(begin = var_27894_begin_0, end = var_27894_end_0, end_mask = var_27894_end_mask_0, x = var_27332_cast_fp16)[name = tensor("op_27894_cast_fp16")]; tensor k_35_perm_0 = const()[name = tensor("k_35_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_27899_begin_0 = const()[name = tensor("op_27899_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27899_end_0 = const()[name = tensor("op_27899_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_27899_end_mask_0 = const()[name = tensor("op_27899_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_35_cast_fp16 = transpose(perm = k_35_perm_0, x = key_35_cast_fp16)[name = tensor("transpose_14")]; tensor var_27899_cast_fp16 = slice_by_index(begin = var_27899_begin_0, end = var_27899_end_0, end_mask = var_27899_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27899_cast_fp16")]; tensor var_27903_begin_0 = const()[name = tensor("op_27903_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_27903_end_0 = const()[name = tensor("op_27903_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_27903_end_mask_0 = const()[name = tensor("op_27903_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27903_cast_fp16 = slice_by_index(begin = var_27903_begin_0, end = var_27903_end_0, end_mask = var_27903_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27903_cast_fp16")]; tensor var_27907_begin_0 = const()[name = tensor("op_27907_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_27907_end_0 = const()[name = tensor("op_27907_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_27907_end_mask_0 = const()[name = tensor("op_27907_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27907_cast_fp16 = slice_by_index(begin = var_27907_begin_0, end = var_27907_end_0, end_mask = var_27907_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27907_cast_fp16")]; tensor var_27911_begin_0 = const()[name = tensor("op_27911_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_27911_end_0 = const()[name = tensor("op_27911_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_27911_end_mask_0 = const()[name = tensor("op_27911_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27911_cast_fp16 = slice_by_index(begin = var_27911_begin_0, end = var_27911_end_0, end_mask = var_27911_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27911_cast_fp16")]; tensor var_27915_begin_0 = const()[name = tensor("op_27915_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_27915_end_0 = const()[name = tensor("op_27915_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_27915_end_mask_0 = const()[name = tensor("op_27915_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27915_cast_fp16 = slice_by_index(begin = var_27915_begin_0, end = var_27915_end_0, end_mask = var_27915_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27915_cast_fp16")]; tensor var_27919_begin_0 = const()[name = tensor("op_27919_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_27919_end_0 = const()[name = tensor("op_27919_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_27919_end_mask_0 = const()[name = tensor("op_27919_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27919_cast_fp16 = slice_by_index(begin = var_27919_begin_0, end = var_27919_end_0, end_mask = var_27919_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27919_cast_fp16")]; tensor var_27923_begin_0 = const()[name = tensor("op_27923_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_27923_end_0 = const()[name = tensor("op_27923_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_27923_end_mask_0 = const()[name = tensor("op_27923_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27923_cast_fp16 = slice_by_index(begin = var_27923_begin_0, end = var_27923_end_0, end_mask = var_27923_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27923_cast_fp16")]; tensor var_27927_begin_0 = const()[name = tensor("op_27927_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_27927_end_0 = const()[name = tensor("op_27927_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_27927_end_mask_0 = const()[name = tensor("op_27927_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27927_cast_fp16 = slice_by_index(begin = var_27927_begin_0, end = var_27927_end_0, end_mask = var_27927_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27927_cast_fp16")]; tensor var_27931_begin_0 = const()[name = tensor("op_27931_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_27931_end_0 = const()[name = tensor("op_27931_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_27931_end_mask_0 = const()[name = tensor("op_27931_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27931_cast_fp16 = slice_by_index(begin = var_27931_begin_0, end = var_27931_end_0, end_mask = var_27931_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27931_cast_fp16")]; tensor var_27935_begin_0 = const()[name = tensor("op_27935_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_27935_end_0 = const()[name = tensor("op_27935_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_27935_end_mask_0 = const()[name = tensor("op_27935_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27935_cast_fp16 = slice_by_index(begin = var_27935_begin_0, end = var_27935_end_0, end_mask = var_27935_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27935_cast_fp16")]; tensor var_27939_begin_0 = const()[name = tensor("op_27939_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_27939_end_0 = const()[name = tensor("op_27939_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_27939_end_mask_0 = const()[name = tensor("op_27939_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27939_cast_fp16 = slice_by_index(begin = var_27939_begin_0, end = var_27939_end_0, end_mask = var_27939_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27939_cast_fp16")]; tensor var_27943_begin_0 = const()[name = tensor("op_27943_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_27943_end_0 = const()[name = tensor("op_27943_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_27943_end_mask_0 = const()[name = tensor("op_27943_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27943_cast_fp16 = slice_by_index(begin = var_27943_begin_0, end = var_27943_end_0, end_mask = var_27943_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27943_cast_fp16")]; tensor var_27947_begin_0 = const()[name = tensor("op_27947_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_27947_end_0 = const()[name = tensor("op_27947_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_27947_end_mask_0 = const()[name = tensor("op_27947_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27947_cast_fp16 = slice_by_index(begin = var_27947_begin_0, end = var_27947_end_0, end_mask = var_27947_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27947_cast_fp16")]; tensor var_27951_begin_0 = const()[name = tensor("op_27951_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_27951_end_0 = const()[name = tensor("op_27951_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_27951_end_mask_0 = const()[name = tensor("op_27951_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27951_cast_fp16 = slice_by_index(begin = var_27951_begin_0, end = var_27951_end_0, end_mask = var_27951_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27951_cast_fp16")]; tensor var_27955_begin_0 = const()[name = tensor("op_27955_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_27955_end_0 = const()[name = tensor("op_27955_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_27955_end_mask_0 = const()[name = tensor("op_27955_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27955_cast_fp16 = slice_by_index(begin = var_27955_begin_0, end = var_27955_end_0, end_mask = var_27955_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27955_cast_fp16")]; tensor var_27959_begin_0 = const()[name = tensor("op_27959_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_27959_end_0 = const()[name = tensor("op_27959_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_27959_end_mask_0 = const()[name = tensor("op_27959_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27959_cast_fp16 = slice_by_index(begin = var_27959_begin_0, end = var_27959_end_0, end_mask = var_27959_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27959_cast_fp16")]; tensor var_27963_begin_0 = const()[name = tensor("op_27963_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_27963_end_0 = const()[name = tensor("op_27963_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_27963_end_mask_0 = const()[name = tensor("op_27963_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27963_cast_fp16 = slice_by_index(begin = var_27963_begin_0, end = var_27963_end_0, end_mask = var_27963_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27963_cast_fp16")]; tensor var_27967_begin_0 = const()[name = tensor("op_27967_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_27967_end_0 = const()[name = tensor("op_27967_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_27967_end_mask_0 = const()[name = tensor("op_27967_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27967_cast_fp16 = slice_by_index(begin = var_27967_begin_0, end = var_27967_end_0, end_mask = var_27967_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27967_cast_fp16")]; tensor var_27971_begin_0 = const()[name = tensor("op_27971_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_27971_end_0 = const()[name = tensor("op_27971_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_27971_end_mask_0 = const()[name = tensor("op_27971_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27971_cast_fp16 = slice_by_index(begin = var_27971_begin_0, end = var_27971_end_0, end_mask = var_27971_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27971_cast_fp16")]; tensor var_27975_begin_0 = const()[name = tensor("op_27975_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_27975_end_0 = const()[name = tensor("op_27975_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_27975_end_mask_0 = const()[name = tensor("op_27975_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_27975_cast_fp16 = slice_by_index(begin = var_27975_begin_0, end = var_27975_end_0, end_mask = var_27975_end_mask_0, x = k_35_cast_fp16)[name = tensor("op_27975_cast_fp16")]; tensor var_27977_begin_0 = const()[name = tensor("op_27977_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_27977_end_0 = const()[name = tensor("op_27977_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_27977_end_mask_0 = const()[name = tensor("op_27977_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27977_cast_fp16 = slice_by_index(begin = var_27977_begin_0, end = var_27977_end_0, end_mask = var_27977_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_27977_cast_fp16")]; tensor var_27981_begin_0 = const()[name = tensor("op_27981_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_27981_end_0 = const()[name = tensor("op_27981_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_27981_end_mask_0 = const()[name = tensor("op_27981_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27981_cast_fp16 = slice_by_index(begin = var_27981_begin_0, end = var_27981_end_0, end_mask = var_27981_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_27981_cast_fp16")]; tensor var_27985_begin_0 = const()[name = tensor("op_27985_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_27985_end_0 = const()[name = tensor("op_27985_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_27985_end_mask_0 = const()[name = tensor("op_27985_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27985_cast_fp16 = slice_by_index(begin = var_27985_begin_0, end = var_27985_end_0, end_mask = var_27985_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_27985_cast_fp16")]; tensor var_27989_begin_0 = const()[name = tensor("op_27989_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_27989_end_0 = const()[name = tensor("op_27989_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_27989_end_mask_0 = const()[name = tensor("op_27989_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27989_cast_fp16 = slice_by_index(begin = var_27989_begin_0, end = var_27989_end_0, end_mask = var_27989_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_27989_cast_fp16")]; tensor var_27993_begin_0 = const()[name = tensor("op_27993_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_27993_end_0 = const()[name = tensor("op_27993_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_27993_end_mask_0 = const()[name = tensor("op_27993_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27993_cast_fp16 = slice_by_index(begin = var_27993_begin_0, end = var_27993_end_0, end_mask = var_27993_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_27993_cast_fp16")]; tensor var_27997_begin_0 = const()[name = tensor("op_27997_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_27997_end_0 = const()[name = tensor("op_27997_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_27997_end_mask_0 = const()[name = tensor("op_27997_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_27997_cast_fp16 = slice_by_index(begin = var_27997_begin_0, end = var_27997_end_0, end_mask = var_27997_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_27997_cast_fp16")]; tensor var_28001_begin_0 = const()[name = tensor("op_28001_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_28001_end_0 = const()[name = tensor("op_28001_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_28001_end_mask_0 = const()[name = tensor("op_28001_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28001_cast_fp16 = slice_by_index(begin = var_28001_begin_0, end = var_28001_end_0, end_mask = var_28001_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_28001_cast_fp16")]; tensor var_28005_begin_0 = const()[name = tensor("op_28005_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_28005_end_0 = const()[name = tensor("op_28005_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_28005_end_mask_0 = const()[name = tensor("op_28005_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28005_cast_fp16 = slice_by_index(begin = var_28005_begin_0, end = var_28005_end_0, end_mask = var_28005_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_28005_cast_fp16")]; tensor var_28009_begin_0 = const()[name = tensor("op_28009_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_28009_end_0 = const()[name = tensor("op_28009_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_28009_end_mask_0 = const()[name = tensor("op_28009_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28009_cast_fp16 = slice_by_index(begin = var_28009_begin_0, end = var_28009_end_0, end_mask = var_28009_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_28009_cast_fp16")]; tensor var_28013_begin_0 = const()[name = tensor("op_28013_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_28013_end_0 = const()[name = tensor("op_28013_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_28013_end_mask_0 = const()[name = tensor("op_28013_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28013_cast_fp16 = slice_by_index(begin = var_28013_begin_0, end = var_28013_end_0, end_mask = var_28013_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_28013_cast_fp16")]; tensor var_28017_begin_0 = const()[name = tensor("op_28017_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_28017_end_0 = const()[name = tensor("op_28017_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_28017_end_mask_0 = const()[name = tensor("op_28017_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28017_cast_fp16 = slice_by_index(begin = var_28017_begin_0, end = var_28017_end_0, end_mask = var_28017_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_28017_cast_fp16")]; tensor var_28021_begin_0 = const()[name = tensor("op_28021_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_28021_end_0 = const()[name = tensor("op_28021_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_28021_end_mask_0 = const()[name = tensor("op_28021_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28021_cast_fp16 = slice_by_index(begin = var_28021_begin_0, end = var_28021_end_0, end_mask = var_28021_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_28021_cast_fp16")]; tensor var_28025_begin_0 = const()[name = tensor("op_28025_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_28025_end_0 = const()[name = tensor("op_28025_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_28025_end_mask_0 = const()[name = tensor("op_28025_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28025_cast_fp16 = slice_by_index(begin = var_28025_begin_0, end = var_28025_end_0, end_mask = var_28025_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_28025_cast_fp16")]; tensor var_28029_begin_0 = const()[name = tensor("op_28029_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_28029_end_0 = const()[name = tensor("op_28029_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_28029_end_mask_0 = const()[name = tensor("op_28029_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28029_cast_fp16 = slice_by_index(begin = var_28029_begin_0, end = var_28029_end_0, end_mask = var_28029_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_28029_cast_fp16")]; tensor var_28033_begin_0 = const()[name = tensor("op_28033_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_28033_end_0 = const()[name = tensor("op_28033_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_28033_end_mask_0 = const()[name = tensor("op_28033_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28033_cast_fp16 = slice_by_index(begin = var_28033_begin_0, end = var_28033_end_0, end_mask = var_28033_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_28033_cast_fp16")]; tensor var_28037_begin_0 = const()[name = tensor("op_28037_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_28037_end_0 = const()[name = tensor("op_28037_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_28037_end_mask_0 = const()[name = tensor("op_28037_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28037_cast_fp16 = slice_by_index(begin = var_28037_begin_0, end = var_28037_end_0, end_mask = var_28037_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_28037_cast_fp16")]; tensor var_28041_begin_0 = const()[name = tensor("op_28041_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_28041_end_0 = const()[name = tensor("op_28041_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_28041_end_mask_0 = const()[name = tensor("op_28041_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28041_cast_fp16 = slice_by_index(begin = var_28041_begin_0, end = var_28041_end_0, end_mask = var_28041_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_28041_cast_fp16")]; tensor var_28045_begin_0 = const()[name = tensor("op_28045_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_28045_end_0 = const()[name = tensor("op_28045_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_28045_end_mask_0 = const()[name = tensor("op_28045_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28045_cast_fp16 = slice_by_index(begin = var_28045_begin_0, end = var_28045_end_0, end_mask = var_28045_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_28045_cast_fp16")]; tensor var_28049_begin_0 = const()[name = tensor("op_28049_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_28049_end_0 = const()[name = tensor("op_28049_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_28049_end_mask_0 = const()[name = tensor("op_28049_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28049_cast_fp16 = slice_by_index(begin = var_28049_begin_0, end = var_28049_end_0, end_mask = var_28049_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_28049_cast_fp16")]; tensor var_28053_begin_0 = const()[name = tensor("op_28053_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_28053_end_0 = const()[name = tensor("op_28053_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_28053_end_mask_0 = const()[name = tensor("op_28053_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28053_cast_fp16 = slice_by_index(begin = var_28053_begin_0, end = var_28053_end_0, end_mask = var_28053_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_28053_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2721_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2721_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2721_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2721_equation_0, values = (var_27899_cast_fp16, var_27341_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2721_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2723_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2723_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2723_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2723_equation_0, values = (var_27899_cast_fp16, var_27348_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2723_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2725_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2725_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2725_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2725_equation_0, values = (var_27899_cast_fp16, var_27355_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2725_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2727_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2727_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2727_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2727_equation_0, values = (var_27899_cast_fp16, var_27362_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2727_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2729_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2729_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2729_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2729_equation_0, values = (var_27903_cast_fp16, var_27369_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2729_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2731_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2731_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2731_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2731_equation_0, values = (var_27903_cast_fp16, var_27376_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2731_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2733_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2733_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2733_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2733_equation_0, values = (var_27903_cast_fp16, var_27383_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2733_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2735_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2735_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2735_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2735_equation_0, values = (var_27903_cast_fp16, var_27390_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2735_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2737_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2737_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2737_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2737_equation_0, values = (var_27907_cast_fp16, var_27397_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2737_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2739_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2739_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2739_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2739_equation_0, values = (var_27907_cast_fp16, var_27404_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2739_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2741_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2741_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2741_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2741_equation_0, values = (var_27907_cast_fp16, var_27411_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2741_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2743_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2743_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2743_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2743_equation_0, values = (var_27907_cast_fp16, var_27418_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2743_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2745_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2745_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2745_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2745_equation_0, values = (var_27911_cast_fp16, var_27425_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2745_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2747_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2747_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2747_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2747_equation_0, values = (var_27911_cast_fp16, var_27432_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2747_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2749_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2749_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2749_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2749_equation_0, values = (var_27911_cast_fp16, var_27439_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2749_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2751_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2751_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2751_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2751_equation_0, values = (var_27911_cast_fp16, var_27446_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2751_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2753_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2753_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2753_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2753_equation_0, values = (var_27915_cast_fp16, var_27453_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2753_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2755_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2755_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2755_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2755_equation_0, values = (var_27915_cast_fp16, var_27460_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2755_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2757_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2757_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2757_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2757_equation_0, values = (var_27915_cast_fp16, var_27467_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2757_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2759_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2759_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2759_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2759_equation_0, values = (var_27915_cast_fp16, var_27474_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2759_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2761_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2761_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2761_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2761_equation_0, values = (var_27919_cast_fp16, var_27481_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2761_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2763_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2763_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2763_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2763_equation_0, values = (var_27919_cast_fp16, var_27488_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2763_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2765_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2765_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2765_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2765_equation_0, values = (var_27919_cast_fp16, var_27495_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2765_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2767_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2767_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2767_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2767_equation_0, values = (var_27919_cast_fp16, var_27502_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2767_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2769_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2769_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2769_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2769_equation_0, values = (var_27923_cast_fp16, var_27509_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2769_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2771_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2771_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2771_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2771_equation_0, values = (var_27923_cast_fp16, var_27516_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2771_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2773_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2773_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2773_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2773_equation_0, values = (var_27923_cast_fp16, var_27523_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2773_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2775_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2775_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2775_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2775_equation_0, values = (var_27923_cast_fp16, var_27530_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2775_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2777_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2777_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2777_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2777_equation_0, values = (var_27927_cast_fp16, var_27537_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2777_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2779_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2779_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2779_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2779_equation_0, values = (var_27927_cast_fp16, var_27544_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2779_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2781_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2781_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2781_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2781_equation_0, values = (var_27927_cast_fp16, var_27551_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2781_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2783_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2783_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2783_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2783_equation_0, values = (var_27927_cast_fp16, var_27558_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2783_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2785_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2785_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2785_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2785_equation_0, values = (var_27931_cast_fp16, var_27565_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2785_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2787_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2787_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2787_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2787_equation_0, values = (var_27931_cast_fp16, var_27572_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2787_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2789_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2789_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2789_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2789_equation_0, values = (var_27931_cast_fp16, var_27579_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2789_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2791_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2791_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2791_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2791_equation_0, values = (var_27931_cast_fp16, var_27586_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2791_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2793_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2793_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2793_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2793_equation_0, values = (var_27935_cast_fp16, var_27593_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2793_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2795_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2795_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2795_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2795_equation_0, values = (var_27935_cast_fp16, var_27600_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2795_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2797_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2797_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2797_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2797_equation_0, values = (var_27935_cast_fp16, var_27607_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2797_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2799_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2799_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2799_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2799_equation_0, values = (var_27935_cast_fp16, var_27614_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2799_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2801_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2801_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2801_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2801_equation_0, values = (var_27939_cast_fp16, var_27621_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2801_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2803_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2803_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2803_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2803_equation_0, values = (var_27939_cast_fp16, var_27628_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2803_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2805_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2805_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2805_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2805_equation_0, values = (var_27939_cast_fp16, var_27635_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2805_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2807_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2807_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2807_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2807_equation_0, values = (var_27939_cast_fp16, var_27642_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2807_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2809_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2809_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2809_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2809_equation_0, values = (var_27943_cast_fp16, var_27649_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2809_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2811_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2811_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2811_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2811_equation_0, values = (var_27943_cast_fp16, var_27656_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2811_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2813_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2813_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2813_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2813_equation_0, values = (var_27943_cast_fp16, var_27663_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2813_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2815_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2815_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2815_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2815_equation_0, values = (var_27943_cast_fp16, var_27670_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2815_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2817_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2817_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2817_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2817_equation_0, values = (var_27947_cast_fp16, var_27677_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2817_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2819_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2819_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2819_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2819_equation_0, values = (var_27947_cast_fp16, var_27684_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2819_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2821_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2821_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2821_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2821_equation_0, values = (var_27947_cast_fp16, var_27691_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2821_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2823_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2823_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2823_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2823_equation_0, values = (var_27947_cast_fp16, var_27698_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2823_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2825_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2825_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2825_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2825_equation_0, values = (var_27951_cast_fp16, var_27705_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2825_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2827_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2827_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2827_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2827_equation_0, values = (var_27951_cast_fp16, var_27712_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2827_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2829_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2829_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2829_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2829_equation_0, values = (var_27951_cast_fp16, var_27719_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2829_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2831_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2831_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2831_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2831_equation_0, values = (var_27951_cast_fp16, var_27726_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2831_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2833_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2833_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2833_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2833_equation_0, values = (var_27955_cast_fp16, var_27733_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2833_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2835_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2835_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2835_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2835_equation_0, values = (var_27955_cast_fp16, var_27740_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2835_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2837_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2837_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2837_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2837_equation_0, values = (var_27955_cast_fp16, var_27747_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2837_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2839_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2839_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2839_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2839_equation_0, values = (var_27955_cast_fp16, var_27754_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2839_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2841_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2841_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2841_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2841_equation_0, values = (var_27959_cast_fp16, var_27761_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2841_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2843_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2843_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2843_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2843_equation_0, values = (var_27959_cast_fp16, var_27768_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2843_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2845_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2845_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2845_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2845_equation_0, values = (var_27959_cast_fp16, var_27775_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2845_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2847_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2847_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2847_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2847_equation_0, values = (var_27959_cast_fp16, var_27782_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2847_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2849_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2849_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2849_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2849_equation_0, values = (var_27963_cast_fp16, var_27789_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2849_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2851_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2851_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2851_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2851_equation_0, values = (var_27963_cast_fp16, var_27796_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2851_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2853_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2853_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2853_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2853_equation_0, values = (var_27963_cast_fp16, var_27803_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2853_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2855_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2855_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2855_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2855_equation_0, values = (var_27963_cast_fp16, var_27810_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2855_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2857_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2857_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2857_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2857_equation_0, values = (var_27967_cast_fp16, var_27817_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2857_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2859_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2859_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2859_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2859_equation_0, values = (var_27967_cast_fp16, var_27824_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2859_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2861_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2861_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2861_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2861_equation_0, values = (var_27967_cast_fp16, var_27831_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2861_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2863_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2863_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2863_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2863_equation_0, values = (var_27967_cast_fp16, var_27838_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2863_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2865_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2865_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2865_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2865_equation_0, values = (var_27971_cast_fp16, var_27845_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2865_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2867_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2867_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2867_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2867_equation_0, values = (var_27971_cast_fp16, var_27852_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2867_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2869_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2869_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2869_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2869_equation_0, values = (var_27971_cast_fp16, var_27859_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2869_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2871_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2871_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2871_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2871_equation_0, values = (var_27971_cast_fp16, var_27866_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2871_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2873_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2873_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2873_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2873_equation_0, values = (var_27975_cast_fp16, var_27873_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2873_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2875_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2875_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2875_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2875_equation_0, values = (var_27975_cast_fp16, var_27880_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2875_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2877_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2877_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2877_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2877_equation_0, values = (var_27975_cast_fp16, var_27887_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2877_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2879_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2879_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2879_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2879_equation_0, values = (var_27975_cast_fp16, var_27894_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2879_cast_fp16")]; tensor var_28216_to_fp16 = const()[name = tensor("op_28216_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2721_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2721_cast_fp16, y = var_28216_to_fp16)[name = tensor("aw_chunk_2721_cast_fp16")]; tensor var_28218_to_fp16 = const()[name = tensor("op_28218_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2723_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2723_cast_fp16, y = var_28218_to_fp16)[name = tensor("aw_chunk_2723_cast_fp16")]; tensor var_28220_to_fp16 = const()[name = tensor("op_28220_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2725_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2725_cast_fp16, y = var_28220_to_fp16)[name = tensor("aw_chunk_2725_cast_fp16")]; tensor var_28222_to_fp16 = const()[name = tensor("op_28222_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2727_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2727_cast_fp16, y = var_28222_to_fp16)[name = tensor("aw_chunk_2727_cast_fp16")]; tensor var_28224_to_fp16 = const()[name = tensor("op_28224_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2729_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2729_cast_fp16, y = var_28224_to_fp16)[name = tensor("aw_chunk_2729_cast_fp16")]; tensor var_28226_to_fp16 = const()[name = tensor("op_28226_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2731_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2731_cast_fp16, y = var_28226_to_fp16)[name = tensor("aw_chunk_2731_cast_fp16")]; tensor var_28228_to_fp16 = const()[name = tensor("op_28228_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2733_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2733_cast_fp16, y = var_28228_to_fp16)[name = tensor("aw_chunk_2733_cast_fp16")]; tensor var_28230_to_fp16 = const()[name = tensor("op_28230_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2735_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2735_cast_fp16, y = var_28230_to_fp16)[name = tensor("aw_chunk_2735_cast_fp16")]; tensor var_28232_to_fp16 = const()[name = tensor("op_28232_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2737_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2737_cast_fp16, y = var_28232_to_fp16)[name = tensor("aw_chunk_2737_cast_fp16")]; tensor var_28234_to_fp16 = const()[name = tensor("op_28234_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2739_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2739_cast_fp16, y = var_28234_to_fp16)[name = tensor("aw_chunk_2739_cast_fp16")]; tensor var_28236_to_fp16 = const()[name = tensor("op_28236_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2741_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2741_cast_fp16, y = var_28236_to_fp16)[name = tensor("aw_chunk_2741_cast_fp16")]; tensor var_28238_to_fp16 = const()[name = tensor("op_28238_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2743_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2743_cast_fp16, y = var_28238_to_fp16)[name = tensor("aw_chunk_2743_cast_fp16")]; tensor var_28240_to_fp16 = const()[name = tensor("op_28240_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2745_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2745_cast_fp16, y = var_28240_to_fp16)[name = tensor("aw_chunk_2745_cast_fp16")]; tensor var_28242_to_fp16 = const()[name = tensor("op_28242_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2747_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2747_cast_fp16, y = var_28242_to_fp16)[name = tensor("aw_chunk_2747_cast_fp16")]; tensor var_28244_to_fp16 = const()[name = tensor("op_28244_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2749_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2749_cast_fp16, y = var_28244_to_fp16)[name = tensor("aw_chunk_2749_cast_fp16")]; tensor var_28246_to_fp16 = const()[name = tensor("op_28246_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2751_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2751_cast_fp16, y = var_28246_to_fp16)[name = tensor("aw_chunk_2751_cast_fp16")]; tensor var_28248_to_fp16 = const()[name = tensor("op_28248_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2753_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2753_cast_fp16, y = var_28248_to_fp16)[name = tensor("aw_chunk_2753_cast_fp16")]; tensor var_28250_to_fp16 = const()[name = tensor("op_28250_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2755_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2755_cast_fp16, y = var_28250_to_fp16)[name = tensor("aw_chunk_2755_cast_fp16")]; tensor var_28252_to_fp16 = const()[name = tensor("op_28252_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2757_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2757_cast_fp16, y = var_28252_to_fp16)[name = tensor("aw_chunk_2757_cast_fp16")]; tensor var_28254_to_fp16 = const()[name = tensor("op_28254_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2759_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2759_cast_fp16, y = var_28254_to_fp16)[name = tensor("aw_chunk_2759_cast_fp16")]; tensor var_28256_to_fp16 = const()[name = tensor("op_28256_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2761_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2761_cast_fp16, y = var_28256_to_fp16)[name = tensor("aw_chunk_2761_cast_fp16")]; tensor var_28258_to_fp16 = const()[name = tensor("op_28258_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2763_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2763_cast_fp16, y = var_28258_to_fp16)[name = tensor("aw_chunk_2763_cast_fp16")]; tensor var_28260_to_fp16 = const()[name = tensor("op_28260_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2765_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2765_cast_fp16, y = var_28260_to_fp16)[name = tensor("aw_chunk_2765_cast_fp16")]; tensor var_28262_to_fp16 = const()[name = tensor("op_28262_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2767_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2767_cast_fp16, y = var_28262_to_fp16)[name = tensor("aw_chunk_2767_cast_fp16")]; tensor var_28264_to_fp16 = const()[name = tensor("op_28264_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2769_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2769_cast_fp16, y = var_28264_to_fp16)[name = tensor("aw_chunk_2769_cast_fp16")]; tensor var_28266_to_fp16 = const()[name = tensor("op_28266_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2771_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2771_cast_fp16, y = var_28266_to_fp16)[name = tensor("aw_chunk_2771_cast_fp16")]; tensor var_28268_to_fp16 = const()[name = tensor("op_28268_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2773_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2773_cast_fp16, y = var_28268_to_fp16)[name = tensor("aw_chunk_2773_cast_fp16")]; tensor var_28270_to_fp16 = const()[name = tensor("op_28270_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2775_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2775_cast_fp16, y = var_28270_to_fp16)[name = tensor("aw_chunk_2775_cast_fp16")]; tensor var_28272_to_fp16 = const()[name = tensor("op_28272_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2777_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2777_cast_fp16, y = var_28272_to_fp16)[name = tensor("aw_chunk_2777_cast_fp16")]; tensor var_28274_to_fp16 = const()[name = tensor("op_28274_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2779_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2779_cast_fp16, y = var_28274_to_fp16)[name = tensor("aw_chunk_2779_cast_fp16")]; tensor var_28276_to_fp16 = const()[name = tensor("op_28276_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2781_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2781_cast_fp16, y = var_28276_to_fp16)[name = tensor("aw_chunk_2781_cast_fp16")]; tensor var_28278_to_fp16 = const()[name = tensor("op_28278_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2783_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2783_cast_fp16, y = var_28278_to_fp16)[name = tensor("aw_chunk_2783_cast_fp16")]; tensor var_28280_to_fp16 = const()[name = tensor("op_28280_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2785_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2785_cast_fp16, y = var_28280_to_fp16)[name = tensor("aw_chunk_2785_cast_fp16")]; tensor var_28282_to_fp16 = const()[name = tensor("op_28282_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2787_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2787_cast_fp16, y = var_28282_to_fp16)[name = tensor("aw_chunk_2787_cast_fp16")]; tensor var_28284_to_fp16 = const()[name = tensor("op_28284_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2789_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2789_cast_fp16, y = var_28284_to_fp16)[name = tensor("aw_chunk_2789_cast_fp16")]; tensor var_28286_to_fp16 = const()[name = tensor("op_28286_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2791_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2791_cast_fp16, y = var_28286_to_fp16)[name = tensor("aw_chunk_2791_cast_fp16")]; tensor var_28288_to_fp16 = const()[name = tensor("op_28288_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2793_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2793_cast_fp16, y = var_28288_to_fp16)[name = tensor("aw_chunk_2793_cast_fp16")]; tensor var_28290_to_fp16 = const()[name = tensor("op_28290_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2795_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2795_cast_fp16, y = var_28290_to_fp16)[name = tensor("aw_chunk_2795_cast_fp16")]; tensor var_28292_to_fp16 = const()[name = tensor("op_28292_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2797_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2797_cast_fp16, y = var_28292_to_fp16)[name = tensor("aw_chunk_2797_cast_fp16")]; tensor var_28294_to_fp16 = const()[name = tensor("op_28294_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2799_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2799_cast_fp16, y = var_28294_to_fp16)[name = tensor("aw_chunk_2799_cast_fp16")]; tensor var_28296_to_fp16 = const()[name = tensor("op_28296_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2801_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2801_cast_fp16, y = var_28296_to_fp16)[name = tensor("aw_chunk_2801_cast_fp16")]; tensor var_28298_to_fp16 = const()[name = tensor("op_28298_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2803_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2803_cast_fp16, y = var_28298_to_fp16)[name = tensor("aw_chunk_2803_cast_fp16")]; tensor var_28300_to_fp16 = const()[name = tensor("op_28300_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2805_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2805_cast_fp16, y = var_28300_to_fp16)[name = tensor("aw_chunk_2805_cast_fp16")]; tensor var_28302_to_fp16 = const()[name = tensor("op_28302_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2807_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2807_cast_fp16, y = var_28302_to_fp16)[name = tensor("aw_chunk_2807_cast_fp16")]; tensor var_28304_to_fp16 = const()[name = tensor("op_28304_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2809_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2809_cast_fp16, y = var_28304_to_fp16)[name = tensor("aw_chunk_2809_cast_fp16")]; tensor var_28306_to_fp16 = const()[name = tensor("op_28306_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2811_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2811_cast_fp16, y = var_28306_to_fp16)[name = tensor("aw_chunk_2811_cast_fp16")]; tensor var_28308_to_fp16 = const()[name = tensor("op_28308_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2813_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2813_cast_fp16, y = var_28308_to_fp16)[name = tensor("aw_chunk_2813_cast_fp16")]; tensor var_28310_to_fp16 = const()[name = tensor("op_28310_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2815_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2815_cast_fp16, y = var_28310_to_fp16)[name = tensor("aw_chunk_2815_cast_fp16")]; tensor var_28312_to_fp16 = const()[name = tensor("op_28312_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2817_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2817_cast_fp16, y = var_28312_to_fp16)[name = tensor("aw_chunk_2817_cast_fp16")]; tensor var_28314_to_fp16 = const()[name = tensor("op_28314_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2819_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2819_cast_fp16, y = var_28314_to_fp16)[name = tensor("aw_chunk_2819_cast_fp16")]; tensor var_28316_to_fp16 = const()[name = tensor("op_28316_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2821_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2821_cast_fp16, y = var_28316_to_fp16)[name = tensor("aw_chunk_2821_cast_fp16")]; tensor var_28318_to_fp16 = const()[name = tensor("op_28318_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2823_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2823_cast_fp16, y = var_28318_to_fp16)[name = tensor("aw_chunk_2823_cast_fp16")]; tensor var_28320_to_fp16 = const()[name = tensor("op_28320_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2825_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2825_cast_fp16, y = var_28320_to_fp16)[name = tensor("aw_chunk_2825_cast_fp16")]; tensor var_28322_to_fp16 = const()[name = tensor("op_28322_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2827_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2827_cast_fp16, y = var_28322_to_fp16)[name = tensor("aw_chunk_2827_cast_fp16")]; tensor var_28324_to_fp16 = const()[name = tensor("op_28324_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2829_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2829_cast_fp16, y = var_28324_to_fp16)[name = tensor("aw_chunk_2829_cast_fp16")]; tensor var_28326_to_fp16 = const()[name = tensor("op_28326_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2831_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2831_cast_fp16, y = var_28326_to_fp16)[name = tensor("aw_chunk_2831_cast_fp16")]; tensor var_28328_to_fp16 = const()[name = tensor("op_28328_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2833_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2833_cast_fp16, y = var_28328_to_fp16)[name = tensor("aw_chunk_2833_cast_fp16")]; tensor var_28330_to_fp16 = const()[name = tensor("op_28330_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2835_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2835_cast_fp16, y = var_28330_to_fp16)[name = tensor("aw_chunk_2835_cast_fp16")]; tensor var_28332_to_fp16 = const()[name = tensor("op_28332_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2837_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2837_cast_fp16, y = var_28332_to_fp16)[name = tensor("aw_chunk_2837_cast_fp16")]; tensor var_28334_to_fp16 = const()[name = tensor("op_28334_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2839_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2839_cast_fp16, y = var_28334_to_fp16)[name = tensor("aw_chunk_2839_cast_fp16")]; tensor var_28336_to_fp16 = const()[name = tensor("op_28336_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2841_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2841_cast_fp16, y = var_28336_to_fp16)[name = tensor("aw_chunk_2841_cast_fp16")]; tensor var_28338_to_fp16 = const()[name = tensor("op_28338_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2843_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2843_cast_fp16, y = var_28338_to_fp16)[name = tensor("aw_chunk_2843_cast_fp16")]; tensor var_28340_to_fp16 = const()[name = tensor("op_28340_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2845_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2845_cast_fp16, y = var_28340_to_fp16)[name = tensor("aw_chunk_2845_cast_fp16")]; tensor var_28342_to_fp16 = const()[name = tensor("op_28342_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2847_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2847_cast_fp16, y = var_28342_to_fp16)[name = tensor("aw_chunk_2847_cast_fp16")]; tensor var_28344_to_fp16 = const()[name = tensor("op_28344_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2849_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2849_cast_fp16, y = var_28344_to_fp16)[name = tensor("aw_chunk_2849_cast_fp16")]; tensor var_28346_to_fp16 = const()[name = tensor("op_28346_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2851_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2851_cast_fp16, y = var_28346_to_fp16)[name = tensor("aw_chunk_2851_cast_fp16")]; tensor var_28348_to_fp16 = const()[name = tensor("op_28348_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2853_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2853_cast_fp16, y = var_28348_to_fp16)[name = tensor("aw_chunk_2853_cast_fp16")]; tensor var_28350_to_fp16 = const()[name = tensor("op_28350_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2855_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2855_cast_fp16, y = var_28350_to_fp16)[name = tensor("aw_chunk_2855_cast_fp16")]; tensor var_28352_to_fp16 = const()[name = tensor("op_28352_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2857_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2857_cast_fp16, y = var_28352_to_fp16)[name = tensor("aw_chunk_2857_cast_fp16")]; tensor var_28354_to_fp16 = const()[name = tensor("op_28354_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2859_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2859_cast_fp16, y = var_28354_to_fp16)[name = tensor("aw_chunk_2859_cast_fp16")]; tensor var_28356_to_fp16 = const()[name = tensor("op_28356_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2861_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2861_cast_fp16, y = var_28356_to_fp16)[name = tensor("aw_chunk_2861_cast_fp16")]; tensor var_28358_to_fp16 = const()[name = tensor("op_28358_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2863_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2863_cast_fp16, y = var_28358_to_fp16)[name = tensor("aw_chunk_2863_cast_fp16")]; tensor var_28360_to_fp16 = const()[name = tensor("op_28360_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2865_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2865_cast_fp16, y = var_28360_to_fp16)[name = tensor("aw_chunk_2865_cast_fp16")]; tensor var_28362_to_fp16 = const()[name = tensor("op_28362_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2867_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2867_cast_fp16, y = var_28362_to_fp16)[name = tensor("aw_chunk_2867_cast_fp16")]; tensor var_28364_to_fp16 = const()[name = tensor("op_28364_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2869_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2869_cast_fp16, y = var_28364_to_fp16)[name = tensor("aw_chunk_2869_cast_fp16")]; tensor var_28366_to_fp16 = const()[name = tensor("op_28366_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2871_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2871_cast_fp16, y = var_28366_to_fp16)[name = tensor("aw_chunk_2871_cast_fp16")]; tensor var_28368_to_fp16 = const()[name = tensor("op_28368_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2873_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2873_cast_fp16, y = var_28368_to_fp16)[name = tensor("aw_chunk_2873_cast_fp16")]; tensor var_28370_to_fp16 = const()[name = tensor("op_28370_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2875_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2875_cast_fp16, y = var_28370_to_fp16)[name = tensor("aw_chunk_2875_cast_fp16")]; tensor var_28372_to_fp16 = const()[name = tensor("op_28372_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2877_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2877_cast_fp16, y = var_28372_to_fp16)[name = tensor("aw_chunk_2877_cast_fp16")]; tensor var_28374_to_fp16 = const()[name = tensor("op_28374_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2879_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2879_cast_fp16, y = var_28374_to_fp16)[name = tensor("aw_chunk_2879_cast_fp16")]; tensor var_28376_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2721_cast_fp16)[name = tensor("op_28376_cast_fp16")]; tensor var_28377_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2723_cast_fp16)[name = tensor("op_28377_cast_fp16")]; tensor var_28378_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2725_cast_fp16)[name = tensor("op_28378_cast_fp16")]; tensor var_28379_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2727_cast_fp16)[name = tensor("op_28379_cast_fp16")]; tensor var_28380_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2729_cast_fp16)[name = tensor("op_28380_cast_fp16")]; tensor var_28381_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2731_cast_fp16)[name = tensor("op_28381_cast_fp16")]; tensor var_28382_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2733_cast_fp16)[name = tensor("op_28382_cast_fp16")]; tensor var_28383_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2735_cast_fp16)[name = tensor("op_28383_cast_fp16")]; tensor var_28384_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2737_cast_fp16)[name = tensor("op_28384_cast_fp16")]; tensor var_28385_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2739_cast_fp16)[name = tensor("op_28385_cast_fp16")]; tensor var_28386_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2741_cast_fp16)[name = tensor("op_28386_cast_fp16")]; tensor var_28387_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2743_cast_fp16)[name = tensor("op_28387_cast_fp16")]; tensor var_28388_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2745_cast_fp16)[name = tensor("op_28388_cast_fp16")]; tensor var_28389_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2747_cast_fp16)[name = tensor("op_28389_cast_fp16")]; tensor var_28390_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2749_cast_fp16)[name = tensor("op_28390_cast_fp16")]; tensor var_28391_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2751_cast_fp16)[name = tensor("op_28391_cast_fp16")]; tensor var_28392_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2753_cast_fp16)[name = tensor("op_28392_cast_fp16")]; tensor var_28393_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2755_cast_fp16)[name = tensor("op_28393_cast_fp16")]; tensor var_28394_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2757_cast_fp16)[name = tensor("op_28394_cast_fp16")]; tensor var_28395_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2759_cast_fp16)[name = tensor("op_28395_cast_fp16")]; tensor var_28396_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2761_cast_fp16)[name = tensor("op_28396_cast_fp16")]; tensor var_28397_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2763_cast_fp16)[name = tensor("op_28397_cast_fp16")]; tensor var_28398_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2765_cast_fp16)[name = tensor("op_28398_cast_fp16")]; tensor var_28399_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2767_cast_fp16)[name = tensor("op_28399_cast_fp16")]; tensor var_28400_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2769_cast_fp16)[name = tensor("op_28400_cast_fp16")]; tensor var_28401_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2771_cast_fp16)[name = tensor("op_28401_cast_fp16")]; tensor var_28402_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2773_cast_fp16)[name = tensor("op_28402_cast_fp16")]; tensor var_28403_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2775_cast_fp16)[name = tensor("op_28403_cast_fp16")]; tensor var_28404_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2777_cast_fp16)[name = tensor("op_28404_cast_fp16")]; tensor var_28405_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2779_cast_fp16)[name = tensor("op_28405_cast_fp16")]; tensor var_28406_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2781_cast_fp16)[name = tensor("op_28406_cast_fp16")]; tensor var_28407_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2783_cast_fp16)[name = tensor("op_28407_cast_fp16")]; tensor var_28408_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2785_cast_fp16)[name = tensor("op_28408_cast_fp16")]; tensor var_28409_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2787_cast_fp16)[name = tensor("op_28409_cast_fp16")]; tensor var_28410_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2789_cast_fp16)[name = tensor("op_28410_cast_fp16")]; tensor var_28411_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2791_cast_fp16)[name = tensor("op_28411_cast_fp16")]; tensor var_28412_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2793_cast_fp16)[name = tensor("op_28412_cast_fp16")]; tensor var_28413_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2795_cast_fp16)[name = tensor("op_28413_cast_fp16")]; tensor var_28414_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2797_cast_fp16)[name = tensor("op_28414_cast_fp16")]; tensor var_28415_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2799_cast_fp16)[name = tensor("op_28415_cast_fp16")]; tensor var_28416_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2801_cast_fp16)[name = tensor("op_28416_cast_fp16")]; tensor var_28417_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2803_cast_fp16)[name = tensor("op_28417_cast_fp16")]; tensor var_28418_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2805_cast_fp16)[name = tensor("op_28418_cast_fp16")]; tensor var_28419_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2807_cast_fp16)[name = tensor("op_28419_cast_fp16")]; tensor var_28420_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2809_cast_fp16)[name = tensor("op_28420_cast_fp16")]; tensor var_28421_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2811_cast_fp16)[name = tensor("op_28421_cast_fp16")]; tensor var_28422_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2813_cast_fp16)[name = tensor("op_28422_cast_fp16")]; tensor var_28423_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2815_cast_fp16)[name = tensor("op_28423_cast_fp16")]; tensor var_28424_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2817_cast_fp16)[name = tensor("op_28424_cast_fp16")]; tensor var_28425_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2819_cast_fp16)[name = tensor("op_28425_cast_fp16")]; tensor var_28426_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2821_cast_fp16)[name = tensor("op_28426_cast_fp16")]; tensor var_28427_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2823_cast_fp16)[name = tensor("op_28427_cast_fp16")]; tensor var_28428_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2825_cast_fp16)[name = tensor("op_28428_cast_fp16")]; tensor var_28429_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2827_cast_fp16)[name = tensor("op_28429_cast_fp16")]; tensor var_28430_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2829_cast_fp16)[name = tensor("op_28430_cast_fp16")]; tensor var_28431_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2831_cast_fp16)[name = tensor("op_28431_cast_fp16")]; tensor var_28432_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2833_cast_fp16)[name = tensor("op_28432_cast_fp16")]; tensor var_28433_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2835_cast_fp16)[name = tensor("op_28433_cast_fp16")]; tensor var_28434_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2837_cast_fp16)[name = tensor("op_28434_cast_fp16")]; tensor var_28435_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2839_cast_fp16)[name = tensor("op_28435_cast_fp16")]; tensor var_28436_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2841_cast_fp16)[name = tensor("op_28436_cast_fp16")]; tensor var_28437_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2843_cast_fp16)[name = tensor("op_28437_cast_fp16")]; tensor var_28438_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2845_cast_fp16)[name = tensor("op_28438_cast_fp16")]; tensor var_28439_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2847_cast_fp16)[name = tensor("op_28439_cast_fp16")]; tensor var_28440_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2849_cast_fp16)[name = tensor("op_28440_cast_fp16")]; tensor var_28441_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2851_cast_fp16)[name = tensor("op_28441_cast_fp16")]; tensor var_28442_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2853_cast_fp16)[name = tensor("op_28442_cast_fp16")]; tensor var_28443_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2855_cast_fp16)[name = tensor("op_28443_cast_fp16")]; tensor var_28444_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2857_cast_fp16)[name = tensor("op_28444_cast_fp16")]; tensor var_28445_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2859_cast_fp16)[name = tensor("op_28445_cast_fp16")]; tensor var_28446_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2861_cast_fp16)[name = tensor("op_28446_cast_fp16")]; tensor var_28447_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2863_cast_fp16)[name = tensor("op_28447_cast_fp16")]; tensor var_28448_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2865_cast_fp16)[name = tensor("op_28448_cast_fp16")]; tensor var_28449_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2867_cast_fp16)[name = tensor("op_28449_cast_fp16")]; tensor var_28450_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2869_cast_fp16)[name = tensor("op_28450_cast_fp16")]; tensor var_28451_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2871_cast_fp16)[name = tensor("op_28451_cast_fp16")]; tensor var_28452_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2873_cast_fp16)[name = tensor("op_28452_cast_fp16")]; tensor var_28453_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2875_cast_fp16)[name = tensor("op_28453_cast_fp16")]; tensor var_28454_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2877_cast_fp16)[name = tensor("op_28454_cast_fp16")]; tensor var_28455_cast_fp16 = softmax(axis = var_27174, x = aw_chunk_2879_cast_fp16)[name = tensor("op_28455_cast_fp16")]; tensor var_28457_equation_0 = const()[name = tensor("op_28457_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28457_cast_fp16 = einsum(equation = var_28457_equation_0, values = (var_27977_cast_fp16, var_28376_cast_fp16))[name = tensor("op_28457_cast_fp16")]; tensor var_28459_equation_0 = const()[name = tensor("op_28459_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28459_cast_fp16 = einsum(equation = var_28459_equation_0, values = (var_27977_cast_fp16, var_28377_cast_fp16))[name = tensor("op_28459_cast_fp16")]; tensor var_28461_equation_0 = const()[name = tensor("op_28461_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28461_cast_fp16 = einsum(equation = var_28461_equation_0, values = (var_27977_cast_fp16, var_28378_cast_fp16))[name = tensor("op_28461_cast_fp16")]; tensor var_28463_equation_0 = const()[name = tensor("op_28463_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28463_cast_fp16 = einsum(equation = var_28463_equation_0, values = (var_27977_cast_fp16, var_28379_cast_fp16))[name = tensor("op_28463_cast_fp16")]; tensor var_28465_equation_0 = const()[name = tensor("op_28465_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28465_cast_fp16 = einsum(equation = var_28465_equation_0, values = (var_27981_cast_fp16, var_28380_cast_fp16))[name = tensor("op_28465_cast_fp16")]; tensor var_28467_equation_0 = const()[name = tensor("op_28467_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28467_cast_fp16 = einsum(equation = var_28467_equation_0, values = (var_27981_cast_fp16, var_28381_cast_fp16))[name = tensor("op_28467_cast_fp16")]; tensor var_28469_equation_0 = const()[name = tensor("op_28469_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28469_cast_fp16 = einsum(equation = var_28469_equation_0, values = (var_27981_cast_fp16, var_28382_cast_fp16))[name = tensor("op_28469_cast_fp16")]; tensor var_28471_equation_0 = const()[name = tensor("op_28471_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28471_cast_fp16 = einsum(equation = var_28471_equation_0, values = (var_27981_cast_fp16, var_28383_cast_fp16))[name = tensor("op_28471_cast_fp16")]; tensor var_28473_equation_0 = const()[name = tensor("op_28473_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28473_cast_fp16 = einsum(equation = var_28473_equation_0, values = (var_27985_cast_fp16, var_28384_cast_fp16))[name = tensor("op_28473_cast_fp16")]; tensor var_28475_equation_0 = const()[name = tensor("op_28475_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28475_cast_fp16 = einsum(equation = var_28475_equation_0, values = (var_27985_cast_fp16, var_28385_cast_fp16))[name = tensor("op_28475_cast_fp16")]; tensor var_28477_equation_0 = const()[name = tensor("op_28477_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28477_cast_fp16 = einsum(equation = var_28477_equation_0, values = (var_27985_cast_fp16, var_28386_cast_fp16))[name = tensor("op_28477_cast_fp16")]; tensor var_28479_equation_0 = const()[name = tensor("op_28479_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28479_cast_fp16 = einsum(equation = var_28479_equation_0, values = (var_27985_cast_fp16, var_28387_cast_fp16))[name = tensor("op_28479_cast_fp16")]; tensor var_28481_equation_0 = const()[name = tensor("op_28481_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28481_cast_fp16 = einsum(equation = var_28481_equation_0, values = (var_27989_cast_fp16, var_28388_cast_fp16))[name = tensor("op_28481_cast_fp16")]; tensor var_28483_equation_0 = const()[name = tensor("op_28483_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28483_cast_fp16 = einsum(equation = var_28483_equation_0, values = (var_27989_cast_fp16, var_28389_cast_fp16))[name = tensor("op_28483_cast_fp16")]; tensor var_28485_equation_0 = const()[name = tensor("op_28485_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28485_cast_fp16 = einsum(equation = var_28485_equation_0, values = (var_27989_cast_fp16, var_28390_cast_fp16))[name = tensor("op_28485_cast_fp16")]; tensor var_28487_equation_0 = const()[name = tensor("op_28487_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28487_cast_fp16 = einsum(equation = var_28487_equation_0, values = (var_27989_cast_fp16, var_28391_cast_fp16))[name = tensor("op_28487_cast_fp16")]; tensor var_28489_equation_0 = const()[name = tensor("op_28489_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28489_cast_fp16 = einsum(equation = var_28489_equation_0, values = (var_27993_cast_fp16, var_28392_cast_fp16))[name = tensor("op_28489_cast_fp16")]; tensor var_28491_equation_0 = const()[name = tensor("op_28491_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28491_cast_fp16 = einsum(equation = var_28491_equation_0, values = (var_27993_cast_fp16, var_28393_cast_fp16))[name = tensor("op_28491_cast_fp16")]; tensor var_28493_equation_0 = const()[name = tensor("op_28493_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28493_cast_fp16 = einsum(equation = var_28493_equation_0, values = (var_27993_cast_fp16, var_28394_cast_fp16))[name = tensor("op_28493_cast_fp16")]; tensor var_28495_equation_0 = const()[name = tensor("op_28495_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28495_cast_fp16 = einsum(equation = var_28495_equation_0, values = (var_27993_cast_fp16, var_28395_cast_fp16))[name = tensor("op_28495_cast_fp16")]; tensor var_28497_equation_0 = const()[name = tensor("op_28497_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28497_cast_fp16 = einsum(equation = var_28497_equation_0, values = (var_27997_cast_fp16, var_28396_cast_fp16))[name = tensor("op_28497_cast_fp16")]; tensor var_28499_equation_0 = const()[name = tensor("op_28499_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28499_cast_fp16 = einsum(equation = var_28499_equation_0, values = (var_27997_cast_fp16, var_28397_cast_fp16))[name = tensor("op_28499_cast_fp16")]; tensor var_28501_equation_0 = const()[name = tensor("op_28501_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28501_cast_fp16 = einsum(equation = var_28501_equation_0, values = (var_27997_cast_fp16, var_28398_cast_fp16))[name = tensor("op_28501_cast_fp16")]; tensor var_28503_equation_0 = const()[name = tensor("op_28503_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28503_cast_fp16 = einsum(equation = var_28503_equation_0, values = (var_27997_cast_fp16, var_28399_cast_fp16))[name = tensor("op_28503_cast_fp16")]; tensor var_28505_equation_0 = const()[name = tensor("op_28505_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28505_cast_fp16 = einsum(equation = var_28505_equation_0, values = (var_28001_cast_fp16, var_28400_cast_fp16))[name = tensor("op_28505_cast_fp16")]; tensor var_28507_equation_0 = const()[name = tensor("op_28507_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28507_cast_fp16 = einsum(equation = var_28507_equation_0, values = (var_28001_cast_fp16, var_28401_cast_fp16))[name = tensor("op_28507_cast_fp16")]; tensor var_28509_equation_0 = const()[name = tensor("op_28509_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28509_cast_fp16 = einsum(equation = var_28509_equation_0, values = (var_28001_cast_fp16, var_28402_cast_fp16))[name = tensor("op_28509_cast_fp16")]; tensor var_28511_equation_0 = const()[name = tensor("op_28511_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28511_cast_fp16 = einsum(equation = var_28511_equation_0, values = (var_28001_cast_fp16, var_28403_cast_fp16))[name = tensor("op_28511_cast_fp16")]; tensor var_28513_equation_0 = const()[name = tensor("op_28513_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28513_cast_fp16 = einsum(equation = var_28513_equation_0, values = (var_28005_cast_fp16, var_28404_cast_fp16))[name = tensor("op_28513_cast_fp16")]; tensor var_28515_equation_0 = const()[name = tensor("op_28515_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28515_cast_fp16 = einsum(equation = var_28515_equation_0, values = (var_28005_cast_fp16, var_28405_cast_fp16))[name = tensor("op_28515_cast_fp16")]; tensor var_28517_equation_0 = const()[name = tensor("op_28517_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28517_cast_fp16 = einsum(equation = var_28517_equation_0, values = (var_28005_cast_fp16, var_28406_cast_fp16))[name = tensor("op_28517_cast_fp16")]; tensor var_28519_equation_0 = const()[name = tensor("op_28519_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28519_cast_fp16 = einsum(equation = var_28519_equation_0, values = (var_28005_cast_fp16, var_28407_cast_fp16))[name = tensor("op_28519_cast_fp16")]; tensor var_28521_equation_0 = const()[name = tensor("op_28521_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28521_cast_fp16 = einsum(equation = var_28521_equation_0, values = (var_28009_cast_fp16, var_28408_cast_fp16))[name = tensor("op_28521_cast_fp16")]; tensor var_28523_equation_0 = const()[name = tensor("op_28523_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28523_cast_fp16 = einsum(equation = var_28523_equation_0, values = (var_28009_cast_fp16, var_28409_cast_fp16))[name = tensor("op_28523_cast_fp16")]; tensor var_28525_equation_0 = const()[name = tensor("op_28525_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28525_cast_fp16 = einsum(equation = var_28525_equation_0, values = (var_28009_cast_fp16, var_28410_cast_fp16))[name = tensor("op_28525_cast_fp16")]; tensor var_28527_equation_0 = const()[name = tensor("op_28527_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28527_cast_fp16 = einsum(equation = var_28527_equation_0, values = (var_28009_cast_fp16, var_28411_cast_fp16))[name = tensor("op_28527_cast_fp16")]; tensor var_28529_equation_0 = const()[name = tensor("op_28529_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28529_cast_fp16 = einsum(equation = var_28529_equation_0, values = (var_28013_cast_fp16, var_28412_cast_fp16))[name = tensor("op_28529_cast_fp16")]; tensor var_28531_equation_0 = const()[name = tensor("op_28531_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28531_cast_fp16 = einsum(equation = var_28531_equation_0, values = (var_28013_cast_fp16, var_28413_cast_fp16))[name = tensor("op_28531_cast_fp16")]; tensor var_28533_equation_0 = const()[name = tensor("op_28533_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28533_cast_fp16 = einsum(equation = var_28533_equation_0, values = (var_28013_cast_fp16, var_28414_cast_fp16))[name = tensor("op_28533_cast_fp16")]; tensor var_28535_equation_0 = const()[name = tensor("op_28535_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28535_cast_fp16 = einsum(equation = var_28535_equation_0, values = (var_28013_cast_fp16, var_28415_cast_fp16))[name = tensor("op_28535_cast_fp16")]; tensor var_28537_equation_0 = const()[name = tensor("op_28537_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28537_cast_fp16 = einsum(equation = var_28537_equation_0, values = (var_28017_cast_fp16, var_28416_cast_fp16))[name = tensor("op_28537_cast_fp16")]; tensor var_28539_equation_0 = const()[name = tensor("op_28539_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28539_cast_fp16 = einsum(equation = var_28539_equation_0, values = (var_28017_cast_fp16, var_28417_cast_fp16))[name = tensor("op_28539_cast_fp16")]; tensor var_28541_equation_0 = const()[name = tensor("op_28541_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28541_cast_fp16 = einsum(equation = var_28541_equation_0, values = (var_28017_cast_fp16, var_28418_cast_fp16))[name = tensor("op_28541_cast_fp16")]; tensor var_28543_equation_0 = const()[name = tensor("op_28543_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28543_cast_fp16 = einsum(equation = var_28543_equation_0, values = (var_28017_cast_fp16, var_28419_cast_fp16))[name = tensor("op_28543_cast_fp16")]; tensor var_28545_equation_0 = const()[name = tensor("op_28545_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28545_cast_fp16 = einsum(equation = var_28545_equation_0, values = (var_28021_cast_fp16, var_28420_cast_fp16))[name = tensor("op_28545_cast_fp16")]; tensor var_28547_equation_0 = const()[name = tensor("op_28547_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28547_cast_fp16 = einsum(equation = var_28547_equation_0, values = (var_28021_cast_fp16, var_28421_cast_fp16))[name = tensor("op_28547_cast_fp16")]; tensor var_28549_equation_0 = const()[name = tensor("op_28549_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28549_cast_fp16 = einsum(equation = var_28549_equation_0, values = (var_28021_cast_fp16, var_28422_cast_fp16))[name = tensor("op_28549_cast_fp16")]; tensor var_28551_equation_0 = const()[name = tensor("op_28551_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28551_cast_fp16 = einsum(equation = var_28551_equation_0, values = (var_28021_cast_fp16, var_28423_cast_fp16))[name = tensor("op_28551_cast_fp16")]; tensor var_28553_equation_0 = const()[name = tensor("op_28553_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28553_cast_fp16 = einsum(equation = var_28553_equation_0, values = (var_28025_cast_fp16, var_28424_cast_fp16))[name = tensor("op_28553_cast_fp16")]; tensor var_28555_equation_0 = const()[name = tensor("op_28555_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28555_cast_fp16 = einsum(equation = var_28555_equation_0, values = (var_28025_cast_fp16, var_28425_cast_fp16))[name = tensor("op_28555_cast_fp16")]; tensor var_28557_equation_0 = const()[name = tensor("op_28557_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28557_cast_fp16 = einsum(equation = var_28557_equation_0, values = (var_28025_cast_fp16, var_28426_cast_fp16))[name = tensor("op_28557_cast_fp16")]; tensor var_28559_equation_0 = const()[name = tensor("op_28559_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28559_cast_fp16 = einsum(equation = var_28559_equation_0, values = (var_28025_cast_fp16, var_28427_cast_fp16))[name = tensor("op_28559_cast_fp16")]; tensor var_28561_equation_0 = const()[name = tensor("op_28561_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28561_cast_fp16 = einsum(equation = var_28561_equation_0, values = (var_28029_cast_fp16, var_28428_cast_fp16))[name = tensor("op_28561_cast_fp16")]; tensor var_28563_equation_0 = const()[name = tensor("op_28563_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28563_cast_fp16 = einsum(equation = var_28563_equation_0, values = (var_28029_cast_fp16, var_28429_cast_fp16))[name = tensor("op_28563_cast_fp16")]; tensor var_28565_equation_0 = const()[name = tensor("op_28565_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28565_cast_fp16 = einsum(equation = var_28565_equation_0, values = (var_28029_cast_fp16, var_28430_cast_fp16))[name = tensor("op_28565_cast_fp16")]; tensor var_28567_equation_0 = const()[name = tensor("op_28567_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28567_cast_fp16 = einsum(equation = var_28567_equation_0, values = (var_28029_cast_fp16, var_28431_cast_fp16))[name = tensor("op_28567_cast_fp16")]; tensor var_28569_equation_0 = const()[name = tensor("op_28569_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28569_cast_fp16 = einsum(equation = var_28569_equation_0, values = (var_28033_cast_fp16, var_28432_cast_fp16))[name = tensor("op_28569_cast_fp16")]; tensor var_28571_equation_0 = const()[name = tensor("op_28571_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28571_cast_fp16 = einsum(equation = var_28571_equation_0, values = (var_28033_cast_fp16, var_28433_cast_fp16))[name = tensor("op_28571_cast_fp16")]; tensor var_28573_equation_0 = const()[name = tensor("op_28573_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28573_cast_fp16 = einsum(equation = var_28573_equation_0, values = (var_28033_cast_fp16, var_28434_cast_fp16))[name = tensor("op_28573_cast_fp16")]; tensor var_28575_equation_0 = const()[name = tensor("op_28575_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28575_cast_fp16 = einsum(equation = var_28575_equation_0, values = (var_28033_cast_fp16, var_28435_cast_fp16))[name = tensor("op_28575_cast_fp16")]; tensor var_28577_equation_0 = const()[name = tensor("op_28577_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28577_cast_fp16 = einsum(equation = var_28577_equation_0, values = (var_28037_cast_fp16, var_28436_cast_fp16))[name = tensor("op_28577_cast_fp16")]; tensor var_28579_equation_0 = const()[name = tensor("op_28579_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28579_cast_fp16 = einsum(equation = var_28579_equation_0, values = (var_28037_cast_fp16, var_28437_cast_fp16))[name = tensor("op_28579_cast_fp16")]; tensor var_28581_equation_0 = const()[name = tensor("op_28581_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28581_cast_fp16 = einsum(equation = var_28581_equation_0, values = (var_28037_cast_fp16, var_28438_cast_fp16))[name = tensor("op_28581_cast_fp16")]; tensor var_28583_equation_0 = const()[name = tensor("op_28583_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28583_cast_fp16 = einsum(equation = var_28583_equation_0, values = (var_28037_cast_fp16, var_28439_cast_fp16))[name = tensor("op_28583_cast_fp16")]; tensor var_28585_equation_0 = const()[name = tensor("op_28585_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28585_cast_fp16 = einsum(equation = var_28585_equation_0, values = (var_28041_cast_fp16, var_28440_cast_fp16))[name = tensor("op_28585_cast_fp16")]; tensor var_28587_equation_0 = const()[name = tensor("op_28587_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28587_cast_fp16 = einsum(equation = var_28587_equation_0, values = (var_28041_cast_fp16, var_28441_cast_fp16))[name = tensor("op_28587_cast_fp16")]; tensor var_28589_equation_0 = const()[name = tensor("op_28589_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28589_cast_fp16 = einsum(equation = var_28589_equation_0, values = (var_28041_cast_fp16, var_28442_cast_fp16))[name = tensor("op_28589_cast_fp16")]; tensor var_28591_equation_0 = const()[name = tensor("op_28591_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28591_cast_fp16 = einsum(equation = var_28591_equation_0, values = (var_28041_cast_fp16, var_28443_cast_fp16))[name = tensor("op_28591_cast_fp16")]; tensor var_28593_equation_0 = const()[name = tensor("op_28593_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28593_cast_fp16 = einsum(equation = var_28593_equation_0, values = (var_28045_cast_fp16, var_28444_cast_fp16))[name = tensor("op_28593_cast_fp16")]; tensor var_28595_equation_0 = const()[name = tensor("op_28595_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28595_cast_fp16 = einsum(equation = var_28595_equation_0, values = (var_28045_cast_fp16, var_28445_cast_fp16))[name = tensor("op_28595_cast_fp16")]; tensor var_28597_equation_0 = const()[name = tensor("op_28597_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28597_cast_fp16 = einsum(equation = var_28597_equation_0, values = (var_28045_cast_fp16, var_28446_cast_fp16))[name = tensor("op_28597_cast_fp16")]; tensor var_28599_equation_0 = const()[name = tensor("op_28599_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28599_cast_fp16 = einsum(equation = var_28599_equation_0, values = (var_28045_cast_fp16, var_28447_cast_fp16))[name = tensor("op_28599_cast_fp16")]; tensor var_28601_equation_0 = const()[name = tensor("op_28601_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28601_cast_fp16 = einsum(equation = var_28601_equation_0, values = (var_28049_cast_fp16, var_28448_cast_fp16))[name = tensor("op_28601_cast_fp16")]; tensor var_28603_equation_0 = const()[name = tensor("op_28603_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28603_cast_fp16 = einsum(equation = var_28603_equation_0, values = (var_28049_cast_fp16, var_28449_cast_fp16))[name = tensor("op_28603_cast_fp16")]; tensor var_28605_equation_0 = const()[name = tensor("op_28605_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28605_cast_fp16 = einsum(equation = var_28605_equation_0, values = (var_28049_cast_fp16, var_28450_cast_fp16))[name = tensor("op_28605_cast_fp16")]; tensor var_28607_equation_0 = const()[name = tensor("op_28607_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28607_cast_fp16 = einsum(equation = var_28607_equation_0, values = (var_28049_cast_fp16, var_28451_cast_fp16))[name = tensor("op_28607_cast_fp16")]; tensor var_28609_equation_0 = const()[name = tensor("op_28609_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28609_cast_fp16 = einsum(equation = var_28609_equation_0, values = (var_28053_cast_fp16, var_28452_cast_fp16))[name = tensor("op_28609_cast_fp16")]; tensor var_28611_equation_0 = const()[name = tensor("op_28611_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28611_cast_fp16 = einsum(equation = var_28611_equation_0, values = (var_28053_cast_fp16, var_28453_cast_fp16))[name = tensor("op_28611_cast_fp16")]; tensor var_28613_equation_0 = const()[name = tensor("op_28613_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28613_cast_fp16 = einsum(equation = var_28613_equation_0, values = (var_28053_cast_fp16, var_28454_cast_fp16))[name = tensor("op_28613_cast_fp16")]; tensor var_28615_equation_0 = const()[name = tensor("op_28615_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_28615_cast_fp16 = einsum(equation = var_28615_equation_0, values = (var_28053_cast_fp16, var_28455_cast_fp16))[name = tensor("op_28615_cast_fp16")]; tensor var_28617_interleave_0 = const()[name = tensor("op_28617_interleave_0"), val = tensor(false)]; tensor var_28617_cast_fp16 = concat(axis = var_27149, interleave = var_28617_interleave_0, values = (var_28457_cast_fp16, var_28459_cast_fp16, var_28461_cast_fp16, var_28463_cast_fp16))[name = tensor("op_28617_cast_fp16")]; tensor var_28619_interleave_0 = const()[name = tensor("op_28619_interleave_0"), val = tensor(false)]; tensor var_28619_cast_fp16 = concat(axis = var_27149, interleave = var_28619_interleave_0, values = (var_28465_cast_fp16, var_28467_cast_fp16, var_28469_cast_fp16, var_28471_cast_fp16))[name = tensor("op_28619_cast_fp16")]; tensor var_28621_interleave_0 = const()[name = tensor("op_28621_interleave_0"), val = tensor(false)]; tensor var_28621_cast_fp16 = concat(axis = var_27149, interleave = var_28621_interleave_0, values = (var_28473_cast_fp16, var_28475_cast_fp16, var_28477_cast_fp16, var_28479_cast_fp16))[name = tensor("op_28621_cast_fp16")]; tensor var_28623_interleave_0 = const()[name = tensor("op_28623_interleave_0"), val = tensor(false)]; tensor var_28623_cast_fp16 = concat(axis = var_27149, interleave = var_28623_interleave_0, values = (var_28481_cast_fp16, var_28483_cast_fp16, var_28485_cast_fp16, var_28487_cast_fp16))[name = tensor("op_28623_cast_fp16")]; tensor var_28625_interleave_0 = const()[name = tensor("op_28625_interleave_0"), val = tensor(false)]; tensor var_28625_cast_fp16 = concat(axis = var_27149, interleave = var_28625_interleave_0, values = (var_28489_cast_fp16, var_28491_cast_fp16, var_28493_cast_fp16, var_28495_cast_fp16))[name = tensor("op_28625_cast_fp16")]; tensor var_28627_interleave_0 = const()[name = tensor("op_28627_interleave_0"), val = tensor(false)]; tensor var_28627_cast_fp16 = concat(axis = var_27149, interleave = var_28627_interleave_0, values = (var_28497_cast_fp16, var_28499_cast_fp16, var_28501_cast_fp16, var_28503_cast_fp16))[name = tensor("op_28627_cast_fp16")]; tensor var_28629_interleave_0 = const()[name = tensor("op_28629_interleave_0"), val = tensor(false)]; tensor var_28629_cast_fp16 = concat(axis = var_27149, interleave = var_28629_interleave_0, values = (var_28505_cast_fp16, var_28507_cast_fp16, var_28509_cast_fp16, var_28511_cast_fp16))[name = tensor("op_28629_cast_fp16")]; tensor var_28631_interleave_0 = const()[name = tensor("op_28631_interleave_0"), val = tensor(false)]; tensor var_28631_cast_fp16 = concat(axis = var_27149, interleave = var_28631_interleave_0, values = (var_28513_cast_fp16, var_28515_cast_fp16, var_28517_cast_fp16, var_28519_cast_fp16))[name = tensor("op_28631_cast_fp16")]; tensor var_28633_interleave_0 = const()[name = tensor("op_28633_interleave_0"), val = tensor(false)]; tensor var_28633_cast_fp16 = concat(axis = var_27149, interleave = var_28633_interleave_0, values = (var_28521_cast_fp16, var_28523_cast_fp16, var_28525_cast_fp16, var_28527_cast_fp16))[name = tensor("op_28633_cast_fp16")]; tensor var_28635_interleave_0 = const()[name = tensor("op_28635_interleave_0"), val = tensor(false)]; tensor var_28635_cast_fp16 = concat(axis = var_27149, interleave = var_28635_interleave_0, values = (var_28529_cast_fp16, var_28531_cast_fp16, var_28533_cast_fp16, var_28535_cast_fp16))[name = tensor("op_28635_cast_fp16")]; tensor var_28637_interleave_0 = const()[name = tensor("op_28637_interleave_0"), val = tensor(false)]; tensor var_28637_cast_fp16 = concat(axis = var_27149, interleave = var_28637_interleave_0, values = (var_28537_cast_fp16, var_28539_cast_fp16, var_28541_cast_fp16, var_28543_cast_fp16))[name = tensor("op_28637_cast_fp16")]; tensor var_28639_interleave_0 = const()[name = tensor("op_28639_interleave_0"), val = tensor(false)]; tensor var_28639_cast_fp16 = concat(axis = var_27149, interleave = var_28639_interleave_0, values = (var_28545_cast_fp16, var_28547_cast_fp16, var_28549_cast_fp16, var_28551_cast_fp16))[name = tensor("op_28639_cast_fp16")]; tensor var_28641_interleave_0 = const()[name = tensor("op_28641_interleave_0"), val = tensor(false)]; tensor var_28641_cast_fp16 = concat(axis = var_27149, interleave = var_28641_interleave_0, values = (var_28553_cast_fp16, var_28555_cast_fp16, var_28557_cast_fp16, var_28559_cast_fp16))[name = tensor("op_28641_cast_fp16")]; tensor var_28643_interleave_0 = const()[name = tensor("op_28643_interleave_0"), val = tensor(false)]; tensor var_28643_cast_fp16 = concat(axis = var_27149, interleave = var_28643_interleave_0, values = (var_28561_cast_fp16, var_28563_cast_fp16, var_28565_cast_fp16, var_28567_cast_fp16))[name = tensor("op_28643_cast_fp16")]; tensor var_28645_interleave_0 = const()[name = tensor("op_28645_interleave_0"), val = tensor(false)]; tensor var_28645_cast_fp16 = concat(axis = var_27149, interleave = var_28645_interleave_0, values = (var_28569_cast_fp16, var_28571_cast_fp16, var_28573_cast_fp16, var_28575_cast_fp16))[name = tensor("op_28645_cast_fp16")]; tensor var_28647_interleave_0 = const()[name = tensor("op_28647_interleave_0"), val = tensor(false)]; tensor var_28647_cast_fp16 = concat(axis = var_27149, interleave = var_28647_interleave_0, values = (var_28577_cast_fp16, var_28579_cast_fp16, var_28581_cast_fp16, var_28583_cast_fp16))[name = tensor("op_28647_cast_fp16")]; tensor var_28649_interleave_0 = const()[name = tensor("op_28649_interleave_0"), val = tensor(false)]; tensor var_28649_cast_fp16 = concat(axis = var_27149, interleave = var_28649_interleave_0, values = (var_28585_cast_fp16, var_28587_cast_fp16, var_28589_cast_fp16, var_28591_cast_fp16))[name = tensor("op_28649_cast_fp16")]; tensor var_28651_interleave_0 = const()[name = tensor("op_28651_interleave_0"), val = tensor(false)]; tensor var_28651_cast_fp16 = concat(axis = var_27149, interleave = var_28651_interleave_0, values = (var_28593_cast_fp16, var_28595_cast_fp16, var_28597_cast_fp16, var_28599_cast_fp16))[name = tensor("op_28651_cast_fp16")]; tensor var_28653_interleave_0 = const()[name = tensor("op_28653_interleave_0"), val = tensor(false)]; tensor var_28653_cast_fp16 = concat(axis = var_27149, interleave = var_28653_interleave_0, values = (var_28601_cast_fp16, var_28603_cast_fp16, var_28605_cast_fp16, var_28607_cast_fp16))[name = tensor("op_28653_cast_fp16")]; tensor var_28655_interleave_0 = const()[name = tensor("op_28655_interleave_0"), val = tensor(false)]; tensor var_28655_cast_fp16 = concat(axis = var_27149, interleave = var_28655_interleave_0, values = (var_28609_cast_fp16, var_28611_cast_fp16, var_28613_cast_fp16, var_28615_cast_fp16))[name = tensor("op_28655_cast_fp16")]; tensor input_137_interleave_0 = const()[name = tensor("input_137_interleave_0"), val = tensor(false)]; tensor input_137_cast_fp16 = concat(axis = var_27174, interleave = input_137_interleave_0, values = (var_28617_cast_fp16, var_28619_cast_fp16, var_28621_cast_fp16, var_28623_cast_fp16, var_28625_cast_fp16, var_28627_cast_fp16, var_28629_cast_fp16, var_28631_cast_fp16, var_28633_cast_fp16, var_28635_cast_fp16, var_28637_cast_fp16, var_28639_cast_fp16, var_28641_cast_fp16, var_28643_cast_fp16, var_28645_cast_fp16, var_28647_cast_fp16, var_28649_cast_fp16, var_28651_cast_fp16, var_28653_cast_fp16, var_28655_cast_fp16))[name = tensor("input_137_cast_fp16")]; tensor var_28666_pad_type_0 = const()[name = tensor("op_28666_pad_type_0"), val = tensor("valid")]; tensor var_28666_strides_0 = const()[name = tensor("op_28666_strides_0"), val = tensor([1, 1])]; tensor var_28666_pad_0 = const()[name = tensor("op_28666_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_28666_dilations_0 = const()[name = tensor("op_28666_dilations_0"), val = tensor([1, 1])]; tensor var_28666_groups_0 = const()[name = tensor("op_28666_groups_0"), val = tensor(1)]; tensor layers_17_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237102784))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237922048))), name = tensor("layers_17_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_17_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_17_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237922176)))]; tensor var_28666_cast_fp16 = conv(bias = layers_17_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_28666_dilations_0, groups = var_28666_groups_0, pad = var_28666_pad_0, pad_type = var_28666_pad_type_0, strides = var_28666_strides_0, weight = layers_17_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_137_cast_fp16)[name = tensor("op_28666_cast_fp16")]; tensor var_28672_pad_type_0 = const()[name = tensor("op_28672_pad_type_0"), val = tensor("valid")]; tensor var_28672_strides_0 = const()[name = tensor("op_28672_strides_0"), val = tensor([1, 1])]; tensor var_28672_pad_0 = const()[name = tensor("op_28672_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_28672_dilations_0 = const()[name = tensor("op_28672_dilations_0"), val = tensor([1, 1])]; tensor var_28672_groups_0 = const()[name = tensor("op_28672_groups_0"), val = tensor(1)]; tensor layers_17_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237940736))), name = tensor("layers_17_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237924800))), shape = tensor([1280, 1280, 1, 1])]; tensor var_28672_cast_fp16 = conv(dilations = var_28672_dilations_0, groups = var_28672_groups_0, pad = var_28672_pad_0, pad_type = var_28672_pad_type_0, strides = var_28672_strides_0, weight = layers_17_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_137_cast_fp16)[name = tensor("op_28672_cast_fp16")]; tensor obj_71_cast_fp16 = add(x = var_28666_cast_fp16, y = var_28672_cast_fp16)[name = tensor("obj_71_cast_fp16")]; tensor inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = obj_71_cast_fp16)[name = tensor("inputs_71_cast_fp16")]; tensor out_71_axes_0 = const()[name = tensor("out_71_axes_0"), val = tensor([1])]; tensor var_28683_to_fp16 = const()[name = tensor("op_28683_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_71_cast_fp16 = layer_norm(axes = out_71_axes_0, epsilon = var_28683_to_fp16, x = inputs_71_cast_fp16)[name = tensor("out_71_cast_fp16")]; tensor input_139_gamma_0_to_fp16 = const()[name = tensor("input_139_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(238145600)))]; tensor input_139_beta_0_to_fp16 = const()[name = tensor("input_139_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(238148224)))]; tensor input_139_epsilon_0_to_fp16 = const()[name = tensor("input_139_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_139_cast_fp16 = batch_norm(beta = input_139_beta_0_to_fp16, epsilon = input_139_epsilon_0_to_fp16, gamma = input_139_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_71_cast_fp16)[name = tensor("input_139_cast_fp16")]; tensor var_28701_pad_type_0 = const()[name = tensor("op_28701_pad_type_0"), val = tensor("valid")]; tensor var_28701_strides_0 = const()[name = tensor("op_28701_strides_0"), val = tensor([1, 1])]; tensor var_28701_pad_0 = const()[name = tensor("op_28701_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_28701_dilations_0 = const()[name = tensor("op_28701_dilations_0"), val = tensor([1, 1])]; tensor var_28701_groups_0 = const()[name = tensor("op_28701_groups_0"), val = tensor(1)]; tensor layers_17_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(238150848))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241427712))), name = tensor("layers_17_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_17_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_17_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241427840)))]; tensor var_28701_cast_fp16 = conv(bias = layers_17_fc1_inlier_module_bias_to_fp16, dilations = var_28701_dilations_0, groups = var_28701_groups_0, pad = var_28701_pad_0, pad_type = var_28701_pad_type_0, strides = var_28701_strides_0, weight = layers_17_fc1_inlier_module_weight_to_fp16_palettized, x = input_139_cast_fp16)[name = tensor("op_28701_cast_fp16")]; tensor var_28707_pad_type_0 = const()[name = tensor("op_28707_pad_type_0"), val = tensor("valid")]; tensor var_28707_strides_0 = const()[name = tensor("op_28707_strides_0"), val = tensor([1, 1])]; tensor var_28707_pad_0 = const()[name = tensor("op_28707_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_28707_dilations_0 = const()[name = tensor("op_28707_dilations_0"), val = tensor([1, 1])]; tensor var_28707_groups_0 = const()[name = tensor("op_28707_groups_0"), val = tensor(1)]; tensor layers_17_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241516992))), name = tensor("layers_17_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(241438144))), shape = tensor([5120, 1280, 1, 1])]; tensor var_28707_cast_fp16 = conv(dilations = var_28707_dilations_0, groups = var_28707_groups_0, pad = var_28707_pad_0, pad_type = var_28707_pad_type_0, strides = var_28707_strides_0, weight = layers_17_fc1_outlier_module_weight_to_fp16_sparsified, x = input_139_cast_fp16)[name = tensor("op_28707_cast_fp16")]; tensor input_141_cast_fp16 = add(x = var_28701_cast_fp16, y = var_28707_cast_fp16)[name = tensor("input_141_cast_fp16")]; tensor input_143_mode_0 = const()[name = tensor("input_143_mode_0"), val = tensor("EXACT")]; tensor input_143_cast_fp16 = gelu(mode = input_143_mode_0, x = input_141_cast_fp16)[name = tensor("input_143_cast_fp16")]; tensor var_28718_pad_type_0 = const()[name = tensor("op_28718_pad_type_0"), val = tensor("valid")]; tensor var_28718_strides_0 = const()[name = tensor("op_28718_strides_0"), val = tensor([1, 1])]; tensor var_28718_pad_0 = const()[name = tensor("op_28718_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_28718_dilations_0 = const()[name = tensor("op_28718_dilations_0"), val = tensor([1, 1])]; tensor var_28718_groups_0 = const()[name = tensor("op_28718_groups_0"), val = tensor(1)]; tensor layers_17_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(242336256))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(245613120))), name = tensor("layers_17_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_17_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_17_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(245613248)))]; tensor var_28718_cast_fp16 = conv(bias = layers_17_fc2_inlier_module_bias_to_fp16, dilations = var_28718_dilations_0, groups = var_28718_groups_0, pad = var_28718_pad_0, pad_type = var_28718_pad_type_0, strides = var_28718_strides_0, weight = layers_17_fc2_inlier_module_weight_to_fp16_palettized, x = input_143_cast_fp16)[name = tensor("op_28718_cast_fp16")]; tensor var_28724_pad_type_0 = const()[name = tensor("op_28724_pad_type_0"), val = tensor("valid")]; tensor var_28724_strides_0 = const()[name = tensor("op_28724_strides_0"), val = tensor([1, 1])]; tensor var_28724_pad_0 = const()[name = tensor("op_28724_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_28724_dilations_0 = const()[name = tensor("op_28724_dilations_0"), val = tensor([1, 1])]; tensor var_28724_groups_0 = const()[name = tensor("op_28724_groups_0"), val = tensor(1)]; tensor layers_17_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(245717952))), name = tensor("layers_17_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(245615872))), shape = tensor([1280, 5120, 1, 1])]; tensor var_28724_cast_fp16 = conv(dilations = var_28724_dilations_0, groups = var_28724_groups_0, pad = var_28724_pad_0, pad_type = var_28724_pad_type_0, strides = var_28724_strides_0, weight = layers_17_fc2_outlier_module_weight_to_fp16_sparsified, x = input_143_cast_fp16)[name = tensor("op_28724_cast_fp16")]; tensor hidden_states_39_cast_fp16 = add(x = var_28718_cast_fp16, y = var_28724_cast_fp16)[name = tensor("hidden_states_39_cast_fp16")]; tensor inputs_73_cast_fp16 = add(x = inputs_71_cast_fp16, y = hidden_states_39_cast_fp16)[name = tensor("inputs_73_cast_fp16")]; tensor var_28730 = const()[name = tensor("op_28730"), val = tensor(3)]; tensor var_28755 = const()[name = tensor("op_28755"), val = tensor(1)]; tensor out_73_axes_0 = const()[name = tensor("out_73_axes_0"), val = tensor([1])]; tensor var_28772_to_fp16 = const()[name = tensor("op_28772_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_73_cast_fp16 = layer_norm(axes = out_73_axes_0, epsilon = var_28772_to_fp16, x = inputs_73_cast_fp16)[name = tensor("out_73_cast_fp16")]; tensor obj_73_gamma_0_to_fp16 = const()[name = tensor("obj_73_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(246537216)))]; tensor obj_73_beta_0_to_fp16 = const()[name = tensor("obj_73_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(246539840)))]; tensor obj_73_epsilon_0_to_fp16 = const()[name = tensor("obj_73_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_73_cast_fp16 = batch_norm(beta = obj_73_beta_0_to_fp16, epsilon = obj_73_epsilon_0_to_fp16, gamma = obj_73_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_73_cast_fp16)[name = tensor("obj_73_cast_fp16")]; tensor var_28794_pad_type_0 = const()[name = tensor("op_28794_pad_type_0"), val = tensor("valid")]; tensor var_28794_strides_0 = const()[name = tensor("op_28794_strides_0"), val = tensor([1, 1])]; tensor var_28794_pad_0 = const()[name = tensor("op_28794_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_28794_dilations_0 = const()[name = tensor("op_28794_dilations_0"), val = tensor([1, 1])]; tensor var_28794_groups_0 = const()[name = tensor("op_28794_groups_0"), val = tensor(1)]; tensor layers_18_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(246542464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(247361728))), name = tensor("layers_18_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_18_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_18_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(247361856)))]; tensor var_28794_cast_fp16 = conv(bias = layers_18_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_28794_dilations_0, groups = var_28794_groups_0, pad = var_28794_pad_0, pad_type = var_28794_pad_type_0, strides = var_28794_strides_0, weight = layers_18_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_73_cast_fp16)[name = tensor("op_28794_cast_fp16")]; tensor var_28800_pad_type_0 = const()[name = tensor("op_28800_pad_type_0"), val = tensor("valid")]; tensor var_28800_strides_0 = const()[name = tensor("op_28800_strides_0"), val = tensor([1, 1])]; tensor var_28800_pad_0 = const()[name = tensor("op_28800_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_28800_dilations_0 = const()[name = tensor("op_28800_dilations_0"), val = tensor([1, 1])]; tensor var_28800_groups_0 = const()[name = tensor("op_28800_groups_0"), val = tensor(1)]; tensor layers_18_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(247417600))), name = tensor("layers_18_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(247364480))), shape = tensor([1280, 1280, 1, 1])]; tensor var_28800_cast_fp16 = conv(dilations = var_28800_dilations_0, groups = var_28800_groups_0, pad = var_28800_pad_0, pad_type = var_28800_pad_type_0, strides = var_28800_strides_0, weight = layers_18_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_73_cast_fp16)[name = tensor("op_28800_cast_fp16")]; tensor query_37_cast_fp16 = add(x = var_28794_cast_fp16, y = var_28800_cast_fp16)[name = tensor("query_37_cast_fp16")]; tensor var_28809_pad_type_0 = const()[name = tensor("op_28809_pad_type_0"), val = tensor("valid")]; tensor var_28809_strides_0 = const()[name = tensor("op_28809_strides_0"), val = tensor([1, 1])]; tensor var_28809_pad_0 = const()[name = tensor("op_28809_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_28809_dilations_0 = const()[name = tensor("op_28809_dilations_0"), val = tensor([1, 1])]; tensor var_28809_groups_0 = const()[name = tensor("op_28809_groups_0"), val = tensor(1)]; tensor layers_18_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(247622464))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248441728))), name = tensor("layers_18_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_28809_cast_fp16 = conv(dilations = var_28809_dilations_0, groups = var_28809_groups_0, pad = var_28809_pad_0, pad_type = var_28809_pad_type_0, strides = var_28809_strides_0, weight = layers_18_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_73_cast_fp16)[name = tensor("op_28809_cast_fp16")]; tensor var_28815_pad_type_0 = const()[name = tensor("op_28815_pad_type_0"), val = tensor("valid")]; tensor var_28815_strides_0 = const()[name = tensor("op_28815_strides_0"), val = tensor([1, 1])]; tensor var_28815_pad_0 = const()[name = tensor("op_28815_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_28815_dilations_0 = const()[name = tensor("op_28815_dilations_0"), val = tensor([1, 1])]; tensor var_28815_groups_0 = const()[name = tensor("op_28815_groups_0"), val = tensor(1)]; tensor layers_18_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248472512))), name = tensor("layers_18_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248441856))), shape = tensor([1280, 1280, 1, 1])]; tensor var_28815_cast_fp16 = conv(dilations = var_28815_dilations_0, groups = var_28815_groups_0, pad = var_28815_pad_0, pad_type = var_28815_pad_type_0, strides = var_28815_strides_0, weight = layers_18_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_73_cast_fp16)[name = tensor("op_28815_cast_fp16")]; tensor key_37_cast_fp16 = add(x = var_28809_cast_fp16, y = var_28815_cast_fp16)[name = tensor("key_37_cast_fp16")]; tensor var_28825_pad_type_0 = const()[name = tensor("op_28825_pad_type_0"), val = tensor("valid")]; tensor var_28825_strides_0 = const()[name = tensor("op_28825_strides_0"), val = tensor([1, 1])]; tensor var_28825_pad_0 = const()[name = tensor("op_28825_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_28825_dilations_0 = const()[name = tensor("op_28825_dilations_0"), val = tensor([1, 1])]; tensor var_28825_groups_0 = const()[name = tensor("op_28825_groups_0"), val = tensor(1)]; tensor layers_18_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(248677376))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(249496640))), name = tensor("layers_18_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_18_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_18_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(249496768)))]; tensor var_28825_cast_fp16 = conv(bias = layers_18_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_28825_dilations_0, groups = var_28825_groups_0, pad = var_28825_pad_0, pad_type = var_28825_pad_type_0, strides = var_28825_strides_0, weight = layers_18_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_73_cast_fp16)[name = tensor("op_28825_cast_fp16")]; tensor var_28831_pad_type_0 = const()[name = tensor("op_28831_pad_type_0"), val = tensor("valid")]; tensor var_28831_strides_0 = const()[name = tensor("op_28831_strides_0"), val = tensor([1, 1])]; tensor var_28831_pad_0 = const()[name = tensor("op_28831_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_28831_dilations_0 = const()[name = tensor("op_28831_dilations_0"), val = tensor([1, 1])]; tensor var_28831_groups_0 = const()[name = tensor("op_28831_groups_0"), val = tensor(1)]; tensor layers_18_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(249515520))), name = tensor("layers_18_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(249499392))), shape = tensor([1280, 1280, 1, 1])]; tensor var_28831_cast_fp16 = conv(dilations = var_28831_dilations_0, groups = var_28831_groups_0, pad = var_28831_pad_0, pad_type = var_28831_pad_type_0, strides = var_28831_strides_0, weight = layers_18_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_73_cast_fp16)[name = tensor("op_28831_cast_fp16")]; tensor value_37_cast_fp16 = add(x = var_28825_cast_fp16, y = var_28831_cast_fp16)[name = tensor("value_37_cast_fp16")]; tensor var_28837_begin_0 = const()[name = tensor("op_28837_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_28837_end_0 = const()[name = tensor("op_28837_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_28837_end_mask_0 = const()[name = tensor("op_28837_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28837_cast_fp16 = slice_by_index(begin = var_28837_begin_0, end = var_28837_end_0, end_mask = var_28837_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28837_cast_fp16")]; tensor var_28841_begin_0 = const()[name = tensor("op_28841_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_28841_end_0 = const()[name = tensor("op_28841_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_28841_end_mask_0 = const()[name = tensor("op_28841_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28841_cast_fp16 = slice_by_index(begin = var_28841_begin_0, end = var_28841_end_0, end_mask = var_28841_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28841_cast_fp16")]; tensor var_28845_begin_0 = const()[name = tensor("op_28845_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_28845_end_0 = const()[name = tensor("op_28845_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_28845_end_mask_0 = const()[name = tensor("op_28845_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28845_cast_fp16 = slice_by_index(begin = var_28845_begin_0, end = var_28845_end_0, end_mask = var_28845_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28845_cast_fp16")]; tensor var_28849_begin_0 = const()[name = tensor("op_28849_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_28849_end_0 = const()[name = tensor("op_28849_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_28849_end_mask_0 = const()[name = tensor("op_28849_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28849_cast_fp16 = slice_by_index(begin = var_28849_begin_0, end = var_28849_end_0, end_mask = var_28849_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28849_cast_fp16")]; tensor var_28853_begin_0 = const()[name = tensor("op_28853_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_28853_end_0 = const()[name = tensor("op_28853_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_28853_end_mask_0 = const()[name = tensor("op_28853_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28853_cast_fp16 = slice_by_index(begin = var_28853_begin_0, end = var_28853_end_0, end_mask = var_28853_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28853_cast_fp16")]; tensor var_28857_begin_0 = const()[name = tensor("op_28857_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_28857_end_0 = const()[name = tensor("op_28857_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_28857_end_mask_0 = const()[name = tensor("op_28857_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28857_cast_fp16 = slice_by_index(begin = var_28857_begin_0, end = var_28857_end_0, end_mask = var_28857_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28857_cast_fp16")]; tensor var_28861_begin_0 = const()[name = tensor("op_28861_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_28861_end_0 = const()[name = tensor("op_28861_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_28861_end_mask_0 = const()[name = tensor("op_28861_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28861_cast_fp16 = slice_by_index(begin = var_28861_begin_0, end = var_28861_end_0, end_mask = var_28861_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28861_cast_fp16")]; tensor var_28865_begin_0 = const()[name = tensor("op_28865_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_28865_end_0 = const()[name = tensor("op_28865_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_28865_end_mask_0 = const()[name = tensor("op_28865_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28865_cast_fp16 = slice_by_index(begin = var_28865_begin_0, end = var_28865_end_0, end_mask = var_28865_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28865_cast_fp16")]; tensor var_28869_begin_0 = const()[name = tensor("op_28869_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_28869_end_0 = const()[name = tensor("op_28869_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_28869_end_mask_0 = const()[name = tensor("op_28869_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28869_cast_fp16 = slice_by_index(begin = var_28869_begin_0, end = var_28869_end_0, end_mask = var_28869_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28869_cast_fp16")]; tensor var_28873_begin_0 = const()[name = tensor("op_28873_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_28873_end_0 = const()[name = tensor("op_28873_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_28873_end_mask_0 = const()[name = tensor("op_28873_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28873_cast_fp16 = slice_by_index(begin = var_28873_begin_0, end = var_28873_end_0, end_mask = var_28873_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28873_cast_fp16")]; tensor var_28877_begin_0 = const()[name = tensor("op_28877_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_28877_end_0 = const()[name = tensor("op_28877_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_28877_end_mask_0 = const()[name = tensor("op_28877_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28877_cast_fp16 = slice_by_index(begin = var_28877_begin_0, end = var_28877_end_0, end_mask = var_28877_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28877_cast_fp16")]; tensor var_28881_begin_0 = const()[name = tensor("op_28881_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_28881_end_0 = const()[name = tensor("op_28881_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_28881_end_mask_0 = const()[name = tensor("op_28881_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28881_cast_fp16 = slice_by_index(begin = var_28881_begin_0, end = var_28881_end_0, end_mask = var_28881_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28881_cast_fp16")]; tensor var_28885_begin_0 = const()[name = tensor("op_28885_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_28885_end_0 = const()[name = tensor("op_28885_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_28885_end_mask_0 = const()[name = tensor("op_28885_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28885_cast_fp16 = slice_by_index(begin = var_28885_begin_0, end = var_28885_end_0, end_mask = var_28885_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28885_cast_fp16")]; tensor var_28889_begin_0 = const()[name = tensor("op_28889_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_28889_end_0 = const()[name = tensor("op_28889_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_28889_end_mask_0 = const()[name = tensor("op_28889_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28889_cast_fp16 = slice_by_index(begin = var_28889_begin_0, end = var_28889_end_0, end_mask = var_28889_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28889_cast_fp16")]; tensor var_28893_begin_0 = const()[name = tensor("op_28893_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_28893_end_0 = const()[name = tensor("op_28893_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_28893_end_mask_0 = const()[name = tensor("op_28893_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28893_cast_fp16 = slice_by_index(begin = var_28893_begin_0, end = var_28893_end_0, end_mask = var_28893_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28893_cast_fp16")]; tensor var_28897_begin_0 = const()[name = tensor("op_28897_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_28897_end_0 = const()[name = tensor("op_28897_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_28897_end_mask_0 = const()[name = tensor("op_28897_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28897_cast_fp16 = slice_by_index(begin = var_28897_begin_0, end = var_28897_end_0, end_mask = var_28897_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28897_cast_fp16")]; tensor var_28901_begin_0 = const()[name = tensor("op_28901_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_28901_end_0 = const()[name = tensor("op_28901_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_28901_end_mask_0 = const()[name = tensor("op_28901_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28901_cast_fp16 = slice_by_index(begin = var_28901_begin_0, end = var_28901_end_0, end_mask = var_28901_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28901_cast_fp16")]; tensor var_28905_begin_0 = const()[name = tensor("op_28905_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_28905_end_0 = const()[name = tensor("op_28905_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_28905_end_mask_0 = const()[name = tensor("op_28905_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28905_cast_fp16 = slice_by_index(begin = var_28905_begin_0, end = var_28905_end_0, end_mask = var_28905_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28905_cast_fp16")]; tensor var_28909_begin_0 = const()[name = tensor("op_28909_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_28909_end_0 = const()[name = tensor("op_28909_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_28909_end_mask_0 = const()[name = tensor("op_28909_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28909_cast_fp16 = slice_by_index(begin = var_28909_begin_0, end = var_28909_end_0, end_mask = var_28909_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28909_cast_fp16")]; tensor var_28913_begin_0 = const()[name = tensor("op_28913_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_28913_end_0 = const()[name = tensor("op_28913_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_28913_end_mask_0 = const()[name = tensor("op_28913_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_28913_cast_fp16 = slice_by_index(begin = var_28913_begin_0, end = var_28913_end_0, end_mask = var_28913_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_28913_cast_fp16")]; tensor var_28922_begin_0 = const()[name = tensor("op_28922_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_28922_end_0 = const()[name = tensor("op_28922_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_28922_end_mask_0 = const()[name = tensor("op_28922_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_28922_cast_fp16 = slice_by_index(begin = var_28922_begin_0, end = var_28922_end_0, end_mask = var_28922_end_mask_0, x = var_28837_cast_fp16)[name = tensor("op_28922_cast_fp16")]; tensor var_28929_begin_0 = const()[name = tensor("op_28929_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_28929_end_0 = const()[name = tensor("op_28929_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_28929_end_mask_0 = const()[name = tensor("op_28929_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_28929_cast_fp16 = slice_by_index(begin = var_28929_begin_0, end = var_28929_end_0, end_mask = var_28929_end_mask_0, x = var_28837_cast_fp16)[name = tensor("op_28929_cast_fp16")]; tensor var_28936_begin_0 = const()[name = tensor("op_28936_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_28936_end_0 = const()[name = tensor("op_28936_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_28936_end_mask_0 = const()[name = tensor("op_28936_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_28936_cast_fp16 = slice_by_index(begin = var_28936_begin_0, end = var_28936_end_0, end_mask = var_28936_end_mask_0, x = var_28837_cast_fp16)[name = tensor("op_28936_cast_fp16")]; tensor var_28943_begin_0 = const()[name = tensor("op_28943_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_28943_end_0 = const()[name = tensor("op_28943_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_28943_end_mask_0 = const()[name = tensor("op_28943_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_28943_cast_fp16 = slice_by_index(begin = var_28943_begin_0, end = var_28943_end_0, end_mask = var_28943_end_mask_0, x = var_28837_cast_fp16)[name = tensor("op_28943_cast_fp16")]; tensor var_28950_begin_0 = const()[name = tensor("op_28950_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_28950_end_0 = const()[name = tensor("op_28950_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_28950_end_mask_0 = const()[name = tensor("op_28950_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_28950_cast_fp16 = slice_by_index(begin = var_28950_begin_0, end = var_28950_end_0, end_mask = var_28950_end_mask_0, x = var_28841_cast_fp16)[name = tensor("op_28950_cast_fp16")]; tensor var_28957_begin_0 = const()[name = tensor("op_28957_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_28957_end_0 = const()[name = tensor("op_28957_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_28957_end_mask_0 = const()[name = tensor("op_28957_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_28957_cast_fp16 = slice_by_index(begin = var_28957_begin_0, end = var_28957_end_0, end_mask = var_28957_end_mask_0, x = var_28841_cast_fp16)[name = tensor("op_28957_cast_fp16")]; tensor var_28964_begin_0 = const()[name = tensor("op_28964_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_28964_end_0 = const()[name = tensor("op_28964_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_28964_end_mask_0 = const()[name = tensor("op_28964_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_28964_cast_fp16 = slice_by_index(begin = var_28964_begin_0, end = var_28964_end_0, end_mask = var_28964_end_mask_0, x = var_28841_cast_fp16)[name = tensor("op_28964_cast_fp16")]; tensor var_28971_begin_0 = const()[name = tensor("op_28971_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_28971_end_0 = const()[name = tensor("op_28971_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_28971_end_mask_0 = const()[name = tensor("op_28971_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_28971_cast_fp16 = slice_by_index(begin = var_28971_begin_0, end = var_28971_end_0, end_mask = var_28971_end_mask_0, x = var_28841_cast_fp16)[name = tensor("op_28971_cast_fp16")]; tensor var_28978_begin_0 = const()[name = tensor("op_28978_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_28978_end_0 = const()[name = tensor("op_28978_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_28978_end_mask_0 = const()[name = tensor("op_28978_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_28978_cast_fp16 = slice_by_index(begin = var_28978_begin_0, end = var_28978_end_0, end_mask = var_28978_end_mask_0, x = var_28845_cast_fp16)[name = tensor("op_28978_cast_fp16")]; tensor var_28985_begin_0 = const()[name = tensor("op_28985_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_28985_end_0 = const()[name = tensor("op_28985_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_28985_end_mask_0 = const()[name = tensor("op_28985_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_28985_cast_fp16 = slice_by_index(begin = var_28985_begin_0, end = var_28985_end_0, end_mask = var_28985_end_mask_0, x = var_28845_cast_fp16)[name = tensor("op_28985_cast_fp16")]; tensor var_28992_begin_0 = const()[name = tensor("op_28992_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_28992_end_0 = const()[name = tensor("op_28992_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_28992_end_mask_0 = const()[name = tensor("op_28992_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_28992_cast_fp16 = slice_by_index(begin = var_28992_begin_0, end = var_28992_end_0, end_mask = var_28992_end_mask_0, x = var_28845_cast_fp16)[name = tensor("op_28992_cast_fp16")]; tensor var_28999_begin_0 = const()[name = tensor("op_28999_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_28999_end_0 = const()[name = tensor("op_28999_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_28999_end_mask_0 = const()[name = tensor("op_28999_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_28999_cast_fp16 = slice_by_index(begin = var_28999_begin_0, end = var_28999_end_0, end_mask = var_28999_end_mask_0, x = var_28845_cast_fp16)[name = tensor("op_28999_cast_fp16")]; tensor var_29006_begin_0 = const()[name = tensor("op_29006_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29006_end_0 = const()[name = tensor("op_29006_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_29006_end_mask_0 = const()[name = tensor("op_29006_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29006_cast_fp16 = slice_by_index(begin = var_29006_begin_0, end = var_29006_end_0, end_mask = var_29006_end_mask_0, x = var_28849_cast_fp16)[name = tensor("op_29006_cast_fp16")]; tensor var_29013_begin_0 = const()[name = tensor("op_29013_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_29013_end_0 = const()[name = tensor("op_29013_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_29013_end_mask_0 = const()[name = tensor("op_29013_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29013_cast_fp16 = slice_by_index(begin = var_29013_begin_0, end = var_29013_end_0, end_mask = var_29013_end_mask_0, x = var_28849_cast_fp16)[name = tensor("op_29013_cast_fp16")]; tensor var_29020_begin_0 = const()[name = tensor("op_29020_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_29020_end_0 = const()[name = tensor("op_29020_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_29020_end_mask_0 = const()[name = tensor("op_29020_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29020_cast_fp16 = slice_by_index(begin = var_29020_begin_0, end = var_29020_end_0, end_mask = var_29020_end_mask_0, x = var_28849_cast_fp16)[name = tensor("op_29020_cast_fp16")]; tensor var_29027_begin_0 = const()[name = tensor("op_29027_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_29027_end_0 = const()[name = tensor("op_29027_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_29027_end_mask_0 = const()[name = tensor("op_29027_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29027_cast_fp16 = slice_by_index(begin = var_29027_begin_0, end = var_29027_end_0, end_mask = var_29027_end_mask_0, x = var_28849_cast_fp16)[name = tensor("op_29027_cast_fp16")]; tensor var_29034_begin_0 = const()[name = tensor("op_29034_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29034_end_0 = const()[name = tensor("op_29034_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_29034_end_mask_0 = const()[name = tensor("op_29034_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29034_cast_fp16 = slice_by_index(begin = var_29034_begin_0, end = var_29034_end_0, end_mask = var_29034_end_mask_0, x = var_28853_cast_fp16)[name = tensor("op_29034_cast_fp16")]; tensor var_29041_begin_0 = const()[name = tensor("op_29041_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_29041_end_0 = const()[name = tensor("op_29041_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_29041_end_mask_0 = const()[name = tensor("op_29041_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29041_cast_fp16 = slice_by_index(begin = var_29041_begin_0, end = var_29041_end_0, end_mask = var_29041_end_mask_0, x = var_28853_cast_fp16)[name = tensor("op_29041_cast_fp16")]; tensor var_29048_begin_0 = const()[name = tensor("op_29048_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_29048_end_0 = const()[name = tensor("op_29048_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_29048_end_mask_0 = const()[name = tensor("op_29048_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29048_cast_fp16 = slice_by_index(begin = var_29048_begin_0, end = var_29048_end_0, end_mask = var_29048_end_mask_0, x = var_28853_cast_fp16)[name = tensor("op_29048_cast_fp16")]; tensor var_29055_begin_0 = const()[name = tensor("op_29055_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_29055_end_0 = const()[name = tensor("op_29055_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_29055_end_mask_0 = const()[name = tensor("op_29055_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29055_cast_fp16 = slice_by_index(begin = var_29055_begin_0, end = var_29055_end_0, end_mask = var_29055_end_mask_0, x = var_28853_cast_fp16)[name = tensor("op_29055_cast_fp16")]; tensor var_29062_begin_0 = const()[name = tensor("op_29062_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29062_end_0 = const()[name = tensor("op_29062_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_29062_end_mask_0 = const()[name = tensor("op_29062_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29062_cast_fp16 = slice_by_index(begin = var_29062_begin_0, end = var_29062_end_0, end_mask = var_29062_end_mask_0, x = var_28857_cast_fp16)[name = tensor("op_29062_cast_fp16")]; tensor var_29069_begin_0 = const()[name = tensor("op_29069_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_29069_end_0 = const()[name = tensor("op_29069_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_29069_end_mask_0 = const()[name = tensor("op_29069_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29069_cast_fp16 = slice_by_index(begin = var_29069_begin_0, end = var_29069_end_0, end_mask = var_29069_end_mask_0, x = var_28857_cast_fp16)[name = tensor("op_29069_cast_fp16")]; tensor var_29076_begin_0 = const()[name = tensor("op_29076_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_29076_end_0 = const()[name = tensor("op_29076_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_29076_end_mask_0 = const()[name = tensor("op_29076_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29076_cast_fp16 = slice_by_index(begin = var_29076_begin_0, end = var_29076_end_0, end_mask = var_29076_end_mask_0, x = var_28857_cast_fp16)[name = tensor("op_29076_cast_fp16")]; tensor var_29083_begin_0 = const()[name = tensor("op_29083_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_29083_end_0 = const()[name = tensor("op_29083_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_29083_end_mask_0 = const()[name = tensor("op_29083_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29083_cast_fp16 = slice_by_index(begin = var_29083_begin_0, end = var_29083_end_0, end_mask = var_29083_end_mask_0, x = var_28857_cast_fp16)[name = tensor("op_29083_cast_fp16")]; tensor var_29090_begin_0 = const()[name = tensor("op_29090_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29090_end_0 = const()[name = tensor("op_29090_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_29090_end_mask_0 = const()[name = tensor("op_29090_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29090_cast_fp16 = slice_by_index(begin = var_29090_begin_0, end = var_29090_end_0, end_mask = var_29090_end_mask_0, x = var_28861_cast_fp16)[name = tensor("op_29090_cast_fp16")]; tensor var_29097_begin_0 = const()[name = tensor("op_29097_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_29097_end_0 = const()[name = tensor("op_29097_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_29097_end_mask_0 = const()[name = tensor("op_29097_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29097_cast_fp16 = slice_by_index(begin = var_29097_begin_0, end = var_29097_end_0, end_mask = var_29097_end_mask_0, x = var_28861_cast_fp16)[name = tensor("op_29097_cast_fp16")]; tensor var_29104_begin_0 = const()[name = tensor("op_29104_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_29104_end_0 = const()[name = tensor("op_29104_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_29104_end_mask_0 = const()[name = tensor("op_29104_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29104_cast_fp16 = slice_by_index(begin = var_29104_begin_0, end = var_29104_end_0, end_mask = var_29104_end_mask_0, x = var_28861_cast_fp16)[name = tensor("op_29104_cast_fp16")]; tensor var_29111_begin_0 = const()[name = tensor("op_29111_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_29111_end_0 = const()[name = tensor("op_29111_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_29111_end_mask_0 = const()[name = tensor("op_29111_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29111_cast_fp16 = slice_by_index(begin = var_29111_begin_0, end = var_29111_end_0, end_mask = var_29111_end_mask_0, x = var_28861_cast_fp16)[name = tensor("op_29111_cast_fp16")]; tensor var_29118_begin_0 = const()[name = tensor("op_29118_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29118_end_0 = const()[name = tensor("op_29118_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_29118_end_mask_0 = const()[name = tensor("op_29118_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29118_cast_fp16 = slice_by_index(begin = var_29118_begin_0, end = var_29118_end_0, end_mask = var_29118_end_mask_0, x = var_28865_cast_fp16)[name = tensor("op_29118_cast_fp16")]; tensor var_29125_begin_0 = const()[name = tensor("op_29125_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_29125_end_0 = const()[name = tensor("op_29125_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_29125_end_mask_0 = const()[name = tensor("op_29125_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29125_cast_fp16 = slice_by_index(begin = var_29125_begin_0, end = var_29125_end_0, end_mask = var_29125_end_mask_0, x = var_28865_cast_fp16)[name = tensor("op_29125_cast_fp16")]; tensor var_29132_begin_0 = const()[name = tensor("op_29132_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_29132_end_0 = const()[name = tensor("op_29132_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_29132_end_mask_0 = const()[name = tensor("op_29132_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29132_cast_fp16 = slice_by_index(begin = var_29132_begin_0, end = var_29132_end_0, end_mask = var_29132_end_mask_0, x = var_28865_cast_fp16)[name = tensor("op_29132_cast_fp16")]; tensor var_29139_begin_0 = const()[name = tensor("op_29139_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_29139_end_0 = const()[name = tensor("op_29139_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_29139_end_mask_0 = const()[name = tensor("op_29139_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29139_cast_fp16 = slice_by_index(begin = var_29139_begin_0, end = var_29139_end_0, end_mask = var_29139_end_mask_0, x = var_28865_cast_fp16)[name = tensor("op_29139_cast_fp16")]; tensor var_29146_begin_0 = const()[name = tensor("op_29146_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29146_end_0 = const()[name = tensor("op_29146_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_29146_end_mask_0 = const()[name = tensor("op_29146_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29146_cast_fp16 = slice_by_index(begin = var_29146_begin_0, end = var_29146_end_0, end_mask = var_29146_end_mask_0, x = var_28869_cast_fp16)[name = tensor("op_29146_cast_fp16")]; tensor var_29153_begin_0 = const()[name = tensor("op_29153_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_29153_end_0 = const()[name = tensor("op_29153_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_29153_end_mask_0 = const()[name = tensor("op_29153_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29153_cast_fp16 = slice_by_index(begin = var_29153_begin_0, end = var_29153_end_0, end_mask = var_29153_end_mask_0, x = var_28869_cast_fp16)[name = tensor("op_29153_cast_fp16")]; tensor var_29160_begin_0 = const()[name = tensor("op_29160_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_29160_end_0 = const()[name = tensor("op_29160_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_29160_end_mask_0 = const()[name = tensor("op_29160_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29160_cast_fp16 = slice_by_index(begin = var_29160_begin_0, end = var_29160_end_0, end_mask = var_29160_end_mask_0, x = var_28869_cast_fp16)[name = tensor("op_29160_cast_fp16")]; tensor var_29167_begin_0 = const()[name = tensor("op_29167_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_29167_end_0 = const()[name = tensor("op_29167_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_29167_end_mask_0 = const()[name = tensor("op_29167_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29167_cast_fp16 = slice_by_index(begin = var_29167_begin_0, end = var_29167_end_0, end_mask = var_29167_end_mask_0, x = var_28869_cast_fp16)[name = tensor("op_29167_cast_fp16")]; tensor var_29174_begin_0 = const()[name = tensor("op_29174_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29174_end_0 = const()[name = tensor("op_29174_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_29174_end_mask_0 = const()[name = tensor("op_29174_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29174_cast_fp16 = slice_by_index(begin = var_29174_begin_0, end = var_29174_end_0, end_mask = var_29174_end_mask_0, x = var_28873_cast_fp16)[name = tensor("op_29174_cast_fp16")]; tensor var_29181_begin_0 = const()[name = tensor("op_29181_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_29181_end_0 = const()[name = tensor("op_29181_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_29181_end_mask_0 = const()[name = tensor("op_29181_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29181_cast_fp16 = slice_by_index(begin = var_29181_begin_0, end = var_29181_end_0, end_mask = var_29181_end_mask_0, x = var_28873_cast_fp16)[name = tensor("op_29181_cast_fp16")]; tensor var_29188_begin_0 = const()[name = tensor("op_29188_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_29188_end_0 = const()[name = tensor("op_29188_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_29188_end_mask_0 = const()[name = tensor("op_29188_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29188_cast_fp16 = slice_by_index(begin = var_29188_begin_0, end = var_29188_end_0, end_mask = var_29188_end_mask_0, x = var_28873_cast_fp16)[name = tensor("op_29188_cast_fp16")]; tensor var_29195_begin_0 = const()[name = tensor("op_29195_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_29195_end_0 = const()[name = tensor("op_29195_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_29195_end_mask_0 = const()[name = tensor("op_29195_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29195_cast_fp16 = slice_by_index(begin = var_29195_begin_0, end = var_29195_end_0, end_mask = var_29195_end_mask_0, x = var_28873_cast_fp16)[name = tensor("op_29195_cast_fp16")]; tensor var_29202_begin_0 = const()[name = tensor("op_29202_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29202_end_0 = const()[name = tensor("op_29202_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_29202_end_mask_0 = const()[name = tensor("op_29202_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29202_cast_fp16 = slice_by_index(begin = var_29202_begin_0, end = var_29202_end_0, end_mask = var_29202_end_mask_0, x = var_28877_cast_fp16)[name = tensor("op_29202_cast_fp16")]; tensor var_29209_begin_0 = const()[name = tensor("op_29209_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_29209_end_0 = const()[name = tensor("op_29209_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_29209_end_mask_0 = const()[name = tensor("op_29209_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29209_cast_fp16 = slice_by_index(begin = var_29209_begin_0, end = var_29209_end_0, end_mask = var_29209_end_mask_0, x = var_28877_cast_fp16)[name = tensor("op_29209_cast_fp16")]; tensor var_29216_begin_0 = const()[name = tensor("op_29216_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_29216_end_0 = const()[name = tensor("op_29216_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_29216_end_mask_0 = const()[name = tensor("op_29216_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29216_cast_fp16 = slice_by_index(begin = var_29216_begin_0, end = var_29216_end_0, end_mask = var_29216_end_mask_0, x = var_28877_cast_fp16)[name = tensor("op_29216_cast_fp16")]; tensor var_29223_begin_0 = const()[name = tensor("op_29223_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_29223_end_0 = const()[name = tensor("op_29223_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_29223_end_mask_0 = const()[name = tensor("op_29223_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29223_cast_fp16 = slice_by_index(begin = var_29223_begin_0, end = var_29223_end_0, end_mask = var_29223_end_mask_0, x = var_28877_cast_fp16)[name = tensor("op_29223_cast_fp16")]; tensor var_29230_begin_0 = const()[name = tensor("op_29230_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29230_end_0 = const()[name = tensor("op_29230_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_29230_end_mask_0 = const()[name = tensor("op_29230_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29230_cast_fp16 = slice_by_index(begin = var_29230_begin_0, end = var_29230_end_0, end_mask = var_29230_end_mask_0, x = var_28881_cast_fp16)[name = tensor("op_29230_cast_fp16")]; tensor var_29237_begin_0 = const()[name = tensor("op_29237_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_29237_end_0 = const()[name = tensor("op_29237_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_29237_end_mask_0 = const()[name = tensor("op_29237_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29237_cast_fp16 = slice_by_index(begin = var_29237_begin_0, end = var_29237_end_0, end_mask = var_29237_end_mask_0, x = var_28881_cast_fp16)[name = tensor("op_29237_cast_fp16")]; tensor var_29244_begin_0 = const()[name = tensor("op_29244_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_29244_end_0 = const()[name = tensor("op_29244_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_29244_end_mask_0 = const()[name = tensor("op_29244_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29244_cast_fp16 = slice_by_index(begin = var_29244_begin_0, end = var_29244_end_0, end_mask = var_29244_end_mask_0, x = var_28881_cast_fp16)[name = tensor("op_29244_cast_fp16")]; tensor var_29251_begin_0 = const()[name = tensor("op_29251_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_29251_end_0 = const()[name = tensor("op_29251_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_29251_end_mask_0 = const()[name = tensor("op_29251_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29251_cast_fp16 = slice_by_index(begin = var_29251_begin_0, end = var_29251_end_0, end_mask = var_29251_end_mask_0, x = var_28881_cast_fp16)[name = tensor("op_29251_cast_fp16")]; tensor var_29258_begin_0 = const()[name = tensor("op_29258_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29258_end_0 = const()[name = tensor("op_29258_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_29258_end_mask_0 = const()[name = tensor("op_29258_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29258_cast_fp16 = slice_by_index(begin = var_29258_begin_0, end = var_29258_end_0, end_mask = var_29258_end_mask_0, x = var_28885_cast_fp16)[name = tensor("op_29258_cast_fp16")]; tensor var_29265_begin_0 = const()[name = tensor("op_29265_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_29265_end_0 = const()[name = tensor("op_29265_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_29265_end_mask_0 = const()[name = tensor("op_29265_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29265_cast_fp16 = slice_by_index(begin = var_29265_begin_0, end = var_29265_end_0, end_mask = var_29265_end_mask_0, x = var_28885_cast_fp16)[name = tensor("op_29265_cast_fp16")]; tensor var_29272_begin_0 = const()[name = tensor("op_29272_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_29272_end_0 = const()[name = tensor("op_29272_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_29272_end_mask_0 = const()[name = tensor("op_29272_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29272_cast_fp16 = slice_by_index(begin = var_29272_begin_0, end = var_29272_end_0, end_mask = var_29272_end_mask_0, x = var_28885_cast_fp16)[name = tensor("op_29272_cast_fp16")]; tensor var_29279_begin_0 = const()[name = tensor("op_29279_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_29279_end_0 = const()[name = tensor("op_29279_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_29279_end_mask_0 = const()[name = tensor("op_29279_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29279_cast_fp16 = slice_by_index(begin = var_29279_begin_0, end = var_29279_end_0, end_mask = var_29279_end_mask_0, x = var_28885_cast_fp16)[name = tensor("op_29279_cast_fp16")]; tensor var_29286_begin_0 = const()[name = tensor("op_29286_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29286_end_0 = const()[name = tensor("op_29286_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_29286_end_mask_0 = const()[name = tensor("op_29286_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29286_cast_fp16 = slice_by_index(begin = var_29286_begin_0, end = var_29286_end_0, end_mask = var_29286_end_mask_0, x = var_28889_cast_fp16)[name = tensor("op_29286_cast_fp16")]; tensor var_29293_begin_0 = const()[name = tensor("op_29293_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_29293_end_0 = const()[name = tensor("op_29293_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_29293_end_mask_0 = const()[name = tensor("op_29293_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29293_cast_fp16 = slice_by_index(begin = var_29293_begin_0, end = var_29293_end_0, end_mask = var_29293_end_mask_0, x = var_28889_cast_fp16)[name = tensor("op_29293_cast_fp16")]; tensor var_29300_begin_0 = const()[name = tensor("op_29300_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_29300_end_0 = const()[name = tensor("op_29300_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_29300_end_mask_0 = const()[name = tensor("op_29300_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29300_cast_fp16 = slice_by_index(begin = var_29300_begin_0, end = var_29300_end_0, end_mask = var_29300_end_mask_0, x = var_28889_cast_fp16)[name = tensor("op_29300_cast_fp16")]; tensor var_29307_begin_0 = const()[name = tensor("op_29307_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_29307_end_0 = const()[name = tensor("op_29307_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_29307_end_mask_0 = const()[name = tensor("op_29307_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29307_cast_fp16 = slice_by_index(begin = var_29307_begin_0, end = var_29307_end_0, end_mask = var_29307_end_mask_0, x = var_28889_cast_fp16)[name = tensor("op_29307_cast_fp16")]; tensor var_29314_begin_0 = const()[name = tensor("op_29314_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29314_end_0 = const()[name = tensor("op_29314_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_29314_end_mask_0 = const()[name = tensor("op_29314_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29314_cast_fp16 = slice_by_index(begin = var_29314_begin_0, end = var_29314_end_0, end_mask = var_29314_end_mask_0, x = var_28893_cast_fp16)[name = tensor("op_29314_cast_fp16")]; tensor var_29321_begin_0 = const()[name = tensor("op_29321_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_29321_end_0 = const()[name = tensor("op_29321_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_29321_end_mask_0 = const()[name = tensor("op_29321_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29321_cast_fp16 = slice_by_index(begin = var_29321_begin_0, end = var_29321_end_0, end_mask = var_29321_end_mask_0, x = var_28893_cast_fp16)[name = tensor("op_29321_cast_fp16")]; tensor var_29328_begin_0 = const()[name = tensor("op_29328_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_29328_end_0 = const()[name = tensor("op_29328_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_29328_end_mask_0 = const()[name = tensor("op_29328_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29328_cast_fp16 = slice_by_index(begin = var_29328_begin_0, end = var_29328_end_0, end_mask = var_29328_end_mask_0, x = var_28893_cast_fp16)[name = tensor("op_29328_cast_fp16")]; tensor var_29335_begin_0 = const()[name = tensor("op_29335_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_29335_end_0 = const()[name = tensor("op_29335_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_29335_end_mask_0 = const()[name = tensor("op_29335_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29335_cast_fp16 = slice_by_index(begin = var_29335_begin_0, end = var_29335_end_0, end_mask = var_29335_end_mask_0, x = var_28893_cast_fp16)[name = tensor("op_29335_cast_fp16")]; tensor var_29342_begin_0 = const()[name = tensor("op_29342_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29342_end_0 = const()[name = tensor("op_29342_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_29342_end_mask_0 = const()[name = tensor("op_29342_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29342_cast_fp16 = slice_by_index(begin = var_29342_begin_0, end = var_29342_end_0, end_mask = var_29342_end_mask_0, x = var_28897_cast_fp16)[name = tensor("op_29342_cast_fp16")]; tensor var_29349_begin_0 = const()[name = tensor("op_29349_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_29349_end_0 = const()[name = tensor("op_29349_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_29349_end_mask_0 = const()[name = tensor("op_29349_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29349_cast_fp16 = slice_by_index(begin = var_29349_begin_0, end = var_29349_end_0, end_mask = var_29349_end_mask_0, x = var_28897_cast_fp16)[name = tensor("op_29349_cast_fp16")]; tensor var_29356_begin_0 = const()[name = tensor("op_29356_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_29356_end_0 = const()[name = tensor("op_29356_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_29356_end_mask_0 = const()[name = tensor("op_29356_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29356_cast_fp16 = slice_by_index(begin = var_29356_begin_0, end = var_29356_end_0, end_mask = var_29356_end_mask_0, x = var_28897_cast_fp16)[name = tensor("op_29356_cast_fp16")]; tensor var_29363_begin_0 = const()[name = tensor("op_29363_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_29363_end_0 = const()[name = tensor("op_29363_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_29363_end_mask_0 = const()[name = tensor("op_29363_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29363_cast_fp16 = slice_by_index(begin = var_29363_begin_0, end = var_29363_end_0, end_mask = var_29363_end_mask_0, x = var_28897_cast_fp16)[name = tensor("op_29363_cast_fp16")]; tensor var_29370_begin_0 = const()[name = tensor("op_29370_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29370_end_0 = const()[name = tensor("op_29370_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_29370_end_mask_0 = const()[name = tensor("op_29370_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29370_cast_fp16 = slice_by_index(begin = var_29370_begin_0, end = var_29370_end_0, end_mask = var_29370_end_mask_0, x = var_28901_cast_fp16)[name = tensor("op_29370_cast_fp16")]; tensor var_29377_begin_0 = const()[name = tensor("op_29377_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_29377_end_0 = const()[name = tensor("op_29377_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_29377_end_mask_0 = const()[name = tensor("op_29377_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29377_cast_fp16 = slice_by_index(begin = var_29377_begin_0, end = var_29377_end_0, end_mask = var_29377_end_mask_0, x = var_28901_cast_fp16)[name = tensor("op_29377_cast_fp16")]; tensor var_29384_begin_0 = const()[name = tensor("op_29384_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_29384_end_0 = const()[name = tensor("op_29384_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_29384_end_mask_0 = const()[name = tensor("op_29384_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29384_cast_fp16 = slice_by_index(begin = var_29384_begin_0, end = var_29384_end_0, end_mask = var_29384_end_mask_0, x = var_28901_cast_fp16)[name = tensor("op_29384_cast_fp16")]; tensor var_29391_begin_0 = const()[name = tensor("op_29391_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_29391_end_0 = const()[name = tensor("op_29391_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_29391_end_mask_0 = const()[name = tensor("op_29391_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29391_cast_fp16 = slice_by_index(begin = var_29391_begin_0, end = var_29391_end_0, end_mask = var_29391_end_mask_0, x = var_28901_cast_fp16)[name = tensor("op_29391_cast_fp16")]; tensor var_29398_begin_0 = const()[name = tensor("op_29398_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29398_end_0 = const()[name = tensor("op_29398_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_29398_end_mask_0 = const()[name = tensor("op_29398_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29398_cast_fp16 = slice_by_index(begin = var_29398_begin_0, end = var_29398_end_0, end_mask = var_29398_end_mask_0, x = var_28905_cast_fp16)[name = tensor("op_29398_cast_fp16")]; tensor var_29405_begin_0 = const()[name = tensor("op_29405_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_29405_end_0 = const()[name = tensor("op_29405_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_29405_end_mask_0 = const()[name = tensor("op_29405_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29405_cast_fp16 = slice_by_index(begin = var_29405_begin_0, end = var_29405_end_0, end_mask = var_29405_end_mask_0, x = var_28905_cast_fp16)[name = tensor("op_29405_cast_fp16")]; tensor var_29412_begin_0 = const()[name = tensor("op_29412_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_29412_end_0 = const()[name = tensor("op_29412_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_29412_end_mask_0 = const()[name = tensor("op_29412_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29412_cast_fp16 = slice_by_index(begin = var_29412_begin_0, end = var_29412_end_0, end_mask = var_29412_end_mask_0, x = var_28905_cast_fp16)[name = tensor("op_29412_cast_fp16")]; tensor var_29419_begin_0 = const()[name = tensor("op_29419_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_29419_end_0 = const()[name = tensor("op_29419_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_29419_end_mask_0 = const()[name = tensor("op_29419_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29419_cast_fp16 = slice_by_index(begin = var_29419_begin_0, end = var_29419_end_0, end_mask = var_29419_end_mask_0, x = var_28905_cast_fp16)[name = tensor("op_29419_cast_fp16")]; tensor var_29426_begin_0 = const()[name = tensor("op_29426_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29426_end_0 = const()[name = tensor("op_29426_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_29426_end_mask_0 = const()[name = tensor("op_29426_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29426_cast_fp16 = slice_by_index(begin = var_29426_begin_0, end = var_29426_end_0, end_mask = var_29426_end_mask_0, x = var_28909_cast_fp16)[name = tensor("op_29426_cast_fp16")]; tensor var_29433_begin_0 = const()[name = tensor("op_29433_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_29433_end_0 = const()[name = tensor("op_29433_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_29433_end_mask_0 = const()[name = tensor("op_29433_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29433_cast_fp16 = slice_by_index(begin = var_29433_begin_0, end = var_29433_end_0, end_mask = var_29433_end_mask_0, x = var_28909_cast_fp16)[name = tensor("op_29433_cast_fp16")]; tensor var_29440_begin_0 = const()[name = tensor("op_29440_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_29440_end_0 = const()[name = tensor("op_29440_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_29440_end_mask_0 = const()[name = tensor("op_29440_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29440_cast_fp16 = slice_by_index(begin = var_29440_begin_0, end = var_29440_end_0, end_mask = var_29440_end_mask_0, x = var_28909_cast_fp16)[name = tensor("op_29440_cast_fp16")]; tensor var_29447_begin_0 = const()[name = tensor("op_29447_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_29447_end_0 = const()[name = tensor("op_29447_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_29447_end_mask_0 = const()[name = tensor("op_29447_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29447_cast_fp16 = slice_by_index(begin = var_29447_begin_0, end = var_29447_end_0, end_mask = var_29447_end_mask_0, x = var_28909_cast_fp16)[name = tensor("op_29447_cast_fp16")]; tensor var_29454_begin_0 = const()[name = tensor("op_29454_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29454_end_0 = const()[name = tensor("op_29454_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_29454_end_mask_0 = const()[name = tensor("op_29454_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29454_cast_fp16 = slice_by_index(begin = var_29454_begin_0, end = var_29454_end_0, end_mask = var_29454_end_mask_0, x = var_28913_cast_fp16)[name = tensor("op_29454_cast_fp16")]; tensor var_29461_begin_0 = const()[name = tensor("op_29461_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_29461_end_0 = const()[name = tensor("op_29461_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_29461_end_mask_0 = const()[name = tensor("op_29461_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29461_cast_fp16 = slice_by_index(begin = var_29461_begin_0, end = var_29461_end_0, end_mask = var_29461_end_mask_0, x = var_28913_cast_fp16)[name = tensor("op_29461_cast_fp16")]; tensor var_29468_begin_0 = const()[name = tensor("op_29468_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_29468_end_0 = const()[name = tensor("op_29468_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_29468_end_mask_0 = const()[name = tensor("op_29468_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29468_cast_fp16 = slice_by_index(begin = var_29468_begin_0, end = var_29468_end_0, end_mask = var_29468_end_mask_0, x = var_28913_cast_fp16)[name = tensor("op_29468_cast_fp16")]; tensor var_29475_begin_0 = const()[name = tensor("op_29475_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_29475_end_0 = const()[name = tensor("op_29475_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_29475_end_mask_0 = const()[name = tensor("op_29475_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29475_cast_fp16 = slice_by_index(begin = var_29475_begin_0, end = var_29475_end_0, end_mask = var_29475_end_mask_0, x = var_28913_cast_fp16)[name = tensor("op_29475_cast_fp16")]; tensor k_37_perm_0 = const()[name = tensor("k_37_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_29480_begin_0 = const()[name = tensor("op_29480_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29480_end_0 = const()[name = tensor("op_29480_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_29480_end_mask_0 = const()[name = tensor("op_29480_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_37_cast_fp16 = transpose(perm = k_37_perm_0, x = key_37_cast_fp16)[name = tensor("transpose_13")]; tensor var_29480_cast_fp16 = slice_by_index(begin = var_29480_begin_0, end = var_29480_end_0, end_mask = var_29480_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29480_cast_fp16")]; tensor var_29484_begin_0 = const()[name = tensor("op_29484_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_29484_end_0 = const()[name = tensor("op_29484_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_29484_end_mask_0 = const()[name = tensor("op_29484_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29484_cast_fp16 = slice_by_index(begin = var_29484_begin_0, end = var_29484_end_0, end_mask = var_29484_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29484_cast_fp16")]; tensor var_29488_begin_0 = const()[name = tensor("op_29488_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_29488_end_0 = const()[name = tensor("op_29488_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_29488_end_mask_0 = const()[name = tensor("op_29488_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29488_cast_fp16 = slice_by_index(begin = var_29488_begin_0, end = var_29488_end_0, end_mask = var_29488_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29488_cast_fp16")]; tensor var_29492_begin_0 = const()[name = tensor("op_29492_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_29492_end_0 = const()[name = tensor("op_29492_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_29492_end_mask_0 = const()[name = tensor("op_29492_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29492_cast_fp16 = slice_by_index(begin = var_29492_begin_0, end = var_29492_end_0, end_mask = var_29492_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29492_cast_fp16")]; tensor var_29496_begin_0 = const()[name = tensor("op_29496_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_29496_end_0 = const()[name = tensor("op_29496_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_29496_end_mask_0 = const()[name = tensor("op_29496_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29496_cast_fp16 = slice_by_index(begin = var_29496_begin_0, end = var_29496_end_0, end_mask = var_29496_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29496_cast_fp16")]; tensor var_29500_begin_0 = const()[name = tensor("op_29500_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_29500_end_0 = const()[name = tensor("op_29500_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_29500_end_mask_0 = const()[name = tensor("op_29500_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29500_cast_fp16 = slice_by_index(begin = var_29500_begin_0, end = var_29500_end_0, end_mask = var_29500_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29500_cast_fp16")]; tensor var_29504_begin_0 = const()[name = tensor("op_29504_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_29504_end_0 = const()[name = tensor("op_29504_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_29504_end_mask_0 = const()[name = tensor("op_29504_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29504_cast_fp16 = slice_by_index(begin = var_29504_begin_0, end = var_29504_end_0, end_mask = var_29504_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29504_cast_fp16")]; tensor var_29508_begin_0 = const()[name = tensor("op_29508_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_29508_end_0 = const()[name = tensor("op_29508_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_29508_end_mask_0 = const()[name = tensor("op_29508_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29508_cast_fp16 = slice_by_index(begin = var_29508_begin_0, end = var_29508_end_0, end_mask = var_29508_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29508_cast_fp16")]; tensor var_29512_begin_0 = const()[name = tensor("op_29512_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_29512_end_0 = const()[name = tensor("op_29512_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_29512_end_mask_0 = const()[name = tensor("op_29512_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29512_cast_fp16 = slice_by_index(begin = var_29512_begin_0, end = var_29512_end_0, end_mask = var_29512_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29512_cast_fp16")]; tensor var_29516_begin_0 = const()[name = tensor("op_29516_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_29516_end_0 = const()[name = tensor("op_29516_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_29516_end_mask_0 = const()[name = tensor("op_29516_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29516_cast_fp16 = slice_by_index(begin = var_29516_begin_0, end = var_29516_end_0, end_mask = var_29516_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29516_cast_fp16")]; tensor var_29520_begin_0 = const()[name = tensor("op_29520_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_29520_end_0 = const()[name = tensor("op_29520_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_29520_end_mask_0 = const()[name = tensor("op_29520_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29520_cast_fp16 = slice_by_index(begin = var_29520_begin_0, end = var_29520_end_0, end_mask = var_29520_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29520_cast_fp16")]; tensor var_29524_begin_0 = const()[name = tensor("op_29524_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_29524_end_0 = const()[name = tensor("op_29524_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_29524_end_mask_0 = const()[name = tensor("op_29524_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29524_cast_fp16 = slice_by_index(begin = var_29524_begin_0, end = var_29524_end_0, end_mask = var_29524_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29524_cast_fp16")]; tensor var_29528_begin_0 = const()[name = tensor("op_29528_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_29528_end_0 = const()[name = tensor("op_29528_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_29528_end_mask_0 = const()[name = tensor("op_29528_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29528_cast_fp16 = slice_by_index(begin = var_29528_begin_0, end = var_29528_end_0, end_mask = var_29528_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29528_cast_fp16")]; tensor var_29532_begin_0 = const()[name = tensor("op_29532_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_29532_end_0 = const()[name = tensor("op_29532_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_29532_end_mask_0 = const()[name = tensor("op_29532_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29532_cast_fp16 = slice_by_index(begin = var_29532_begin_0, end = var_29532_end_0, end_mask = var_29532_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29532_cast_fp16")]; tensor var_29536_begin_0 = const()[name = tensor("op_29536_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_29536_end_0 = const()[name = tensor("op_29536_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_29536_end_mask_0 = const()[name = tensor("op_29536_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29536_cast_fp16 = slice_by_index(begin = var_29536_begin_0, end = var_29536_end_0, end_mask = var_29536_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29536_cast_fp16")]; tensor var_29540_begin_0 = const()[name = tensor("op_29540_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_29540_end_0 = const()[name = tensor("op_29540_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_29540_end_mask_0 = const()[name = tensor("op_29540_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29540_cast_fp16 = slice_by_index(begin = var_29540_begin_0, end = var_29540_end_0, end_mask = var_29540_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29540_cast_fp16")]; tensor var_29544_begin_0 = const()[name = tensor("op_29544_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_29544_end_0 = const()[name = tensor("op_29544_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_29544_end_mask_0 = const()[name = tensor("op_29544_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29544_cast_fp16 = slice_by_index(begin = var_29544_begin_0, end = var_29544_end_0, end_mask = var_29544_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29544_cast_fp16")]; tensor var_29548_begin_0 = const()[name = tensor("op_29548_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_29548_end_0 = const()[name = tensor("op_29548_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_29548_end_mask_0 = const()[name = tensor("op_29548_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29548_cast_fp16 = slice_by_index(begin = var_29548_begin_0, end = var_29548_end_0, end_mask = var_29548_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29548_cast_fp16")]; tensor var_29552_begin_0 = const()[name = tensor("op_29552_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_29552_end_0 = const()[name = tensor("op_29552_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_29552_end_mask_0 = const()[name = tensor("op_29552_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29552_cast_fp16 = slice_by_index(begin = var_29552_begin_0, end = var_29552_end_0, end_mask = var_29552_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29552_cast_fp16")]; tensor var_29556_begin_0 = const()[name = tensor("op_29556_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_29556_end_0 = const()[name = tensor("op_29556_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_29556_end_mask_0 = const()[name = tensor("op_29556_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_29556_cast_fp16 = slice_by_index(begin = var_29556_begin_0, end = var_29556_end_0, end_mask = var_29556_end_mask_0, x = k_37_cast_fp16)[name = tensor("op_29556_cast_fp16")]; tensor var_29558_begin_0 = const()[name = tensor("op_29558_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_29558_end_0 = const()[name = tensor("op_29558_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_29558_end_mask_0 = const()[name = tensor("op_29558_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29558_cast_fp16 = slice_by_index(begin = var_29558_begin_0, end = var_29558_end_0, end_mask = var_29558_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29558_cast_fp16")]; tensor var_29562_begin_0 = const()[name = tensor("op_29562_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_29562_end_0 = const()[name = tensor("op_29562_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_29562_end_mask_0 = const()[name = tensor("op_29562_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29562_cast_fp16 = slice_by_index(begin = var_29562_begin_0, end = var_29562_end_0, end_mask = var_29562_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29562_cast_fp16")]; tensor var_29566_begin_0 = const()[name = tensor("op_29566_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_29566_end_0 = const()[name = tensor("op_29566_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_29566_end_mask_0 = const()[name = tensor("op_29566_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29566_cast_fp16 = slice_by_index(begin = var_29566_begin_0, end = var_29566_end_0, end_mask = var_29566_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29566_cast_fp16")]; tensor var_29570_begin_0 = const()[name = tensor("op_29570_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_29570_end_0 = const()[name = tensor("op_29570_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_29570_end_mask_0 = const()[name = tensor("op_29570_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29570_cast_fp16 = slice_by_index(begin = var_29570_begin_0, end = var_29570_end_0, end_mask = var_29570_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29570_cast_fp16")]; tensor var_29574_begin_0 = const()[name = tensor("op_29574_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_29574_end_0 = const()[name = tensor("op_29574_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_29574_end_mask_0 = const()[name = tensor("op_29574_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29574_cast_fp16 = slice_by_index(begin = var_29574_begin_0, end = var_29574_end_0, end_mask = var_29574_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29574_cast_fp16")]; tensor var_29578_begin_0 = const()[name = tensor("op_29578_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_29578_end_0 = const()[name = tensor("op_29578_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_29578_end_mask_0 = const()[name = tensor("op_29578_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29578_cast_fp16 = slice_by_index(begin = var_29578_begin_0, end = var_29578_end_0, end_mask = var_29578_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29578_cast_fp16")]; tensor var_29582_begin_0 = const()[name = tensor("op_29582_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_29582_end_0 = const()[name = tensor("op_29582_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_29582_end_mask_0 = const()[name = tensor("op_29582_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29582_cast_fp16 = slice_by_index(begin = var_29582_begin_0, end = var_29582_end_0, end_mask = var_29582_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29582_cast_fp16")]; tensor var_29586_begin_0 = const()[name = tensor("op_29586_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_29586_end_0 = const()[name = tensor("op_29586_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_29586_end_mask_0 = const()[name = tensor("op_29586_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29586_cast_fp16 = slice_by_index(begin = var_29586_begin_0, end = var_29586_end_0, end_mask = var_29586_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29586_cast_fp16")]; tensor var_29590_begin_0 = const()[name = tensor("op_29590_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_29590_end_0 = const()[name = tensor("op_29590_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_29590_end_mask_0 = const()[name = tensor("op_29590_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29590_cast_fp16 = slice_by_index(begin = var_29590_begin_0, end = var_29590_end_0, end_mask = var_29590_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29590_cast_fp16")]; tensor var_29594_begin_0 = const()[name = tensor("op_29594_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_29594_end_0 = const()[name = tensor("op_29594_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_29594_end_mask_0 = const()[name = tensor("op_29594_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29594_cast_fp16 = slice_by_index(begin = var_29594_begin_0, end = var_29594_end_0, end_mask = var_29594_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29594_cast_fp16")]; tensor var_29598_begin_0 = const()[name = tensor("op_29598_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_29598_end_0 = const()[name = tensor("op_29598_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_29598_end_mask_0 = const()[name = tensor("op_29598_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29598_cast_fp16 = slice_by_index(begin = var_29598_begin_0, end = var_29598_end_0, end_mask = var_29598_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29598_cast_fp16")]; tensor var_29602_begin_0 = const()[name = tensor("op_29602_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_29602_end_0 = const()[name = tensor("op_29602_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_29602_end_mask_0 = const()[name = tensor("op_29602_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29602_cast_fp16 = slice_by_index(begin = var_29602_begin_0, end = var_29602_end_0, end_mask = var_29602_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29602_cast_fp16")]; tensor var_29606_begin_0 = const()[name = tensor("op_29606_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_29606_end_0 = const()[name = tensor("op_29606_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_29606_end_mask_0 = const()[name = tensor("op_29606_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29606_cast_fp16 = slice_by_index(begin = var_29606_begin_0, end = var_29606_end_0, end_mask = var_29606_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29606_cast_fp16")]; tensor var_29610_begin_0 = const()[name = tensor("op_29610_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_29610_end_0 = const()[name = tensor("op_29610_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_29610_end_mask_0 = const()[name = tensor("op_29610_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29610_cast_fp16 = slice_by_index(begin = var_29610_begin_0, end = var_29610_end_0, end_mask = var_29610_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29610_cast_fp16")]; tensor var_29614_begin_0 = const()[name = tensor("op_29614_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_29614_end_0 = const()[name = tensor("op_29614_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_29614_end_mask_0 = const()[name = tensor("op_29614_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29614_cast_fp16 = slice_by_index(begin = var_29614_begin_0, end = var_29614_end_0, end_mask = var_29614_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29614_cast_fp16")]; tensor var_29618_begin_0 = const()[name = tensor("op_29618_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_29618_end_0 = const()[name = tensor("op_29618_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_29618_end_mask_0 = const()[name = tensor("op_29618_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29618_cast_fp16 = slice_by_index(begin = var_29618_begin_0, end = var_29618_end_0, end_mask = var_29618_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29618_cast_fp16")]; tensor var_29622_begin_0 = const()[name = tensor("op_29622_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_29622_end_0 = const()[name = tensor("op_29622_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_29622_end_mask_0 = const()[name = tensor("op_29622_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29622_cast_fp16 = slice_by_index(begin = var_29622_begin_0, end = var_29622_end_0, end_mask = var_29622_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29622_cast_fp16")]; tensor var_29626_begin_0 = const()[name = tensor("op_29626_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_29626_end_0 = const()[name = tensor("op_29626_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_29626_end_mask_0 = const()[name = tensor("op_29626_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29626_cast_fp16 = slice_by_index(begin = var_29626_begin_0, end = var_29626_end_0, end_mask = var_29626_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29626_cast_fp16")]; tensor var_29630_begin_0 = const()[name = tensor("op_29630_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_29630_end_0 = const()[name = tensor("op_29630_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_29630_end_mask_0 = const()[name = tensor("op_29630_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29630_cast_fp16 = slice_by_index(begin = var_29630_begin_0, end = var_29630_end_0, end_mask = var_29630_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29630_cast_fp16")]; tensor var_29634_begin_0 = const()[name = tensor("op_29634_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_29634_end_0 = const()[name = tensor("op_29634_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_29634_end_mask_0 = const()[name = tensor("op_29634_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_29634_cast_fp16 = slice_by_index(begin = var_29634_begin_0, end = var_29634_end_0, end_mask = var_29634_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_29634_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2881_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2881_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2881_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2881_equation_0, values = (var_29480_cast_fp16, var_28922_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2881_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2883_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2883_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2883_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2883_equation_0, values = (var_29480_cast_fp16, var_28929_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2883_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2885_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2885_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2885_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2885_equation_0, values = (var_29480_cast_fp16, var_28936_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2885_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2887_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2887_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2887_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2887_equation_0, values = (var_29480_cast_fp16, var_28943_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2887_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2889_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2889_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2889_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2889_equation_0, values = (var_29484_cast_fp16, var_28950_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2889_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2891_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2891_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2891_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2891_equation_0, values = (var_29484_cast_fp16, var_28957_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2891_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2893_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2893_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2893_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2893_equation_0, values = (var_29484_cast_fp16, var_28964_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2893_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2895_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2895_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2895_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2895_equation_0, values = (var_29484_cast_fp16, var_28971_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2895_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2897_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2897_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2897_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2897_equation_0, values = (var_29488_cast_fp16, var_28978_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2897_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2899_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2899_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2899_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2899_equation_0, values = (var_29488_cast_fp16, var_28985_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2899_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2901_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2901_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2901_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2901_equation_0, values = (var_29488_cast_fp16, var_28992_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2901_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2903_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2903_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2903_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2903_equation_0, values = (var_29488_cast_fp16, var_28999_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2903_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2905_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2905_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2905_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2905_equation_0, values = (var_29492_cast_fp16, var_29006_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2905_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2907_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2907_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2907_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2907_equation_0, values = (var_29492_cast_fp16, var_29013_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2907_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2909_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2909_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2909_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2909_equation_0, values = (var_29492_cast_fp16, var_29020_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2909_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2911_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2911_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2911_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2911_equation_0, values = (var_29492_cast_fp16, var_29027_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2911_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2913_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2913_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2913_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2913_equation_0, values = (var_29496_cast_fp16, var_29034_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2913_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2915_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2915_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2915_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2915_equation_0, values = (var_29496_cast_fp16, var_29041_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2915_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2917_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2917_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2917_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2917_equation_0, values = (var_29496_cast_fp16, var_29048_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2917_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2919_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2919_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2919_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2919_equation_0, values = (var_29496_cast_fp16, var_29055_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2919_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2921_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2921_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2921_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2921_equation_0, values = (var_29500_cast_fp16, var_29062_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2921_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2923_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2923_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2923_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2923_equation_0, values = (var_29500_cast_fp16, var_29069_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2923_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2925_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2925_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2925_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2925_equation_0, values = (var_29500_cast_fp16, var_29076_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2925_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2927_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2927_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2927_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2927_equation_0, values = (var_29500_cast_fp16, var_29083_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2927_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2929_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2929_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2929_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2929_equation_0, values = (var_29504_cast_fp16, var_29090_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2929_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2931_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2931_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2931_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2931_equation_0, values = (var_29504_cast_fp16, var_29097_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2931_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2933_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2933_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2933_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2933_equation_0, values = (var_29504_cast_fp16, var_29104_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2933_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2935_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2935_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2935_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2935_equation_0, values = (var_29504_cast_fp16, var_29111_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2935_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2937_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2937_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2937_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2937_equation_0, values = (var_29508_cast_fp16, var_29118_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2937_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2939_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2939_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2939_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2939_equation_0, values = (var_29508_cast_fp16, var_29125_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2939_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2941_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2941_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2941_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2941_equation_0, values = (var_29508_cast_fp16, var_29132_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2941_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2943_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2943_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2943_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2943_equation_0, values = (var_29508_cast_fp16, var_29139_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2943_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2945_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2945_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2945_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2945_equation_0, values = (var_29512_cast_fp16, var_29146_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2945_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2947_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2947_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2947_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2947_equation_0, values = (var_29512_cast_fp16, var_29153_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2947_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2949_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2949_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2949_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2949_equation_0, values = (var_29512_cast_fp16, var_29160_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2949_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2951_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2951_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2951_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2951_equation_0, values = (var_29512_cast_fp16, var_29167_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2951_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2953_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2953_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2953_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2953_equation_0, values = (var_29516_cast_fp16, var_29174_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2953_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2955_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2955_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2955_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2955_equation_0, values = (var_29516_cast_fp16, var_29181_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2955_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2957_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2957_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2957_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2957_equation_0, values = (var_29516_cast_fp16, var_29188_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2957_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2959_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2959_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2959_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2959_equation_0, values = (var_29516_cast_fp16, var_29195_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2959_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2961_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2961_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2961_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2961_equation_0, values = (var_29520_cast_fp16, var_29202_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2961_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2963_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2963_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2963_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2963_equation_0, values = (var_29520_cast_fp16, var_29209_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2963_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2965_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2965_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2965_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2965_equation_0, values = (var_29520_cast_fp16, var_29216_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2965_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2967_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2967_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2967_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2967_equation_0, values = (var_29520_cast_fp16, var_29223_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2967_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2969_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2969_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2969_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2969_equation_0, values = (var_29524_cast_fp16, var_29230_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2969_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2971_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2971_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2971_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2971_equation_0, values = (var_29524_cast_fp16, var_29237_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2971_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2973_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2973_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2973_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2973_equation_0, values = (var_29524_cast_fp16, var_29244_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2973_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2975_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2975_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2975_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2975_equation_0, values = (var_29524_cast_fp16, var_29251_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2975_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2977_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2977_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2977_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2977_equation_0, values = (var_29528_cast_fp16, var_29258_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2977_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2979_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2979_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2979_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2979_equation_0, values = (var_29528_cast_fp16, var_29265_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2979_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2981_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2981_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2981_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2981_equation_0, values = (var_29528_cast_fp16, var_29272_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2981_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2983_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2983_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2983_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2983_equation_0, values = (var_29528_cast_fp16, var_29279_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2983_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2985_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2985_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2985_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2985_equation_0, values = (var_29532_cast_fp16, var_29286_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2985_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2987_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2987_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2987_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2987_equation_0, values = (var_29532_cast_fp16, var_29293_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2987_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2989_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2989_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2989_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2989_equation_0, values = (var_29532_cast_fp16, var_29300_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2989_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2991_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2991_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2991_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2991_equation_0, values = (var_29532_cast_fp16, var_29307_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2991_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2993_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2993_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2993_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2993_equation_0, values = (var_29536_cast_fp16, var_29314_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2993_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2995_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2995_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2995_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2995_equation_0, values = (var_29536_cast_fp16, var_29321_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2995_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2997_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2997_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2997_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2997_equation_0, values = (var_29536_cast_fp16, var_29328_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2997_cast_fp16")]; tensor _SplitHeadsQ__mh_w_2999_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_2999_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_2999_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_2999_equation_0, values = (var_29536_cast_fp16, var_29335_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_2999_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3001_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3001_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3001_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3001_equation_0, values = (var_29540_cast_fp16, var_29342_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3001_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3003_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3003_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3003_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3003_equation_0, values = (var_29540_cast_fp16, var_29349_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3003_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3005_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3005_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3005_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3005_equation_0, values = (var_29540_cast_fp16, var_29356_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3005_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3007_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3007_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3007_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3007_equation_0, values = (var_29540_cast_fp16, var_29363_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3007_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3009_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3009_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3009_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3009_equation_0, values = (var_29544_cast_fp16, var_29370_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3009_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3011_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3011_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3011_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3011_equation_0, values = (var_29544_cast_fp16, var_29377_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3011_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3013_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3013_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3013_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3013_equation_0, values = (var_29544_cast_fp16, var_29384_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3013_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3015_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3015_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3015_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3015_equation_0, values = (var_29544_cast_fp16, var_29391_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3015_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3017_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3017_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3017_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3017_equation_0, values = (var_29548_cast_fp16, var_29398_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3017_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3019_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3019_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3019_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3019_equation_0, values = (var_29548_cast_fp16, var_29405_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3019_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3021_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3021_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3021_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3021_equation_0, values = (var_29548_cast_fp16, var_29412_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3021_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3023_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3023_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3023_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3023_equation_0, values = (var_29548_cast_fp16, var_29419_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3023_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3025_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3025_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3025_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3025_equation_0, values = (var_29552_cast_fp16, var_29426_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3025_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3027_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3027_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3027_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3027_equation_0, values = (var_29552_cast_fp16, var_29433_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3027_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3029_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3029_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3029_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3029_equation_0, values = (var_29552_cast_fp16, var_29440_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3029_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3031_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3031_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3031_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3031_equation_0, values = (var_29552_cast_fp16, var_29447_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3031_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3033_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3033_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3033_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3033_equation_0, values = (var_29556_cast_fp16, var_29454_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3033_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3035_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3035_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3035_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3035_equation_0, values = (var_29556_cast_fp16, var_29461_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3035_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3037_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3037_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3037_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3037_equation_0, values = (var_29556_cast_fp16, var_29468_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3037_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3039_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3039_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3039_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3039_equation_0, values = (var_29556_cast_fp16, var_29475_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3039_cast_fp16")]; tensor var_29797_to_fp16 = const()[name = tensor("op_29797_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2881_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2881_cast_fp16, y = var_29797_to_fp16)[name = tensor("aw_chunk_2881_cast_fp16")]; tensor var_29799_to_fp16 = const()[name = tensor("op_29799_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2883_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2883_cast_fp16, y = var_29799_to_fp16)[name = tensor("aw_chunk_2883_cast_fp16")]; tensor var_29801_to_fp16 = const()[name = tensor("op_29801_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2885_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2885_cast_fp16, y = var_29801_to_fp16)[name = tensor("aw_chunk_2885_cast_fp16")]; tensor var_29803_to_fp16 = const()[name = tensor("op_29803_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2887_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2887_cast_fp16, y = var_29803_to_fp16)[name = tensor("aw_chunk_2887_cast_fp16")]; tensor var_29805_to_fp16 = const()[name = tensor("op_29805_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2889_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2889_cast_fp16, y = var_29805_to_fp16)[name = tensor("aw_chunk_2889_cast_fp16")]; tensor var_29807_to_fp16 = const()[name = tensor("op_29807_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2891_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2891_cast_fp16, y = var_29807_to_fp16)[name = tensor("aw_chunk_2891_cast_fp16")]; tensor var_29809_to_fp16 = const()[name = tensor("op_29809_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2893_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2893_cast_fp16, y = var_29809_to_fp16)[name = tensor("aw_chunk_2893_cast_fp16")]; tensor var_29811_to_fp16 = const()[name = tensor("op_29811_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2895_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2895_cast_fp16, y = var_29811_to_fp16)[name = tensor("aw_chunk_2895_cast_fp16")]; tensor var_29813_to_fp16 = const()[name = tensor("op_29813_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2897_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2897_cast_fp16, y = var_29813_to_fp16)[name = tensor("aw_chunk_2897_cast_fp16")]; tensor var_29815_to_fp16 = const()[name = tensor("op_29815_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2899_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2899_cast_fp16, y = var_29815_to_fp16)[name = tensor("aw_chunk_2899_cast_fp16")]; tensor var_29817_to_fp16 = const()[name = tensor("op_29817_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2901_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2901_cast_fp16, y = var_29817_to_fp16)[name = tensor("aw_chunk_2901_cast_fp16")]; tensor var_29819_to_fp16 = const()[name = tensor("op_29819_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2903_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2903_cast_fp16, y = var_29819_to_fp16)[name = tensor("aw_chunk_2903_cast_fp16")]; tensor var_29821_to_fp16 = const()[name = tensor("op_29821_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2905_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2905_cast_fp16, y = var_29821_to_fp16)[name = tensor("aw_chunk_2905_cast_fp16")]; tensor var_29823_to_fp16 = const()[name = tensor("op_29823_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2907_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2907_cast_fp16, y = var_29823_to_fp16)[name = tensor("aw_chunk_2907_cast_fp16")]; tensor var_29825_to_fp16 = const()[name = tensor("op_29825_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2909_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2909_cast_fp16, y = var_29825_to_fp16)[name = tensor("aw_chunk_2909_cast_fp16")]; tensor var_29827_to_fp16 = const()[name = tensor("op_29827_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2911_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2911_cast_fp16, y = var_29827_to_fp16)[name = tensor("aw_chunk_2911_cast_fp16")]; tensor var_29829_to_fp16 = const()[name = tensor("op_29829_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2913_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2913_cast_fp16, y = var_29829_to_fp16)[name = tensor("aw_chunk_2913_cast_fp16")]; tensor var_29831_to_fp16 = const()[name = tensor("op_29831_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2915_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2915_cast_fp16, y = var_29831_to_fp16)[name = tensor("aw_chunk_2915_cast_fp16")]; tensor var_29833_to_fp16 = const()[name = tensor("op_29833_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2917_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2917_cast_fp16, y = var_29833_to_fp16)[name = tensor("aw_chunk_2917_cast_fp16")]; tensor var_29835_to_fp16 = const()[name = tensor("op_29835_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2919_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2919_cast_fp16, y = var_29835_to_fp16)[name = tensor("aw_chunk_2919_cast_fp16")]; tensor var_29837_to_fp16 = const()[name = tensor("op_29837_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2921_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2921_cast_fp16, y = var_29837_to_fp16)[name = tensor("aw_chunk_2921_cast_fp16")]; tensor var_29839_to_fp16 = const()[name = tensor("op_29839_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2923_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2923_cast_fp16, y = var_29839_to_fp16)[name = tensor("aw_chunk_2923_cast_fp16")]; tensor var_29841_to_fp16 = const()[name = tensor("op_29841_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2925_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2925_cast_fp16, y = var_29841_to_fp16)[name = tensor("aw_chunk_2925_cast_fp16")]; tensor var_29843_to_fp16 = const()[name = tensor("op_29843_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2927_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2927_cast_fp16, y = var_29843_to_fp16)[name = tensor("aw_chunk_2927_cast_fp16")]; tensor var_29845_to_fp16 = const()[name = tensor("op_29845_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2929_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2929_cast_fp16, y = var_29845_to_fp16)[name = tensor("aw_chunk_2929_cast_fp16")]; tensor var_29847_to_fp16 = const()[name = tensor("op_29847_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2931_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2931_cast_fp16, y = var_29847_to_fp16)[name = tensor("aw_chunk_2931_cast_fp16")]; tensor var_29849_to_fp16 = const()[name = tensor("op_29849_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2933_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2933_cast_fp16, y = var_29849_to_fp16)[name = tensor("aw_chunk_2933_cast_fp16")]; tensor var_29851_to_fp16 = const()[name = tensor("op_29851_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2935_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2935_cast_fp16, y = var_29851_to_fp16)[name = tensor("aw_chunk_2935_cast_fp16")]; tensor var_29853_to_fp16 = const()[name = tensor("op_29853_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2937_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2937_cast_fp16, y = var_29853_to_fp16)[name = tensor("aw_chunk_2937_cast_fp16")]; tensor var_29855_to_fp16 = const()[name = tensor("op_29855_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2939_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2939_cast_fp16, y = var_29855_to_fp16)[name = tensor("aw_chunk_2939_cast_fp16")]; tensor var_29857_to_fp16 = const()[name = tensor("op_29857_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2941_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2941_cast_fp16, y = var_29857_to_fp16)[name = tensor("aw_chunk_2941_cast_fp16")]; tensor var_29859_to_fp16 = const()[name = tensor("op_29859_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2943_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2943_cast_fp16, y = var_29859_to_fp16)[name = tensor("aw_chunk_2943_cast_fp16")]; tensor var_29861_to_fp16 = const()[name = tensor("op_29861_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2945_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2945_cast_fp16, y = var_29861_to_fp16)[name = tensor("aw_chunk_2945_cast_fp16")]; tensor var_29863_to_fp16 = const()[name = tensor("op_29863_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2947_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2947_cast_fp16, y = var_29863_to_fp16)[name = tensor("aw_chunk_2947_cast_fp16")]; tensor var_29865_to_fp16 = const()[name = tensor("op_29865_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2949_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2949_cast_fp16, y = var_29865_to_fp16)[name = tensor("aw_chunk_2949_cast_fp16")]; tensor var_29867_to_fp16 = const()[name = tensor("op_29867_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2951_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2951_cast_fp16, y = var_29867_to_fp16)[name = tensor("aw_chunk_2951_cast_fp16")]; tensor var_29869_to_fp16 = const()[name = tensor("op_29869_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2953_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2953_cast_fp16, y = var_29869_to_fp16)[name = tensor("aw_chunk_2953_cast_fp16")]; tensor var_29871_to_fp16 = const()[name = tensor("op_29871_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2955_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2955_cast_fp16, y = var_29871_to_fp16)[name = tensor("aw_chunk_2955_cast_fp16")]; tensor var_29873_to_fp16 = const()[name = tensor("op_29873_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2957_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2957_cast_fp16, y = var_29873_to_fp16)[name = tensor("aw_chunk_2957_cast_fp16")]; tensor var_29875_to_fp16 = const()[name = tensor("op_29875_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2959_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2959_cast_fp16, y = var_29875_to_fp16)[name = tensor("aw_chunk_2959_cast_fp16")]; tensor var_29877_to_fp16 = const()[name = tensor("op_29877_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2961_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2961_cast_fp16, y = var_29877_to_fp16)[name = tensor("aw_chunk_2961_cast_fp16")]; tensor var_29879_to_fp16 = const()[name = tensor("op_29879_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2963_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2963_cast_fp16, y = var_29879_to_fp16)[name = tensor("aw_chunk_2963_cast_fp16")]; tensor var_29881_to_fp16 = const()[name = tensor("op_29881_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2965_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2965_cast_fp16, y = var_29881_to_fp16)[name = tensor("aw_chunk_2965_cast_fp16")]; tensor var_29883_to_fp16 = const()[name = tensor("op_29883_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2967_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2967_cast_fp16, y = var_29883_to_fp16)[name = tensor("aw_chunk_2967_cast_fp16")]; tensor var_29885_to_fp16 = const()[name = tensor("op_29885_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2969_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2969_cast_fp16, y = var_29885_to_fp16)[name = tensor("aw_chunk_2969_cast_fp16")]; tensor var_29887_to_fp16 = const()[name = tensor("op_29887_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2971_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2971_cast_fp16, y = var_29887_to_fp16)[name = tensor("aw_chunk_2971_cast_fp16")]; tensor var_29889_to_fp16 = const()[name = tensor("op_29889_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2973_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2973_cast_fp16, y = var_29889_to_fp16)[name = tensor("aw_chunk_2973_cast_fp16")]; tensor var_29891_to_fp16 = const()[name = tensor("op_29891_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2975_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2975_cast_fp16, y = var_29891_to_fp16)[name = tensor("aw_chunk_2975_cast_fp16")]; tensor var_29893_to_fp16 = const()[name = tensor("op_29893_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2977_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2977_cast_fp16, y = var_29893_to_fp16)[name = tensor("aw_chunk_2977_cast_fp16")]; tensor var_29895_to_fp16 = const()[name = tensor("op_29895_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2979_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2979_cast_fp16, y = var_29895_to_fp16)[name = tensor("aw_chunk_2979_cast_fp16")]; tensor var_29897_to_fp16 = const()[name = tensor("op_29897_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2981_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2981_cast_fp16, y = var_29897_to_fp16)[name = tensor("aw_chunk_2981_cast_fp16")]; tensor var_29899_to_fp16 = const()[name = tensor("op_29899_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2983_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2983_cast_fp16, y = var_29899_to_fp16)[name = tensor("aw_chunk_2983_cast_fp16")]; tensor var_29901_to_fp16 = const()[name = tensor("op_29901_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2985_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2985_cast_fp16, y = var_29901_to_fp16)[name = tensor("aw_chunk_2985_cast_fp16")]; tensor var_29903_to_fp16 = const()[name = tensor("op_29903_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2987_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2987_cast_fp16, y = var_29903_to_fp16)[name = tensor("aw_chunk_2987_cast_fp16")]; tensor var_29905_to_fp16 = const()[name = tensor("op_29905_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2989_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2989_cast_fp16, y = var_29905_to_fp16)[name = tensor("aw_chunk_2989_cast_fp16")]; tensor var_29907_to_fp16 = const()[name = tensor("op_29907_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2991_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2991_cast_fp16, y = var_29907_to_fp16)[name = tensor("aw_chunk_2991_cast_fp16")]; tensor var_29909_to_fp16 = const()[name = tensor("op_29909_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2993_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2993_cast_fp16, y = var_29909_to_fp16)[name = tensor("aw_chunk_2993_cast_fp16")]; tensor var_29911_to_fp16 = const()[name = tensor("op_29911_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2995_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2995_cast_fp16, y = var_29911_to_fp16)[name = tensor("aw_chunk_2995_cast_fp16")]; tensor var_29913_to_fp16 = const()[name = tensor("op_29913_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2997_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2997_cast_fp16, y = var_29913_to_fp16)[name = tensor("aw_chunk_2997_cast_fp16")]; tensor var_29915_to_fp16 = const()[name = tensor("op_29915_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_2999_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_2999_cast_fp16, y = var_29915_to_fp16)[name = tensor("aw_chunk_2999_cast_fp16")]; tensor var_29917_to_fp16 = const()[name = tensor("op_29917_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3001_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3001_cast_fp16, y = var_29917_to_fp16)[name = tensor("aw_chunk_3001_cast_fp16")]; tensor var_29919_to_fp16 = const()[name = tensor("op_29919_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3003_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3003_cast_fp16, y = var_29919_to_fp16)[name = tensor("aw_chunk_3003_cast_fp16")]; tensor var_29921_to_fp16 = const()[name = tensor("op_29921_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3005_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3005_cast_fp16, y = var_29921_to_fp16)[name = tensor("aw_chunk_3005_cast_fp16")]; tensor var_29923_to_fp16 = const()[name = tensor("op_29923_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3007_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3007_cast_fp16, y = var_29923_to_fp16)[name = tensor("aw_chunk_3007_cast_fp16")]; tensor var_29925_to_fp16 = const()[name = tensor("op_29925_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3009_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3009_cast_fp16, y = var_29925_to_fp16)[name = tensor("aw_chunk_3009_cast_fp16")]; tensor var_29927_to_fp16 = const()[name = tensor("op_29927_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3011_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3011_cast_fp16, y = var_29927_to_fp16)[name = tensor("aw_chunk_3011_cast_fp16")]; tensor var_29929_to_fp16 = const()[name = tensor("op_29929_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3013_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3013_cast_fp16, y = var_29929_to_fp16)[name = tensor("aw_chunk_3013_cast_fp16")]; tensor var_29931_to_fp16 = const()[name = tensor("op_29931_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3015_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3015_cast_fp16, y = var_29931_to_fp16)[name = tensor("aw_chunk_3015_cast_fp16")]; tensor var_29933_to_fp16 = const()[name = tensor("op_29933_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3017_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3017_cast_fp16, y = var_29933_to_fp16)[name = tensor("aw_chunk_3017_cast_fp16")]; tensor var_29935_to_fp16 = const()[name = tensor("op_29935_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3019_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3019_cast_fp16, y = var_29935_to_fp16)[name = tensor("aw_chunk_3019_cast_fp16")]; tensor var_29937_to_fp16 = const()[name = tensor("op_29937_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3021_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3021_cast_fp16, y = var_29937_to_fp16)[name = tensor("aw_chunk_3021_cast_fp16")]; tensor var_29939_to_fp16 = const()[name = tensor("op_29939_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3023_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3023_cast_fp16, y = var_29939_to_fp16)[name = tensor("aw_chunk_3023_cast_fp16")]; tensor var_29941_to_fp16 = const()[name = tensor("op_29941_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3025_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3025_cast_fp16, y = var_29941_to_fp16)[name = tensor("aw_chunk_3025_cast_fp16")]; tensor var_29943_to_fp16 = const()[name = tensor("op_29943_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3027_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3027_cast_fp16, y = var_29943_to_fp16)[name = tensor("aw_chunk_3027_cast_fp16")]; tensor var_29945_to_fp16 = const()[name = tensor("op_29945_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3029_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3029_cast_fp16, y = var_29945_to_fp16)[name = tensor("aw_chunk_3029_cast_fp16")]; tensor var_29947_to_fp16 = const()[name = tensor("op_29947_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3031_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3031_cast_fp16, y = var_29947_to_fp16)[name = tensor("aw_chunk_3031_cast_fp16")]; tensor var_29949_to_fp16 = const()[name = tensor("op_29949_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3033_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3033_cast_fp16, y = var_29949_to_fp16)[name = tensor("aw_chunk_3033_cast_fp16")]; tensor var_29951_to_fp16 = const()[name = tensor("op_29951_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3035_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3035_cast_fp16, y = var_29951_to_fp16)[name = tensor("aw_chunk_3035_cast_fp16")]; tensor var_29953_to_fp16 = const()[name = tensor("op_29953_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3037_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3037_cast_fp16, y = var_29953_to_fp16)[name = tensor("aw_chunk_3037_cast_fp16")]; tensor var_29955_to_fp16 = const()[name = tensor("op_29955_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3039_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3039_cast_fp16, y = var_29955_to_fp16)[name = tensor("aw_chunk_3039_cast_fp16")]; tensor var_29957_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2881_cast_fp16)[name = tensor("op_29957_cast_fp16")]; tensor var_29958_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2883_cast_fp16)[name = tensor("op_29958_cast_fp16")]; tensor var_29959_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2885_cast_fp16)[name = tensor("op_29959_cast_fp16")]; tensor var_29960_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2887_cast_fp16)[name = tensor("op_29960_cast_fp16")]; tensor var_29961_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2889_cast_fp16)[name = tensor("op_29961_cast_fp16")]; tensor var_29962_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2891_cast_fp16)[name = tensor("op_29962_cast_fp16")]; tensor var_29963_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2893_cast_fp16)[name = tensor("op_29963_cast_fp16")]; tensor var_29964_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2895_cast_fp16)[name = tensor("op_29964_cast_fp16")]; tensor var_29965_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2897_cast_fp16)[name = tensor("op_29965_cast_fp16")]; tensor var_29966_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2899_cast_fp16)[name = tensor("op_29966_cast_fp16")]; tensor var_29967_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2901_cast_fp16)[name = tensor("op_29967_cast_fp16")]; tensor var_29968_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2903_cast_fp16)[name = tensor("op_29968_cast_fp16")]; tensor var_29969_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2905_cast_fp16)[name = tensor("op_29969_cast_fp16")]; tensor var_29970_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2907_cast_fp16)[name = tensor("op_29970_cast_fp16")]; tensor var_29971_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2909_cast_fp16)[name = tensor("op_29971_cast_fp16")]; tensor var_29972_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2911_cast_fp16)[name = tensor("op_29972_cast_fp16")]; tensor var_29973_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2913_cast_fp16)[name = tensor("op_29973_cast_fp16")]; tensor var_29974_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2915_cast_fp16)[name = tensor("op_29974_cast_fp16")]; tensor var_29975_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2917_cast_fp16)[name = tensor("op_29975_cast_fp16")]; tensor var_29976_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2919_cast_fp16)[name = tensor("op_29976_cast_fp16")]; tensor var_29977_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2921_cast_fp16)[name = tensor("op_29977_cast_fp16")]; tensor var_29978_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2923_cast_fp16)[name = tensor("op_29978_cast_fp16")]; tensor var_29979_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2925_cast_fp16)[name = tensor("op_29979_cast_fp16")]; tensor var_29980_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2927_cast_fp16)[name = tensor("op_29980_cast_fp16")]; tensor var_29981_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2929_cast_fp16)[name = tensor("op_29981_cast_fp16")]; tensor var_29982_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2931_cast_fp16)[name = tensor("op_29982_cast_fp16")]; tensor var_29983_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2933_cast_fp16)[name = tensor("op_29983_cast_fp16")]; tensor var_29984_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2935_cast_fp16)[name = tensor("op_29984_cast_fp16")]; tensor var_29985_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2937_cast_fp16)[name = tensor("op_29985_cast_fp16")]; tensor var_29986_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2939_cast_fp16)[name = tensor("op_29986_cast_fp16")]; tensor var_29987_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2941_cast_fp16)[name = tensor("op_29987_cast_fp16")]; tensor var_29988_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2943_cast_fp16)[name = tensor("op_29988_cast_fp16")]; tensor var_29989_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2945_cast_fp16)[name = tensor("op_29989_cast_fp16")]; tensor var_29990_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2947_cast_fp16)[name = tensor("op_29990_cast_fp16")]; tensor var_29991_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2949_cast_fp16)[name = tensor("op_29991_cast_fp16")]; tensor var_29992_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2951_cast_fp16)[name = tensor("op_29992_cast_fp16")]; tensor var_29993_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2953_cast_fp16)[name = tensor("op_29993_cast_fp16")]; tensor var_29994_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2955_cast_fp16)[name = tensor("op_29994_cast_fp16")]; tensor var_29995_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2957_cast_fp16)[name = tensor("op_29995_cast_fp16")]; tensor var_29996_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2959_cast_fp16)[name = tensor("op_29996_cast_fp16")]; tensor var_29997_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2961_cast_fp16)[name = tensor("op_29997_cast_fp16")]; tensor var_29998_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2963_cast_fp16)[name = tensor("op_29998_cast_fp16")]; tensor var_29999_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2965_cast_fp16)[name = tensor("op_29999_cast_fp16")]; tensor var_30000_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2967_cast_fp16)[name = tensor("op_30000_cast_fp16")]; tensor var_30001_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2969_cast_fp16)[name = tensor("op_30001_cast_fp16")]; tensor var_30002_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2971_cast_fp16)[name = tensor("op_30002_cast_fp16")]; tensor var_30003_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2973_cast_fp16)[name = tensor("op_30003_cast_fp16")]; tensor var_30004_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2975_cast_fp16)[name = tensor("op_30004_cast_fp16")]; tensor var_30005_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2977_cast_fp16)[name = tensor("op_30005_cast_fp16")]; tensor var_30006_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2979_cast_fp16)[name = tensor("op_30006_cast_fp16")]; tensor var_30007_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2981_cast_fp16)[name = tensor("op_30007_cast_fp16")]; tensor var_30008_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2983_cast_fp16)[name = tensor("op_30008_cast_fp16")]; tensor var_30009_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2985_cast_fp16)[name = tensor("op_30009_cast_fp16")]; tensor var_30010_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2987_cast_fp16)[name = tensor("op_30010_cast_fp16")]; tensor var_30011_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2989_cast_fp16)[name = tensor("op_30011_cast_fp16")]; tensor var_30012_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2991_cast_fp16)[name = tensor("op_30012_cast_fp16")]; tensor var_30013_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2993_cast_fp16)[name = tensor("op_30013_cast_fp16")]; tensor var_30014_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2995_cast_fp16)[name = tensor("op_30014_cast_fp16")]; tensor var_30015_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2997_cast_fp16)[name = tensor("op_30015_cast_fp16")]; tensor var_30016_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_2999_cast_fp16)[name = tensor("op_30016_cast_fp16")]; tensor var_30017_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3001_cast_fp16)[name = tensor("op_30017_cast_fp16")]; tensor var_30018_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3003_cast_fp16)[name = tensor("op_30018_cast_fp16")]; tensor var_30019_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3005_cast_fp16)[name = tensor("op_30019_cast_fp16")]; tensor var_30020_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3007_cast_fp16)[name = tensor("op_30020_cast_fp16")]; tensor var_30021_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3009_cast_fp16)[name = tensor("op_30021_cast_fp16")]; tensor var_30022_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3011_cast_fp16)[name = tensor("op_30022_cast_fp16")]; tensor var_30023_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3013_cast_fp16)[name = tensor("op_30023_cast_fp16")]; tensor var_30024_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3015_cast_fp16)[name = tensor("op_30024_cast_fp16")]; tensor var_30025_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3017_cast_fp16)[name = tensor("op_30025_cast_fp16")]; tensor var_30026_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3019_cast_fp16)[name = tensor("op_30026_cast_fp16")]; tensor var_30027_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3021_cast_fp16)[name = tensor("op_30027_cast_fp16")]; tensor var_30028_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3023_cast_fp16)[name = tensor("op_30028_cast_fp16")]; tensor var_30029_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3025_cast_fp16)[name = tensor("op_30029_cast_fp16")]; tensor var_30030_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3027_cast_fp16)[name = tensor("op_30030_cast_fp16")]; tensor var_30031_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3029_cast_fp16)[name = tensor("op_30031_cast_fp16")]; tensor var_30032_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3031_cast_fp16)[name = tensor("op_30032_cast_fp16")]; tensor var_30033_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3033_cast_fp16)[name = tensor("op_30033_cast_fp16")]; tensor var_30034_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3035_cast_fp16)[name = tensor("op_30034_cast_fp16")]; tensor var_30035_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3037_cast_fp16)[name = tensor("op_30035_cast_fp16")]; tensor var_30036_cast_fp16 = softmax(axis = var_28755, x = aw_chunk_3039_cast_fp16)[name = tensor("op_30036_cast_fp16")]; tensor var_30038_equation_0 = const()[name = tensor("op_30038_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30038_cast_fp16 = einsum(equation = var_30038_equation_0, values = (var_29558_cast_fp16, var_29957_cast_fp16))[name = tensor("op_30038_cast_fp16")]; tensor var_30040_equation_0 = const()[name = tensor("op_30040_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30040_cast_fp16 = einsum(equation = var_30040_equation_0, values = (var_29558_cast_fp16, var_29958_cast_fp16))[name = tensor("op_30040_cast_fp16")]; tensor var_30042_equation_0 = const()[name = tensor("op_30042_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30042_cast_fp16 = einsum(equation = var_30042_equation_0, values = (var_29558_cast_fp16, var_29959_cast_fp16))[name = tensor("op_30042_cast_fp16")]; tensor var_30044_equation_0 = const()[name = tensor("op_30044_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30044_cast_fp16 = einsum(equation = var_30044_equation_0, values = (var_29558_cast_fp16, var_29960_cast_fp16))[name = tensor("op_30044_cast_fp16")]; tensor var_30046_equation_0 = const()[name = tensor("op_30046_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30046_cast_fp16 = einsum(equation = var_30046_equation_0, values = (var_29562_cast_fp16, var_29961_cast_fp16))[name = tensor("op_30046_cast_fp16")]; tensor var_30048_equation_0 = const()[name = tensor("op_30048_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30048_cast_fp16 = einsum(equation = var_30048_equation_0, values = (var_29562_cast_fp16, var_29962_cast_fp16))[name = tensor("op_30048_cast_fp16")]; tensor var_30050_equation_0 = const()[name = tensor("op_30050_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30050_cast_fp16 = einsum(equation = var_30050_equation_0, values = (var_29562_cast_fp16, var_29963_cast_fp16))[name = tensor("op_30050_cast_fp16")]; tensor var_30052_equation_0 = const()[name = tensor("op_30052_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30052_cast_fp16 = einsum(equation = var_30052_equation_0, values = (var_29562_cast_fp16, var_29964_cast_fp16))[name = tensor("op_30052_cast_fp16")]; tensor var_30054_equation_0 = const()[name = tensor("op_30054_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30054_cast_fp16 = einsum(equation = var_30054_equation_0, values = (var_29566_cast_fp16, var_29965_cast_fp16))[name = tensor("op_30054_cast_fp16")]; tensor var_30056_equation_0 = const()[name = tensor("op_30056_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30056_cast_fp16 = einsum(equation = var_30056_equation_0, values = (var_29566_cast_fp16, var_29966_cast_fp16))[name = tensor("op_30056_cast_fp16")]; tensor var_30058_equation_0 = const()[name = tensor("op_30058_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30058_cast_fp16 = einsum(equation = var_30058_equation_0, values = (var_29566_cast_fp16, var_29967_cast_fp16))[name = tensor("op_30058_cast_fp16")]; tensor var_30060_equation_0 = const()[name = tensor("op_30060_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30060_cast_fp16 = einsum(equation = var_30060_equation_0, values = (var_29566_cast_fp16, var_29968_cast_fp16))[name = tensor("op_30060_cast_fp16")]; tensor var_30062_equation_0 = const()[name = tensor("op_30062_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30062_cast_fp16 = einsum(equation = var_30062_equation_0, values = (var_29570_cast_fp16, var_29969_cast_fp16))[name = tensor("op_30062_cast_fp16")]; tensor var_30064_equation_0 = const()[name = tensor("op_30064_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30064_cast_fp16 = einsum(equation = var_30064_equation_0, values = (var_29570_cast_fp16, var_29970_cast_fp16))[name = tensor("op_30064_cast_fp16")]; tensor var_30066_equation_0 = const()[name = tensor("op_30066_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30066_cast_fp16 = einsum(equation = var_30066_equation_0, values = (var_29570_cast_fp16, var_29971_cast_fp16))[name = tensor("op_30066_cast_fp16")]; tensor var_30068_equation_0 = const()[name = tensor("op_30068_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30068_cast_fp16 = einsum(equation = var_30068_equation_0, values = (var_29570_cast_fp16, var_29972_cast_fp16))[name = tensor("op_30068_cast_fp16")]; tensor var_30070_equation_0 = const()[name = tensor("op_30070_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30070_cast_fp16 = einsum(equation = var_30070_equation_0, values = (var_29574_cast_fp16, var_29973_cast_fp16))[name = tensor("op_30070_cast_fp16")]; tensor var_30072_equation_0 = const()[name = tensor("op_30072_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30072_cast_fp16 = einsum(equation = var_30072_equation_0, values = (var_29574_cast_fp16, var_29974_cast_fp16))[name = tensor("op_30072_cast_fp16")]; tensor var_30074_equation_0 = const()[name = tensor("op_30074_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30074_cast_fp16 = einsum(equation = var_30074_equation_0, values = (var_29574_cast_fp16, var_29975_cast_fp16))[name = tensor("op_30074_cast_fp16")]; tensor var_30076_equation_0 = const()[name = tensor("op_30076_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30076_cast_fp16 = einsum(equation = var_30076_equation_0, values = (var_29574_cast_fp16, var_29976_cast_fp16))[name = tensor("op_30076_cast_fp16")]; tensor var_30078_equation_0 = const()[name = tensor("op_30078_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30078_cast_fp16 = einsum(equation = var_30078_equation_0, values = (var_29578_cast_fp16, var_29977_cast_fp16))[name = tensor("op_30078_cast_fp16")]; tensor var_30080_equation_0 = const()[name = tensor("op_30080_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30080_cast_fp16 = einsum(equation = var_30080_equation_0, values = (var_29578_cast_fp16, var_29978_cast_fp16))[name = tensor("op_30080_cast_fp16")]; tensor var_30082_equation_0 = const()[name = tensor("op_30082_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30082_cast_fp16 = einsum(equation = var_30082_equation_0, values = (var_29578_cast_fp16, var_29979_cast_fp16))[name = tensor("op_30082_cast_fp16")]; tensor var_30084_equation_0 = const()[name = tensor("op_30084_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30084_cast_fp16 = einsum(equation = var_30084_equation_0, values = (var_29578_cast_fp16, var_29980_cast_fp16))[name = tensor("op_30084_cast_fp16")]; tensor var_30086_equation_0 = const()[name = tensor("op_30086_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30086_cast_fp16 = einsum(equation = var_30086_equation_0, values = (var_29582_cast_fp16, var_29981_cast_fp16))[name = tensor("op_30086_cast_fp16")]; tensor var_30088_equation_0 = const()[name = tensor("op_30088_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30088_cast_fp16 = einsum(equation = var_30088_equation_0, values = (var_29582_cast_fp16, var_29982_cast_fp16))[name = tensor("op_30088_cast_fp16")]; tensor var_30090_equation_0 = const()[name = tensor("op_30090_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30090_cast_fp16 = einsum(equation = var_30090_equation_0, values = (var_29582_cast_fp16, var_29983_cast_fp16))[name = tensor("op_30090_cast_fp16")]; tensor var_30092_equation_0 = const()[name = tensor("op_30092_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30092_cast_fp16 = einsum(equation = var_30092_equation_0, values = (var_29582_cast_fp16, var_29984_cast_fp16))[name = tensor("op_30092_cast_fp16")]; tensor var_30094_equation_0 = const()[name = tensor("op_30094_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30094_cast_fp16 = einsum(equation = var_30094_equation_0, values = (var_29586_cast_fp16, var_29985_cast_fp16))[name = tensor("op_30094_cast_fp16")]; tensor var_30096_equation_0 = const()[name = tensor("op_30096_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30096_cast_fp16 = einsum(equation = var_30096_equation_0, values = (var_29586_cast_fp16, var_29986_cast_fp16))[name = tensor("op_30096_cast_fp16")]; tensor var_30098_equation_0 = const()[name = tensor("op_30098_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30098_cast_fp16 = einsum(equation = var_30098_equation_0, values = (var_29586_cast_fp16, var_29987_cast_fp16))[name = tensor("op_30098_cast_fp16")]; tensor var_30100_equation_0 = const()[name = tensor("op_30100_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30100_cast_fp16 = einsum(equation = var_30100_equation_0, values = (var_29586_cast_fp16, var_29988_cast_fp16))[name = tensor("op_30100_cast_fp16")]; tensor var_30102_equation_0 = const()[name = tensor("op_30102_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30102_cast_fp16 = einsum(equation = var_30102_equation_0, values = (var_29590_cast_fp16, var_29989_cast_fp16))[name = tensor("op_30102_cast_fp16")]; tensor var_30104_equation_0 = const()[name = tensor("op_30104_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30104_cast_fp16 = einsum(equation = var_30104_equation_0, values = (var_29590_cast_fp16, var_29990_cast_fp16))[name = tensor("op_30104_cast_fp16")]; tensor var_30106_equation_0 = const()[name = tensor("op_30106_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30106_cast_fp16 = einsum(equation = var_30106_equation_0, values = (var_29590_cast_fp16, var_29991_cast_fp16))[name = tensor("op_30106_cast_fp16")]; tensor var_30108_equation_0 = const()[name = tensor("op_30108_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30108_cast_fp16 = einsum(equation = var_30108_equation_0, values = (var_29590_cast_fp16, var_29992_cast_fp16))[name = tensor("op_30108_cast_fp16")]; tensor var_30110_equation_0 = const()[name = tensor("op_30110_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30110_cast_fp16 = einsum(equation = var_30110_equation_0, values = (var_29594_cast_fp16, var_29993_cast_fp16))[name = tensor("op_30110_cast_fp16")]; tensor var_30112_equation_0 = const()[name = tensor("op_30112_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30112_cast_fp16 = einsum(equation = var_30112_equation_0, values = (var_29594_cast_fp16, var_29994_cast_fp16))[name = tensor("op_30112_cast_fp16")]; tensor var_30114_equation_0 = const()[name = tensor("op_30114_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30114_cast_fp16 = einsum(equation = var_30114_equation_0, values = (var_29594_cast_fp16, var_29995_cast_fp16))[name = tensor("op_30114_cast_fp16")]; tensor var_30116_equation_0 = const()[name = tensor("op_30116_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30116_cast_fp16 = einsum(equation = var_30116_equation_0, values = (var_29594_cast_fp16, var_29996_cast_fp16))[name = tensor("op_30116_cast_fp16")]; tensor var_30118_equation_0 = const()[name = tensor("op_30118_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30118_cast_fp16 = einsum(equation = var_30118_equation_0, values = (var_29598_cast_fp16, var_29997_cast_fp16))[name = tensor("op_30118_cast_fp16")]; tensor var_30120_equation_0 = const()[name = tensor("op_30120_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30120_cast_fp16 = einsum(equation = var_30120_equation_0, values = (var_29598_cast_fp16, var_29998_cast_fp16))[name = tensor("op_30120_cast_fp16")]; tensor var_30122_equation_0 = const()[name = tensor("op_30122_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30122_cast_fp16 = einsum(equation = var_30122_equation_0, values = (var_29598_cast_fp16, var_29999_cast_fp16))[name = tensor("op_30122_cast_fp16")]; tensor var_30124_equation_0 = const()[name = tensor("op_30124_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30124_cast_fp16 = einsum(equation = var_30124_equation_0, values = (var_29598_cast_fp16, var_30000_cast_fp16))[name = tensor("op_30124_cast_fp16")]; tensor var_30126_equation_0 = const()[name = tensor("op_30126_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30126_cast_fp16 = einsum(equation = var_30126_equation_0, values = (var_29602_cast_fp16, var_30001_cast_fp16))[name = tensor("op_30126_cast_fp16")]; tensor var_30128_equation_0 = const()[name = tensor("op_30128_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30128_cast_fp16 = einsum(equation = var_30128_equation_0, values = (var_29602_cast_fp16, var_30002_cast_fp16))[name = tensor("op_30128_cast_fp16")]; tensor var_30130_equation_0 = const()[name = tensor("op_30130_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30130_cast_fp16 = einsum(equation = var_30130_equation_0, values = (var_29602_cast_fp16, var_30003_cast_fp16))[name = tensor("op_30130_cast_fp16")]; tensor var_30132_equation_0 = const()[name = tensor("op_30132_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30132_cast_fp16 = einsum(equation = var_30132_equation_0, values = (var_29602_cast_fp16, var_30004_cast_fp16))[name = tensor("op_30132_cast_fp16")]; tensor var_30134_equation_0 = const()[name = tensor("op_30134_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30134_cast_fp16 = einsum(equation = var_30134_equation_0, values = (var_29606_cast_fp16, var_30005_cast_fp16))[name = tensor("op_30134_cast_fp16")]; tensor var_30136_equation_0 = const()[name = tensor("op_30136_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30136_cast_fp16 = einsum(equation = var_30136_equation_0, values = (var_29606_cast_fp16, var_30006_cast_fp16))[name = tensor("op_30136_cast_fp16")]; tensor var_30138_equation_0 = const()[name = tensor("op_30138_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30138_cast_fp16 = einsum(equation = var_30138_equation_0, values = (var_29606_cast_fp16, var_30007_cast_fp16))[name = tensor("op_30138_cast_fp16")]; tensor var_30140_equation_0 = const()[name = tensor("op_30140_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30140_cast_fp16 = einsum(equation = var_30140_equation_0, values = (var_29606_cast_fp16, var_30008_cast_fp16))[name = tensor("op_30140_cast_fp16")]; tensor var_30142_equation_0 = const()[name = tensor("op_30142_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30142_cast_fp16 = einsum(equation = var_30142_equation_0, values = (var_29610_cast_fp16, var_30009_cast_fp16))[name = tensor("op_30142_cast_fp16")]; tensor var_30144_equation_0 = const()[name = tensor("op_30144_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30144_cast_fp16 = einsum(equation = var_30144_equation_0, values = (var_29610_cast_fp16, var_30010_cast_fp16))[name = tensor("op_30144_cast_fp16")]; tensor var_30146_equation_0 = const()[name = tensor("op_30146_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30146_cast_fp16 = einsum(equation = var_30146_equation_0, values = (var_29610_cast_fp16, var_30011_cast_fp16))[name = tensor("op_30146_cast_fp16")]; tensor var_30148_equation_0 = const()[name = tensor("op_30148_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30148_cast_fp16 = einsum(equation = var_30148_equation_0, values = (var_29610_cast_fp16, var_30012_cast_fp16))[name = tensor("op_30148_cast_fp16")]; tensor var_30150_equation_0 = const()[name = tensor("op_30150_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30150_cast_fp16 = einsum(equation = var_30150_equation_0, values = (var_29614_cast_fp16, var_30013_cast_fp16))[name = tensor("op_30150_cast_fp16")]; tensor var_30152_equation_0 = const()[name = tensor("op_30152_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30152_cast_fp16 = einsum(equation = var_30152_equation_0, values = (var_29614_cast_fp16, var_30014_cast_fp16))[name = tensor("op_30152_cast_fp16")]; tensor var_30154_equation_0 = const()[name = tensor("op_30154_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30154_cast_fp16 = einsum(equation = var_30154_equation_0, values = (var_29614_cast_fp16, var_30015_cast_fp16))[name = tensor("op_30154_cast_fp16")]; tensor var_30156_equation_0 = const()[name = tensor("op_30156_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30156_cast_fp16 = einsum(equation = var_30156_equation_0, values = (var_29614_cast_fp16, var_30016_cast_fp16))[name = tensor("op_30156_cast_fp16")]; tensor var_30158_equation_0 = const()[name = tensor("op_30158_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30158_cast_fp16 = einsum(equation = var_30158_equation_0, values = (var_29618_cast_fp16, var_30017_cast_fp16))[name = tensor("op_30158_cast_fp16")]; tensor var_30160_equation_0 = const()[name = tensor("op_30160_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30160_cast_fp16 = einsum(equation = var_30160_equation_0, values = (var_29618_cast_fp16, var_30018_cast_fp16))[name = tensor("op_30160_cast_fp16")]; tensor var_30162_equation_0 = const()[name = tensor("op_30162_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30162_cast_fp16 = einsum(equation = var_30162_equation_0, values = (var_29618_cast_fp16, var_30019_cast_fp16))[name = tensor("op_30162_cast_fp16")]; tensor var_30164_equation_0 = const()[name = tensor("op_30164_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30164_cast_fp16 = einsum(equation = var_30164_equation_0, values = (var_29618_cast_fp16, var_30020_cast_fp16))[name = tensor("op_30164_cast_fp16")]; tensor var_30166_equation_0 = const()[name = tensor("op_30166_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30166_cast_fp16 = einsum(equation = var_30166_equation_0, values = (var_29622_cast_fp16, var_30021_cast_fp16))[name = tensor("op_30166_cast_fp16")]; tensor var_30168_equation_0 = const()[name = tensor("op_30168_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30168_cast_fp16 = einsum(equation = var_30168_equation_0, values = (var_29622_cast_fp16, var_30022_cast_fp16))[name = tensor("op_30168_cast_fp16")]; tensor var_30170_equation_0 = const()[name = tensor("op_30170_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30170_cast_fp16 = einsum(equation = var_30170_equation_0, values = (var_29622_cast_fp16, var_30023_cast_fp16))[name = tensor("op_30170_cast_fp16")]; tensor var_30172_equation_0 = const()[name = tensor("op_30172_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30172_cast_fp16 = einsum(equation = var_30172_equation_0, values = (var_29622_cast_fp16, var_30024_cast_fp16))[name = tensor("op_30172_cast_fp16")]; tensor var_30174_equation_0 = const()[name = tensor("op_30174_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30174_cast_fp16 = einsum(equation = var_30174_equation_0, values = (var_29626_cast_fp16, var_30025_cast_fp16))[name = tensor("op_30174_cast_fp16")]; tensor var_30176_equation_0 = const()[name = tensor("op_30176_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30176_cast_fp16 = einsum(equation = var_30176_equation_0, values = (var_29626_cast_fp16, var_30026_cast_fp16))[name = tensor("op_30176_cast_fp16")]; tensor var_30178_equation_0 = const()[name = tensor("op_30178_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30178_cast_fp16 = einsum(equation = var_30178_equation_0, values = (var_29626_cast_fp16, var_30027_cast_fp16))[name = tensor("op_30178_cast_fp16")]; tensor var_30180_equation_0 = const()[name = tensor("op_30180_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30180_cast_fp16 = einsum(equation = var_30180_equation_0, values = (var_29626_cast_fp16, var_30028_cast_fp16))[name = tensor("op_30180_cast_fp16")]; tensor var_30182_equation_0 = const()[name = tensor("op_30182_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30182_cast_fp16 = einsum(equation = var_30182_equation_0, values = (var_29630_cast_fp16, var_30029_cast_fp16))[name = tensor("op_30182_cast_fp16")]; tensor var_30184_equation_0 = const()[name = tensor("op_30184_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30184_cast_fp16 = einsum(equation = var_30184_equation_0, values = (var_29630_cast_fp16, var_30030_cast_fp16))[name = tensor("op_30184_cast_fp16")]; tensor var_30186_equation_0 = const()[name = tensor("op_30186_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30186_cast_fp16 = einsum(equation = var_30186_equation_0, values = (var_29630_cast_fp16, var_30031_cast_fp16))[name = tensor("op_30186_cast_fp16")]; tensor var_30188_equation_0 = const()[name = tensor("op_30188_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30188_cast_fp16 = einsum(equation = var_30188_equation_0, values = (var_29630_cast_fp16, var_30032_cast_fp16))[name = tensor("op_30188_cast_fp16")]; tensor var_30190_equation_0 = const()[name = tensor("op_30190_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30190_cast_fp16 = einsum(equation = var_30190_equation_0, values = (var_29634_cast_fp16, var_30033_cast_fp16))[name = tensor("op_30190_cast_fp16")]; tensor var_30192_equation_0 = const()[name = tensor("op_30192_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30192_cast_fp16 = einsum(equation = var_30192_equation_0, values = (var_29634_cast_fp16, var_30034_cast_fp16))[name = tensor("op_30192_cast_fp16")]; tensor var_30194_equation_0 = const()[name = tensor("op_30194_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30194_cast_fp16 = einsum(equation = var_30194_equation_0, values = (var_29634_cast_fp16, var_30035_cast_fp16))[name = tensor("op_30194_cast_fp16")]; tensor var_30196_equation_0 = const()[name = tensor("op_30196_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_30196_cast_fp16 = einsum(equation = var_30196_equation_0, values = (var_29634_cast_fp16, var_30036_cast_fp16))[name = tensor("op_30196_cast_fp16")]; tensor var_30198_interleave_0 = const()[name = tensor("op_30198_interleave_0"), val = tensor(false)]; tensor var_30198_cast_fp16 = concat(axis = var_28730, interleave = var_30198_interleave_0, values = (var_30038_cast_fp16, var_30040_cast_fp16, var_30042_cast_fp16, var_30044_cast_fp16))[name = tensor("op_30198_cast_fp16")]; tensor var_30200_interleave_0 = const()[name = tensor("op_30200_interleave_0"), val = tensor(false)]; tensor var_30200_cast_fp16 = concat(axis = var_28730, interleave = var_30200_interleave_0, values = (var_30046_cast_fp16, var_30048_cast_fp16, var_30050_cast_fp16, var_30052_cast_fp16))[name = tensor("op_30200_cast_fp16")]; tensor var_30202_interleave_0 = const()[name = tensor("op_30202_interleave_0"), val = tensor(false)]; tensor var_30202_cast_fp16 = concat(axis = var_28730, interleave = var_30202_interleave_0, values = (var_30054_cast_fp16, var_30056_cast_fp16, var_30058_cast_fp16, var_30060_cast_fp16))[name = tensor("op_30202_cast_fp16")]; tensor var_30204_interleave_0 = const()[name = tensor("op_30204_interleave_0"), val = tensor(false)]; tensor var_30204_cast_fp16 = concat(axis = var_28730, interleave = var_30204_interleave_0, values = (var_30062_cast_fp16, var_30064_cast_fp16, var_30066_cast_fp16, var_30068_cast_fp16))[name = tensor("op_30204_cast_fp16")]; tensor var_30206_interleave_0 = const()[name = tensor("op_30206_interleave_0"), val = tensor(false)]; tensor var_30206_cast_fp16 = concat(axis = var_28730, interleave = var_30206_interleave_0, values = (var_30070_cast_fp16, var_30072_cast_fp16, var_30074_cast_fp16, var_30076_cast_fp16))[name = tensor("op_30206_cast_fp16")]; tensor var_30208_interleave_0 = const()[name = tensor("op_30208_interleave_0"), val = tensor(false)]; tensor var_30208_cast_fp16 = concat(axis = var_28730, interleave = var_30208_interleave_0, values = (var_30078_cast_fp16, var_30080_cast_fp16, var_30082_cast_fp16, var_30084_cast_fp16))[name = tensor("op_30208_cast_fp16")]; tensor var_30210_interleave_0 = const()[name = tensor("op_30210_interleave_0"), val = tensor(false)]; tensor var_30210_cast_fp16 = concat(axis = var_28730, interleave = var_30210_interleave_0, values = (var_30086_cast_fp16, var_30088_cast_fp16, var_30090_cast_fp16, var_30092_cast_fp16))[name = tensor("op_30210_cast_fp16")]; tensor var_30212_interleave_0 = const()[name = tensor("op_30212_interleave_0"), val = tensor(false)]; tensor var_30212_cast_fp16 = concat(axis = var_28730, interleave = var_30212_interleave_0, values = (var_30094_cast_fp16, var_30096_cast_fp16, var_30098_cast_fp16, var_30100_cast_fp16))[name = tensor("op_30212_cast_fp16")]; tensor var_30214_interleave_0 = const()[name = tensor("op_30214_interleave_0"), val = tensor(false)]; tensor var_30214_cast_fp16 = concat(axis = var_28730, interleave = var_30214_interleave_0, values = (var_30102_cast_fp16, var_30104_cast_fp16, var_30106_cast_fp16, var_30108_cast_fp16))[name = tensor("op_30214_cast_fp16")]; tensor var_30216_interleave_0 = const()[name = tensor("op_30216_interleave_0"), val = tensor(false)]; tensor var_30216_cast_fp16 = concat(axis = var_28730, interleave = var_30216_interleave_0, values = (var_30110_cast_fp16, var_30112_cast_fp16, var_30114_cast_fp16, var_30116_cast_fp16))[name = tensor("op_30216_cast_fp16")]; tensor var_30218_interleave_0 = const()[name = tensor("op_30218_interleave_0"), val = tensor(false)]; tensor var_30218_cast_fp16 = concat(axis = var_28730, interleave = var_30218_interleave_0, values = (var_30118_cast_fp16, var_30120_cast_fp16, var_30122_cast_fp16, var_30124_cast_fp16))[name = tensor("op_30218_cast_fp16")]; tensor var_30220_interleave_0 = const()[name = tensor("op_30220_interleave_0"), val = tensor(false)]; tensor var_30220_cast_fp16 = concat(axis = var_28730, interleave = var_30220_interleave_0, values = (var_30126_cast_fp16, var_30128_cast_fp16, var_30130_cast_fp16, var_30132_cast_fp16))[name = tensor("op_30220_cast_fp16")]; tensor var_30222_interleave_0 = const()[name = tensor("op_30222_interleave_0"), val = tensor(false)]; tensor var_30222_cast_fp16 = concat(axis = var_28730, interleave = var_30222_interleave_0, values = (var_30134_cast_fp16, var_30136_cast_fp16, var_30138_cast_fp16, var_30140_cast_fp16))[name = tensor("op_30222_cast_fp16")]; tensor var_30224_interleave_0 = const()[name = tensor("op_30224_interleave_0"), val = tensor(false)]; tensor var_30224_cast_fp16 = concat(axis = var_28730, interleave = var_30224_interleave_0, values = (var_30142_cast_fp16, var_30144_cast_fp16, var_30146_cast_fp16, var_30148_cast_fp16))[name = tensor("op_30224_cast_fp16")]; tensor var_30226_interleave_0 = const()[name = tensor("op_30226_interleave_0"), val = tensor(false)]; tensor var_30226_cast_fp16 = concat(axis = var_28730, interleave = var_30226_interleave_0, values = (var_30150_cast_fp16, var_30152_cast_fp16, var_30154_cast_fp16, var_30156_cast_fp16))[name = tensor("op_30226_cast_fp16")]; tensor var_30228_interleave_0 = const()[name = tensor("op_30228_interleave_0"), val = tensor(false)]; tensor var_30228_cast_fp16 = concat(axis = var_28730, interleave = var_30228_interleave_0, values = (var_30158_cast_fp16, var_30160_cast_fp16, var_30162_cast_fp16, var_30164_cast_fp16))[name = tensor("op_30228_cast_fp16")]; tensor var_30230_interleave_0 = const()[name = tensor("op_30230_interleave_0"), val = tensor(false)]; tensor var_30230_cast_fp16 = concat(axis = var_28730, interleave = var_30230_interleave_0, values = (var_30166_cast_fp16, var_30168_cast_fp16, var_30170_cast_fp16, var_30172_cast_fp16))[name = tensor("op_30230_cast_fp16")]; tensor var_30232_interleave_0 = const()[name = tensor("op_30232_interleave_0"), val = tensor(false)]; tensor var_30232_cast_fp16 = concat(axis = var_28730, interleave = var_30232_interleave_0, values = (var_30174_cast_fp16, var_30176_cast_fp16, var_30178_cast_fp16, var_30180_cast_fp16))[name = tensor("op_30232_cast_fp16")]; tensor var_30234_interleave_0 = const()[name = tensor("op_30234_interleave_0"), val = tensor(false)]; tensor var_30234_cast_fp16 = concat(axis = var_28730, interleave = var_30234_interleave_0, values = (var_30182_cast_fp16, var_30184_cast_fp16, var_30186_cast_fp16, var_30188_cast_fp16))[name = tensor("op_30234_cast_fp16")]; tensor var_30236_interleave_0 = const()[name = tensor("op_30236_interleave_0"), val = tensor(false)]; tensor var_30236_cast_fp16 = concat(axis = var_28730, interleave = var_30236_interleave_0, values = (var_30190_cast_fp16, var_30192_cast_fp16, var_30194_cast_fp16, var_30196_cast_fp16))[name = tensor("op_30236_cast_fp16")]; tensor input_145_interleave_0 = const()[name = tensor("input_145_interleave_0"), val = tensor(false)]; tensor input_145_cast_fp16 = concat(axis = var_28755, interleave = input_145_interleave_0, values = (var_30198_cast_fp16, var_30200_cast_fp16, var_30202_cast_fp16, var_30204_cast_fp16, var_30206_cast_fp16, var_30208_cast_fp16, var_30210_cast_fp16, var_30212_cast_fp16, var_30214_cast_fp16, var_30216_cast_fp16, var_30218_cast_fp16, var_30220_cast_fp16, var_30222_cast_fp16, var_30224_cast_fp16, var_30226_cast_fp16, var_30228_cast_fp16, var_30230_cast_fp16, var_30232_cast_fp16, var_30234_cast_fp16, var_30236_cast_fp16))[name = tensor("input_145_cast_fp16")]; tensor var_30247_pad_type_0 = const()[name = tensor("op_30247_pad_type_0"), val = tensor("valid")]; tensor var_30247_strides_0 = const()[name = tensor("op_30247_strides_0"), val = tensor([1, 1])]; tensor var_30247_pad_0 = const()[name = tensor("op_30247_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_30247_dilations_0 = const()[name = tensor("op_30247_dilations_0"), val = tensor([1, 1])]; tensor var_30247_groups_0 = const()[name = tensor("op_30247_groups_0"), val = tensor(1)]; tensor layers_18_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(249720384))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250539648))), name = tensor("layers_18_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_18_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_18_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250539776)))]; tensor var_30247_cast_fp16 = conv(bias = layers_18_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_30247_dilations_0, groups = var_30247_groups_0, pad = var_30247_pad_0, pad_type = var_30247_pad_type_0, strides = var_30247_strides_0, weight = layers_18_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_145_cast_fp16)[name = tensor("op_30247_cast_fp16")]; tensor var_30253_pad_type_0 = const()[name = tensor("op_30253_pad_type_0"), val = tensor("valid")]; tensor var_30253_strides_0 = const()[name = tensor("op_30253_strides_0"), val = tensor([1, 1])]; tensor var_30253_pad_0 = const()[name = tensor("op_30253_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_30253_dilations_0 = const()[name = tensor("op_30253_dilations_0"), val = tensor([1, 1])]; tensor var_30253_groups_0 = const()[name = tensor("op_30253_groups_0"), val = tensor(1)]; tensor layers_18_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250558144))), name = tensor("layers_18_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250542400))), shape = tensor([1280, 1280, 1, 1])]; tensor var_30253_cast_fp16 = conv(dilations = var_30253_dilations_0, groups = var_30253_groups_0, pad = var_30253_pad_0, pad_type = var_30253_pad_type_0, strides = var_30253_strides_0, weight = layers_18_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_145_cast_fp16)[name = tensor("op_30253_cast_fp16")]; tensor obj_75_cast_fp16 = add(x = var_30247_cast_fp16, y = var_30253_cast_fp16)[name = tensor("obj_75_cast_fp16")]; tensor inputs_75_cast_fp16 = add(x = inputs_73_cast_fp16, y = obj_75_cast_fp16)[name = tensor("inputs_75_cast_fp16")]; tensor out_75_axes_0 = const()[name = tensor("out_75_axes_0"), val = tensor([1])]; tensor var_30264_to_fp16 = const()[name = tensor("op_30264_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_75_cast_fp16 = layer_norm(axes = out_75_axes_0, epsilon = var_30264_to_fp16, x = inputs_75_cast_fp16)[name = tensor("out_75_cast_fp16")]; tensor input_147_gamma_0_to_fp16 = const()[name = tensor("input_147_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250763008)))]; tensor input_147_beta_0_to_fp16 = const()[name = tensor("input_147_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250765632)))]; tensor input_147_epsilon_0_to_fp16 = const()[name = tensor("input_147_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_147_cast_fp16 = batch_norm(beta = input_147_beta_0_to_fp16, epsilon = input_147_epsilon_0_to_fp16, gamma = input_147_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_75_cast_fp16)[name = tensor("input_147_cast_fp16")]; tensor var_30282_pad_type_0 = const()[name = tensor("op_30282_pad_type_0"), val = tensor("valid")]; tensor var_30282_strides_0 = const()[name = tensor("op_30282_strides_0"), val = tensor([1, 1])]; tensor var_30282_pad_0 = const()[name = tensor("op_30282_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_30282_dilations_0 = const()[name = tensor("op_30282_dilations_0"), val = tensor([1, 1])]; tensor var_30282_groups_0 = const()[name = tensor("op_30282_groups_0"), val = tensor(1)]; tensor layers_18_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250768256))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(254045120))), name = tensor("layers_18_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_18_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_18_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(254045248)))]; tensor var_30282_cast_fp16 = conv(bias = layers_18_fc1_inlier_module_bias_to_fp16, dilations = var_30282_dilations_0, groups = var_30282_groups_0, pad = var_30282_pad_0, pad_type = var_30282_pad_type_0, strides = var_30282_strides_0, weight = layers_18_fc1_inlier_module_weight_to_fp16_palettized, x = input_147_cast_fp16)[name = tensor("op_30282_cast_fp16")]; tensor var_30288_pad_type_0 = const()[name = tensor("op_30288_pad_type_0"), val = tensor("valid")]; tensor var_30288_strides_0 = const()[name = tensor("op_30288_strides_0"), val = tensor([1, 1])]; tensor var_30288_pad_0 = const()[name = tensor("op_30288_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_30288_dilations_0 = const()[name = tensor("op_30288_dilations_0"), val = tensor([1, 1])]; tensor var_30288_groups_0 = const()[name = tensor("op_30288_groups_0"), val = tensor(1)]; tensor layers_18_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(254130816))), name = tensor("layers_18_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(254055552))), shape = tensor([5120, 1280, 1, 1])]; tensor var_30288_cast_fp16 = conv(dilations = var_30288_dilations_0, groups = var_30288_groups_0, pad = var_30288_pad_0, pad_type = var_30288_pad_type_0, strides = var_30288_strides_0, weight = layers_18_fc1_outlier_module_weight_to_fp16_sparsified, x = input_147_cast_fp16)[name = tensor("op_30288_cast_fp16")]; tensor input_149_cast_fp16 = add(x = var_30282_cast_fp16, y = var_30288_cast_fp16)[name = tensor("input_149_cast_fp16")]; tensor input_151_mode_0 = const()[name = tensor("input_151_mode_0"), val = tensor("EXACT")]; tensor input_151_cast_fp16 = gelu(mode = input_151_mode_0, x = input_149_cast_fp16)[name = tensor("input_151_cast_fp16")]; tensor var_30299_pad_type_0 = const()[name = tensor("op_30299_pad_type_0"), val = tensor("valid")]; tensor var_30299_strides_0 = const()[name = tensor("op_30299_strides_0"), val = tensor([1, 1])]; tensor var_30299_pad_0 = const()[name = tensor("op_30299_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_30299_dilations_0 = const()[name = tensor("op_30299_dilations_0"), val = tensor([1, 1])]; tensor var_30299_groups_0 = const()[name = tensor("op_30299_groups_0"), val = tensor(1)]; tensor layers_18_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(254950080))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258226944))), name = tensor("layers_18_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_18_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_18_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258227072)))]; tensor var_30299_cast_fp16 = conv(bias = layers_18_fc2_inlier_module_bias_to_fp16, dilations = var_30299_dilations_0, groups = var_30299_groups_0, pad = var_30299_pad_0, pad_type = var_30299_pad_type_0, strides = var_30299_strides_0, weight = layers_18_fc2_inlier_module_weight_to_fp16_palettized, x = input_151_cast_fp16)[name = tensor("op_30299_cast_fp16")]; tensor var_30305_pad_type_0 = const()[name = tensor("op_30305_pad_type_0"), val = tensor("valid")]; tensor var_30305_strides_0 = const()[name = tensor("op_30305_strides_0"), val = tensor([1, 1])]; tensor var_30305_pad_0 = const()[name = tensor("op_30305_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_30305_dilations_0 = const()[name = tensor("op_30305_dilations_0"), val = tensor([1, 1])]; tensor var_30305_groups_0 = const()[name = tensor("op_30305_groups_0"), val = tensor(1)]; tensor layers_18_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258318976))), name = tensor("layers_18_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(258229696))), shape = tensor([1280, 5120, 1, 1])]; tensor var_30305_cast_fp16 = conv(dilations = var_30305_dilations_0, groups = var_30305_groups_0, pad = var_30305_pad_0, pad_type = var_30305_pad_type_0, strides = var_30305_strides_0, weight = layers_18_fc2_outlier_module_weight_to_fp16_sparsified, x = input_151_cast_fp16)[name = tensor("op_30305_cast_fp16")]; tensor hidden_states_41_cast_fp16 = add(x = var_30299_cast_fp16, y = var_30305_cast_fp16)[name = tensor("hidden_states_41_cast_fp16")]; tensor inputs_77_cast_fp16 = add(x = inputs_75_cast_fp16, y = hidden_states_41_cast_fp16)[name = tensor("inputs_77_cast_fp16")]; tensor var_30311 = const()[name = tensor("op_30311"), val = tensor(3)]; tensor var_30336 = const()[name = tensor("op_30336"), val = tensor(1)]; tensor out_77_axes_0 = const()[name = tensor("out_77_axes_0"), val = tensor([1])]; tensor var_30353_to_fp16 = const()[name = tensor("op_30353_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_77_cast_fp16 = layer_norm(axes = out_77_axes_0, epsilon = var_30353_to_fp16, x = inputs_77_cast_fp16)[name = tensor("out_77_cast_fp16")]; tensor obj_77_gamma_0_to_fp16 = const()[name = tensor("obj_77_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259138240)))]; tensor obj_77_beta_0_to_fp16 = const()[name = tensor("obj_77_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259140864)))]; tensor obj_77_epsilon_0_to_fp16 = const()[name = tensor("obj_77_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_77_cast_fp16 = batch_norm(beta = obj_77_beta_0_to_fp16, epsilon = obj_77_epsilon_0_to_fp16, gamma = obj_77_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_77_cast_fp16)[name = tensor("obj_77_cast_fp16")]; tensor var_30375_pad_type_0 = const()[name = tensor("op_30375_pad_type_0"), val = tensor("valid")]; tensor var_30375_strides_0 = const()[name = tensor("op_30375_strides_0"), val = tensor([1, 1])]; tensor var_30375_pad_0 = const()[name = tensor("op_30375_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_30375_dilations_0 = const()[name = tensor("op_30375_dilations_0"), val = tensor([1, 1])]; tensor var_30375_groups_0 = const()[name = tensor("op_30375_groups_0"), val = tensor(1)]; tensor layers_19_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259143488))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259962752))), name = tensor("layers_19_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_19_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_19_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259962880)))]; tensor var_30375_cast_fp16 = conv(bias = layers_19_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_30375_dilations_0, groups = var_30375_groups_0, pad = var_30375_pad_0, pad_type = var_30375_pad_type_0, strides = var_30375_strides_0, weight = layers_19_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_77_cast_fp16)[name = tensor("op_30375_cast_fp16")]; tensor var_30381_pad_type_0 = const()[name = tensor("op_30381_pad_type_0"), val = tensor("valid")]; tensor var_30381_strides_0 = const()[name = tensor("op_30381_strides_0"), val = tensor([1, 1])]; tensor var_30381_pad_0 = const()[name = tensor("op_30381_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_30381_dilations_0 = const()[name = tensor("op_30381_dilations_0"), val = tensor([1, 1])]; tensor var_30381_groups_0 = const()[name = tensor("op_30381_groups_0"), val = tensor(1)]; tensor layers_19_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260020736))), name = tensor("layers_19_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(259965504))), shape = tensor([1280, 1280, 1, 1])]; tensor var_30381_cast_fp16 = conv(dilations = var_30381_dilations_0, groups = var_30381_groups_0, pad = var_30381_pad_0, pad_type = var_30381_pad_type_0, strides = var_30381_strides_0, weight = layers_19_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_77_cast_fp16)[name = tensor("op_30381_cast_fp16")]; tensor query_39_cast_fp16 = add(x = var_30375_cast_fp16, y = var_30381_cast_fp16)[name = tensor("query_39_cast_fp16")]; tensor var_30390_pad_type_0 = const()[name = tensor("op_30390_pad_type_0"), val = tensor("valid")]; tensor var_30390_strides_0 = const()[name = tensor("op_30390_strides_0"), val = tensor([1, 1])]; tensor var_30390_pad_0 = const()[name = tensor("op_30390_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_30390_dilations_0 = const()[name = tensor("op_30390_dilations_0"), val = tensor([1, 1])]; tensor var_30390_groups_0 = const()[name = tensor("op_30390_groups_0"), val = tensor(1)]; tensor layers_19_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260225600))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261044864))), name = tensor("layers_19_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_30390_cast_fp16 = conv(dilations = var_30390_dilations_0, groups = var_30390_groups_0, pad = var_30390_pad_0, pad_type = var_30390_pad_type_0, strides = var_30390_strides_0, weight = layers_19_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_77_cast_fp16)[name = tensor("op_30390_cast_fp16")]; tensor var_30396_pad_type_0 = const()[name = tensor("op_30396_pad_type_0"), val = tensor("valid")]; tensor var_30396_strides_0 = const()[name = tensor("op_30396_strides_0"), val = tensor([1, 1])]; tensor var_30396_pad_0 = const()[name = tensor("op_30396_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_30396_dilations_0 = const()[name = tensor("op_30396_dilations_0"), val = tensor([1, 1])]; tensor var_30396_groups_0 = const()[name = tensor("op_30396_groups_0"), val = tensor(1)]; tensor layers_19_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261079936))), name = tensor("layers_19_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261044992))), shape = tensor([1280, 1280, 1, 1])]; tensor var_30396_cast_fp16 = conv(dilations = var_30396_dilations_0, groups = var_30396_groups_0, pad = var_30396_pad_0, pad_type = var_30396_pad_type_0, strides = var_30396_strides_0, weight = layers_19_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_77_cast_fp16)[name = tensor("op_30396_cast_fp16")]; tensor key_39_cast_fp16 = add(x = var_30390_cast_fp16, y = var_30396_cast_fp16)[name = tensor("key_39_cast_fp16")]; tensor var_30406_pad_type_0 = const()[name = tensor("op_30406_pad_type_0"), val = tensor("valid")]; tensor var_30406_strides_0 = const()[name = tensor("op_30406_strides_0"), val = tensor([1, 1])]; tensor var_30406_pad_0 = const()[name = tensor("op_30406_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_30406_dilations_0 = const()[name = tensor("op_30406_dilations_0"), val = tensor([1, 1])]; tensor var_30406_groups_0 = const()[name = tensor("op_30406_groups_0"), val = tensor(1)]; tensor layers_19_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(261284800))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262104064))), name = tensor("layers_19_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_19_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_19_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262104192)))]; tensor var_30406_cast_fp16 = conv(bias = layers_19_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_30406_dilations_0, groups = var_30406_groups_0, pad = var_30406_pad_0, pad_type = var_30406_pad_type_0, strides = var_30406_strides_0, weight = layers_19_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_77_cast_fp16)[name = tensor("op_30406_cast_fp16")]; tensor var_30412_pad_type_0 = const()[name = tensor("op_30412_pad_type_0"), val = tensor("valid")]; tensor var_30412_strides_0 = const()[name = tensor("op_30412_strides_0"), val = tensor([1, 1])]; tensor var_30412_pad_0 = const()[name = tensor("op_30412_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_30412_dilations_0 = const()[name = tensor("op_30412_dilations_0"), val = tensor([1, 1])]; tensor var_30412_groups_0 = const()[name = tensor("op_30412_groups_0"), val = tensor(1)]; tensor layers_19_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262121984))), name = tensor("layers_19_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262106816))), shape = tensor([1280, 1280, 1, 1])]; tensor var_30412_cast_fp16 = conv(dilations = var_30412_dilations_0, groups = var_30412_groups_0, pad = var_30412_pad_0, pad_type = var_30412_pad_type_0, strides = var_30412_strides_0, weight = layers_19_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_77_cast_fp16)[name = tensor("op_30412_cast_fp16")]; tensor value_39_cast_fp16 = add(x = var_30406_cast_fp16, y = var_30412_cast_fp16)[name = tensor("value_39_cast_fp16")]; tensor var_30418_begin_0 = const()[name = tensor("op_30418_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30418_end_0 = const()[name = tensor("op_30418_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_30418_end_mask_0 = const()[name = tensor("op_30418_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30418_cast_fp16 = slice_by_index(begin = var_30418_begin_0, end = var_30418_end_0, end_mask = var_30418_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30418_cast_fp16")]; tensor var_30422_begin_0 = const()[name = tensor("op_30422_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_30422_end_0 = const()[name = tensor("op_30422_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_30422_end_mask_0 = const()[name = tensor("op_30422_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30422_cast_fp16 = slice_by_index(begin = var_30422_begin_0, end = var_30422_end_0, end_mask = var_30422_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30422_cast_fp16")]; tensor var_30426_begin_0 = const()[name = tensor("op_30426_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_30426_end_0 = const()[name = tensor("op_30426_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_30426_end_mask_0 = const()[name = tensor("op_30426_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30426_cast_fp16 = slice_by_index(begin = var_30426_begin_0, end = var_30426_end_0, end_mask = var_30426_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30426_cast_fp16")]; tensor var_30430_begin_0 = const()[name = tensor("op_30430_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_30430_end_0 = const()[name = tensor("op_30430_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_30430_end_mask_0 = const()[name = tensor("op_30430_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30430_cast_fp16 = slice_by_index(begin = var_30430_begin_0, end = var_30430_end_0, end_mask = var_30430_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30430_cast_fp16")]; tensor var_30434_begin_0 = const()[name = tensor("op_30434_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_30434_end_0 = const()[name = tensor("op_30434_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_30434_end_mask_0 = const()[name = tensor("op_30434_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30434_cast_fp16 = slice_by_index(begin = var_30434_begin_0, end = var_30434_end_0, end_mask = var_30434_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30434_cast_fp16")]; tensor var_30438_begin_0 = const()[name = tensor("op_30438_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_30438_end_0 = const()[name = tensor("op_30438_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_30438_end_mask_0 = const()[name = tensor("op_30438_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30438_cast_fp16 = slice_by_index(begin = var_30438_begin_0, end = var_30438_end_0, end_mask = var_30438_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30438_cast_fp16")]; tensor var_30442_begin_0 = const()[name = tensor("op_30442_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_30442_end_0 = const()[name = tensor("op_30442_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_30442_end_mask_0 = const()[name = tensor("op_30442_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30442_cast_fp16 = slice_by_index(begin = var_30442_begin_0, end = var_30442_end_0, end_mask = var_30442_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30442_cast_fp16")]; tensor var_30446_begin_0 = const()[name = tensor("op_30446_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_30446_end_0 = const()[name = tensor("op_30446_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_30446_end_mask_0 = const()[name = tensor("op_30446_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30446_cast_fp16 = slice_by_index(begin = var_30446_begin_0, end = var_30446_end_0, end_mask = var_30446_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30446_cast_fp16")]; tensor var_30450_begin_0 = const()[name = tensor("op_30450_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_30450_end_0 = const()[name = tensor("op_30450_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_30450_end_mask_0 = const()[name = tensor("op_30450_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30450_cast_fp16 = slice_by_index(begin = var_30450_begin_0, end = var_30450_end_0, end_mask = var_30450_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30450_cast_fp16")]; tensor var_30454_begin_0 = const()[name = tensor("op_30454_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_30454_end_0 = const()[name = tensor("op_30454_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_30454_end_mask_0 = const()[name = tensor("op_30454_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30454_cast_fp16 = slice_by_index(begin = var_30454_begin_0, end = var_30454_end_0, end_mask = var_30454_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30454_cast_fp16")]; tensor var_30458_begin_0 = const()[name = tensor("op_30458_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_30458_end_0 = const()[name = tensor("op_30458_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_30458_end_mask_0 = const()[name = tensor("op_30458_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30458_cast_fp16 = slice_by_index(begin = var_30458_begin_0, end = var_30458_end_0, end_mask = var_30458_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30458_cast_fp16")]; tensor var_30462_begin_0 = const()[name = tensor("op_30462_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_30462_end_0 = const()[name = tensor("op_30462_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_30462_end_mask_0 = const()[name = tensor("op_30462_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30462_cast_fp16 = slice_by_index(begin = var_30462_begin_0, end = var_30462_end_0, end_mask = var_30462_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30462_cast_fp16")]; tensor var_30466_begin_0 = const()[name = tensor("op_30466_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_30466_end_0 = const()[name = tensor("op_30466_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_30466_end_mask_0 = const()[name = tensor("op_30466_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30466_cast_fp16 = slice_by_index(begin = var_30466_begin_0, end = var_30466_end_0, end_mask = var_30466_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30466_cast_fp16")]; tensor var_30470_begin_0 = const()[name = tensor("op_30470_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_30470_end_0 = const()[name = tensor("op_30470_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_30470_end_mask_0 = const()[name = tensor("op_30470_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30470_cast_fp16 = slice_by_index(begin = var_30470_begin_0, end = var_30470_end_0, end_mask = var_30470_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30470_cast_fp16")]; tensor var_30474_begin_0 = const()[name = tensor("op_30474_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_30474_end_0 = const()[name = tensor("op_30474_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_30474_end_mask_0 = const()[name = tensor("op_30474_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30474_cast_fp16 = slice_by_index(begin = var_30474_begin_0, end = var_30474_end_0, end_mask = var_30474_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30474_cast_fp16")]; tensor var_30478_begin_0 = const()[name = tensor("op_30478_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_30478_end_0 = const()[name = tensor("op_30478_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_30478_end_mask_0 = const()[name = tensor("op_30478_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30478_cast_fp16 = slice_by_index(begin = var_30478_begin_0, end = var_30478_end_0, end_mask = var_30478_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30478_cast_fp16")]; tensor var_30482_begin_0 = const()[name = tensor("op_30482_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_30482_end_0 = const()[name = tensor("op_30482_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_30482_end_mask_0 = const()[name = tensor("op_30482_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30482_cast_fp16 = slice_by_index(begin = var_30482_begin_0, end = var_30482_end_0, end_mask = var_30482_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30482_cast_fp16")]; tensor var_30486_begin_0 = const()[name = tensor("op_30486_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_30486_end_0 = const()[name = tensor("op_30486_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_30486_end_mask_0 = const()[name = tensor("op_30486_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30486_cast_fp16 = slice_by_index(begin = var_30486_begin_0, end = var_30486_end_0, end_mask = var_30486_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30486_cast_fp16")]; tensor var_30490_begin_0 = const()[name = tensor("op_30490_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_30490_end_0 = const()[name = tensor("op_30490_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_30490_end_mask_0 = const()[name = tensor("op_30490_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30490_cast_fp16 = slice_by_index(begin = var_30490_begin_0, end = var_30490_end_0, end_mask = var_30490_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30490_cast_fp16")]; tensor var_30494_begin_0 = const()[name = tensor("op_30494_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_30494_end_0 = const()[name = tensor("op_30494_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_30494_end_mask_0 = const()[name = tensor("op_30494_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_30494_cast_fp16 = slice_by_index(begin = var_30494_begin_0, end = var_30494_end_0, end_mask = var_30494_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_30494_cast_fp16")]; tensor var_30503_begin_0 = const()[name = tensor("op_30503_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30503_end_0 = const()[name = tensor("op_30503_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_30503_end_mask_0 = const()[name = tensor("op_30503_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30503_cast_fp16 = slice_by_index(begin = var_30503_begin_0, end = var_30503_end_0, end_mask = var_30503_end_mask_0, x = var_30418_cast_fp16)[name = tensor("op_30503_cast_fp16")]; tensor var_30510_begin_0 = const()[name = tensor("op_30510_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_30510_end_0 = const()[name = tensor("op_30510_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_30510_end_mask_0 = const()[name = tensor("op_30510_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30510_cast_fp16 = slice_by_index(begin = var_30510_begin_0, end = var_30510_end_0, end_mask = var_30510_end_mask_0, x = var_30418_cast_fp16)[name = tensor("op_30510_cast_fp16")]; tensor var_30517_begin_0 = const()[name = tensor("op_30517_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_30517_end_0 = const()[name = tensor("op_30517_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_30517_end_mask_0 = const()[name = tensor("op_30517_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30517_cast_fp16 = slice_by_index(begin = var_30517_begin_0, end = var_30517_end_0, end_mask = var_30517_end_mask_0, x = var_30418_cast_fp16)[name = tensor("op_30517_cast_fp16")]; tensor var_30524_begin_0 = const()[name = tensor("op_30524_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_30524_end_0 = const()[name = tensor("op_30524_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_30524_end_mask_0 = const()[name = tensor("op_30524_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30524_cast_fp16 = slice_by_index(begin = var_30524_begin_0, end = var_30524_end_0, end_mask = var_30524_end_mask_0, x = var_30418_cast_fp16)[name = tensor("op_30524_cast_fp16")]; tensor var_30531_begin_0 = const()[name = tensor("op_30531_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30531_end_0 = const()[name = tensor("op_30531_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_30531_end_mask_0 = const()[name = tensor("op_30531_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30531_cast_fp16 = slice_by_index(begin = var_30531_begin_0, end = var_30531_end_0, end_mask = var_30531_end_mask_0, x = var_30422_cast_fp16)[name = tensor("op_30531_cast_fp16")]; tensor var_30538_begin_0 = const()[name = tensor("op_30538_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_30538_end_0 = const()[name = tensor("op_30538_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_30538_end_mask_0 = const()[name = tensor("op_30538_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30538_cast_fp16 = slice_by_index(begin = var_30538_begin_0, end = var_30538_end_0, end_mask = var_30538_end_mask_0, x = var_30422_cast_fp16)[name = tensor("op_30538_cast_fp16")]; tensor var_30545_begin_0 = const()[name = tensor("op_30545_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_30545_end_0 = const()[name = tensor("op_30545_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_30545_end_mask_0 = const()[name = tensor("op_30545_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30545_cast_fp16 = slice_by_index(begin = var_30545_begin_0, end = var_30545_end_0, end_mask = var_30545_end_mask_0, x = var_30422_cast_fp16)[name = tensor("op_30545_cast_fp16")]; tensor var_30552_begin_0 = const()[name = tensor("op_30552_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_30552_end_0 = const()[name = tensor("op_30552_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_30552_end_mask_0 = const()[name = tensor("op_30552_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30552_cast_fp16 = slice_by_index(begin = var_30552_begin_0, end = var_30552_end_0, end_mask = var_30552_end_mask_0, x = var_30422_cast_fp16)[name = tensor("op_30552_cast_fp16")]; tensor var_30559_begin_0 = const()[name = tensor("op_30559_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30559_end_0 = const()[name = tensor("op_30559_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_30559_end_mask_0 = const()[name = tensor("op_30559_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30559_cast_fp16 = slice_by_index(begin = var_30559_begin_0, end = var_30559_end_0, end_mask = var_30559_end_mask_0, x = var_30426_cast_fp16)[name = tensor("op_30559_cast_fp16")]; tensor var_30566_begin_0 = const()[name = tensor("op_30566_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_30566_end_0 = const()[name = tensor("op_30566_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_30566_end_mask_0 = const()[name = tensor("op_30566_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30566_cast_fp16 = slice_by_index(begin = var_30566_begin_0, end = var_30566_end_0, end_mask = var_30566_end_mask_0, x = var_30426_cast_fp16)[name = tensor("op_30566_cast_fp16")]; tensor var_30573_begin_0 = const()[name = tensor("op_30573_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_30573_end_0 = const()[name = tensor("op_30573_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_30573_end_mask_0 = const()[name = tensor("op_30573_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30573_cast_fp16 = slice_by_index(begin = var_30573_begin_0, end = var_30573_end_0, end_mask = var_30573_end_mask_0, x = var_30426_cast_fp16)[name = tensor("op_30573_cast_fp16")]; tensor var_30580_begin_0 = const()[name = tensor("op_30580_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_30580_end_0 = const()[name = tensor("op_30580_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_30580_end_mask_0 = const()[name = tensor("op_30580_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30580_cast_fp16 = slice_by_index(begin = var_30580_begin_0, end = var_30580_end_0, end_mask = var_30580_end_mask_0, x = var_30426_cast_fp16)[name = tensor("op_30580_cast_fp16")]; tensor var_30587_begin_0 = const()[name = tensor("op_30587_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30587_end_0 = const()[name = tensor("op_30587_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_30587_end_mask_0 = const()[name = tensor("op_30587_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30587_cast_fp16 = slice_by_index(begin = var_30587_begin_0, end = var_30587_end_0, end_mask = var_30587_end_mask_0, x = var_30430_cast_fp16)[name = tensor("op_30587_cast_fp16")]; tensor var_30594_begin_0 = const()[name = tensor("op_30594_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_30594_end_0 = const()[name = tensor("op_30594_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_30594_end_mask_0 = const()[name = tensor("op_30594_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30594_cast_fp16 = slice_by_index(begin = var_30594_begin_0, end = var_30594_end_0, end_mask = var_30594_end_mask_0, x = var_30430_cast_fp16)[name = tensor("op_30594_cast_fp16")]; tensor var_30601_begin_0 = const()[name = tensor("op_30601_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_30601_end_0 = const()[name = tensor("op_30601_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_30601_end_mask_0 = const()[name = tensor("op_30601_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30601_cast_fp16 = slice_by_index(begin = var_30601_begin_0, end = var_30601_end_0, end_mask = var_30601_end_mask_0, x = var_30430_cast_fp16)[name = tensor("op_30601_cast_fp16")]; tensor var_30608_begin_0 = const()[name = tensor("op_30608_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_30608_end_0 = const()[name = tensor("op_30608_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_30608_end_mask_0 = const()[name = tensor("op_30608_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30608_cast_fp16 = slice_by_index(begin = var_30608_begin_0, end = var_30608_end_0, end_mask = var_30608_end_mask_0, x = var_30430_cast_fp16)[name = tensor("op_30608_cast_fp16")]; tensor var_30615_begin_0 = const()[name = tensor("op_30615_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30615_end_0 = const()[name = tensor("op_30615_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_30615_end_mask_0 = const()[name = tensor("op_30615_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30615_cast_fp16 = slice_by_index(begin = var_30615_begin_0, end = var_30615_end_0, end_mask = var_30615_end_mask_0, x = var_30434_cast_fp16)[name = tensor("op_30615_cast_fp16")]; tensor var_30622_begin_0 = const()[name = tensor("op_30622_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_30622_end_0 = const()[name = tensor("op_30622_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_30622_end_mask_0 = const()[name = tensor("op_30622_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30622_cast_fp16 = slice_by_index(begin = var_30622_begin_0, end = var_30622_end_0, end_mask = var_30622_end_mask_0, x = var_30434_cast_fp16)[name = tensor("op_30622_cast_fp16")]; tensor var_30629_begin_0 = const()[name = tensor("op_30629_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_30629_end_0 = const()[name = tensor("op_30629_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_30629_end_mask_0 = const()[name = tensor("op_30629_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30629_cast_fp16 = slice_by_index(begin = var_30629_begin_0, end = var_30629_end_0, end_mask = var_30629_end_mask_0, x = var_30434_cast_fp16)[name = tensor("op_30629_cast_fp16")]; tensor var_30636_begin_0 = const()[name = tensor("op_30636_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_30636_end_0 = const()[name = tensor("op_30636_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_30636_end_mask_0 = const()[name = tensor("op_30636_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30636_cast_fp16 = slice_by_index(begin = var_30636_begin_0, end = var_30636_end_0, end_mask = var_30636_end_mask_0, x = var_30434_cast_fp16)[name = tensor("op_30636_cast_fp16")]; tensor var_30643_begin_0 = const()[name = tensor("op_30643_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30643_end_0 = const()[name = tensor("op_30643_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_30643_end_mask_0 = const()[name = tensor("op_30643_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30643_cast_fp16 = slice_by_index(begin = var_30643_begin_0, end = var_30643_end_0, end_mask = var_30643_end_mask_0, x = var_30438_cast_fp16)[name = tensor("op_30643_cast_fp16")]; tensor var_30650_begin_0 = const()[name = tensor("op_30650_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_30650_end_0 = const()[name = tensor("op_30650_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_30650_end_mask_0 = const()[name = tensor("op_30650_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30650_cast_fp16 = slice_by_index(begin = var_30650_begin_0, end = var_30650_end_0, end_mask = var_30650_end_mask_0, x = var_30438_cast_fp16)[name = tensor("op_30650_cast_fp16")]; tensor var_30657_begin_0 = const()[name = tensor("op_30657_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_30657_end_0 = const()[name = tensor("op_30657_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_30657_end_mask_0 = const()[name = tensor("op_30657_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30657_cast_fp16 = slice_by_index(begin = var_30657_begin_0, end = var_30657_end_0, end_mask = var_30657_end_mask_0, x = var_30438_cast_fp16)[name = tensor("op_30657_cast_fp16")]; tensor var_30664_begin_0 = const()[name = tensor("op_30664_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_30664_end_0 = const()[name = tensor("op_30664_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_30664_end_mask_0 = const()[name = tensor("op_30664_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30664_cast_fp16 = slice_by_index(begin = var_30664_begin_0, end = var_30664_end_0, end_mask = var_30664_end_mask_0, x = var_30438_cast_fp16)[name = tensor("op_30664_cast_fp16")]; tensor var_30671_begin_0 = const()[name = tensor("op_30671_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30671_end_0 = const()[name = tensor("op_30671_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_30671_end_mask_0 = const()[name = tensor("op_30671_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30671_cast_fp16 = slice_by_index(begin = var_30671_begin_0, end = var_30671_end_0, end_mask = var_30671_end_mask_0, x = var_30442_cast_fp16)[name = tensor("op_30671_cast_fp16")]; tensor var_30678_begin_0 = const()[name = tensor("op_30678_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_30678_end_0 = const()[name = tensor("op_30678_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_30678_end_mask_0 = const()[name = tensor("op_30678_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30678_cast_fp16 = slice_by_index(begin = var_30678_begin_0, end = var_30678_end_0, end_mask = var_30678_end_mask_0, x = var_30442_cast_fp16)[name = tensor("op_30678_cast_fp16")]; tensor var_30685_begin_0 = const()[name = tensor("op_30685_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_30685_end_0 = const()[name = tensor("op_30685_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_30685_end_mask_0 = const()[name = tensor("op_30685_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30685_cast_fp16 = slice_by_index(begin = var_30685_begin_0, end = var_30685_end_0, end_mask = var_30685_end_mask_0, x = var_30442_cast_fp16)[name = tensor("op_30685_cast_fp16")]; tensor var_30692_begin_0 = const()[name = tensor("op_30692_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_30692_end_0 = const()[name = tensor("op_30692_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_30692_end_mask_0 = const()[name = tensor("op_30692_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30692_cast_fp16 = slice_by_index(begin = var_30692_begin_0, end = var_30692_end_0, end_mask = var_30692_end_mask_0, x = var_30442_cast_fp16)[name = tensor("op_30692_cast_fp16")]; tensor var_30699_begin_0 = const()[name = tensor("op_30699_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30699_end_0 = const()[name = tensor("op_30699_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_30699_end_mask_0 = const()[name = tensor("op_30699_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30699_cast_fp16 = slice_by_index(begin = var_30699_begin_0, end = var_30699_end_0, end_mask = var_30699_end_mask_0, x = var_30446_cast_fp16)[name = tensor("op_30699_cast_fp16")]; tensor var_30706_begin_0 = const()[name = tensor("op_30706_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_30706_end_0 = const()[name = tensor("op_30706_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_30706_end_mask_0 = const()[name = tensor("op_30706_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30706_cast_fp16 = slice_by_index(begin = var_30706_begin_0, end = var_30706_end_0, end_mask = var_30706_end_mask_0, x = var_30446_cast_fp16)[name = tensor("op_30706_cast_fp16")]; tensor var_30713_begin_0 = const()[name = tensor("op_30713_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_30713_end_0 = const()[name = tensor("op_30713_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_30713_end_mask_0 = const()[name = tensor("op_30713_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30713_cast_fp16 = slice_by_index(begin = var_30713_begin_0, end = var_30713_end_0, end_mask = var_30713_end_mask_0, x = var_30446_cast_fp16)[name = tensor("op_30713_cast_fp16")]; tensor var_30720_begin_0 = const()[name = tensor("op_30720_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_30720_end_0 = const()[name = tensor("op_30720_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_30720_end_mask_0 = const()[name = tensor("op_30720_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30720_cast_fp16 = slice_by_index(begin = var_30720_begin_0, end = var_30720_end_0, end_mask = var_30720_end_mask_0, x = var_30446_cast_fp16)[name = tensor("op_30720_cast_fp16")]; tensor var_30727_begin_0 = const()[name = tensor("op_30727_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30727_end_0 = const()[name = tensor("op_30727_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_30727_end_mask_0 = const()[name = tensor("op_30727_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30727_cast_fp16 = slice_by_index(begin = var_30727_begin_0, end = var_30727_end_0, end_mask = var_30727_end_mask_0, x = var_30450_cast_fp16)[name = tensor("op_30727_cast_fp16")]; tensor var_30734_begin_0 = const()[name = tensor("op_30734_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_30734_end_0 = const()[name = tensor("op_30734_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_30734_end_mask_0 = const()[name = tensor("op_30734_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30734_cast_fp16 = slice_by_index(begin = var_30734_begin_0, end = var_30734_end_0, end_mask = var_30734_end_mask_0, x = var_30450_cast_fp16)[name = tensor("op_30734_cast_fp16")]; tensor var_30741_begin_0 = const()[name = tensor("op_30741_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_30741_end_0 = const()[name = tensor("op_30741_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_30741_end_mask_0 = const()[name = tensor("op_30741_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30741_cast_fp16 = slice_by_index(begin = var_30741_begin_0, end = var_30741_end_0, end_mask = var_30741_end_mask_0, x = var_30450_cast_fp16)[name = tensor("op_30741_cast_fp16")]; tensor var_30748_begin_0 = const()[name = tensor("op_30748_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_30748_end_0 = const()[name = tensor("op_30748_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_30748_end_mask_0 = const()[name = tensor("op_30748_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30748_cast_fp16 = slice_by_index(begin = var_30748_begin_0, end = var_30748_end_0, end_mask = var_30748_end_mask_0, x = var_30450_cast_fp16)[name = tensor("op_30748_cast_fp16")]; tensor var_30755_begin_0 = const()[name = tensor("op_30755_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30755_end_0 = const()[name = tensor("op_30755_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_30755_end_mask_0 = const()[name = tensor("op_30755_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30755_cast_fp16 = slice_by_index(begin = var_30755_begin_0, end = var_30755_end_0, end_mask = var_30755_end_mask_0, x = var_30454_cast_fp16)[name = tensor("op_30755_cast_fp16")]; tensor var_30762_begin_0 = const()[name = tensor("op_30762_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_30762_end_0 = const()[name = tensor("op_30762_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_30762_end_mask_0 = const()[name = tensor("op_30762_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30762_cast_fp16 = slice_by_index(begin = var_30762_begin_0, end = var_30762_end_0, end_mask = var_30762_end_mask_0, x = var_30454_cast_fp16)[name = tensor("op_30762_cast_fp16")]; tensor var_30769_begin_0 = const()[name = tensor("op_30769_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_30769_end_0 = const()[name = tensor("op_30769_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_30769_end_mask_0 = const()[name = tensor("op_30769_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30769_cast_fp16 = slice_by_index(begin = var_30769_begin_0, end = var_30769_end_0, end_mask = var_30769_end_mask_0, x = var_30454_cast_fp16)[name = tensor("op_30769_cast_fp16")]; tensor var_30776_begin_0 = const()[name = tensor("op_30776_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_30776_end_0 = const()[name = tensor("op_30776_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_30776_end_mask_0 = const()[name = tensor("op_30776_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30776_cast_fp16 = slice_by_index(begin = var_30776_begin_0, end = var_30776_end_0, end_mask = var_30776_end_mask_0, x = var_30454_cast_fp16)[name = tensor("op_30776_cast_fp16")]; tensor var_30783_begin_0 = const()[name = tensor("op_30783_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30783_end_0 = const()[name = tensor("op_30783_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_30783_end_mask_0 = const()[name = tensor("op_30783_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30783_cast_fp16 = slice_by_index(begin = var_30783_begin_0, end = var_30783_end_0, end_mask = var_30783_end_mask_0, x = var_30458_cast_fp16)[name = tensor("op_30783_cast_fp16")]; tensor var_30790_begin_0 = const()[name = tensor("op_30790_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_30790_end_0 = const()[name = tensor("op_30790_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_30790_end_mask_0 = const()[name = tensor("op_30790_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30790_cast_fp16 = slice_by_index(begin = var_30790_begin_0, end = var_30790_end_0, end_mask = var_30790_end_mask_0, x = var_30458_cast_fp16)[name = tensor("op_30790_cast_fp16")]; tensor var_30797_begin_0 = const()[name = tensor("op_30797_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_30797_end_0 = const()[name = tensor("op_30797_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_30797_end_mask_0 = const()[name = tensor("op_30797_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30797_cast_fp16 = slice_by_index(begin = var_30797_begin_0, end = var_30797_end_0, end_mask = var_30797_end_mask_0, x = var_30458_cast_fp16)[name = tensor("op_30797_cast_fp16")]; tensor var_30804_begin_0 = const()[name = tensor("op_30804_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_30804_end_0 = const()[name = tensor("op_30804_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_30804_end_mask_0 = const()[name = tensor("op_30804_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30804_cast_fp16 = slice_by_index(begin = var_30804_begin_0, end = var_30804_end_0, end_mask = var_30804_end_mask_0, x = var_30458_cast_fp16)[name = tensor("op_30804_cast_fp16")]; tensor var_30811_begin_0 = const()[name = tensor("op_30811_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30811_end_0 = const()[name = tensor("op_30811_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_30811_end_mask_0 = const()[name = tensor("op_30811_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30811_cast_fp16 = slice_by_index(begin = var_30811_begin_0, end = var_30811_end_0, end_mask = var_30811_end_mask_0, x = var_30462_cast_fp16)[name = tensor("op_30811_cast_fp16")]; tensor var_30818_begin_0 = const()[name = tensor("op_30818_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_30818_end_0 = const()[name = tensor("op_30818_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_30818_end_mask_0 = const()[name = tensor("op_30818_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30818_cast_fp16 = slice_by_index(begin = var_30818_begin_0, end = var_30818_end_0, end_mask = var_30818_end_mask_0, x = var_30462_cast_fp16)[name = tensor("op_30818_cast_fp16")]; tensor var_30825_begin_0 = const()[name = tensor("op_30825_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_30825_end_0 = const()[name = tensor("op_30825_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_30825_end_mask_0 = const()[name = tensor("op_30825_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30825_cast_fp16 = slice_by_index(begin = var_30825_begin_0, end = var_30825_end_0, end_mask = var_30825_end_mask_0, x = var_30462_cast_fp16)[name = tensor("op_30825_cast_fp16")]; tensor var_30832_begin_0 = const()[name = tensor("op_30832_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_30832_end_0 = const()[name = tensor("op_30832_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_30832_end_mask_0 = const()[name = tensor("op_30832_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30832_cast_fp16 = slice_by_index(begin = var_30832_begin_0, end = var_30832_end_0, end_mask = var_30832_end_mask_0, x = var_30462_cast_fp16)[name = tensor("op_30832_cast_fp16")]; tensor var_30839_begin_0 = const()[name = tensor("op_30839_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30839_end_0 = const()[name = tensor("op_30839_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_30839_end_mask_0 = const()[name = tensor("op_30839_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30839_cast_fp16 = slice_by_index(begin = var_30839_begin_0, end = var_30839_end_0, end_mask = var_30839_end_mask_0, x = var_30466_cast_fp16)[name = tensor("op_30839_cast_fp16")]; tensor var_30846_begin_0 = const()[name = tensor("op_30846_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_30846_end_0 = const()[name = tensor("op_30846_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_30846_end_mask_0 = const()[name = tensor("op_30846_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30846_cast_fp16 = slice_by_index(begin = var_30846_begin_0, end = var_30846_end_0, end_mask = var_30846_end_mask_0, x = var_30466_cast_fp16)[name = tensor("op_30846_cast_fp16")]; tensor var_30853_begin_0 = const()[name = tensor("op_30853_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_30853_end_0 = const()[name = tensor("op_30853_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_30853_end_mask_0 = const()[name = tensor("op_30853_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30853_cast_fp16 = slice_by_index(begin = var_30853_begin_0, end = var_30853_end_0, end_mask = var_30853_end_mask_0, x = var_30466_cast_fp16)[name = tensor("op_30853_cast_fp16")]; tensor var_30860_begin_0 = const()[name = tensor("op_30860_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_30860_end_0 = const()[name = tensor("op_30860_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_30860_end_mask_0 = const()[name = tensor("op_30860_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30860_cast_fp16 = slice_by_index(begin = var_30860_begin_0, end = var_30860_end_0, end_mask = var_30860_end_mask_0, x = var_30466_cast_fp16)[name = tensor("op_30860_cast_fp16")]; tensor var_30867_begin_0 = const()[name = tensor("op_30867_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30867_end_0 = const()[name = tensor("op_30867_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_30867_end_mask_0 = const()[name = tensor("op_30867_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30867_cast_fp16 = slice_by_index(begin = var_30867_begin_0, end = var_30867_end_0, end_mask = var_30867_end_mask_0, x = var_30470_cast_fp16)[name = tensor("op_30867_cast_fp16")]; tensor var_30874_begin_0 = const()[name = tensor("op_30874_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_30874_end_0 = const()[name = tensor("op_30874_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_30874_end_mask_0 = const()[name = tensor("op_30874_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30874_cast_fp16 = slice_by_index(begin = var_30874_begin_0, end = var_30874_end_0, end_mask = var_30874_end_mask_0, x = var_30470_cast_fp16)[name = tensor("op_30874_cast_fp16")]; tensor var_30881_begin_0 = const()[name = tensor("op_30881_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_30881_end_0 = const()[name = tensor("op_30881_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_30881_end_mask_0 = const()[name = tensor("op_30881_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30881_cast_fp16 = slice_by_index(begin = var_30881_begin_0, end = var_30881_end_0, end_mask = var_30881_end_mask_0, x = var_30470_cast_fp16)[name = tensor("op_30881_cast_fp16")]; tensor var_30888_begin_0 = const()[name = tensor("op_30888_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_30888_end_0 = const()[name = tensor("op_30888_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_30888_end_mask_0 = const()[name = tensor("op_30888_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30888_cast_fp16 = slice_by_index(begin = var_30888_begin_0, end = var_30888_end_0, end_mask = var_30888_end_mask_0, x = var_30470_cast_fp16)[name = tensor("op_30888_cast_fp16")]; tensor var_30895_begin_0 = const()[name = tensor("op_30895_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30895_end_0 = const()[name = tensor("op_30895_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_30895_end_mask_0 = const()[name = tensor("op_30895_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30895_cast_fp16 = slice_by_index(begin = var_30895_begin_0, end = var_30895_end_0, end_mask = var_30895_end_mask_0, x = var_30474_cast_fp16)[name = tensor("op_30895_cast_fp16")]; tensor var_30902_begin_0 = const()[name = tensor("op_30902_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_30902_end_0 = const()[name = tensor("op_30902_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_30902_end_mask_0 = const()[name = tensor("op_30902_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30902_cast_fp16 = slice_by_index(begin = var_30902_begin_0, end = var_30902_end_0, end_mask = var_30902_end_mask_0, x = var_30474_cast_fp16)[name = tensor("op_30902_cast_fp16")]; tensor var_30909_begin_0 = const()[name = tensor("op_30909_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_30909_end_0 = const()[name = tensor("op_30909_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_30909_end_mask_0 = const()[name = tensor("op_30909_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30909_cast_fp16 = slice_by_index(begin = var_30909_begin_0, end = var_30909_end_0, end_mask = var_30909_end_mask_0, x = var_30474_cast_fp16)[name = tensor("op_30909_cast_fp16")]; tensor var_30916_begin_0 = const()[name = tensor("op_30916_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_30916_end_0 = const()[name = tensor("op_30916_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_30916_end_mask_0 = const()[name = tensor("op_30916_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30916_cast_fp16 = slice_by_index(begin = var_30916_begin_0, end = var_30916_end_0, end_mask = var_30916_end_mask_0, x = var_30474_cast_fp16)[name = tensor("op_30916_cast_fp16")]; tensor var_30923_begin_0 = const()[name = tensor("op_30923_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30923_end_0 = const()[name = tensor("op_30923_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_30923_end_mask_0 = const()[name = tensor("op_30923_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30923_cast_fp16 = slice_by_index(begin = var_30923_begin_0, end = var_30923_end_0, end_mask = var_30923_end_mask_0, x = var_30478_cast_fp16)[name = tensor("op_30923_cast_fp16")]; tensor var_30930_begin_0 = const()[name = tensor("op_30930_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_30930_end_0 = const()[name = tensor("op_30930_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_30930_end_mask_0 = const()[name = tensor("op_30930_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30930_cast_fp16 = slice_by_index(begin = var_30930_begin_0, end = var_30930_end_0, end_mask = var_30930_end_mask_0, x = var_30478_cast_fp16)[name = tensor("op_30930_cast_fp16")]; tensor var_30937_begin_0 = const()[name = tensor("op_30937_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_30937_end_0 = const()[name = tensor("op_30937_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_30937_end_mask_0 = const()[name = tensor("op_30937_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30937_cast_fp16 = slice_by_index(begin = var_30937_begin_0, end = var_30937_end_0, end_mask = var_30937_end_mask_0, x = var_30478_cast_fp16)[name = tensor("op_30937_cast_fp16")]; tensor var_30944_begin_0 = const()[name = tensor("op_30944_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_30944_end_0 = const()[name = tensor("op_30944_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_30944_end_mask_0 = const()[name = tensor("op_30944_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30944_cast_fp16 = slice_by_index(begin = var_30944_begin_0, end = var_30944_end_0, end_mask = var_30944_end_mask_0, x = var_30478_cast_fp16)[name = tensor("op_30944_cast_fp16")]; tensor var_30951_begin_0 = const()[name = tensor("op_30951_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30951_end_0 = const()[name = tensor("op_30951_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_30951_end_mask_0 = const()[name = tensor("op_30951_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30951_cast_fp16 = slice_by_index(begin = var_30951_begin_0, end = var_30951_end_0, end_mask = var_30951_end_mask_0, x = var_30482_cast_fp16)[name = tensor("op_30951_cast_fp16")]; tensor var_30958_begin_0 = const()[name = tensor("op_30958_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_30958_end_0 = const()[name = tensor("op_30958_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_30958_end_mask_0 = const()[name = tensor("op_30958_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30958_cast_fp16 = slice_by_index(begin = var_30958_begin_0, end = var_30958_end_0, end_mask = var_30958_end_mask_0, x = var_30482_cast_fp16)[name = tensor("op_30958_cast_fp16")]; tensor var_30965_begin_0 = const()[name = tensor("op_30965_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_30965_end_0 = const()[name = tensor("op_30965_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_30965_end_mask_0 = const()[name = tensor("op_30965_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30965_cast_fp16 = slice_by_index(begin = var_30965_begin_0, end = var_30965_end_0, end_mask = var_30965_end_mask_0, x = var_30482_cast_fp16)[name = tensor("op_30965_cast_fp16")]; tensor var_30972_begin_0 = const()[name = tensor("op_30972_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_30972_end_0 = const()[name = tensor("op_30972_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_30972_end_mask_0 = const()[name = tensor("op_30972_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30972_cast_fp16 = slice_by_index(begin = var_30972_begin_0, end = var_30972_end_0, end_mask = var_30972_end_mask_0, x = var_30482_cast_fp16)[name = tensor("op_30972_cast_fp16")]; tensor var_30979_begin_0 = const()[name = tensor("op_30979_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_30979_end_0 = const()[name = tensor("op_30979_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_30979_end_mask_0 = const()[name = tensor("op_30979_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30979_cast_fp16 = slice_by_index(begin = var_30979_begin_0, end = var_30979_end_0, end_mask = var_30979_end_mask_0, x = var_30486_cast_fp16)[name = tensor("op_30979_cast_fp16")]; tensor var_30986_begin_0 = const()[name = tensor("op_30986_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_30986_end_0 = const()[name = tensor("op_30986_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_30986_end_mask_0 = const()[name = tensor("op_30986_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30986_cast_fp16 = slice_by_index(begin = var_30986_begin_0, end = var_30986_end_0, end_mask = var_30986_end_mask_0, x = var_30486_cast_fp16)[name = tensor("op_30986_cast_fp16")]; tensor var_30993_begin_0 = const()[name = tensor("op_30993_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_30993_end_0 = const()[name = tensor("op_30993_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_30993_end_mask_0 = const()[name = tensor("op_30993_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_30993_cast_fp16 = slice_by_index(begin = var_30993_begin_0, end = var_30993_end_0, end_mask = var_30993_end_mask_0, x = var_30486_cast_fp16)[name = tensor("op_30993_cast_fp16")]; tensor var_31000_begin_0 = const()[name = tensor("op_31000_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_31000_end_0 = const()[name = tensor("op_31000_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_31000_end_mask_0 = const()[name = tensor("op_31000_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31000_cast_fp16 = slice_by_index(begin = var_31000_begin_0, end = var_31000_end_0, end_mask = var_31000_end_mask_0, x = var_30486_cast_fp16)[name = tensor("op_31000_cast_fp16")]; tensor var_31007_begin_0 = const()[name = tensor("op_31007_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_31007_end_0 = const()[name = tensor("op_31007_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_31007_end_mask_0 = const()[name = tensor("op_31007_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31007_cast_fp16 = slice_by_index(begin = var_31007_begin_0, end = var_31007_end_0, end_mask = var_31007_end_mask_0, x = var_30490_cast_fp16)[name = tensor("op_31007_cast_fp16")]; tensor var_31014_begin_0 = const()[name = tensor("op_31014_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_31014_end_0 = const()[name = tensor("op_31014_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_31014_end_mask_0 = const()[name = tensor("op_31014_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31014_cast_fp16 = slice_by_index(begin = var_31014_begin_0, end = var_31014_end_0, end_mask = var_31014_end_mask_0, x = var_30490_cast_fp16)[name = tensor("op_31014_cast_fp16")]; tensor var_31021_begin_0 = const()[name = tensor("op_31021_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_31021_end_0 = const()[name = tensor("op_31021_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_31021_end_mask_0 = const()[name = tensor("op_31021_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31021_cast_fp16 = slice_by_index(begin = var_31021_begin_0, end = var_31021_end_0, end_mask = var_31021_end_mask_0, x = var_30490_cast_fp16)[name = tensor("op_31021_cast_fp16")]; tensor var_31028_begin_0 = const()[name = tensor("op_31028_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_31028_end_0 = const()[name = tensor("op_31028_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_31028_end_mask_0 = const()[name = tensor("op_31028_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31028_cast_fp16 = slice_by_index(begin = var_31028_begin_0, end = var_31028_end_0, end_mask = var_31028_end_mask_0, x = var_30490_cast_fp16)[name = tensor("op_31028_cast_fp16")]; tensor var_31035_begin_0 = const()[name = tensor("op_31035_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_31035_end_0 = const()[name = tensor("op_31035_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_31035_end_mask_0 = const()[name = tensor("op_31035_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31035_cast_fp16 = slice_by_index(begin = var_31035_begin_0, end = var_31035_end_0, end_mask = var_31035_end_mask_0, x = var_30494_cast_fp16)[name = tensor("op_31035_cast_fp16")]; tensor var_31042_begin_0 = const()[name = tensor("op_31042_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_31042_end_0 = const()[name = tensor("op_31042_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_31042_end_mask_0 = const()[name = tensor("op_31042_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31042_cast_fp16 = slice_by_index(begin = var_31042_begin_0, end = var_31042_end_0, end_mask = var_31042_end_mask_0, x = var_30494_cast_fp16)[name = tensor("op_31042_cast_fp16")]; tensor var_31049_begin_0 = const()[name = tensor("op_31049_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_31049_end_0 = const()[name = tensor("op_31049_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_31049_end_mask_0 = const()[name = tensor("op_31049_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31049_cast_fp16 = slice_by_index(begin = var_31049_begin_0, end = var_31049_end_0, end_mask = var_31049_end_mask_0, x = var_30494_cast_fp16)[name = tensor("op_31049_cast_fp16")]; tensor var_31056_begin_0 = const()[name = tensor("op_31056_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_31056_end_0 = const()[name = tensor("op_31056_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_31056_end_mask_0 = const()[name = tensor("op_31056_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31056_cast_fp16 = slice_by_index(begin = var_31056_begin_0, end = var_31056_end_0, end_mask = var_31056_end_mask_0, x = var_30494_cast_fp16)[name = tensor("op_31056_cast_fp16")]; tensor k_39_perm_0 = const()[name = tensor("k_39_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_31061_begin_0 = const()[name = tensor("op_31061_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_31061_end_0 = const()[name = tensor("op_31061_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_31061_end_mask_0 = const()[name = tensor("op_31061_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_39_cast_fp16 = transpose(perm = k_39_perm_0, x = key_39_cast_fp16)[name = tensor("transpose_12")]; tensor var_31061_cast_fp16 = slice_by_index(begin = var_31061_begin_0, end = var_31061_end_0, end_mask = var_31061_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31061_cast_fp16")]; tensor var_31065_begin_0 = const()[name = tensor("op_31065_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_31065_end_0 = const()[name = tensor("op_31065_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_31065_end_mask_0 = const()[name = tensor("op_31065_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31065_cast_fp16 = slice_by_index(begin = var_31065_begin_0, end = var_31065_end_0, end_mask = var_31065_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31065_cast_fp16")]; tensor var_31069_begin_0 = const()[name = tensor("op_31069_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_31069_end_0 = const()[name = tensor("op_31069_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_31069_end_mask_0 = const()[name = tensor("op_31069_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31069_cast_fp16 = slice_by_index(begin = var_31069_begin_0, end = var_31069_end_0, end_mask = var_31069_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31069_cast_fp16")]; tensor var_31073_begin_0 = const()[name = tensor("op_31073_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_31073_end_0 = const()[name = tensor("op_31073_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_31073_end_mask_0 = const()[name = tensor("op_31073_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31073_cast_fp16 = slice_by_index(begin = var_31073_begin_0, end = var_31073_end_0, end_mask = var_31073_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31073_cast_fp16")]; tensor var_31077_begin_0 = const()[name = tensor("op_31077_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_31077_end_0 = const()[name = tensor("op_31077_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_31077_end_mask_0 = const()[name = tensor("op_31077_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31077_cast_fp16 = slice_by_index(begin = var_31077_begin_0, end = var_31077_end_0, end_mask = var_31077_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31077_cast_fp16")]; tensor var_31081_begin_0 = const()[name = tensor("op_31081_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_31081_end_0 = const()[name = tensor("op_31081_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_31081_end_mask_0 = const()[name = tensor("op_31081_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31081_cast_fp16 = slice_by_index(begin = var_31081_begin_0, end = var_31081_end_0, end_mask = var_31081_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31081_cast_fp16")]; tensor var_31085_begin_0 = const()[name = tensor("op_31085_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_31085_end_0 = const()[name = tensor("op_31085_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_31085_end_mask_0 = const()[name = tensor("op_31085_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31085_cast_fp16 = slice_by_index(begin = var_31085_begin_0, end = var_31085_end_0, end_mask = var_31085_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31085_cast_fp16")]; tensor var_31089_begin_0 = const()[name = tensor("op_31089_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_31089_end_0 = const()[name = tensor("op_31089_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_31089_end_mask_0 = const()[name = tensor("op_31089_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31089_cast_fp16 = slice_by_index(begin = var_31089_begin_0, end = var_31089_end_0, end_mask = var_31089_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31089_cast_fp16")]; tensor var_31093_begin_0 = const()[name = tensor("op_31093_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_31093_end_0 = const()[name = tensor("op_31093_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_31093_end_mask_0 = const()[name = tensor("op_31093_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31093_cast_fp16 = slice_by_index(begin = var_31093_begin_0, end = var_31093_end_0, end_mask = var_31093_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31093_cast_fp16")]; tensor var_31097_begin_0 = const()[name = tensor("op_31097_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_31097_end_0 = const()[name = tensor("op_31097_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_31097_end_mask_0 = const()[name = tensor("op_31097_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31097_cast_fp16 = slice_by_index(begin = var_31097_begin_0, end = var_31097_end_0, end_mask = var_31097_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31097_cast_fp16")]; tensor var_31101_begin_0 = const()[name = tensor("op_31101_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_31101_end_0 = const()[name = tensor("op_31101_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_31101_end_mask_0 = const()[name = tensor("op_31101_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31101_cast_fp16 = slice_by_index(begin = var_31101_begin_0, end = var_31101_end_0, end_mask = var_31101_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31101_cast_fp16")]; tensor var_31105_begin_0 = const()[name = tensor("op_31105_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_31105_end_0 = const()[name = tensor("op_31105_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_31105_end_mask_0 = const()[name = tensor("op_31105_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31105_cast_fp16 = slice_by_index(begin = var_31105_begin_0, end = var_31105_end_0, end_mask = var_31105_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31105_cast_fp16")]; tensor var_31109_begin_0 = const()[name = tensor("op_31109_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_31109_end_0 = const()[name = tensor("op_31109_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_31109_end_mask_0 = const()[name = tensor("op_31109_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31109_cast_fp16 = slice_by_index(begin = var_31109_begin_0, end = var_31109_end_0, end_mask = var_31109_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31109_cast_fp16")]; tensor var_31113_begin_0 = const()[name = tensor("op_31113_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_31113_end_0 = const()[name = tensor("op_31113_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_31113_end_mask_0 = const()[name = tensor("op_31113_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31113_cast_fp16 = slice_by_index(begin = var_31113_begin_0, end = var_31113_end_0, end_mask = var_31113_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31113_cast_fp16")]; tensor var_31117_begin_0 = const()[name = tensor("op_31117_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_31117_end_0 = const()[name = tensor("op_31117_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_31117_end_mask_0 = const()[name = tensor("op_31117_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31117_cast_fp16 = slice_by_index(begin = var_31117_begin_0, end = var_31117_end_0, end_mask = var_31117_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31117_cast_fp16")]; tensor var_31121_begin_0 = const()[name = tensor("op_31121_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_31121_end_0 = const()[name = tensor("op_31121_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_31121_end_mask_0 = const()[name = tensor("op_31121_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31121_cast_fp16 = slice_by_index(begin = var_31121_begin_0, end = var_31121_end_0, end_mask = var_31121_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31121_cast_fp16")]; tensor var_31125_begin_0 = const()[name = tensor("op_31125_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_31125_end_0 = const()[name = tensor("op_31125_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_31125_end_mask_0 = const()[name = tensor("op_31125_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31125_cast_fp16 = slice_by_index(begin = var_31125_begin_0, end = var_31125_end_0, end_mask = var_31125_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31125_cast_fp16")]; tensor var_31129_begin_0 = const()[name = tensor("op_31129_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_31129_end_0 = const()[name = tensor("op_31129_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_31129_end_mask_0 = const()[name = tensor("op_31129_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31129_cast_fp16 = slice_by_index(begin = var_31129_begin_0, end = var_31129_end_0, end_mask = var_31129_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31129_cast_fp16")]; tensor var_31133_begin_0 = const()[name = tensor("op_31133_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_31133_end_0 = const()[name = tensor("op_31133_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_31133_end_mask_0 = const()[name = tensor("op_31133_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31133_cast_fp16 = slice_by_index(begin = var_31133_begin_0, end = var_31133_end_0, end_mask = var_31133_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31133_cast_fp16")]; tensor var_31137_begin_0 = const()[name = tensor("op_31137_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_31137_end_0 = const()[name = tensor("op_31137_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_31137_end_mask_0 = const()[name = tensor("op_31137_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_31137_cast_fp16 = slice_by_index(begin = var_31137_begin_0, end = var_31137_end_0, end_mask = var_31137_end_mask_0, x = k_39_cast_fp16)[name = tensor("op_31137_cast_fp16")]; tensor var_31139_begin_0 = const()[name = tensor("op_31139_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_31139_end_0 = const()[name = tensor("op_31139_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_31139_end_mask_0 = const()[name = tensor("op_31139_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31139_cast_fp16 = slice_by_index(begin = var_31139_begin_0, end = var_31139_end_0, end_mask = var_31139_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31139_cast_fp16")]; tensor var_31143_begin_0 = const()[name = tensor("op_31143_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_31143_end_0 = const()[name = tensor("op_31143_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_31143_end_mask_0 = const()[name = tensor("op_31143_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31143_cast_fp16 = slice_by_index(begin = var_31143_begin_0, end = var_31143_end_0, end_mask = var_31143_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31143_cast_fp16")]; tensor var_31147_begin_0 = const()[name = tensor("op_31147_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_31147_end_0 = const()[name = tensor("op_31147_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_31147_end_mask_0 = const()[name = tensor("op_31147_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31147_cast_fp16 = slice_by_index(begin = var_31147_begin_0, end = var_31147_end_0, end_mask = var_31147_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31147_cast_fp16")]; tensor var_31151_begin_0 = const()[name = tensor("op_31151_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_31151_end_0 = const()[name = tensor("op_31151_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_31151_end_mask_0 = const()[name = tensor("op_31151_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31151_cast_fp16 = slice_by_index(begin = var_31151_begin_0, end = var_31151_end_0, end_mask = var_31151_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31151_cast_fp16")]; tensor var_31155_begin_0 = const()[name = tensor("op_31155_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_31155_end_0 = const()[name = tensor("op_31155_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_31155_end_mask_0 = const()[name = tensor("op_31155_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31155_cast_fp16 = slice_by_index(begin = var_31155_begin_0, end = var_31155_end_0, end_mask = var_31155_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31155_cast_fp16")]; tensor var_31159_begin_0 = const()[name = tensor("op_31159_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_31159_end_0 = const()[name = tensor("op_31159_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_31159_end_mask_0 = const()[name = tensor("op_31159_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31159_cast_fp16 = slice_by_index(begin = var_31159_begin_0, end = var_31159_end_0, end_mask = var_31159_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31159_cast_fp16")]; tensor var_31163_begin_0 = const()[name = tensor("op_31163_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_31163_end_0 = const()[name = tensor("op_31163_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_31163_end_mask_0 = const()[name = tensor("op_31163_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31163_cast_fp16 = slice_by_index(begin = var_31163_begin_0, end = var_31163_end_0, end_mask = var_31163_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31163_cast_fp16")]; tensor var_31167_begin_0 = const()[name = tensor("op_31167_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_31167_end_0 = const()[name = tensor("op_31167_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_31167_end_mask_0 = const()[name = tensor("op_31167_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31167_cast_fp16 = slice_by_index(begin = var_31167_begin_0, end = var_31167_end_0, end_mask = var_31167_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31167_cast_fp16")]; tensor var_31171_begin_0 = const()[name = tensor("op_31171_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_31171_end_0 = const()[name = tensor("op_31171_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_31171_end_mask_0 = const()[name = tensor("op_31171_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31171_cast_fp16 = slice_by_index(begin = var_31171_begin_0, end = var_31171_end_0, end_mask = var_31171_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31171_cast_fp16")]; tensor var_31175_begin_0 = const()[name = tensor("op_31175_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_31175_end_0 = const()[name = tensor("op_31175_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_31175_end_mask_0 = const()[name = tensor("op_31175_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31175_cast_fp16 = slice_by_index(begin = var_31175_begin_0, end = var_31175_end_0, end_mask = var_31175_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31175_cast_fp16")]; tensor var_31179_begin_0 = const()[name = tensor("op_31179_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_31179_end_0 = const()[name = tensor("op_31179_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_31179_end_mask_0 = const()[name = tensor("op_31179_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31179_cast_fp16 = slice_by_index(begin = var_31179_begin_0, end = var_31179_end_0, end_mask = var_31179_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31179_cast_fp16")]; tensor var_31183_begin_0 = const()[name = tensor("op_31183_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_31183_end_0 = const()[name = tensor("op_31183_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_31183_end_mask_0 = const()[name = tensor("op_31183_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31183_cast_fp16 = slice_by_index(begin = var_31183_begin_0, end = var_31183_end_0, end_mask = var_31183_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31183_cast_fp16")]; tensor var_31187_begin_0 = const()[name = tensor("op_31187_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_31187_end_0 = const()[name = tensor("op_31187_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_31187_end_mask_0 = const()[name = tensor("op_31187_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31187_cast_fp16 = slice_by_index(begin = var_31187_begin_0, end = var_31187_end_0, end_mask = var_31187_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31187_cast_fp16")]; tensor var_31191_begin_0 = const()[name = tensor("op_31191_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_31191_end_0 = const()[name = tensor("op_31191_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_31191_end_mask_0 = const()[name = tensor("op_31191_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31191_cast_fp16 = slice_by_index(begin = var_31191_begin_0, end = var_31191_end_0, end_mask = var_31191_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31191_cast_fp16")]; tensor var_31195_begin_0 = const()[name = tensor("op_31195_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_31195_end_0 = const()[name = tensor("op_31195_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_31195_end_mask_0 = const()[name = tensor("op_31195_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31195_cast_fp16 = slice_by_index(begin = var_31195_begin_0, end = var_31195_end_0, end_mask = var_31195_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31195_cast_fp16")]; tensor var_31199_begin_0 = const()[name = tensor("op_31199_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_31199_end_0 = const()[name = tensor("op_31199_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_31199_end_mask_0 = const()[name = tensor("op_31199_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31199_cast_fp16 = slice_by_index(begin = var_31199_begin_0, end = var_31199_end_0, end_mask = var_31199_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31199_cast_fp16")]; tensor var_31203_begin_0 = const()[name = tensor("op_31203_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_31203_end_0 = const()[name = tensor("op_31203_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_31203_end_mask_0 = const()[name = tensor("op_31203_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31203_cast_fp16 = slice_by_index(begin = var_31203_begin_0, end = var_31203_end_0, end_mask = var_31203_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31203_cast_fp16")]; tensor var_31207_begin_0 = const()[name = tensor("op_31207_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_31207_end_0 = const()[name = tensor("op_31207_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_31207_end_mask_0 = const()[name = tensor("op_31207_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31207_cast_fp16 = slice_by_index(begin = var_31207_begin_0, end = var_31207_end_0, end_mask = var_31207_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31207_cast_fp16")]; tensor var_31211_begin_0 = const()[name = tensor("op_31211_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_31211_end_0 = const()[name = tensor("op_31211_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_31211_end_mask_0 = const()[name = tensor("op_31211_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31211_cast_fp16 = slice_by_index(begin = var_31211_begin_0, end = var_31211_end_0, end_mask = var_31211_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31211_cast_fp16")]; tensor var_31215_begin_0 = const()[name = tensor("op_31215_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_31215_end_0 = const()[name = tensor("op_31215_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_31215_end_mask_0 = const()[name = tensor("op_31215_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31215_cast_fp16 = slice_by_index(begin = var_31215_begin_0, end = var_31215_end_0, end_mask = var_31215_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_31215_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3041_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3041_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3041_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3041_equation_0, values = (var_31061_cast_fp16, var_30503_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3041_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3043_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3043_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3043_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3043_equation_0, values = (var_31061_cast_fp16, var_30510_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3043_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3045_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3045_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3045_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3045_equation_0, values = (var_31061_cast_fp16, var_30517_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3045_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3047_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3047_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3047_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3047_equation_0, values = (var_31061_cast_fp16, var_30524_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3047_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3049_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3049_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3049_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3049_equation_0, values = (var_31065_cast_fp16, var_30531_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3049_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3051_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3051_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3051_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3051_equation_0, values = (var_31065_cast_fp16, var_30538_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3051_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3053_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3053_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3053_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3053_equation_0, values = (var_31065_cast_fp16, var_30545_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3053_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3055_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3055_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3055_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3055_equation_0, values = (var_31065_cast_fp16, var_30552_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3055_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3057_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3057_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3057_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3057_equation_0, values = (var_31069_cast_fp16, var_30559_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3057_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3059_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3059_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3059_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3059_equation_0, values = (var_31069_cast_fp16, var_30566_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3059_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3061_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3061_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3061_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3061_equation_0, values = (var_31069_cast_fp16, var_30573_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3061_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3063_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3063_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3063_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3063_equation_0, values = (var_31069_cast_fp16, var_30580_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3063_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3065_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3065_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3065_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3065_equation_0, values = (var_31073_cast_fp16, var_30587_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3065_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3067_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3067_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3067_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3067_equation_0, values = (var_31073_cast_fp16, var_30594_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3067_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3069_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3069_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3069_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3069_equation_0, values = (var_31073_cast_fp16, var_30601_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3069_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3071_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3071_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3071_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3071_equation_0, values = (var_31073_cast_fp16, var_30608_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3071_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3073_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3073_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3073_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3073_equation_0, values = (var_31077_cast_fp16, var_30615_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3073_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3075_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3075_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3075_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3075_equation_0, values = (var_31077_cast_fp16, var_30622_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3075_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3077_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3077_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3077_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3077_equation_0, values = (var_31077_cast_fp16, var_30629_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3077_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3079_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3079_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3079_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3079_equation_0, values = (var_31077_cast_fp16, var_30636_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3079_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3081_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3081_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3081_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3081_equation_0, values = (var_31081_cast_fp16, var_30643_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3081_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3083_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3083_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3083_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3083_equation_0, values = (var_31081_cast_fp16, var_30650_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3083_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3085_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3085_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3085_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3085_equation_0, values = (var_31081_cast_fp16, var_30657_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3085_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3087_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3087_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3087_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3087_equation_0, values = (var_31081_cast_fp16, var_30664_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3087_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3089_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3089_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3089_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3089_equation_0, values = (var_31085_cast_fp16, var_30671_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3089_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3091_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3091_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3091_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3091_equation_0, values = (var_31085_cast_fp16, var_30678_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3091_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3093_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3093_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3093_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3093_equation_0, values = (var_31085_cast_fp16, var_30685_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3093_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3095_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3095_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3095_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3095_equation_0, values = (var_31085_cast_fp16, var_30692_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3095_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3097_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3097_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3097_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3097_equation_0, values = (var_31089_cast_fp16, var_30699_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3097_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3099_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3099_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3099_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3099_equation_0, values = (var_31089_cast_fp16, var_30706_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3099_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3101_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3101_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3101_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3101_equation_0, values = (var_31089_cast_fp16, var_30713_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3101_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3103_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3103_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3103_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3103_equation_0, values = (var_31089_cast_fp16, var_30720_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3103_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3105_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3105_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3105_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3105_equation_0, values = (var_31093_cast_fp16, var_30727_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3105_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3107_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3107_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3107_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3107_equation_0, values = (var_31093_cast_fp16, var_30734_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3107_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3109_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3109_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3109_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3109_equation_0, values = (var_31093_cast_fp16, var_30741_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3109_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3111_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3111_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3111_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3111_equation_0, values = (var_31093_cast_fp16, var_30748_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3111_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3113_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3113_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3113_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3113_equation_0, values = (var_31097_cast_fp16, var_30755_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3113_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3115_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3115_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3115_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3115_equation_0, values = (var_31097_cast_fp16, var_30762_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3115_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3117_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3117_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3117_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3117_equation_0, values = (var_31097_cast_fp16, var_30769_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3117_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3119_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3119_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3119_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3119_equation_0, values = (var_31097_cast_fp16, var_30776_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3119_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3121_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3121_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3121_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3121_equation_0, values = (var_31101_cast_fp16, var_30783_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3121_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3123_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3123_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3123_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3123_equation_0, values = (var_31101_cast_fp16, var_30790_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3123_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3125_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3125_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3125_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3125_equation_0, values = (var_31101_cast_fp16, var_30797_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3125_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3127_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3127_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3127_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3127_equation_0, values = (var_31101_cast_fp16, var_30804_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3127_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3129_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3129_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3129_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3129_equation_0, values = (var_31105_cast_fp16, var_30811_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3129_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3131_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3131_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3131_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3131_equation_0, values = (var_31105_cast_fp16, var_30818_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3131_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3133_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3133_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3133_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3133_equation_0, values = (var_31105_cast_fp16, var_30825_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3133_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3135_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3135_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3135_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3135_equation_0, values = (var_31105_cast_fp16, var_30832_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3135_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3137_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3137_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3137_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3137_equation_0, values = (var_31109_cast_fp16, var_30839_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3137_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3139_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3139_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3139_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3139_equation_0, values = (var_31109_cast_fp16, var_30846_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3139_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3141_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3141_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3141_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3141_equation_0, values = (var_31109_cast_fp16, var_30853_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3141_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3143_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3143_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3143_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3143_equation_0, values = (var_31109_cast_fp16, var_30860_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3143_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3145_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3145_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3145_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3145_equation_0, values = (var_31113_cast_fp16, var_30867_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3145_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3147_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3147_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3147_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3147_equation_0, values = (var_31113_cast_fp16, var_30874_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3147_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3149_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3149_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3149_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3149_equation_0, values = (var_31113_cast_fp16, var_30881_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3149_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3151_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3151_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3151_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3151_equation_0, values = (var_31113_cast_fp16, var_30888_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3151_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3153_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3153_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3153_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3153_equation_0, values = (var_31117_cast_fp16, var_30895_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3153_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3155_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3155_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3155_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3155_equation_0, values = (var_31117_cast_fp16, var_30902_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3155_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3157_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3157_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3157_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3157_equation_0, values = (var_31117_cast_fp16, var_30909_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3157_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3159_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3159_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3159_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3159_equation_0, values = (var_31117_cast_fp16, var_30916_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3159_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3161_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3161_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3161_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3161_equation_0, values = (var_31121_cast_fp16, var_30923_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3161_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3163_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3163_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3163_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3163_equation_0, values = (var_31121_cast_fp16, var_30930_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3163_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3165_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3165_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3165_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3165_equation_0, values = (var_31121_cast_fp16, var_30937_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3165_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3167_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3167_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3167_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3167_equation_0, values = (var_31121_cast_fp16, var_30944_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3167_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3169_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3169_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3169_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3169_equation_0, values = (var_31125_cast_fp16, var_30951_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3169_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3171_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3171_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3171_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3171_equation_0, values = (var_31125_cast_fp16, var_30958_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3171_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3173_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3173_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3173_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3173_equation_0, values = (var_31125_cast_fp16, var_30965_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3173_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3175_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3175_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3175_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3175_equation_0, values = (var_31125_cast_fp16, var_30972_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3175_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3177_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3177_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3177_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3177_equation_0, values = (var_31129_cast_fp16, var_30979_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3177_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3179_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3179_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3179_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3179_equation_0, values = (var_31129_cast_fp16, var_30986_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3179_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3181_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3181_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3181_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3181_equation_0, values = (var_31129_cast_fp16, var_30993_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3181_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3183_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3183_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3183_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3183_equation_0, values = (var_31129_cast_fp16, var_31000_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3183_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3185_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3185_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3185_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3185_equation_0, values = (var_31133_cast_fp16, var_31007_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3185_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3187_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3187_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3187_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3187_equation_0, values = (var_31133_cast_fp16, var_31014_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3187_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3189_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3189_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3189_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3189_equation_0, values = (var_31133_cast_fp16, var_31021_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3189_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3191_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3191_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3191_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3191_equation_0, values = (var_31133_cast_fp16, var_31028_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3191_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3193_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3193_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3193_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3193_equation_0, values = (var_31137_cast_fp16, var_31035_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3193_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3195_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3195_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3195_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3195_equation_0, values = (var_31137_cast_fp16, var_31042_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3195_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3197_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3197_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3197_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3197_equation_0, values = (var_31137_cast_fp16, var_31049_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3197_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3199_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3199_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3199_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3199_equation_0, values = (var_31137_cast_fp16, var_31056_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3199_cast_fp16")]; tensor var_31378_to_fp16 = const()[name = tensor("op_31378_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3041_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3041_cast_fp16, y = var_31378_to_fp16)[name = tensor("aw_chunk_3041_cast_fp16")]; tensor var_31380_to_fp16 = const()[name = tensor("op_31380_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3043_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3043_cast_fp16, y = var_31380_to_fp16)[name = tensor("aw_chunk_3043_cast_fp16")]; tensor var_31382_to_fp16 = const()[name = tensor("op_31382_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3045_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3045_cast_fp16, y = var_31382_to_fp16)[name = tensor("aw_chunk_3045_cast_fp16")]; tensor var_31384_to_fp16 = const()[name = tensor("op_31384_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3047_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3047_cast_fp16, y = var_31384_to_fp16)[name = tensor("aw_chunk_3047_cast_fp16")]; tensor var_31386_to_fp16 = const()[name = tensor("op_31386_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3049_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3049_cast_fp16, y = var_31386_to_fp16)[name = tensor("aw_chunk_3049_cast_fp16")]; tensor var_31388_to_fp16 = const()[name = tensor("op_31388_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3051_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3051_cast_fp16, y = var_31388_to_fp16)[name = tensor("aw_chunk_3051_cast_fp16")]; tensor var_31390_to_fp16 = const()[name = tensor("op_31390_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3053_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3053_cast_fp16, y = var_31390_to_fp16)[name = tensor("aw_chunk_3053_cast_fp16")]; tensor var_31392_to_fp16 = const()[name = tensor("op_31392_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3055_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3055_cast_fp16, y = var_31392_to_fp16)[name = tensor("aw_chunk_3055_cast_fp16")]; tensor var_31394_to_fp16 = const()[name = tensor("op_31394_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3057_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3057_cast_fp16, y = var_31394_to_fp16)[name = tensor("aw_chunk_3057_cast_fp16")]; tensor var_31396_to_fp16 = const()[name = tensor("op_31396_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3059_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3059_cast_fp16, y = var_31396_to_fp16)[name = tensor("aw_chunk_3059_cast_fp16")]; tensor var_31398_to_fp16 = const()[name = tensor("op_31398_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3061_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3061_cast_fp16, y = var_31398_to_fp16)[name = tensor("aw_chunk_3061_cast_fp16")]; tensor var_31400_to_fp16 = const()[name = tensor("op_31400_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3063_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3063_cast_fp16, y = var_31400_to_fp16)[name = tensor("aw_chunk_3063_cast_fp16")]; tensor var_31402_to_fp16 = const()[name = tensor("op_31402_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3065_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3065_cast_fp16, y = var_31402_to_fp16)[name = tensor("aw_chunk_3065_cast_fp16")]; tensor var_31404_to_fp16 = const()[name = tensor("op_31404_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3067_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3067_cast_fp16, y = var_31404_to_fp16)[name = tensor("aw_chunk_3067_cast_fp16")]; tensor var_31406_to_fp16 = const()[name = tensor("op_31406_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3069_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3069_cast_fp16, y = var_31406_to_fp16)[name = tensor("aw_chunk_3069_cast_fp16")]; tensor var_31408_to_fp16 = const()[name = tensor("op_31408_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3071_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3071_cast_fp16, y = var_31408_to_fp16)[name = tensor("aw_chunk_3071_cast_fp16")]; tensor var_31410_to_fp16 = const()[name = tensor("op_31410_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3073_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3073_cast_fp16, y = var_31410_to_fp16)[name = tensor("aw_chunk_3073_cast_fp16")]; tensor var_31412_to_fp16 = const()[name = tensor("op_31412_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3075_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3075_cast_fp16, y = var_31412_to_fp16)[name = tensor("aw_chunk_3075_cast_fp16")]; tensor var_31414_to_fp16 = const()[name = tensor("op_31414_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3077_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3077_cast_fp16, y = var_31414_to_fp16)[name = tensor("aw_chunk_3077_cast_fp16")]; tensor var_31416_to_fp16 = const()[name = tensor("op_31416_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3079_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3079_cast_fp16, y = var_31416_to_fp16)[name = tensor("aw_chunk_3079_cast_fp16")]; tensor var_31418_to_fp16 = const()[name = tensor("op_31418_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3081_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3081_cast_fp16, y = var_31418_to_fp16)[name = tensor("aw_chunk_3081_cast_fp16")]; tensor var_31420_to_fp16 = const()[name = tensor("op_31420_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3083_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3083_cast_fp16, y = var_31420_to_fp16)[name = tensor("aw_chunk_3083_cast_fp16")]; tensor var_31422_to_fp16 = const()[name = tensor("op_31422_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3085_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3085_cast_fp16, y = var_31422_to_fp16)[name = tensor("aw_chunk_3085_cast_fp16")]; tensor var_31424_to_fp16 = const()[name = tensor("op_31424_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3087_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3087_cast_fp16, y = var_31424_to_fp16)[name = tensor("aw_chunk_3087_cast_fp16")]; tensor var_31426_to_fp16 = const()[name = tensor("op_31426_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3089_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3089_cast_fp16, y = var_31426_to_fp16)[name = tensor("aw_chunk_3089_cast_fp16")]; tensor var_31428_to_fp16 = const()[name = tensor("op_31428_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3091_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3091_cast_fp16, y = var_31428_to_fp16)[name = tensor("aw_chunk_3091_cast_fp16")]; tensor var_31430_to_fp16 = const()[name = tensor("op_31430_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3093_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3093_cast_fp16, y = var_31430_to_fp16)[name = tensor("aw_chunk_3093_cast_fp16")]; tensor var_31432_to_fp16 = const()[name = tensor("op_31432_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3095_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3095_cast_fp16, y = var_31432_to_fp16)[name = tensor("aw_chunk_3095_cast_fp16")]; tensor var_31434_to_fp16 = const()[name = tensor("op_31434_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3097_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3097_cast_fp16, y = var_31434_to_fp16)[name = tensor("aw_chunk_3097_cast_fp16")]; tensor var_31436_to_fp16 = const()[name = tensor("op_31436_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3099_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3099_cast_fp16, y = var_31436_to_fp16)[name = tensor("aw_chunk_3099_cast_fp16")]; tensor var_31438_to_fp16 = const()[name = tensor("op_31438_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3101_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3101_cast_fp16, y = var_31438_to_fp16)[name = tensor("aw_chunk_3101_cast_fp16")]; tensor var_31440_to_fp16 = const()[name = tensor("op_31440_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3103_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3103_cast_fp16, y = var_31440_to_fp16)[name = tensor("aw_chunk_3103_cast_fp16")]; tensor var_31442_to_fp16 = const()[name = tensor("op_31442_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3105_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3105_cast_fp16, y = var_31442_to_fp16)[name = tensor("aw_chunk_3105_cast_fp16")]; tensor var_31444_to_fp16 = const()[name = tensor("op_31444_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3107_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3107_cast_fp16, y = var_31444_to_fp16)[name = tensor("aw_chunk_3107_cast_fp16")]; tensor var_31446_to_fp16 = const()[name = tensor("op_31446_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3109_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3109_cast_fp16, y = var_31446_to_fp16)[name = tensor("aw_chunk_3109_cast_fp16")]; tensor var_31448_to_fp16 = const()[name = tensor("op_31448_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3111_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3111_cast_fp16, y = var_31448_to_fp16)[name = tensor("aw_chunk_3111_cast_fp16")]; tensor var_31450_to_fp16 = const()[name = tensor("op_31450_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3113_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3113_cast_fp16, y = var_31450_to_fp16)[name = tensor("aw_chunk_3113_cast_fp16")]; tensor var_31452_to_fp16 = const()[name = tensor("op_31452_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3115_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3115_cast_fp16, y = var_31452_to_fp16)[name = tensor("aw_chunk_3115_cast_fp16")]; tensor var_31454_to_fp16 = const()[name = tensor("op_31454_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3117_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3117_cast_fp16, y = var_31454_to_fp16)[name = tensor("aw_chunk_3117_cast_fp16")]; tensor var_31456_to_fp16 = const()[name = tensor("op_31456_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3119_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3119_cast_fp16, y = var_31456_to_fp16)[name = tensor("aw_chunk_3119_cast_fp16")]; tensor var_31458_to_fp16 = const()[name = tensor("op_31458_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3121_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3121_cast_fp16, y = var_31458_to_fp16)[name = tensor("aw_chunk_3121_cast_fp16")]; tensor var_31460_to_fp16 = const()[name = tensor("op_31460_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3123_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3123_cast_fp16, y = var_31460_to_fp16)[name = tensor("aw_chunk_3123_cast_fp16")]; tensor var_31462_to_fp16 = const()[name = tensor("op_31462_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3125_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3125_cast_fp16, y = var_31462_to_fp16)[name = tensor("aw_chunk_3125_cast_fp16")]; tensor var_31464_to_fp16 = const()[name = tensor("op_31464_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3127_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3127_cast_fp16, y = var_31464_to_fp16)[name = tensor("aw_chunk_3127_cast_fp16")]; tensor var_31466_to_fp16 = const()[name = tensor("op_31466_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3129_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3129_cast_fp16, y = var_31466_to_fp16)[name = tensor("aw_chunk_3129_cast_fp16")]; tensor var_31468_to_fp16 = const()[name = tensor("op_31468_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3131_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3131_cast_fp16, y = var_31468_to_fp16)[name = tensor("aw_chunk_3131_cast_fp16")]; tensor var_31470_to_fp16 = const()[name = tensor("op_31470_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3133_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3133_cast_fp16, y = var_31470_to_fp16)[name = tensor("aw_chunk_3133_cast_fp16")]; tensor var_31472_to_fp16 = const()[name = tensor("op_31472_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3135_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3135_cast_fp16, y = var_31472_to_fp16)[name = tensor("aw_chunk_3135_cast_fp16")]; tensor var_31474_to_fp16 = const()[name = tensor("op_31474_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3137_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3137_cast_fp16, y = var_31474_to_fp16)[name = tensor("aw_chunk_3137_cast_fp16")]; tensor var_31476_to_fp16 = const()[name = tensor("op_31476_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3139_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3139_cast_fp16, y = var_31476_to_fp16)[name = tensor("aw_chunk_3139_cast_fp16")]; tensor var_31478_to_fp16 = const()[name = tensor("op_31478_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3141_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3141_cast_fp16, y = var_31478_to_fp16)[name = tensor("aw_chunk_3141_cast_fp16")]; tensor var_31480_to_fp16 = const()[name = tensor("op_31480_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3143_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3143_cast_fp16, y = var_31480_to_fp16)[name = tensor("aw_chunk_3143_cast_fp16")]; tensor var_31482_to_fp16 = const()[name = tensor("op_31482_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3145_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3145_cast_fp16, y = var_31482_to_fp16)[name = tensor("aw_chunk_3145_cast_fp16")]; tensor var_31484_to_fp16 = const()[name = tensor("op_31484_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3147_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3147_cast_fp16, y = var_31484_to_fp16)[name = tensor("aw_chunk_3147_cast_fp16")]; tensor var_31486_to_fp16 = const()[name = tensor("op_31486_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3149_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3149_cast_fp16, y = var_31486_to_fp16)[name = tensor("aw_chunk_3149_cast_fp16")]; tensor var_31488_to_fp16 = const()[name = tensor("op_31488_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3151_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3151_cast_fp16, y = var_31488_to_fp16)[name = tensor("aw_chunk_3151_cast_fp16")]; tensor var_31490_to_fp16 = const()[name = tensor("op_31490_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3153_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3153_cast_fp16, y = var_31490_to_fp16)[name = tensor("aw_chunk_3153_cast_fp16")]; tensor var_31492_to_fp16 = const()[name = tensor("op_31492_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3155_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3155_cast_fp16, y = var_31492_to_fp16)[name = tensor("aw_chunk_3155_cast_fp16")]; tensor var_31494_to_fp16 = const()[name = tensor("op_31494_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3157_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3157_cast_fp16, y = var_31494_to_fp16)[name = tensor("aw_chunk_3157_cast_fp16")]; tensor var_31496_to_fp16 = const()[name = tensor("op_31496_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3159_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3159_cast_fp16, y = var_31496_to_fp16)[name = tensor("aw_chunk_3159_cast_fp16")]; tensor var_31498_to_fp16 = const()[name = tensor("op_31498_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3161_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3161_cast_fp16, y = var_31498_to_fp16)[name = tensor("aw_chunk_3161_cast_fp16")]; tensor var_31500_to_fp16 = const()[name = tensor("op_31500_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3163_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3163_cast_fp16, y = var_31500_to_fp16)[name = tensor("aw_chunk_3163_cast_fp16")]; tensor var_31502_to_fp16 = const()[name = tensor("op_31502_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3165_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3165_cast_fp16, y = var_31502_to_fp16)[name = tensor("aw_chunk_3165_cast_fp16")]; tensor var_31504_to_fp16 = const()[name = tensor("op_31504_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3167_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3167_cast_fp16, y = var_31504_to_fp16)[name = tensor("aw_chunk_3167_cast_fp16")]; tensor var_31506_to_fp16 = const()[name = tensor("op_31506_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3169_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3169_cast_fp16, y = var_31506_to_fp16)[name = tensor("aw_chunk_3169_cast_fp16")]; tensor var_31508_to_fp16 = const()[name = tensor("op_31508_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3171_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3171_cast_fp16, y = var_31508_to_fp16)[name = tensor("aw_chunk_3171_cast_fp16")]; tensor var_31510_to_fp16 = const()[name = tensor("op_31510_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3173_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3173_cast_fp16, y = var_31510_to_fp16)[name = tensor("aw_chunk_3173_cast_fp16")]; tensor var_31512_to_fp16 = const()[name = tensor("op_31512_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3175_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3175_cast_fp16, y = var_31512_to_fp16)[name = tensor("aw_chunk_3175_cast_fp16")]; tensor var_31514_to_fp16 = const()[name = tensor("op_31514_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3177_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3177_cast_fp16, y = var_31514_to_fp16)[name = tensor("aw_chunk_3177_cast_fp16")]; tensor var_31516_to_fp16 = const()[name = tensor("op_31516_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3179_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3179_cast_fp16, y = var_31516_to_fp16)[name = tensor("aw_chunk_3179_cast_fp16")]; tensor var_31518_to_fp16 = const()[name = tensor("op_31518_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3181_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3181_cast_fp16, y = var_31518_to_fp16)[name = tensor("aw_chunk_3181_cast_fp16")]; tensor var_31520_to_fp16 = const()[name = tensor("op_31520_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3183_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3183_cast_fp16, y = var_31520_to_fp16)[name = tensor("aw_chunk_3183_cast_fp16")]; tensor var_31522_to_fp16 = const()[name = tensor("op_31522_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3185_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3185_cast_fp16, y = var_31522_to_fp16)[name = tensor("aw_chunk_3185_cast_fp16")]; tensor var_31524_to_fp16 = const()[name = tensor("op_31524_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3187_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3187_cast_fp16, y = var_31524_to_fp16)[name = tensor("aw_chunk_3187_cast_fp16")]; tensor var_31526_to_fp16 = const()[name = tensor("op_31526_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3189_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3189_cast_fp16, y = var_31526_to_fp16)[name = tensor("aw_chunk_3189_cast_fp16")]; tensor var_31528_to_fp16 = const()[name = tensor("op_31528_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3191_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3191_cast_fp16, y = var_31528_to_fp16)[name = tensor("aw_chunk_3191_cast_fp16")]; tensor var_31530_to_fp16 = const()[name = tensor("op_31530_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3193_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3193_cast_fp16, y = var_31530_to_fp16)[name = tensor("aw_chunk_3193_cast_fp16")]; tensor var_31532_to_fp16 = const()[name = tensor("op_31532_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3195_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3195_cast_fp16, y = var_31532_to_fp16)[name = tensor("aw_chunk_3195_cast_fp16")]; tensor var_31534_to_fp16 = const()[name = tensor("op_31534_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3197_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3197_cast_fp16, y = var_31534_to_fp16)[name = tensor("aw_chunk_3197_cast_fp16")]; tensor var_31536_to_fp16 = const()[name = tensor("op_31536_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3199_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3199_cast_fp16, y = var_31536_to_fp16)[name = tensor("aw_chunk_3199_cast_fp16")]; tensor var_31538_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3041_cast_fp16)[name = tensor("op_31538_cast_fp16")]; tensor var_31539_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3043_cast_fp16)[name = tensor("op_31539_cast_fp16")]; tensor var_31540_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3045_cast_fp16)[name = tensor("op_31540_cast_fp16")]; tensor var_31541_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3047_cast_fp16)[name = tensor("op_31541_cast_fp16")]; tensor var_31542_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3049_cast_fp16)[name = tensor("op_31542_cast_fp16")]; tensor var_31543_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3051_cast_fp16)[name = tensor("op_31543_cast_fp16")]; tensor var_31544_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3053_cast_fp16)[name = tensor("op_31544_cast_fp16")]; tensor var_31545_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3055_cast_fp16)[name = tensor("op_31545_cast_fp16")]; tensor var_31546_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3057_cast_fp16)[name = tensor("op_31546_cast_fp16")]; tensor var_31547_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3059_cast_fp16)[name = tensor("op_31547_cast_fp16")]; tensor var_31548_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3061_cast_fp16)[name = tensor("op_31548_cast_fp16")]; tensor var_31549_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3063_cast_fp16)[name = tensor("op_31549_cast_fp16")]; tensor var_31550_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3065_cast_fp16)[name = tensor("op_31550_cast_fp16")]; tensor var_31551_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3067_cast_fp16)[name = tensor("op_31551_cast_fp16")]; tensor var_31552_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3069_cast_fp16)[name = tensor("op_31552_cast_fp16")]; tensor var_31553_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3071_cast_fp16)[name = tensor("op_31553_cast_fp16")]; tensor var_31554_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3073_cast_fp16)[name = tensor("op_31554_cast_fp16")]; tensor var_31555_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3075_cast_fp16)[name = tensor("op_31555_cast_fp16")]; tensor var_31556_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3077_cast_fp16)[name = tensor("op_31556_cast_fp16")]; tensor var_31557_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3079_cast_fp16)[name = tensor("op_31557_cast_fp16")]; tensor var_31558_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3081_cast_fp16)[name = tensor("op_31558_cast_fp16")]; tensor var_31559_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3083_cast_fp16)[name = tensor("op_31559_cast_fp16")]; tensor var_31560_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3085_cast_fp16)[name = tensor("op_31560_cast_fp16")]; tensor var_31561_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3087_cast_fp16)[name = tensor("op_31561_cast_fp16")]; tensor var_31562_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3089_cast_fp16)[name = tensor("op_31562_cast_fp16")]; tensor var_31563_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3091_cast_fp16)[name = tensor("op_31563_cast_fp16")]; tensor var_31564_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3093_cast_fp16)[name = tensor("op_31564_cast_fp16")]; tensor var_31565_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3095_cast_fp16)[name = tensor("op_31565_cast_fp16")]; tensor var_31566_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3097_cast_fp16)[name = tensor("op_31566_cast_fp16")]; tensor var_31567_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3099_cast_fp16)[name = tensor("op_31567_cast_fp16")]; tensor var_31568_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3101_cast_fp16)[name = tensor("op_31568_cast_fp16")]; tensor var_31569_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3103_cast_fp16)[name = tensor("op_31569_cast_fp16")]; tensor var_31570_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3105_cast_fp16)[name = tensor("op_31570_cast_fp16")]; tensor var_31571_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3107_cast_fp16)[name = tensor("op_31571_cast_fp16")]; tensor var_31572_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3109_cast_fp16)[name = tensor("op_31572_cast_fp16")]; tensor var_31573_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3111_cast_fp16)[name = tensor("op_31573_cast_fp16")]; tensor var_31574_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3113_cast_fp16)[name = tensor("op_31574_cast_fp16")]; tensor var_31575_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3115_cast_fp16)[name = tensor("op_31575_cast_fp16")]; tensor var_31576_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3117_cast_fp16)[name = tensor("op_31576_cast_fp16")]; tensor var_31577_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3119_cast_fp16)[name = tensor("op_31577_cast_fp16")]; tensor var_31578_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3121_cast_fp16)[name = tensor("op_31578_cast_fp16")]; tensor var_31579_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3123_cast_fp16)[name = tensor("op_31579_cast_fp16")]; tensor var_31580_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3125_cast_fp16)[name = tensor("op_31580_cast_fp16")]; tensor var_31581_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3127_cast_fp16)[name = tensor("op_31581_cast_fp16")]; tensor var_31582_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3129_cast_fp16)[name = tensor("op_31582_cast_fp16")]; tensor var_31583_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3131_cast_fp16)[name = tensor("op_31583_cast_fp16")]; tensor var_31584_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3133_cast_fp16)[name = tensor("op_31584_cast_fp16")]; tensor var_31585_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3135_cast_fp16)[name = tensor("op_31585_cast_fp16")]; tensor var_31586_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3137_cast_fp16)[name = tensor("op_31586_cast_fp16")]; tensor var_31587_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3139_cast_fp16)[name = tensor("op_31587_cast_fp16")]; tensor var_31588_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3141_cast_fp16)[name = tensor("op_31588_cast_fp16")]; tensor var_31589_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3143_cast_fp16)[name = tensor("op_31589_cast_fp16")]; tensor var_31590_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3145_cast_fp16)[name = tensor("op_31590_cast_fp16")]; tensor var_31591_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3147_cast_fp16)[name = tensor("op_31591_cast_fp16")]; tensor var_31592_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3149_cast_fp16)[name = tensor("op_31592_cast_fp16")]; tensor var_31593_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3151_cast_fp16)[name = tensor("op_31593_cast_fp16")]; tensor var_31594_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3153_cast_fp16)[name = tensor("op_31594_cast_fp16")]; tensor var_31595_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3155_cast_fp16)[name = tensor("op_31595_cast_fp16")]; tensor var_31596_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3157_cast_fp16)[name = tensor("op_31596_cast_fp16")]; tensor var_31597_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3159_cast_fp16)[name = tensor("op_31597_cast_fp16")]; tensor var_31598_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3161_cast_fp16)[name = tensor("op_31598_cast_fp16")]; tensor var_31599_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3163_cast_fp16)[name = tensor("op_31599_cast_fp16")]; tensor var_31600_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3165_cast_fp16)[name = tensor("op_31600_cast_fp16")]; tensor var_31601_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3167_cast_fp16)[name = tensor("op_31601_cast_fp16")]; tensor var_31602_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3169_cast_fp16)[name = tensor("op_31602_cast_fp16")]; tensor var_31603_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3171_cast_fp16)[name = tensor("op_31603_cast_fp16")]; tensor var_31604_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3173_cast_fp16)[name = tensor("op_31604_cast_fp16")]; tensor var_31605_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3175_cast_fp16)[name = tensor("op_31605_cast_fp16")]; tensor var_31606_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3177_cast_fp16)[name = tensor("op_31606_cast_fp16")]; tensor var_31607_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3179_cast_fp16)[name = tensor("op_31607_cast_fp16")]; tensor var_31608_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3181_cast_fp16)[name = tensor("op_31608_cast_fp16")]; tensor var_31609_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3183_cast_fp16)[name = tensor("op_31609_cast_fp16")]; tensor var_31610_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3185_cast_fp16)[name = tensor("op_31610_cast_fp16")]; tensor var_31611_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3187_cast_fp16)[name = tensor("op_31611_cast_fp16")]; tensor var_31612_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3189_cast_fp16)[name = tensor("op_31612_cast_fp16")]; tensor var_31613_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3191_cast_fp16)[name = tensor("op_31613_cast_fp16")]; tensor var_31614_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3193_cast_fp16)[name = tensor("op_31614_cast_fp16")]; tensor var_31615_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3195_cast_fp16)[name = tensor("op_31615_cast_fp16")]; tensor var_31616_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3197_cast_fp16)[name = tensor("op_31616_cast_fp16")]; tensor var_31617_cast_fp16 = softmax(axis = var_30336, x = aw_chunk_3199_cast_fp16)[name = tensor("op_31617_cast_fp16")]; tensor var_31619_equation_0 = const()[name = tensor("op_31619_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31619_cast_fp16 = einsum(equation = var_31619_equation_0, values = (var_31139_cast_fp16, var_31538_cast_fp16))[name = tensor("op_31619_cast_fp16")]; tensor var_31621_equation_0 = const()[name = tensor("op_31621_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31621_cast_fp16 = einsum(equation = var_31621_equation_0, values = (var_31139_cast_fp16, var_31539_cast_fp16))[name = tensor("op_31621_cast_fp16")]; tensor var_31623_equation_0 = const()[name = tensor("op_31623_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31623_cast_fp16 = einsum(equation = var_31623_equation_0, values = (var_31139_cast_fp16, var_31540_cast_fp16))[name = tensor("op_31623_cast_fp16")]; tensor var_31625_equation_0 = const()[name = tensor("op_31625_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31625_cast_fp16 = einsum(equation = var_31625_equation_0, values = (var_31139_cast_fp16, var_31541_cast_fp16))[name = tensor("op_31625_cast_fp16")]; tensor var_31627_equation_0 = const()[name = tensor("op_31627_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31627_cast_fp16 = einsum(equation = var_31627_equation_0, values = (var_31143_cast_fp16, var_31542_cast_fp16))[name = tensor("op_31627_cast_fp16")]; tensor var_31629_equation_0 = const()[name = tensor("op_31629_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31629_cast_fp16 = einsum(equation = var_31629_equation_0, values = (var_31143_cast_fp16, var_31543_cast_fp16))[name = tensor("op_31629_cast_fp16")]; tensor var_31631_equation_0 = const()[name = tensor("op_31631_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31631_cast_fp16 = einsum(equation = var_31631_equation_0, values = (var_31143_cast_fp16, var_31544_cast_fp16))[name = tensor("op_31631_cast_fp16")]; tensor var_31633_equation_0 = const()[name = tensor("op_31633_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31633_cast_fp16 = einsum(equation = var_31633_equation_0, values = (var_31143_cast_fp16, var_31545_cast_fp16))[name = tensor("op_31633_cast_fp16")]; tensor var_31635_equation_0 = const()[name = tensor("op_31635_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31635_cast_fp16 = einsum(equation = var_31635_equation_0, values = (var_31147_cast_fp16, var_31546_cast_fp16))[name = tensor("op_31635_cast_fp16")]; tensor var_31637_equation_0 = const()[name = tensor("op_31637_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31637_cast_fp16 = einsum(equation = var_31637_equation_0, values = (var_31147_cast_fp16, var_31547_cast_fp16))[name = tensor("op_31637_cast_fp16")]; tensor var_31639_equation_0 = const()[name = tensor("op_31639_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31639_cast_fp16 = einsum(equation = var_31639_equation_0, values = (var_31147_cast_fp16, var_31548_cast_fp16))[name = tensor("op_31639_cast_fp16")]; tensor var_31641_equation_0 = const()[name = tensor("op_31641_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31641_cast_fp16 = einsum(equation = var_31641_equation_0, values = (var_31147_cast_fp16, var_31549_cast_fp16))[name = tensor("op_31641_cast_fp16")]; tensor var_31643_equation_0 = const()[name = tensor("op_31643_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31643_cast_fp16 = einsum(equation = var_31643_equation_0, values = (var_31151_cast_fp16, var_31550_cast_fp16))[name = tensor("op_31643_cast_fp16")]; tensor var_31645_equation_0 = const()[name = tensor("op_31645_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31645_cast_fp16 = einsum(equation = var_31645_equation_0, values = (var_31151_cast_fp16, var_31551_cast_fp16))[name = tensor("op_31645_cast_fp16")]; tensor var_31647_equation_0 = const()[name = tensor("op_31647_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31647_cast_fp16 = einsum(equation = var_31647_equation_0, values = (var_31151_cast_fp16, var_31552_cast_fp16))[name = tensor("op_31647_cast_fp16")]; tensor var_31649_equation_0 = const()[name = tensor("op_31649_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31649_cast_fp16 = einsum(equation = var_31649_equation_0, values = (var_31151_cast_fp16, var_31553_cast_fp16))[name = tensor("op_31649_cast_fp16")]; tensor var_31651_equation_0 = const()[name = tensor("op_31651_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31651_cast_fp16 = einsum(equation = var_31651_equation_0, values = (var_31155_cast_fp16, var_31554_cast_fp16))[name = tensor("op_31651_cast_fp16")]; tensor var_31653_equation_0 = const()[name = tensor("op_31653_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31653_cast_fp16 = einsum(equation = var_31653_equation_0, values = (var_31155_cast_fp16, var_31555_cast_fp16))[name = tensor("op_31653_cast_fp16")]; tensor var_31655_equation_0 = const()[name = tensor("op_31655_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31655_cast_fp16 = einsum(equation = var_31655_equation_0, values = (var_31155_cast_fp16, var_31556_cast_fp16))[name = tensor("op_31655_cast_fp16")]; tensor var_31657_equation_0 = const()[name = tensor("op_31657_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31657_cast_fp16 = einsum(equation = var_31657_equation_0, values = (var_31155_cast_fp16, var_31557_cast_fp16))[name = tensor("op_31657_cast_fp16")]; tensor var_31659_equation_0 = const()[name = tensor("op_31659_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31659_cast_fp16 = einsum(equation = var_31659_equation_0, values = (var_31159_cast_fp16, var_31558_cast_fp16))[name = tensor("op_31659_cast_fp16")]; tensor var_31661_equation_0 = const()[name = tensor("op_31661_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31661_cast_fp16 = einsum(equation = var_31661_equation_0, values = (var_31159_cast_fp16, var_31559_cast_fp16))[name = tensor("op_31661_cast_fp16")]; tensor var_31663_equation_0 = const()[name = tensor("op_31663_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31663_cast_fp16 = einsum(equation = var_31663_equation_0, values = (var_31159_cast_fp16, var_31560_cast_fp16))[name = tensor("op_31663_cast_fp16")]; tensor var_31665_equation_0 = const()[name = tensor("op_31665_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31665_cast_fp16 = einsum(equation = var_31665_equation_0, values = (var_31159_cast_fp16, var_31561_cast_fp16))[name = tensor("op_31665_cast_fp16")]; tensor var_31667_equation_0 = const()[name = tensor("op_31667_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31667_cast_fp16 = einsum(equation = var_31667_equation_0, values = (var_31163_cast_fp16, var_31562_cast_fp16))[name = tensor("op_31667_cast_fp16")]; tensor var_31669_equation_0 = const()[name = tensor("op_31669_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31669_cast_fp16 = einsum(equation = var_31669_equation_0, values = (var_31163_cast_fp16, var_31563_cast_fp16))[name = tensor("op_31669_cast_fp16")]; tensor var_31671_equation_0 = const()[name = tensor("op_31671_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31671_cast_fp16 = einsum(equation = var_31671_equation_0, values = (var_31163_cast_fp16, var_31564_cast_fp16))[name = tensor("op_31671_cast_fp16")]; tensor var_31673_equation_0 = const()[name = tensor("op_31673_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31673_cast_fp16 = einsum(equation = var_31673_equation_0, values = (var_31163_cast_fp16, var_31565_cast_fp16))[name = tensor("op_31673_cast_fp16")]; tensor var_31675_equation_0 = const()[name = tensor("op_31675_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31675_cast_fp16 = einsum(equation = var_31675_equation_0, values = (var_31167_cast_fp16, var_31566_cast_fp16))[name = tensor("op_31675_cast_fp16")]; tensor var_31677_equation_0 = const()[name = tensor("op_31677_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31677_cast_fp16 = einsum(equation = var_31677_equation_0, values = (var_31167_cast_fp16, var_31567_cast_fp16))[name = tensor("op_31677_cast_fp16")]; tensor var_31679_equation_0 = const()[name = tensor("op_31679_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31679_cast_fp16 = einsum(equation = var_31679_equation_0, values = (var_31167_cast_fp16, var_31568_cast_fp16))[name = tensor("op_31679_cast_fp16")]; tensor var_31681_equation_0 = const()[name = tensor("op_31681_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31681_cast_fp16 = einsum(equation = var_31681_equation_0, values = (var_31167_cast_fp16, var_31569_cast_fp16))[name = tensor("op_31681_cast_fp16")]; tensor var_31683_equation_0 = const()[name = tensor("op_31683_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31683_cast_fp16 = einsum(equation = var_31683_equation_0, values = (var_31171_cast_fp16, var_31570_cast_fp16))[name = tensor("op_31683_cast_fp16")]; tensor var_31685_equation_0 = const()[name = tensor("op_31685_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31685_cast_fp16 = einsum(equation = var_31685_equation_0, values = (var_31171_cast_fp16, var_31571_cast_fp16))[name = tensor("op_31685_cast_fp16")]; tensor var_31687_equation_0 = const()[name = tensor("op_31687_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31687_cast_fp16 = einsum(equation = var_31687_equation_0, values = (var_31171_cast_fp16, var_31572_cast_fp16))[name = tensor("op_31687_cast_fp16")]; tensor var_31689_equation_0 = const()[name = tensor("op_31689_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31689_cast_fp16 = einsum(equation = var_31689_equation_0, values = (var_31171_cast_fp16, var_31573_cast_fp16))[name = tensor("op_31689_cast_fp16")]; tensor var_31691_equation_0 = const()[name = tensor("op_31691_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31691_cast_fp16 = einsum(equation = var_31691_equation_0, values = (var_31175_cast_fp16, var_31574_cast_fp16))[name = tensor("op_31691_cast_fp16")]; tensor var_31693_equation_0 = const()[name = tensor("op_31693_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31693_cast_fp16 = einsum(equation = var_31693_equation_0, values = (var_31175_cast_fp16, var_31575_cast_fp16))[name = tensor("op_31693_cast_fp16")]; tensor var_31695_equation_0 = const()[name = tensor("op_31695_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31695_cast_fp16 = einsum(equation = var_31695_equation_0, values = (var_31175_cast_fp16, var_31576_cast_fp16))[name = tensor("op_31695_cast_fp16")]; tensor var_31697_equation_0 = const()[name = tensor("op_31697_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31697_cast_fp16 = einsum(equation = var_31697_equation_0, values = (var_31175_cast_fp16, var_31577_cast_fp16))[name = tensor("op_31697_cast_fp16")]; tensor var_31699_equation_0 = const()[name = tensor("op_31699_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31699_cast_fp16 = einsum(equation = var_31699_equation_0, values = (var_31179_cast_fp16, var_31578_cast_fp16))[name = tensor("op_31699_cast_fp16")]; tensor var_31701_equation_0 = const()[name = tensor("op_31701_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31701_cast_fp16 = einsum(equation = var_31701_equation_0, values = (var_31179_cast_fp16, var_31579_cast_fp16))[name = tensor("op_31701_cast_fp16")]; tensor var_31703_equation_0 = const()[name = tensor("op_31703_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31703_cast_fp16 = einsum(equation = var_31703_equation_0, values = (var_31179_cast_fp16, var_31580_cast_fp16))[name = tensor("op_31703_cast_fp16")]; tensor var_31705_equation_0 = const()[name = tensor("op_31705_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31705_cast_fp16 = einsum(equation = var_31705_equation_0, values = (var_31179_cast_fp16, var_31581_cast_fp16))[name = tensor("op_31705_cast_fp16")]; tensor var_31707_equation_0 = const()[name = tensor("op_31707_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31707_cast_fp16 = einsum(equation = var_31707_equation_0, values = (var_31183_cast_fp16, var_31582_cast_fp16))[name = tensor("op_31707_cast_fp16")]; tensor var_31709_equation_0 = const()[name = tensor("op_31709_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31709_cast_fp16 = einsum(equation = var_31709_equation_0, values = (var_31183_cast_fp16, var_31583_cast_fp16))[name = tensor("op_31709_cast_fp16")]; tensor var_31711_equation_0 = const()[name = tensor("op_31711_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31711_cast_fp16 = einsum(equation = var_31711_equation_0, values = (var_31183_cast_fp16, var_31584_cast_fp16))[name = tensor("op_31711_cast_fp16")]; tensor var_31713_equation_0 = const()[name = tensor("op_31713_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31713_cast_fp16 = einsum(equation = var_31713_equation_0, values = (var_31183_cast_fp16, var_31585_cast_fp16))[name = tensor("op_31713_cast_fp16")]; tensor var_31715_equation_0 = const()[name = tensor("op_31715_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31715_cast_fp16 = einsum(equation = var_31715_equation_0, values = (var_31187_cast_fp16, var_31586_cast_fp16))[name = tensor("op_31715_cast_fp16")]; tensor var_31717_equation_0 = const()[name = tensor("op_31717_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31717_cast_fp16 = einsum(equation = var_31717_equation_0, values = (var_31187_cast_fp16, var_31587_cast_fp16))[name = tensor("op_31717_cast_fp16")]; tensor var_31719_equation_0 = const()[name = tensor("op_31719_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31719_cast_fp16 = einsum(equation = var_31719_equation_0, values = (var_31187_cast_fp16, var_31588_cast_fp16))[name = tensor("op_31719_cast_fp16")]; tensor var_31721_equation_0 = const()[name = tensor("op_31721_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31721_cast_fp16 = einsum(equation = var_31721_equation_0, values = (var_31187_cast_fp16, var_31589_cast_fp16))[name = tensor("op_31721_cast_fp16")]; tensor var_31723_equation_0 = const()[name = tensor("op_31723_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31723_cast_fp16 = einsum(equation = var_31723_equation_0, values = (var_31191_cast_fp16, var_31590_cast_fp16))[name = tensor("op_31723_cast_fp16")]; tensor var_31725_equation_0 = const()[name = tensor("op_31725_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31725_cast_fp16 = einsum(equation = var_31725_equation_0, values = (var_31191_cast_fp16, var_31591_cast_fp16))[name = tensor("op_31725_cast_fp16")]; tensor var_31727_equation_0 = const()[name = tensor("op_31727_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31727_cast_fp16 = einsum(equation = var_31727_equation_0, values = (var_31191_cast_fp16, var_31592_cast_fp16))[name = tensor("op_31727_cast_fp16")]; tensor var_31729_equation_0 = const()[name = tensor("op_31729_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31729_cast_fp16 = einsum(equation = var_31729_equation_0, values = (var_31191_cast_fp16, var_31593_cast_fp16))[name = tensor("op_31729_cast_fp16")]; tensor var_31731_equation_0 = const()[name = tensor("op_31731_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31731_cast_fp16 = einsum(equation = var_31731_equation_0, values = (var_31195_cast_fp16, var_31594_cast_fp16))[name = tensor("op_31731_cast_fp16")]; tensor var_31733_equation_0 = const()[name = tensor("op_31733_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31733_cast_fp16 = einsum(equation = var_31733_equation_0, values = (var_31195_cast_fp16, var_31595_cast_fp16))[name = tensor("op_31733_cast_fp16")]; tensor var_31735_equation_0 = const()[name = tensor("op_31735_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31735_cast_fp16 = einsum(equation = var_31735_equation_0, values = (var_31195_cast_fp16, var_31596_cast_fp16))[name = tensor("op_31735_cast_fp16")]; tensor var_31737_equation_0 = const()[name = tensor("op_31737_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31737_cast_fp16 = einsum(equation = var_31737_equation_0, values = (var_31195_cast_fp16, var_31597_cast_fp16))[name = tensor("op_31737_cast_fp16")]; tensor var_31739_equation_0 = const()[name = tensor("op_31739_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31739_cast_fp16 = einsum(equation = var_31739_equation_0, values = (var_31199_cast_fp16, var_31598_cast_fp16))[name = tensor("op_31739_cast_fp16")]; tensor var_31741_equation_0 = const()[name = tensor("op_31741_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31741_cast_fp16 = einsum(equation = var_31741_equation_0, values = (var_31199_cast_fp16, var_31599_cast_fp16))[name = tensor("op_31741_cast_fp16")]; tensor var_31743_equation_0 = const()[name = tensor("op_31743_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31743_cast_fp16 = einsum(equation = var_31743_equation_0, values = (var_31199_cast_fp16, var_31600_cast_fp16))[name = tensor("op_31743_cast_fp16")]; tensor var_31745_equation_0 = const()[name = tensor("op_31745_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31745_cast_fp16 = einsum(equation = var_31745_equation_0, values = (var_31199_cast_fp16, var_31601_cast_fp16))[name = tensor("op_31745_cast_fp16")]; tensor var_31747_equation_0 = const()[name = tensor("op_31747_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31747_cast_fp16 = einsum(equation = var_31747_equation_0, values = (var_31203_cast_fp16, var_31602_cast_fp16))[name = tensor("op_31747_cast_fp16")]; tensor var_31749_equation_0 = const()[name = tensor("op_31749_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31749_cast_fp16 = einsum(equation = var_31749_equation_0, values = (var_31203_cast_fp16, var_31603_cast_fp16))[name = tensor("op_31749_cast_fp16")]; tensor var_31751_equation_0 = const()[name = tensor("op_31751_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31751_cast_fp16 = einsum(equation = var_31751_equation_0, values = (var_31203_cast_fp16, var_31604_cast_fp16))[name = tensor("op_31751_cast_fp16")]; tensor var_31753_equation_0 = const()[name = tensor("op_31753_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31753_cast_fp16 = einsum(equation = var_31753_equation_0, values = (var_31203_cast_fp16, var_31605_cast_fp16))[name = tensor("op_31753_cast_fp16")]; tensor var_31755_equation_0 = const()[name = tensor("op_31755_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31755_cast_fp16 = einsum(equation = var_31755_equation_0, values = (var_31207_cast_fp16, var_31606_cast_fp16))[name = tensor("op_31755_cast_fp16")]; tensor var_31757_equation_0 = const()[name = tensor("op_31757_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31757_cast_fp16 = einsum(equation = var_31757_equation_0, values = (var_31207_cast_fp16, var_31607_cast_fp16))[name = tensor("op_31757_cast_fp16")]; tensor var_31759_equation_0 = const()[name = tensor("op_31759_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31759_cast_fp16 = einsum(equation = var_31759_equation_0, values = (var_31207_cast_fp16, var_31608_cast_fp16))[name = tensor("op_31759_cast_fp16")]; tensor var_31761_equation_0 = const()[name = tensor("op_31761_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31761_cast_fp16 = einsum(equation = var_31761_equation_0, values = (var_31207_cast_fp16, var_31609_cast_fp16))[name = tensor("op_31761_cast_fp16")]; tensor var_31763_equation_0 = const()[name = tensor("op_31763_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31763_cast_fp16 = einsum(equation = var_31763_equation_0, values = (var_31211_cast_fp16, var_31610_cast_fp16))[name = tensor("op_31763_cast_fp16")]; tensor var_31765_equation_0 = const()[name = tensor("op_31765_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31765_cast_fp16 = einsum(equation = var_31765_equation_0, values = (var_31211_cast_fp16, var_31611_cast_fp16))[name = tensor("op_31765_cast_fp16")]; tensor var_31767_equation_0 = const()[name = tensor("op_31767_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31767_cast_fp16 = einsum(equation = var_31767_equation_0, values = (var_31211_cast_fp16, var_31612_cast_fp16))[name = tensor("op_31767_cast_fp16")]; tensor var_31769_equation_0 = const()[name = tensor("op_31769_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31769_cast_fp16 = einsum(equation = var_31769_equation_0, values = (var_31211_cast_fp16, var_31613_cast_fp16))[name = tensor("op_31769_cast_fp16")]; tensor var_31771_equation_0 = const()[name = tensor("op_31771_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31771_cast_fp16 = einsum(equation = var_31771_equation_0, values = (var_31215_cast_fp16, var_31614_cast_fp16))[name = tensor("op_31771_cast_fp16")]; tensor var_31773_equation_0 = const()[name = tensor("op_31773_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31773_cast_fp16 = einsum(equation = var_31773_equation_0, values = (var_31215_cast_fp16, var_31615_cast_fp16))[name = tensor("op_31773_cast_fp16")]; tensor var_31775_equation_0 = const()[name = tensor("op_31775_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31775_cast_fp16 = einsum(equation = var_31775_equation_0, values = (var_31215_cast_fp16, var_31616_cast_fp16))[name = tensor("op_31775_cast_fp16")]; tensor var_31777_equation_0 = const()[name = tensor("op_31777_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_31777_cast_fp16 = einsum(equation = var_31777_equation_0, values = (var_31215_cast_fp16, var_31617_cast_fp16))[name = tensor("op_31777_cast_fp16")]; tensor var_31779_interleave_0 = const()[name = tensor("op_31779_interleave_0"), val = tensor(false)]; tensor var_31779_cast_fp16 = concat(axis = var_30311, interleave = var_31779_interleave_0, values = (var_31619_cast_fp16, var_31621_cast_fp16, var_31623_cast_fp16, var_31625_cast_fp16))[name = tensor("op_31779_cast_fp16")]; tensor var_31781_interleave_0 = const()[name = tensor("op_31781_interleave_0"), val = tensor(false)]; tensor var_31781_cast_fp16 = concat(axis = var_30311, interleave = var_31781_interleave_0, values = (var_31627_cast_fp16, var_31629_cast_fp16, var_31631_cast_fp16, var_31633_cast_fp16))[name = tensor("op_31781_cast_fp16")]; tensor var_31783_interleave_0 = const()[name = tensor("op_31783_interleave_0"), val = tensor(false)]; tensor var_31783_cast_fp16 = concat(axis = var_30311, interleave = var_31783_interleave_0, values = (var_31635_cast_fp16, var_31637_cast_fp16, var_31639_cast_fp16, var_31641_cast_fp16))[name = tensor("op_31783_cast_fp16")]; tensor var_31785_interleave_0 = const()[name = tensor("op_31785_interleave_0"), val = tensor(false)]; tensor var_31785_cast_fp16 = concat(axis = var_30311, interleave = var_31785_interleave_0, values = (var_31643_cast_fp16, var_31645_cast_fp16, var_31647_cast_fp16, var_31649_cast_fp16))[name = tensor("op_31785_cast_fp16")]; tensor var_31787_interleave_0 = const()[name = tensor("op_31787_interleave_0"), val = tensor(false)]; tensor var_31787_cast_fp16 = concat(axis = var_30311, interleave = var_31787_interleave_0, values = (var_31651_cast_fp16, var_31653_cast_fp16, var_31655_cast_fp16, var_31657_cast_fp16))[name = tensor("op_31787_cast_fp16")]; tensor var_31789_interleave_0 = const()[name = tensor("op_31789_interleave_0"), val = tensor(false)]; tensor var_31789_cast_fp16 = concat(axis = var_30311, interleave = var_31789_interleave_0, values = (var_31659_cast_fp16, var_31661_cast_fp16, var_31663_cast_fp16, var_31665_cast_fp16))[name = tensor("op_31789_cast_fp16")]; tensor var_31791_interleave_0 = const()[name = tensor("op_31791_interleave_0"), val = tensor(false)]; tensor var_31791_cast_fp16 = concat(axis = var_30311, interleave = var_31791_interleave_0, values = (var_31667_cast_fp16, var_31669_cast_fp16, var_31671_cast_fp16, var_31673_cast_fp16))[name = tensor("op_31791_cast_fp16")]; tensor var_31793_interleave_0 = const()[name = tensor("op_31793_interleave_0"), val = tensor(false)]; tensor var_31793_cast_fp16 = concat(axis = var_30311, interleave = var_31793_interleave_0, values = (var_31675_cast_fp16, var_31677_cast_fp16, var_31679_cast_fp16, var_31681_cast_fp16))[name = tensor("op_31793_cast_fp16")]; tensor var_31795_interleave_0 = const()[name = tensor("op_31795_interleave_0"), val = tensor(false)]; tensor var_31795_cast_fp16 = concat(axis = var_30311, interleave = var_31795_interleave_0, values = (var_31683_cast_fp16, var_31685_cast_fp16, var_31687_cast_fp16, var_31689_cast_fp16))[name = tensor("op_31795_cast_fp16")]; tensor var_31797_interleave_0 = const()[name = tensor("op_31797_interleave_0"), val = tensor(false)]; tensor var_31797_cast_fp16 = concat(axis = var_30311, interleave = var_31797_interleave_0, values = (var_31691_cast_fp16, var_31693_cast_fp16, var_31695_cast_fp16, var_31697_cast_fp16))[name = tensor("op_31797_cast_fp16")]; tensor var_31799_interleave_0 = const()[name = tensor("op_31799_interleave_0"), val = tensor(false)]; tensor var_31799_cast_fp16 = concat(axis = var_30311, interleave = var_31799_interleave_0, values = (var_31699_cast_fp16, var_31701_cast_fp16, var_31703_cast_fp16, var_31705_cast_fp16))[name = tensor("op_31799_cast_fp16")]; tensor var_31801_interleave_0 = const()[name = tensor("op_31801_interleave_0"), val = tensor(false)]; tensor var_31801_cast_fp16 = concat(axis = var_30311, interleave = var_31801_interleave_0, values = (var_31707_cast_fp16, var_31709_cast_fp16, var_31711_cast_fp16, var_31713_cast_fp16))[name = tensor("op_31801_cast_fp16")]; tensor var_31803_interleave_0 = const()[name = tensor("op_31803_interleave_0"), val = tensor(false)]; tensor var_31803_cast_fp16 = concat(axis = var_30311, interleave = var_31803_interleave_0, values = (var_31715_cast_fp16, var_31717_cast_fp16, var_31719_cast_fp16, var_31721_cast_fp16))[name = tensor("op_31803_cast_fp16")]; tensor var_31805_interleave_0 = const()[name = tensor("op_31805_interleave_0"), val = tensor(false)]; tensor var_31805_cast_fp16 = concat(axis = var_30311, interleave = var_31805_interleave_0, values = (var_31723_cast_fp16, var_31725_cast_fp16, var_31727_cast_fp16, var_31729_cast_fp16))[name = tensor("op_31805_cast_fp16")]; tensor var_31807_interleave_0 = const()[name = tensor("op_31807_interleave_0"), val = tensor(false)]; tensor var_31807_cast_fp16 = concat(axis = var_30311, interleave = var_31807_interleave_0, values = (var_31731_cast_fp16, var_31733_cast_fp16, var_31735_cast_fp16, var_31737_cast_fp16))[name = tensor("op_31807_cast_fp16")]; tensor var_31809_interleave_0 = const()[name = tensor("op_31809_interleave_0"), val = tensor(false)]; tensor var_31809_cast_fp16 = concat(axis = var_30311, interleave = var_31809_interleave_0, values = (var_31739_cast_fp16, var_31741_cast_fp16, var_31743_cast_fp16, var_31745_cast_fp16))[name = tensor("op_31809_cast_fp16")]; tensor var_31811_interleave_0 = const()[name = tensor("op_31811_interleave_0"), val = tensor(false)]; tensor var_31811_cast_fp16 = concat(axis = var_30311, interleave = var_31811_interleave_0, values = (var_31747_cast_fp16, var_31749_cast_fp16, var_31751_cast_fp16, var_31753_cast_fp16))[name = tensor("op_31811_cast_fp16")]; tensor var_31813_interleave_0 = const()[name = tensor("op_31813_interleave_0"), val = tensor(false)]; tensor var_31813_cast_fp16 = concat(axis = var_30311, interleave = var_31813_interleave_0, values = (var_31755_cast_fp16, var_31757_cast_fp16, var_31759_cast_fp16, var_31761_cast_fp16))[name = tensor("op_31813_cast_fp16")]; tensor var_31815_interleave_0 = const()[name = tensor("op_31815_interleave_0"), val = tensor(false)]; tensor var_31815_cast_fp16 = concat(axis = var_30311, interleave = var_31815_interleave_0, values = (var_31763_cast_fp16, var_31765_cast_fp16, var_31767_cast_fp16, var_31769_cast_fp16))[name = tensor("op_31815_cast_fp16")]; tensor var_31817_interleave_0 = const()[name = tensor("op_31817_interleave_0"), val = tensor(false)]; tensor var_31817_cast_fp16 = concat(axis = var_30311, interleave = var_31817_interleave_0, values = (var_31771_cast_fp16, var_31773_cast_fp16, var_31775_cast_fp16, var_31777_cast_fp16))[name = tensor("op_31817_cast_fp16")]; tensor input_153_interleave_0 = const()[name = tensor("input_153_interleave_0"), val = tensor(false)]; tensor input_153_cast_fp16 = concat(axis = var_30336, interleave = input_153_interleave_0, values = (var_31779_cast_fp16, var_31781_cast_fp16, var_31783_cast_fp16, var_31785_cast_fp16, var_31787_cast_fp16, var_31789_cast_fp16, var_31791_cast_fp16, var_31793_cast_fp16, var_31795_cast_fp16, var_31797_cast_fp16, var_31799_cast_fp16, var_31801_cast_fp16, var_31803_cast_fp16, var_31805_cast_fp16, var_31807_cast_fp16, var_31809_cast_fp16, var_31811_cast_fp16, var_31813_cast_fp16, var_31815_cast_fp16, var_31817_cast_fp16))[name = tensor("input_153_cast_fp16")]; tensor var_31828_pad_type_0 = const()[name = tensor("op_31828_pad_type_0"), val = tensor("valid")]; tensor var_31828_strides_0 = const()[name = tensor("op_31828_strides_0"), val = tensor([1, 1])]; tensor var_31828_pad_0 = const()[name = tensor("op_31828_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_31828_dilations_0 = const()[name = tensor("op_31828_dilations_0"), val = tensor([1, 1])]; tensor var_31828_groups_0 = const()[name = tensor("op_31828_groups_0"), val = tensor(1)]; tensor layers_19_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(262326848))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263146112))), name = tensor("layers_19_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_19_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_19_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263146240)))]; tensor var_31828_cast_fp16 = conv(bias = layers_19_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_31828_dilations_0, groups = var_31828_groups_0, pad = var_31828_pad_0, pad_type = var_31828_pad_type_0, strides = var_31828_strides_0, weight = layers_19_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_153_cast_fp16)[name = tensor("op_31828_cast_fp16")]; tensor var_31834_pad_type_0 = const()[name = tensor("op_31834_pad_type_0"), val = tensor("valid")]; tensor var_31834_strides_0 = const()[name = tensor("op_31834_strides_0"), val = tensor([1, 1])]; tensor var_31834_pad_0 = const()[name = tensor("op_31834_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_31834_dilations_0 = const()[name = tensor("op_31834_dilations_0"), val = tensor([1, 1])]; tensor var_31834_groups_0 = const()[name = tensor("op_31834_groups_0"), val = tensor(1)]; tensor layers_19_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263162496))), name = tensor("layers_19_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263148864))), shape = tensor([1280, 1280, 1, 1])]; tensor var_31834_cast_fp16 = conv(dilations = var_31834_dilations_0, groups = var_31834_groups_0, pad = var_31834_pad_0, pad_type = var_31834_pad_type_0, strides = var_31834_strides_0, weight = layers_19_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_153_cast_fp16)[name = tensor("op_31834_cast_fp16")]; tensor obj_79_cast_fp16 = add(x = var_31828_cast_fp16, y = var_31834_cast_fp16)[name = tensor("obj_79_cast_fp16")]; tensor inputs_79_cast_fp16 = add(x = inputs_77_cast_fp16, y = obj_79_cast_fp16)[name = tensor("inputs_79_cast_fp16")]; tensor out_79_axes_0 = const()[name = tensor("out_79_axes_0"), val = tensor([1])]; tensor var_31845_to_fp16 = const()[name = tensor("op_31845_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_79_cast_fp16 = layer_norm(axes = out_79_axes_0, epsilon = var_31845_to_fp16, x = inputs_79_cast_fp16)[name = tensor("out_79_cast_fp16")]; tensor input_155_gamma_0_to_fp16 = const()[name = tensor("input_155_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263367360)))]; tensor input_155_beta_0_to_fp16 = const()[name = tensor("input_155_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263369984)))]; tensor input_155_epsilon_0_to_fp16 = const()[name = tensor("input_155_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_155_cast_fp16 = batch_norm(beta = input_155_beta_0_to_fp16, epsilon = input_155_epsilon_0_to_fp16, gamma = input_155_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_79_cast_fp16)[name = tensor("input_155_cast_fp16")]; tensor var_31863_pad_type_0 = const()[name = tensor("op_31863_pad_type_0"), val = tensor("valid")]; tensor var_31863_strides_0 = const()[name = tensor("op_31863_strides_0"), val = tensor([1, 1])]; tensor var_31863_pad_0 = const()[name = tensor("op_31863_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_31863_dilations_0 = const()[name = tensor("op_31863_dilations_0"), val = tensor([1, 1])]; tensor var_31863_groups_0 = const()[name = tensor("op_31863_groups_0"), val = tensor(1)]; tensor layers_19_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263372608))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(266649472))), name = tensor("layers_19_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_19_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_19_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(266649600)))]; tensor var_31863_cast_fp16 = conv(bias = layers_19_fc1_inlier_module_bias_to_fp16, dilations = var_31863_dilations_0, groups = var_31863_groups_0, pad = var_31863_pad_0, pad_type = var_31863_pad_type_0, strides = var_31863_strides_0, weight = layers_19_fc1_inlier_module_weight_to_fp16_palettized, x = input_155_cast_fp16)[name = tensor("op_31863_cast_fp16")]; tensor var_31869_pad_type_0 = const()[name = tensor("op_31869_pad_type_0"), val = tensor("valid")]; tensor var_31869_strides_0 = const()[name = tensor("op_31869_strides_0"), val = tensor([1, 1])]; tensor var_31869_pad_0 = const()[name = tensor("op_31869_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_31869_dilations_0 = const()[name = tensor("op_31869_dilations_0"), val = tensor([1, 1])]; tensor var_31869_groups_0 = const()[name = tensor("op_31869_groups_0"), val = tensor(1)]; tensor layers_19_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(266736192))), name = tensor("layers_19_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(266659904))), shape = tensor([5120, 1280, 1, 1])]; tensor var_31869_cast_fp16 = conv(dilations = var_31869_dilations_0, groups = var_31869_groups_0, pad = var_31869_pad_0, pad_type = var_31869_pad_type_0, strides = var_31869_strides_0, weight = layers_19_fc1_outlier_module_weight_to_fp16_sparsified, x = input_155_cast_fp16)[name = tensor("op_31869_cast_fp16")]; tensor input_157_cast_fp16 = add(x = var_31863_cast_fp16, y = var_31869_cast_fp16)[name = tensor("input_157_cast_fp16")]; tensor input_159_mode_0 = const()[name = tensor("input_159_mode_0"), val = tensor("EXACT")]; tensor input_159_cast_fp16 = gelu(mode = input_159_mode_0, x = input_157_cast_fp16)[name = tensor("input_159_cast_fp16")]; tensor var_31880_pad_type_0 = const()[name = tensor("op_31880_pad_type_0"), val = tensor("valid")]; tensor var_31880_strides_0 = const()[name = tensor("op_31880_strides_0"), val = tensor([1, 1])]; tensor var_31880_pad_0 = const()[name = tensor("op_31880_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_31880_dilations_0 = const()[name = tensor("op_31880_dilations_0"), val = tensor([1, 1])]; tensor var_31880_groups_0 = const()[name = tensor("op_31880_groups_0"), val = tensor(1)]; tensor layers_19_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(267555456))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(270832320))), name = tensor("layers_19_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_19_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_19_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(270832448)))]; tensor var_31880_cast_fp16 = conv(bias = layers_19_fc2_inlier_module_bias_to_fp16, dilations = var_31880_dilations_0, groups = var_31880_groups_0, pad = var_31880_pad_0, pad_type = var_31880_pad_type_0, strides = var_31880_strides_0, weight = layers_19_fc2_inlier_module_weight_to_fp16_palettized, x = input_159_cast_fp16)[name = tensor("op_31880_cast_fp16")]; tensor var_31886_pad_type_0 = const()[name = tensor("op_31886_pad_type_0"), val = tensor("valid")]; tensor var_31886_strides_0 = const()[name = tensor("op_31886_strides_0"), val = tensor([1, 1])]; tensor var_31886_pad_0 = const()[name = tensor("op_31886_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_31886_dilations_0 = const()[name = tensor("op_31886_dilations_0"), val = tensor([1, 1])]; tensor var_31886_groups_0 = const()[name = tensor("op_31886_groups_0"), val = tensor(1)]; tensor layers_19_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(270911040))), name = tensor("layers_19_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(270835072))), shape = tensor([1280, 5120, 1, 1])]; tensor var_31886_cast_fp16 = conv(dilations = var_31886_dilations_0, groups = var_31886_groups_0, pad = var_31886_pad_0, pad_type = var_31886_pad_type_0, strides = var_31886_strides_0, weight = layers_19_fc2_outlier_module_weight_to_fp16_sparsified, x = input_159_cast_fp16)[name = tensor("op_31886_cast_fp16")]; tensor hidden_states_43_cast_fp16 = add(x = var_31880_cast_fp16, y = var_31886_cast_fp16)[name = tensor("hidden_states_43_cast_fp16")]; tensor inputs_81_cast_fp16 = add(x = inputs_79_cast_fp16, y = hidden_states_43_cast_fp16)[name = tensor("inputs_81_cast_fp16")]; tensor var_31892 = const()[name = tensor("op_31892"), val = tensor(3)]; tensor var_31917 = const()[name = tensor("op_31917"), val = tensor(1)]; tensor out_81_axes_0 = const()[name = tensor("out_81_axes_0"), val = tensor([1])]; tensor var_31934_to_fp16 = const()[name = tensor("op_31934_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_81_cast_fp16 = layer_norm(axes = out_81_axes_0, epsilon = var_31934_to_fp16, x = inputs_81_cast_fp16)[name = tensor("out_81_cast_fp16")]; tensor obj_81_gamma_0_to_fp16 = const()[name = tensor("obj_81_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(271730304)))]; tensor obj_81_beta_0_to_fp16 = const()[name = tensor("obj_81_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(271732928)))]; tensor obj_81_epsilon_0_to_fp16 = const()[name = tensor("obj_81_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_81_cast_fp16 = batch_norm(beta = obj_81_beta_0_to_fp16, epsilon = obj_81_epsilon_0_to_fp16, gamma = obj_81_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_81_cast_fp16)[name = tensor("obj_81_cast_fp16")]; tensor var_31956_pad_type_0 = const()[name = tensor("op_31956_pad_type_0"), val = tensor("valid")]; tensor var_31956_strides_0 = const()[name = tensor("op_31956_strides_0"), val = tensor([1, 1])]; tensor var_31956_pad_0 = const()[name = tensor("op_31956_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_31956_dilations_0 = const()[name = tensor("op_31956_dilations_0"), val = tensor([1, 1])]; tensor var_31956_groups_0 = const()[name = tensor("op_31956_groups_0"), val = tensor(1)]; tensor layers_20_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(271735552))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(272554816))), name = tensor("layers_20_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_20_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_20_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(272554944)))]; tensor var_31956_cast_fp16 = conv(bias = layers_20_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_31956_dilations_0, groups = var_31956_groups_0, pad = var_31956_pad_0, pad_type = var_31956_pad_type_0, strides = var_31956_strides_0, weight = layers_20_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_81_cast_fp16)[name = tensor("op_31956_cast_fp16")]; tensor var_31962_pad_type_0 = const()[name = tensor("op_31962_pad_type_0"), val = tensor("valid")]; tensor var_31962_strides_0 = const()[name = tensor("op_31962_strides_0"), val = tensor([1, 1])]; tensor var_31962_pad_0 = const()[name = tensor("op_31962_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_31962_dilations_0 = const()[name = tensor("op_31962_dilations_0"), val = tensor([1, 1])]; tensor var_31962_groups_0 = const()[name = tensor("op_31962_groups_0"), val = tensor(1)]; tensor layers_20_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(272594048))), name = tensor("layers_20_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(272557568))), shape = tensor([1280, 1280, 1, 1])]; tensor var_31962_cast_fp16 = conv(dilations = var_31962_dilations_0, groups = var_31962_groups_0, pad = var_31962_pad_0, pad_type = var_31962_pad_type_0, strides = var_31962_strides_0, weight = layers_20_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_81_cast_fp16)[name = tensor("op_31962_cast_fp16")]; tensor query_41_cast_fp16 = add(x = var_31956_cast_fp16, y = var_31962_cast_fp16)[name = tensor("query_41_cast_fp16")]; tensor var_31971_pad_type_0 = const()[name = tensor("op_31971_pad_type_0"), val = tensor("valid")]; tensor var_31971_strides_0 = const()[name = tensor("op_31971_strides_0"), val = tensor([1, 1])]; tensor var_31971_pad_0 = const()[name = tensor("op_31971_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_31971_dilations_0 = const()[name = tensor("op_31971_dilations_0"), val = tensor([1, 1])]; tensor var_31971_groups_0 = const()[name = tensor("op_31971_groups_0"), val = tensor(1)]; tensor layers_20_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(272798912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273618176))), name = tensor("layers_20_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_31971_cast_fp16 = conv(dilations = var_31971_dilations_0, groups = var_31971_groups_0, pad = var_31971_pad_0, pad_type = var_31971_pad_type_0, strides = var_31971_strides_0, weight = layers_20_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_81_cast_fp16)[name = tensor("op_31971_cast_fp16")]; tensor var_31977_pad_type_0 = const()[name = tensor("op_31977_pad_type_0"), val = tensor("valid")]; tensor var_31977_strides_0 = const()[name = tensor("op_31977_strides_0"), val = tensor([1, 1])]; tensor var_31977_pad_0 = const()[name = tensor("op_31977_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_31977_dilations_0 = const()[name = tensor("op_31977_dilations_0"), val = tensor([1, 1])]; tensor var_31977_groups_0 = const()[name = tensor("op_31977_groups_0"), val = tensor(1)]; tensor layers_20_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273644416))), name = tensor("layers_20_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273618304))), shape = tensor([1280, 1280, 1, 1])]; tensor var_31977_cast_fp16 = conv(dilations = var_31977_dilations_0, groups = var_31977_groups_0, pad = var_31977_pad_0, pad_type = var_31977_pad_type_0, strides = var_31977_strides_0, weight = layers_20_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_81_cast_fp16)[name = tensor("op_31977_cast_fp16")]; tensor key_41_cast_fp16 = add(x = var_31971_cast_fp16, y = var_31977_cast_fp16)[name = tensor("key_41_cast_fp16")]; tensor var_31987_pad_type_0 = const()[name = tensor("op_31987_pad_type_0"), val = tensor("valid")]; tensor var_31987_strides_0 = const()[name = tensor("op_31987_strides_0"), val = tensor([1, 1])]; tensor var_31987_pad_0 = const()[name = tensor("op_31987_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_31987_dilations_0 = const()[name = tensor("op_31987_dilations_0"), val = tensor([1, 1])]; tensor var_31987_groups_0 = const()[name = tensor("op_31987_groups_0"), val = tensor(1)]; tensor layers_20_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(273849280))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(274668544))), name = tensor("layers_20_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_20_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_20_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(274668672)))]; tensor var_31987_cast_fp16 = conv(bias = layers_20_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_31987_dilations_0, groups = var_31987_groups_0, pad = var_31987_pad_0, pad_type = var_31987_pad_type_0, strides = var_31987_strides_0, weight = layers_20_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_81_cast_fp16)[name = tensor("op_31987_cast_fp16")]; tensor var_31993_pad_type_0 = const()[name = tensor("op_31993_pad_type_0"), val = tensor("valid")]; tensor var_31993_strides_0 = const()[name = tensor("op_31993_strides_0"), val = tensor([1, 1])]; tensor var_31993_pad_0 = const()[name = tensor("op_31993_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_31993_dilations_0 = const()[name = tensor("op_31993_dilations_0"), val = tensor([1, 1])]; tensor var_31993_groups_0 = const()[name = tensor("op_31993_groups_0"), val = tensor(1)]; tensor layers_20_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(274685312))), name = tensor("layers_20_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(274671296))), shape = tensor([1280, 1280, 1, 1])]; tensor var_31993_cast_fp16 = conv(dilations = var_31993_dilations_0, groups = var_31993_groups_0, pad = var_31993_pad_0, pad_type = var_31993_pad_type_0, strides = var_31993_strides_0, weight = layers_20_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_81_cast_fp16)[name = tensor("op_31993_cast_fp16")]; tensor value_41_cast_fp16 = add(x = var_31987_cast_fp16, y = var_31993_cast_fp16)[name = tensor("value_41_cast_fp16")]; tensor var_31999_begin_0 = const()[name = tensor("op_31999_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_31999_end_0 = const()[name = tensor("op_31999_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_31999_end_mask_0 = const()[name = tensor("op_31999_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_31999_cast_fp16 = slice_by_index(begin = var_31999_begin_0, end = var_31999_end_0, end_mask = var_31999_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_31999_cast_fp16")]; tensor var_32003_begin_0 = const()[name = tensor("op_32003_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_32003_end_0 = const()[name = tensor("op_32003_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_32003_end_mask_0 = const()[name = tensor("op_32003_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32003_cast_fp16 = slice_by_index(begin = var_32003_begin_0, end = var_32003_end_0, end_mask = var_32003_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32003_cast_fp16")]; tensor var_32007_begin_0 = const()[name = tensor("op_32007_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_32007_end_0 = const()[name = tensor("op_32007_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_32007_end_mask_0 = const()[name = tensor("op_32007_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32007_cast_fp16 = slice_by_index(begin = var_32007_begin_0, end = var_32007_end_0, end_mask = var_32007_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32007_cast_fp16")]; tensor var_32011_begin_0 = const()[name = tensor("op_32011_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_32011_end_0 = const()[name = tensor("op_32011_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_32011_end_mask_0 = const()[name = tensor("op_32011_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32011_cast_fp16 = slice_by_index(begin = var_32011_begin_0, end = var_32011_end_0, end_mask = var_32011_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32011_cast_fp16")]; tensor var_32015_begin_0 = const()[name = tensor("op_32015_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_32015_end_0 = const()[name = tensor("op_32015_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_32015_end_mask_0 = const()[name = tensor("op_32015_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32015_cast_fp16 = slice_by_index(begin = var_32015_begin_0, end = var_32015_end_0, end_mask = var_32015_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32015_cast_fp16")]; tensor var_32019_begin_0 = const()[name = tensor("op_32019_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_32019_end_0 = const()[name = tensor("op_32019_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_32019_end_mask_0 = const()[name = tensor("op_32019_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32019_cast_fp16 = slice_by_index(begin = var_32019_begin_0, end = var_32019_end_0, end_mask = var_32019_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32019_cast_fp16")]; tensor var_32023_begin_0 = const()[name = tensor("op_32023_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_32023_end_0 = const()[name = tensor("op_32023_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_32023_end_mask_0 = const()[name = tensor("op_32023_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32023_cast_fp16 = slice_by_index(begin = var_32023_begin_0, end = var_32023_end_0, end_mask = var_32023_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32023_cast_fp16")]; tensor var_32027_begin_0 = const()[name = tensor("op_32027_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_32027_end_0 = const()[name = tensor("op_32027_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_32027_end_mask_0 = const()[name = tensor("op_32027_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32027_cast_fp16 = slice_by_index(begin = var_32027_begin_0, end = var_32027_end_0, end_mask = var_32027_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32027_cast_fp16")]; tensor var_32031_begin_0 = const()[name = tensor("op_32031_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_32031_end_0 = const()[name = tensor("op_32031_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_32031_end_mask_0 = const()[name = tensor("op_32031_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32031_cast_fp16 = slice_by_index(begin = var_32031_begin_0, end = var_32031_end_0, end_mask = var_32031_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32031_cast_fp16")]; tensor var_32035_begin_0 = const()[name = tensor("op_32035_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_32035_end_0 = const()[name = tensor("op_32035_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_32035_end_mask_0 = const()[name = tensor("op_32035_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32035_cast_fp16 = slice_by_index(begin = var_32035_begin_0, end = var_32035_end_0, end_mask = var_32035_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32035_cast_fp16")]; tensor var_32039_begin_0 = const()[name = tensor("op_32039_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_32039_end_0 = const()[name = tensor("op_32039_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_32039_end_mask_0 = const()[name = tensor("op_32039_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32039_cast_fp16 = slice_by_index(begin = var_32039_begin_0, end = var_32039_end_0, end_mask = var_32039_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32039_cast_fp16")]; tensor var_32043_begin_0 = const()[name = tensor("op_32043_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_32043_end_0 = const()[name = tensor("op_32043_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_32043_end_mask_0 = const()[name = tensor("op_32043_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32043_cast_fp16 = slice_by_index(begin = var_32043_begin_0, end = var_32043_end_0, end_mask = var_32043_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32043_cast_fp16")]; tensor var_32047_begin_0 = const()[name = tensor("op_32047_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_32047_end_0 = const()[name = tensor("op_32047_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_32047_end_mask_0 = const()[name = tensor("op_32047_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32047_cast_fp16 = slice_by_index(begin = var_32047_begin_0, end = var_32047_end_0, end_mask = var_32047_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32047_cast_fp16")]; tensor var_32051_begin_0 = const()[name = tensor("op_32051_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_32051_end_0 = const()[name = tensor("op_32051_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_32051_end_mask_0 = const()[name = tensor("op_32051_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32051_cast_fp16 = slice_by_index(begin = var_32051_begin_0, end = var_32051_end_0, end_mask = var_32051_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32051_cast_fp16")]; tensor var_32055_begin_0 = const()[name = tensor("op_32055_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_32055_end_0 = const()[name = tensor("op_32055_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_32055_end_mask_0 = const()[name = tensor("op_32055_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32055_cast_fp16 = slice_by_index(begin = var_32055_begin_0, end = var_32055_end_0, end_mask = var_32055_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32055_cast_fp16")]; tensor var_32059_begin_0 = const()[name = tensor("op_32059_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_32059_end_0 = const()[name = tensor("op_32059_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_32059_end_mask_0 = const()[name = tensor("op_32059_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32059_cast_fp16 = slice_by_index(begin = var_32059_begin_0, end = var_32059_end_0, end_mask = var_32059_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32059_cast_fp16")]; tensor var_32063_begin_0 = const()[name = tensor("op_32063_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_32063_end_0 = const()[name = tensor("op_32063_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_32063_end_mask_0 = const()[name = tensor("op_32063_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32063_cast_fp16 = slice_by_index(begin = var_32063_begin_0, end = var_32063_end_0, end_mask = var_32063_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32063_cast_fp16")]; tensor var_32067_begin_0 = const()[name = tensor("op_32067_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_32067_end_0 = const()[name = tensor("op_32067_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_32067_end_mask_0 = const()[name = tensor("op_32067_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32067_cast_fp16 = slice_by_index(begin = var_32067_begin_0, end = var_32067_end_0, end_mask = var_32067_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32067_cast_fp16")]; tensor var_32071_begin_0 = const()[name = tensor("op_32071_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_32071_end_0 = const()[name = tensor("op_32071_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_32071_end_mask_0 = const()[name = tensor("op_32071_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32071_cast_fp16 = slice_by_index(begin = var_32071_begin_0, end = var_32071_end_0, end_mask = var_32071_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32071_cast_fp16")]; tensor var_32075_begin_0 = const()[name = tensor("op_32075_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_32075_end_0 = const()[name = tensor("op_32075_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_32075_end_mask_0 = const()[name = tensor("op_32075_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32075_cast_fp16 = slice_by_index(begin = var_32075_begin_0, end = var_32075_end_0, end_mask = var_32075_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_32075_cast_fp16")]; tensor var_32084_begin_0 = const()[name = tensor("op_32084_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32084_end_0 = const()[name = tensor("op_32084_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32084_end_mask_0 = const()[name = tensor("op_32084_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32084_cast_fp16 = slice_by_index(begin = var_32084_begin_0, end = var_32084_end_0, end_mask = var_32084_end_mask_0, x = var_31999_cast_fp16)[name = tensor("op_32084_cast_fp16")]; tensor var_32091_begin_0 = const()[name = tensor("op_32091_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32091_end_0 = const()[name = tensor("op_32091_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32091_end_mask_0 = const()[name = tensor("op_32091_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32091_cast_fp16 = slice_by_index(begin = var_32091_begin_0, end = var_32091_end_0, end_mask = var_32091_end_mask_0, x = var_31999_cast_fp16)[name = tensor("op_32091_cast_fp16")]; tensor var_32098_begin_0 = const()[name = tensor("op_32098_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32098_end_0 = const()[name = tensor("op_32098_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32098_end_mask_0 = const()[name = tensor("op_32098_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32098_cast_fp16 = slice_by_index(begin = var_32098_begin_0, end = var_32098_end_0, end_mask = var_32098_end_mask_0, x = var_31999_cast_fp16)[name = tensor("op_32098_cast_fp16")]; tensor var_32105_begin_0 = const()[name = tensor("op_32105_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32105_end_0 = const()[name = tensor("op_32105_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32105_end_mask_0 = const()[name = tensor("op_32105_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32105_cast_fp16 = slice_by_index(begin = var_32105_begin_0, end = var_32105_end_0, end_mask = var_32105_end_mask_0, x = var_31999_cast_fp16)[name = tensor("op_32105_cast_fp16")]; tensor var_32112_begin_0 = const()[name = tensor("op_32112_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32112_end_0 = const()[name = tensor("op_32112_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32112_end_mask_0 = const()[name = tensor("op_32112_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32112_cast_fp16 = slice_by_index(begin = var_32112_begin_0, end = var_32112_end_0, end_mask = var_32112_end_mask_0, x = var_32003_cast_fp16)[name = tensor("op_32112_cast_fp16")]; tensor var_32119_begin_0 = const()[name = tensor("op_32119_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32119_end_0 = const()[name = tensor("op_32119_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32119_end_mask_0 = const()[name = tensor("op_32119_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32119_cast_fp16 = slice_by_index(begin = var_32119_begin_0, end = var_32119_end_0, end_mask = var_32119_end_mask_0, x = var_32003_cast_fp16)[name = tensor("op_32119_cast_fp16")]; tensor var_32126_begin_0 = const()[name = tensor("op_32126_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32126_end_0 = const()[name = tensor("op_32126_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32126_end_mask_0 = const()[name = tensor("op_32126_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32126_cast_fp16 = slice_by_index(begin = var_32126_begin_0, end = var_32126_end_0, end_mask = var_32126_end_mask_0, x = var_32003_cast_fp16)[name = tensor("op_32126_cast_fp16")]; tensor var_32133_begin_0 = const()[name = tensor("op_32133_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32133_end_0 = const()[name = tensor("op_32133_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32133_end_mask_0 = const()[name = tensor("op_32133_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32133_cast_fp16 = slice_by_index(begin = var_32133_begin_0, end = var_32133_end_0, end_mask = var_32133_end_mask_0, x = var_32003_cast_fp16)[name = tensor("op_32133_cast_fp16")]; tensor var_32140_begin_0 = const()[name = tensor("op_32140_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32140_end_0 = const()[name = tensor("op_32140_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32140_end_mask_0 = const()[name = tensor("op_32140_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32140_cast_fp16 = slice_by_index(begin = var_32140_begin_0, end = var_32140_end_0, end_mask = var_32140_end_mask_0, x = var_32007_cast_fp16)[name = tensor("op_32140_cast_fp16")]; tensor var_32147_begin_0 = const()[name = tensor("op_32147_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32147_end_0 = const()[name = tensor("op_32147_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32147_end_mask_0 = const()[name = tensor("op_32147_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32147_cast_fp16 = slice_by_index(begin = var_32147_begin_0, end = var_32147_end_0, end_mask = var_32147_end_mask_0, x = var_32007_cast_fp16)[name = tensor("op_32147_cast_fp16")]; tensor var_32154_begin_0 = const()[name = tensor("op_32154_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32154_end_0 = const()[name = tensor("op_32154_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32154_end_mask_0 = const()[name = tensor("op_32154_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32154_cast_fp16 = slice_by_index(begin = var_32154_begin_0, end = var_32154_end_0, end_mask = var_32154_end_mask_0, x = var_32007_cast_fp16)[name = tensor("op_32154_cast_fp16")]; tensor var_32161_begin_0 = const()[name = tensor("op_32161_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32161_end_0 = const()[name = tensor("op_32161_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32161_end_mask_0 = const()[name = tensor("op_32161_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32161_cast_fp16 = slice_by_index(begin = var_32161_begin_0, end = var_32161_end_0, end_mask = var_32161_end_mask_0, x = var_32007_cast_fp16)[name = tensor("op_32161_cast_fp16")]; tensor var_32168_begin_0 = const()[name = tensor("op_32168_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32168_end_0 = const()[name = tensor("op_32168_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32168_end_mask_0 = const()[name = tensor("op_32168_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32168_cast_fp16 = slice_by_index(begin = var_32168_begin_0, end = var_32168_end_0, end_mask = var_32168_end_mask_0, x = var_32011_cast_fp16)[name = tensor("op_32168_cast_fp16")]; tensor var_32175_begin_0 = const()[name = tensor("op_32175_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32175_end_0 = const()[name = tensor("op_32175_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32175_end_mask_0 = const()[name = tensor("op_32175_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32175_cast_fp16 = slice_by_index(begin = var_32175_begin_0, end = var_32175_end_0, end_mask = var_32175_end_mask_0, x = var_32011_cast_fp16)[name = tensor("op_32175_cast_fp16")]; tensor var_32182_begin_0 = const()[name = tensor("op_32182_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32182_end_0 = const()[name = tensor("op_32182_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32182_end_mask_0 = const()[name = tensor("op_32182_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32182_cast_fp16 = slice_by_index(begin = var_32182_begin_0, end = var_32182_end_0, end_mask = var_32182_end_mask_0, x = var_32011_cast_fp16)[name = tensor("op_32182_cast_fp16")]; tensor var_32189_begin_0 = const()[name = tensor("op_32189_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32189_end_0 = const()[name = tensor("op_32189_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32189_end_mask_0 = const()[name = tensor("op_32189_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32189_cast_fp16 = slice_by_index(begin = var_32189_begin_0, end = var_32189_end_0, end_mask = var_32189_end_mask_0, x = var_32011_cast_fp16)[name = tensor("op_32189_cast_fp16")]; tensor var_32196_begin_0 = const()[name = tensor("op_32196_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32196_end_0 = const()[name = tensor("op_32196_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32196_end_mask_0 = const()[name = tensor("op_32196_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32196_cast_fp16 = slice_by_index(begin = var_32196_begin_0, end = var_32196_end_0, end_mask = var_32196_end_mask_0, x = var_32015_cast_fp16)[name = tensor("op_32196_cast_fp16")]; tensor var_32203_begin_0 = const()[name = tensor("op_32203_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32203_end_0 = const()[name = tensor("op_32203_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32203_end_mask_0 = const()[name = tensor("op_32203_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32203_cast_fp16 = slice_by_index(begin = var_32203_begin_0, end = var_32203_end_0, end_mask = var_32203_end_mask_0, x = var_32015_cast_fp16)[name = tensor("op_32203_cast_fp16")]; tensor var_32210_begin_0 = const()[name = tensor("op_32210_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32210_end_0 = const()[name = tensor("op_32210_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32210_end_mask_0 = const()[name = tensor("op_32210_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32210_cast_fp16 = slice_by_index(begin = var_32210_begin_0, end = var_32210_end_0, end_mask = var_32210_end_mask_0, x = var_32015_cast_fp16)[name = tensor("op_32210_cast_fp16")]; tensor var_32217_begin_0 = const()[name = tensor("op_32217_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32217_end_0 = const()[name = tensor("op_32217_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32217_end_mask_0 = const()[name = tensor("op_32217_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32217_cast_fp16 = slice_by_index(begin = var_32217_begin_0, end = var_32217_end_0, end_mask = var_32217_end_mask_0, x = var_32015_cast_fp16)[name = tensor("op_32217_cast_fp16")]; tensor var_32224_begin_0 = const()[name = tensor("op_32224_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32224_end_0 = const()[name = tensor("op_32224_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32224_end_mask_0 = const()[name = tensor("op_32224_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32224_cast_fp16 = slice_by_index(begin = var_32224_begin_0, end = var_32224_end_0, end_mask = var_32224_end_mask_0, x = var_32019_cast_fp16)[name = tensor("op_32224_cast_fp16")]; tensor var_32231_begin_0 = const()[name = tensor("op_32231_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32231_end_0 = const()[name = tensor("op_32231_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32231_end_mask_0 = const()[name = tensor("op_32231_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32231_cast_fp16 = slice_by_index(begin = var_32231_begin_0, end = var_32231_end_0, end_mask = var_32231_end_mask_0, x = var_32019_cast_fp16)[name = tensor("op_32231_cast_fp16")]; tensor var_32238_begin_0 = const()[name = tensor("op_32238_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32238_end_0 = const()[name = tensor("op_32238_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32238_end_mask_0 = const()[name = tensor("op_32238_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32238_cast_fp16 = slice_by_index(begin = var_32238_begin_0, end = var_32238_end_0, end_mask = var_32238_end_mask_0, x = var_32019_cast_fp16)[name = tensor("op_32238_cast_fp16")]; tensor var_32245_begin_0 = const()[name = tensor("op_32245_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32245_end_0 = const()[name = tensor("op_32245_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32245_end_mask_0 = const()[name = tensor("op_32245_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32245_cast_fp16 = slice_by_index(begin = var_32245_begin_0, end = var_32245_end_0, end_mask = var_32245_end_mask_0, x = var_32019_cast_fp16)[name = tensor("op_32245_cast_fp16")]; tensor var_32252_begin_0 = const()[name = tensor("op_32252_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32252_end_0 = const()[name = tensor("op_32252_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32252_end_mask_0 = const()[name = tensor("op_32252_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32252_cast_fp16 = slice_by_index(begin = var_32252_begin_0, end = var_32252_end_0, end_mask = var_32252_end_mask_0, x = var_32023_cast_fp16)[name = tensor("op_32252_cast_fp16")]; tensor var_32259_begin_0 = const()[name = tensor("op_32259_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32259_end_0 = const()[name = tensor("op_32259_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32259_end_mask_0 = const()[name = tensor("op_32259_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32259_cast_fp16 = slice_by_index(begin = var_32259_begin_0, end = var_32259_end_0, end_mask = var_32259_end_mask_0, x = var_32023_cast_fp16)[name = tensor("op_32259_cast_fp16")]; tensor var_32266_begin_0 = const()[name = tensor("op_32266_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32266_end_0 = const()[name = tensor("op_32266_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32266_end_mask_0 = const()[name = tensor("op_32266_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32266_cast_fp16 = slice_by_index(begin = var_32266_begin_0, end = var_32266_end_0, end_mask = var_32266_end_mask_0, x = var_32023_cast_fp16)[name = tensor("op_32266_cast_fp16")]; tensor var_32273_begin_0 = const()[name = tensor("op_32273_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32273_end_0 = const()[name = tensor("op_32273_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32273_end_mask_0 = const()[name = tensor("op_32273_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32273_cast_fp16 = slice_by_index(begin = var_32273_begin_0, end = var_32273_end_0, end_mask = var_32273_end_mask_0, x = var_32023_cast_fp16)[name = tensor("op_32273_cast_fp16")]; tensor var_32280_begin_0 = const()[name = tensor("op_32280_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32280_end_0 = const()[name = tensor("op_32280_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32280_end_mask_0 = const()[name = tensor("op_32280_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32280_cast_fp16 = slice_by_index(begin = var_32280_begin_0, end = var_32280_end_0, end_mask = var_32280_end_mask_0, x = var_32027_cast_fp16)[name = tensor("op_32280_cast_fp16")]; tensor var_32287_begin_0 = const()[name = tensor("op_32287_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32287_end_0 = const()[name = tensor("op_32287_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32287_end_mask_0 = const()[name = tensor("op_32287_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32287_cast_fp16 = slice_by_index(begin = var_32287_begin_0, end = var_32287_end_0, end_mask = var_32287_end_mask_0, x = var_32027_cast_fp16)[name = tensor("op_32287_cast_fp16")]; tensor var_32294_begin_0 = const()[name = tensor("op_32294_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32294_end_0 = const()[name = tensor("op_32294_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32294_end_mask_0 = const()[name = tensor("op_32294_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32294_cast_fp16 = slice_by_index(begin = var_32294_begin_0, end = var_32294_end_0, end_mask = var_32294_end_mask_0, x = var_32027_cast_fp16)[name = tensor("op_32294_cast_fp16")]; tensor var_32301_begin_0 = const()[name = tensor("op_32301_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32301_end_0 = const()[name = tensor("op_32301_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32301_end_mask_0 = const()[name = tensor("op_32301_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32301_cast_fp16 = slice_by_index(begin = var_32301_begin_0, end = var_32301_end_0, end_mask = var_32301_end_mask_0, x = var_32027_cast_fp16)[name = tensor("op_32301_cast_fp16")]; tensor var_32308_begin_0 = const()[name = tensor("op_32308_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32308_end_0 = const()[name = tensor("op_32308_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32308_end_mask_0 = const()[name = tensor("op_32308_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32308_cast_fp16 = slice_by_index(begin = var_32308_begin_0, end = var_32308_end_0, end_mask = var_32308_end_mask_0, x = var_32031_cast_fp16)[name = tensor("op_32308_cast_fp16")]; tensor var_32315_begin_0 = const()[name = tensor("op_32315_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32315_end_0 = const()[name = tensor("op_32315_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32315_end_mask_0 = const()[name = tensor("op_32315_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32315_cast_fp16 = slice_by_index(begin = var_32315_begin_0, end = var_32315_end_0, end_mask = var_32315_end_mask_0, x = var_32031_cast_fp16)[name = tensor("op_32315_cast_fp16")]; tensor var_32322_begin_0 = const()[name = tensor("op_32322_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32322_end_0 = const()[name = tensor("op_32322_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32322_end_mask_0 = const()[name = tensor("op_32322_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32322_cast_fp16 = slice_by_index(begin = var_32322_begin_0, end = var_32322_end_0, end_mask = var_32322_end_mask_0, x = var_32031_cast_fp16)[name = tensor("op_32322_cast_fp16")]; tensor var_32329_begin_0 = const()[name = tensor("op_32329_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32329_end_0 = const()[name = tensor("op_32329_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32329_end_mask_0 = const()[name = tensor("op_32329_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32329_cast_fp16 = slice_by_index(begin = var_32329_begin_0, end = var_32329_end_0, end_mask = var_32329_end_mask_0, x = var_32031_cast_fp16)[name = tensor("op_32329_cast_fp16")]; tensor var_32336_begin_0 = const()[name = tensor("op_32336_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32336_end_0 = const()[name = tensor("op_32336_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32336_end_mask_0 = const()[name = tensor("op_32336_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32336_cast_fp16 = slice_by_index(begin = var_32336_begin_0, end = var_32336_end_0, end_mask = var_32336_end_mask_0, x = var_32035_cast_fp16)[name = tensor("op_32336_cast_fp16")]; tensor var_32343_begin_0 = const()[name = tensor("op_32343_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32343_end_0 = const()[name = tensor("op_32343_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32343_end_mask_0 = const()[name = tensor("op_32343_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32343_cast_fp16 = slice_by_index(begin = var_32343_begin_0, end = var_32343_end_0, end_mask = var_32343_end_mask_0, x = var_32035_cast_fp16)[name = tensor("op_32343_cast_fp16")]; tensor var_32350_begin_0 = const()[name = tensor("op_32350_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32350_end_0 = const()[name = tensor("op_32350_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32350_end_mask_0 = const()[name = tensor("op_32350_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32350_cast_fp16 = slice_by_index(begin = var_32350_begin_0, end = var_32350_end_0, end_mask = var_32350_end_mask_0, x = var_32035_cast_fp16)[name = tensor("op_32350_cast_fp16")]; tensor var_32357_begin_0 = const()[name = tensor("op_32357_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32357_end_0 = const()[name = tensor("op_32357_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32357_end_mask_0 = const()[name = tensor("op_32357_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32357_cast_fp16 = slice_by_index(begin = var_32357_begin_0, end = var_32357_end_0, end_mask = var_32357_end_mask_0, x = var_32035_cast_fp16)[name = tensor("op_32357_cast_fp16")]; tensor var_32364_begin_0 = const()[name = tensor("op_32364_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32364_end_0 = const()[name = tensor("op_32364_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32364_end_mask_0 = const()[name = tensor("op_32364_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32364_cast_fp16 = slice_by_index(begin = var_32364_begin_0, end = var_32364_end_0, end_mask = var_32364_end_mask_0, x = var_32039_cast_fp16)[name = tensor("op_32364_cast_fp16")]; tensor var_32371_begin_0 = const()[name = tensor("op_32371_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32371_end_0 = const()[name = tensor("op_32371_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32371_end_mask_0 = const()[name = tensor("op_32371_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32371_cast_fp16 = slice_by_index(begin = var_32371_begin_0, end = var_32371_end_0, end_mask = var_32371_end_mask_0, x = var_32039_cast_fp16)[name = tensor("op_32371_cast_fp16")]; tensor var_32378_begin_0 = const()[name = tensor("op_32378_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32378_end_0 = const()[name = tensor("op_32378_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32378_end_mask_0 = const()[name = tensor("op_32378_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32378_cast_fp16 = slice_by_index(begin = var_32378_begin_0, end = var_32378_end_0, end_mask = var_32378_end_mask_0, x = var_32039_cast_fp16)[name = tensor("op_32378_cast_fp16")]; tensor var_32385_begin_0 = const()[name = tensor("op_32385_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32385_end_0 = const()[name = tensor("op_32385_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32385_end_mask_0 = const()[name = tensor("op_32385_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32385_cast_fp16 = slice_by_index(begin = var_32385_begin_0, end = var_32385_end_0, end_mask = var_32385_end_mask_0, x = var_32039_cast_fp16)[name = tensor("op_32385_cast_fp16")]; tensor var_32392_begin_0 = const()[name = tensor("op_32392_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32392_end_0 = const()[name = tensor("op_32392_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32392_end_mask_0 = const()[name = tensor("op_32392_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32392_cast_fp16 = slice_by_index(begin = var_32392_begin_0, end = var_32392_end_0, end_mask = var_32392_end_mask_0, x = var_32043_cast_fp16)[name = tensor("op_32392_cast_fp16")]; tensor var_32399_begin_0 = const()[name = tensor("op_32399_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32399_end_0 = const()[name = tensor("op_32399_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32399_end_mask_0 = const()[name = tensor("op_32399_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32399_cast_fp16 = slice_by_index(begin = var_32399_begin_0, end = var_32399_end_0, end_mask = var_32399_end_mask_0, x = var_32043_cast_fp16)[name = tensor("op_32399_cast_fp16")]; tensor var_32406_begin_0 = const()[name = tensor("op_32406_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32406_end_0 = const()[name = tensor("op_32406_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32406_end_mask_0 = const()[name = tensor("op_32406_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32406_cast_fp16 = slice_by_index(begin = var_32406_begin_0, end = var_32406_end_0, end_mask = var_32406_end_mask_0, x = var_32043_cast_fp16)[name = tensor("op_32406_cast_fp16")]; tensor var_32413_begin_0 = const()[name = tensor("op_32413_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32413_end_0 = const()[name = tensor("op_32413_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32413_end_mask_0 = const()[name = tensor("op_32413_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32413_cast_fp16 = slice_by_index(begin = var_32413_begin_0, end = var_32413_end_0, end_mask = var_32413_end_mask_0, x = var_32043_cast_fp16)[name = tensor("op_32413_cast_fp16")]; tensor var_32420_begin_0 = const()[name = tensor("op_32420_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32420_end_0 = const()[name = tensor("op_32420_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32420_end_mask_0 = const()[name = tensor("op_32420_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32420_cast_fp16 = slice_by_index(begin = var_32420_begin_0, end = var_32420_end_0, end_mask = var_32420_end_mask_0, x = var_32047_cast_fp16)[name = tensor("op_32420_cast_fp16")]; tensor var_32427_begin_0 = const()[name = tensor("op_32427_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32427_end_0 = const()[name = tensor("op_32427_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32427_end_mask_0 = const()[name = tensor("op_32427_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32427_cast_fp16 = slice_by_index(begin = var_32427_begin_0, end = var_32427_end_0, end_mask = var_32427_end_mask_0, x = var_32047_cast_fp16)[name = tensor("op_32427_cast_fp16")]; tensor var_32434_begin_0 = const()[name = tensor("op_32434_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32434_end_0 = const()[name = tensor("op_32434_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32434_end_mask_0 = const()[name = tensor("op_32434_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32434_cast_fp16 = slice_by_index(begin = var_32434_begin_0, end = var_32434_end_0, end_mask = var_32434_end_mask_0, x = var_32047_cast_fp16)[name = tensor("op_32434_cast_fp16")]; tensor var_32441_begin_0 = const()[name = tensor("op_32441_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32441_end_0 = const()[name = tensor("op_32441_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32441_end_mask_0 = const()[name = tensor("op_32441_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32441_cast_fp16 = slice_by_index(begin = var_32441_begin_0, end = var_32441_end_0, end_mask = var_32441_end_mask_0, x = var_32047_cast_fp16)[name = tensor("op_32441_cast_fp16")]; tensor var_32448_begin_0 = const()[name = tensor("op_32448_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32448_end_0 = const()[name = tensor("op_32448_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32448_end_mask_0 = const()[name = tensor("op_32448_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32448_cast_fp16 = slice_by_index(begin = var_32448_begin_0, end = var_32448_end_0, end_mask = var_32448_end_mask_0, x = var_32051_cast_fp16)[name = tensor("op_32448_cast_fp16")]; tensor var_32455_begin_0 = const()[name = tensor("op_32455_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32455_end_0 = const()[name = tensor("op_32455_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32455_end_mask_0 = const()[name = tensor("op_32455_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32455_cast_fp16 = slice_by_index(begin = var_32455_begin_0, end = var_32455_end_0, end_mask = var_32455_end_mask_0, x = var_32051_cast_fp16)[name = tensor("op_32455_cast_fp16")]; tensor var_32462_begin_0 = const()[name = tensor("op_32462_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32462_end_0 = const()[name = tensor("op_32462_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32462_end_mask_0 = const()[name = tensor("op_32462_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32462_cast_fp16 = slice_by_index(begin = var_32462_begin_0, end = var_32462_end_0, end_mask = var_32462_end_mask_0, x = var_32051_cast_fp16)[name = tensor("op_32462_cast_fp16")]; tensor var_32469_begin_0 = const()[name = tensor("op_32469_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32469_end_0 = const()[name = tensor("op_32469_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32469_end_mask_0 = const()[name = tensor("op_32469_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32469_cast_fp16 = slice_by_index(begin = var_32469_begin_0, end = var_32469_end_0, end_mask = var_32469_end_mask_0, x = var_32051_cast_fp16)[name = tensor("op_32469_cast_fp16")]; tensor var_32476_begin_0 = const()[name = tensor("op_32476_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32476_end_0 = const()[name = tensor("op_32476_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32476_end_mask_0 = const()[name = tensor("op_32476_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32476_cast_fp16 = slice_by_index(begin = var_32476_begin_0, end = var_32476_end_0, end_mask = var_32476_end_mask_0, x = var_32055_cast_fp16)[name = tensor("op_32476_cast_fp16")]; tensor var_32483_begin_0 = const()[name = tensor("op_32483_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32483_end_0 = const()[name = tensor("op_32483_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32483_end_mask_0 = const()[name = tensor("op_32483_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32483_cast_fp16 = slice_by_index(begin = var_32483_begin_0, end = var_32483_end_0, end_mask = var_32483_end_mask_0, x = var_32055_cast_fp16)[name = tensor("op_32483_cast_fp16")]; tensor var_32490_begin_0 = const()[name = tensor("op_32490_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32490_end_0 = const()[name = tensor("op_32490_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32490_end_mask_0 = const()[name = tensor("op_32490_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32490_cast_fp16 = slice_by_index(begin = var_32490_begin_0, end = var_32490_end_0, end_mask = var_32490_end_mask_0, x = var_32055_cast_fp16)[name = tensor("op_32490_cast_fp16")]; tensor var_32497_begin_0 = const()[name = tensor("op_32497_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32497_end_0 = const()[name = tensor("op_32497_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32497_end_mask_0 = const()[name = tensor("op_32497_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32497_cast_fp16 = slice_by_index(begin = var_32497_begin_0, end = var_32497_end_0, end_mask = var_32497_end_mask_0, x = var_32055_cast_fp16)[name = tensor("op_32497_cast_fp16")]; tensor var_32504_begin_0 = const()[name = tensor("op_32504_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32504_end_0 = const()[name = tensor("op_32504_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32504_end_mask_0 = const()[name = tensor("op_32504_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32504_cast_fp16 = slice_by_index(begin = var_32504_begin_0, end = var_32504_end_0, end_mask = var_32504_end_mask_0, x = var_32059_cast_fp16)[name = tensor("op_32504_cast_fp16")]; tensor var_32511_begin_0 = const()[name = tensor("op_32511_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32511_end_0 = const()[name = tensor("op_32511_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32511_end_mask_0 = const()[name = tensor("op_32511_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32511_cast_fp16 = slice_by_index(begin = var_32511_begin_0, end = var_32511_end_0, end_mask = var_32511_end_mask_0, x = var_32059_cast_fp16)[name = tensor("op_32511_cast_fp16")]; tensor var_32518_begin_0 = const()[name = tensor("op_32518_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32518_end_0 = const()[name = tensor("op_32518_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32518_end_mask_0 = const()[name = tensor("op_32518_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32518_cast_fp16 = slice_by_index(begin = var_32518_begin_0, end = var_32518_end_0, end_mask = var_32518_end_mask_0, x = var_32059_cast_fp16)[name = tensor("op_32518_cast_fp16")]; tensor var_32525_begin_0 = const()[name = tensor("op_32525_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32525_end_0 = const()[name = tensor("op_32525_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32525_end_mask_0 = const()[name = tensor("op_32525_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32525_cast_fp16 = slice_by_index(begin = var_32525_begin_0, end = var_32525_end_0, end_mask = var_32525_end_mask_0, x = var_32059_cast_fp16)[name = tensor("op_32525_cast_fp16")]; tensor var_32532_begin_0 = const()[name = tensor("op_32532_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32532_end_0 = const()[name = tensor("op_32532_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32532_end_mask_0 = const()[name = tensor("op_32532_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32532_cast_fp16 = slice_by_index(begin = var_32532_begin_0, end = var_32532_end_0, end_mask = var_32532_end_mask_0, x = var_32063_cast_fp16)[name = tensor("op_32532_cast_fp16")]; tensor var_32539_begin_0 = const()[name = tensor("op_32539_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32539_end_0 = const()[name = tensor("op_32539_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32539_end_mask_0 = const()[name = tensor("op_32539_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32539_cast_fp16 = slice_by_index(begin = var_32539_begin_0, end = var_32539_end_0, end_mask = var_32539_end_mask_0, x = var_32063_cast_fp16)[name = tensor("op_32539_cast_fp16")]; tensor var_32546_begin_0 = const()[name = tensor("op_32546_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32546_end_0 = const()[name = tensor("op_32546_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32546_end_mask_0 = const()[name = tensor("op_32546_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32546_cast_fp16 = slice_by_index(begin = var_32546_begin_0, end = var_32546_end_0, end_mask = var_32546_end_mask_0, x = var_32063_cast_fp16)[name = tensor("op_32546_cast_fp16")]; tensor var_32553_begin_0 = const()[name = tensor("op_32553_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32553_end_0 = const()[name = tensor("op_32553_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32553_end_mask_0 = const()[name = tensor("op_32553_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32553_cast_fp16 = slice_by_index(begin = var_32553_begin_0, end = var_32553_end_0, end_mask = var_32553_end_mask_0, x = var_32063_cast_fp16)[name = tensor("op_32553_cast_fp16")]; tensor var_32560_begin_0 = const()[name = tensor("op_32560_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32560_end_0 = const()[name = tensor("op_32560_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32560_end_mask_0 = const()[name = tensor("op_32560_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32560_cast_fp16 = slice_by_index(begin = var_32560_begin_0, end = var_32560_end_0, end_mask = var_32560_end_mask_0, x = var_32067_cast_fp16)[name = tensor("op_32560_cast_fp16")]; tensor var_32567_begin_0 = const()[name = tensor("op_32567_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32567_end_0 = const()[name = tensor("op_32567_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32567_end_mask_0 = const()[name = tensor("op_32567_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32567_cast_fp16 = slice_by_index(begin = var_32567_begin_0, end = var_32567_end_0, end_mask = var_32567_end_mask_0, x = var_32067_cast_fp16)[name = tensor("op_32567_cast_fp16")]; tensor var_32574_begin_0 = const()[name = tensor("op_32574_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32574_end_0 = const()[name = tensor("op_32574_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32574_end_mask_0 = const()[name = tensor("op_32574_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32574_cast_fp16 = slice_by_index(begin = var_32574_begin_0, end = var_32574_end_0, end_mask = var_32574_end_mask_0, x = var_32067_cast_fp16)[name = tensor("op_32574_cast_fp16")]; tensor var_32581_begin_0 = const()[name = tensor("op_32581_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32581_end_0 = const()[name = tensor("op_32581_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32581_end_mask_0 = const()[name = tensor("op_32581_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32581_cast_fp16 = slice_by_index(begin = var_32581_begin_0, end = var_32581_end_0, end_mask = var_32581_end_mask_0, x = var_32067_cast_fp16)[name = tensor("op_32581_cast_fp16")]; tensor var_32588_begin_0 = const()[name = tensor("op_32588_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32588_end_0 = const()[name = tensor("op_32588_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32588_end_mask_0 = const()[name = tensor("op_32588_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32588_cast_fp16 = slice_by_index(begin = var_32588_begin_0, end = var_32588_end_0, end_mask = var_32588_end_mask_0, x = var_32071_cast_fp16)[name = tensor("op_32588_cast_fp16")]; tensor var_32595_begin_0 = const()[name = tensor("op_32595_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32595_end_0 = const()[name = tensor("op_32595_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32595_end_mask_0 = const()[name = tensor("op_32595_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32595_cast_fp16 = slice_by_index(begin = var_32595_begin_0, end = var_32595_end_0, end_mask = var_32595_end_mask_0, x = var_32071_cast_fp16)[name = tensor("op_32595_cast_fp16")]; tensor var_32602_begin_0 = const()[name = tensor("op_32602_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32602_end_0 = const()[name = tensor("op_32602_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32602_end_mask_0 = const()[name = tensor("op_32602_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32602_cast_fp16 = slice_by_index(begin = var_32602_begin_0, end = var_32602_end_0, end_mask = var_32602_end_mask_0, x = var_32071_cast_fp16)[name = tensor("op_32602_cast_fp16")]; tensor var_32609_begin_0 = const()[name = tensor("op_32609_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32609_end_0 = const()[name = tensor("op_32609_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32609_end_mask_0 = const()[name = tensor("op_32609_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32609_cast_fp16 = slice_by_index(begin = var_32609_begin_0, end = var_32609_end_0, end_mask = var_32609_end_mask_0, x = var_32071_cast_fp16)[name = tensor("op_32609_cast_fp16")]; tensor var_32616_begin_0 = const()[name = tensor("op_32616_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32616_end_0 = const()[name = tensor("op_32616_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_32616_end_mask_0 = const()[name = tensor("op_32616_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32616_cast_fp16 = slice_by_index(begin = var_32616_begin_0, end = var_32616_end_0, end_mask = var_32616_end_mask_0, x = var_32075_cast_fp16)[name = tensor("op_32616_cast_fp16")]; tensor var_32623_begin_0 = const()[name = tensor("op_32623_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_32623_end_0 = const()[name = tensor("op_32623_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_32623_end_mask_0 = const()[name = tensor("op_32623_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32623_cast_fp16 = slice_by_index(begin = var_32623_begin_0, end = var_32623_end_0, end_mask = var_32623_end_mask_0, x = var_32075_cast_fp16)[name = tensor("op_32623_cast_fp16")]; tensor var_32630_begin_0 = const()[name = tensor("op_32630_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_32630_end_0 = const()[name = tensor("op_32630_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_32630_end_mask_0 = const()[name = tensor("op_32630_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32630_cast_fp16 = slice_by_index(begin = var_32630_begin_0, end = var_32630_end_0, end_mask = var_32630_end_mask_0, x = var_32075_cast_fp16)[name = tensor("op_32630_cast_fp16")]; tensor var_32637_begin_0 = const()[name = tensor("op_32637_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_32637_end_0 = const()[name = tensor("op_32637_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32637_end_mask_0 = const()[name = tensor("op_32637_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32637_cast_fp16 = slice_by_index(begin = var_32637_begin_0, end = var_32637_end_0, end_mask = var_32637_end_mask_0, x = var_32075_cast_fp16)[name = tensor("op_32637_cast_fp16")]; tensor k_41_perm_0 = const()[name = tensor("k_41_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_32642_begin_0 = const()[name = tensor("op_32642_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32642_end_0 = const()[name = tensor("op_32642_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_32642_end_mask_0 = const()[name = tensor("op_32642_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_41_cast_fp16 = transpose(perm = k_41_perm_0, x = key_41_cast_fp16)[name = tensor("transpose_11")]; tensor var_32642_cast_fp16 = slice_by_index(begin = var_32642_begin_0, end = var_32642_end_0, end_mask = var_32642_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32642_cast_fp16")]; tensor var_32646_begin_0 = const()[name = tensor("op_32646_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_32646_end_0 = const()[name = tensor("op_32646_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_32646_end_mask_0 = const()[name = tensor("op_32646_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32646_cast_fp16 = slice_by_index(begin = var_32646_begin_0, end = var_32646_end_0, end_mask = var_32646_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32646_cast_fp16")]; tensor var_32650_begin_0 = const()[name = tensor("op_32650_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_32650_end_0 = const()[name = tensor("op_32650_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_32650_end_mask_0 = const()[name = tensor("op_32650_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32650_cast_fp16 = slice_by_index(begin = var_32650_begin_0, end = var_32650_end_0, end_mask = var_32650_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32650_cast_fp16")]; tensor var_32654_begin_0 = const()[name = tensor("op_32654_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_32654_end_0 = const()[name = tensor("op_32654_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_32654_end_mask_0 = const()[name = tensor("op_32654_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32654_cast_fp16 = slice_by_index(begin = var_32654_begin_0, end = var_32654_end_0, end_mask = var_32654_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32654_cast_fp16")]; tensor var_32658_begin_0 = const()[name = tensor("op_32658_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_32658_end_0 = const()[name = tensor("op_32658_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_32658_end_mask_0 = const()[name = tensor("op_32658_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32658_cast_fp16 = slice_by_index(begin = var_32658_begin_0, end = var_32658_end_0, end_mask = var_32658_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32658_cast_fp16")]; tensor var_32662_begin_0 = const()[name = tensor("op_32662_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_32662_end_0 = const()[name = tensor("op_32662_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_32662_end_mask_0 = const()[name = tensor("op_32662_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32662_cast_fp16 = slice_by_index(begin = var_32662_begin_0, end = var_32662_end_0, end_mask = var_32662_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32662_cast_fp16")]; tensor var_32666_begin_0 = const()[name = tensor("op_32666_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_32666_end_0 = const()[name = tensor("op_32666_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_32666_end_mask_0 = const()[name = tensor("op_32666_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32666_cast_fp16 = slice_by_index(begin = var_32666_begin_0, end = var_32666_end_0, end_mask = var_32666_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32666_cast_fp16")]; tensor var_32670_begin_0 = const()[name = tensor("op_32670_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_32670_end_0 = const()[name = tensor("op_32670_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_32670_end_mask_0 = const()[name = tensor("op_32670_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32670_cast_fp16 = slice_by_index(begin = var_32670_begin_0, end = var_32670_end_0, end_mask = var_32670_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32670_cast_fp16")]; tensor var_32674_begin_0 = const()[name = tensor("op_32674_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_32674_end_0 = const()[name = tensor("op_32674_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_32674_end_mask_0 = const()[name = tensor("op_32674_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32674_cast_fp16 = slice_by_index(begin = var_32674_begin_0, end = var_32674_end_0, end_mask = var_32674_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32674_cast_fp16")]; tensor var_32678_begin_0 = const()[name = tensor("op_32678_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_32678_end_0 = const()[name = tensor("op_32678_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_32678_end_mask_0 = const()[name = tensor("op_32678_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32678_cast_fp16 = slice_by_index(begin = var_32678_begin_0, end = var_32678_end_0, end_mask = var_32678_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32678_cast_fp16")]; tensor var_32682_begin_0 = const()[name = tensor("op_32682_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_32682_end_0 = const()[name = tensor("op_32682_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_32682_end_mask_0 = const()[name = tensor("op_32682_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32682_cast_fp16 = slice_by_index(begin = var_32682_begin_0, end = var_32682_end_0, end_mask = var_32682_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32682_cast_fp16")]; tensor var_32686_begin_0 = const()[name = tensor("op_32686_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_32686_end_0 = const()[name = tensor("op_32686_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_32686_end_mask_0 = const()[name = tensor("op_32686_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32686_cast_fp16 = slice_by_index(begin = var_32686_begin_0, end = var_32686_end_0, end_mask = var_32686_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32686_cast_fp16")]; tensor var_32690_begin_0 = const()[name = tensor("op_32690_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_32690_end_0 = const()[name = tensor("op_32690_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_32690_end_mask_0 = const()[name = tensor("op_32690_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32690_cast_fp16 = slice_by_index(begin = var_32690_begin_0, end = var_32690_end_0, end_mask = var_32690_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32690_cast_fp16")]; tensor var_32694_begin_0 = const()[name = tensor("op_32694_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_32694_end_0 = const()[name = tensor("op_32694_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_32694_end_mask_0 = const()[name = tensor("op_32694_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32694_cast_fp16 = slice_by_index(begin = var_32694_begin_0, end = var_32694_end_0, end_mask = var_32694_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32694_cast_fp16")]; tensor var_32698_begin_0 = const()[name = tensor("op_32698_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_32698_end_0 = const()[name = tensor("op_32698_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_32698_end_mask_0 = const()[name = tensor("op_32698_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32698_cast_fp16 = slice_by_index(begin = var_32698_begin_0, end = var_32698_end_0, end_mask = var_32698_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32698_cast_fp16")]; tensor var_32702_begin_0 = const()[name = tensor("op_32702_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_32702_end_0 = const()[name = tensor("op_32702_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_32702_end_mask_0 = const()[name = tensor("op_32702_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32702_cast_fp16 = slice_by_index(begin = var_32702_begin_0, end = var_32702_end_0, end_mask = var_32702_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32702_cast_fp16")]; tensor var_32706_begin_0 = const()[name = tensor("op_32706_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_32706_end_0 = const()[name = tensor("op_32706_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_32706_end_mask_0 = const()[name = tensor("op_32706_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32706_cast_fp16 = slice_by_index(begin = var_32706_begin_0, end = var_32706_end_0, end_mask = var_32706_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32706_cast_fp16")]; tensor var_32710_begin_0 = const()[name = tensor("op_32710_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_32710_end_0 = const()[name = tensor("op_32710_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_32710_end_mask_0 = const()[name = tensor("op_32710_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32710_cast_fp16 = slice_by_index(begin = var_32710_begin_0, end = var_32710_end_0, end_mask = var_32710_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32710_cast_fp16")]; tensor var_32714_begin_0 = const()[name = tensor("op_32714_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_32714_end_0 = const()[name = tensor("op_32714_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_32714_end_mask_0 = const()[name = tensor("op_32714_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32714_cast_fp16 = slice_by_index(begin = var_32714_begin_0, end = var_32714_end_0, end_mask = var_32714_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32714_cast_fp16")]; tensor var_32718_begin_0 = const()[name = tensor("op_32718_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_32718_end_0 = const()[name = tensor("op_32718_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_32718_end_mask_0 = const()[name = tensor("op_32718_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_32718_cast_fp16 = slice_by_index(begin = var_32718_begin_0, end = var_32718_end_0, end_mask = var_32718_end_mask_0, x = k_41_cast_fp16)[name = tensor("op_32718_cast_fp16")]; tensor var_32720_begin_0 = const()[name = tensor("op_32720_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_32720_end_0 = const()[name = tensor("op_32720_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_32720_end_mask_0 = const()[name = tensor("op_32720_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32720_cast_fp16 = slice_by_index(begin = var_32720_begin_0, end = var_32720_end_0, end_mask = var_32720_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32720_cast_fp16")]; tensor var_32724_begin_0 = const()[name = tensor("op_32724_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_32724_end_0 = const()[name = tensor("op_32724_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_32724_end_mask_0 = const()[name = tensor("op_32724_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32724_cast_fp16 = slice_by_index(begin = var_32724_begin_0, end = var_32724_end_0, end_mask = var_32724_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32724_cast_fp16")]; tensor var_32728_begin_0 = const()[name = tensor("op_32728_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_32728_end_0 = const()[name = tensor("op_32728_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_32728_end_mask_0 = const()[name = tensor("op_32728_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32728_cast_fp16 = slice_by_index(begin = var_32728_begin_0, end = var_32728_end_0, end_mask = var_32728_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32728_cast_fp16")]; tensor var_32732_begin_0 = const()[name = tensor("op_32732_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_32732_end_0 = const()[name = tensor("op_32732_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_32732_end_mask_0 = const()[name = tensor("op_32732_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32732_cast_fp16 = slice_by_index(begin = var_32732_begin_0, end = var_32732_end_0, end_mask = var_32732_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32732_cast_fp16")]; tensor var_32736_begin_0 = const()[name = tensor("op_32736_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_32736_end_0 = const()[name = tensor("op_32736_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_32736_end_mask_0 = const()[name = tensor("op_32736_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32736_cast_fp16 = slice_by_index(begin = var_32736_begin_0, end = var_32736_end_0, end_mask = var_32736_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32736_cast_fp16")]; tensor var_32740_begin_0 = const()[name = tensor("op_32740_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_32740_end_0 = const()[name = tensor("op_32740_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_32740_end_mask_0 = const()[name = tensor("op_32740_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32740_cast_fp16 = slice_by_index(begin = var_32740_begin_0, end = var_32740_end_0, end_mask = var_32740_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32740_cast_fp16")]; tensor var_32744_begin_0 = const()[name = tensor("op_32744_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_32744_end_0 = const()[name = tensor("op_32744_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_32744_end_mask_0 = const()[name = tensor("op_32744_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32744_cast_fp16 = slice_by_index(begin = var_32744_begin_0, end = var_32744_end_0, end_mask = var_32744_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32744_cast_fp16")]; tensor var_32748_begin_0 = const()[name = tensor("op_32748_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_32748_end_0 = const()[name = tensor("op_32748_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_32748_end_mask_0 = const()[name = tensor("op_32748_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32748_cast_fp16 = slice_by_index(begin = var_32748_begin_0, end = var_32748_end_0, end_mask = var_32748_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32748_cast_fp16")]; tensor var_32752_begin_0 = const()[name = tensor("op_32752_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_32752_end_0 = const()[name = tensor("op_32752_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_32752_end_mask_0 = const()[name = tensor("op_32752_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32752_cast_fp16 = slice_by_index(begin = var_32752_begin_0, end = var_32752_end_0, end_mask = var_32752_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32752_cast_fp16")]; tensor var_32756_begin_0 = const()[name = tensor("op_32756_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_32756_end_0 = const()[name = tensor("op_32756_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_32756_end_mask_0 = const()[name = tensor("op_32756_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32756_cast_fp16 = slice_by_index(begin = var_32756_begin_0, end = var_32756_end_0, end_mask = var_32756_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32756_cast_fp16")]; tensor var_32760_begin_0 = const()[name = tensor("op_32760_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_32760_end_0 = const()[name = tensor("op_32760_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_32760_end_mask_0 = const()[name = tensor("op_32760_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32760_cast_fp16 = slice_by_index(begin = var_32760_begin_0, end = var_32760_end_0, end_mask = var_32760_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32760_cast_fp16")]; tensor var_32764_begin_0 = const()[name = tensor("op_32764_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_32764_end_0 = const()[name = tensor("op_32764_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_32764_end_mask_0 = const()[name = tensor("op_32764_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32764_cast_fp16 = slice_by_index(begin = var_32764_begin_0, end = var_32764_end_0, end_mask = var_32764_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32764_cast_fp16")]; tensor var_32768_begin_0 = const()[name = tensor("op_32768_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_32768_end_0 = const()[name = tensor("op_32768_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_32768_end_mask_0 = const()[name = tensor("op_32768_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32768_cast_fp16 = slice_by_index(begin = var_32768_begin_0, end = var_32768_end_0, end_mask = var_32768_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32768_cast_fp16")]; tensor var_32772_begin_0 = const()[name = tensor("op_32772_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_32772_end_0 = const()[name = tensor("op_32772_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_32772_end_mask_0 = const()[name = tensor("op_32772_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32772_cast_fp16 = slice_by_index(begin = var_32772_begin_0, end = var_32772_end_0, end_mask = var_32772_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32772_cast_fp16")]; tensor var_32776_begin_0 = const()[name = tensor("op_32776_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_32776_end_0 = const()[name = tensor("op_32776_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_32776_end_mask_0 = const()[name = tensor("op_32776_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32776_cast_fp16 = slice_by_index(begin = var_32776_begin_0, end = var_32776_end_0, end_mask = var_32776_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32776_cast_fp16")]; tensor var_32780_begin_0 = const()[name = tensor("op_32780_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_32780_end_0 = const()[name = tensor("op_32780_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_32780_end_mask_0 = const()[name = tensor("op_32780_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32780_cast_fp16 = slice_by_index(begin = var_32780_begin_0, end = var_32780_end_0, end_mask = var_32780_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32780_cast_fp16")]; tensor var_32784_begin_0 = const()[name = tensor("op_32784_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_32784_end_0 = const()[name = tensor("op_32784_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_32784_end_mask_0 = const()[name = tensor("op_32784_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32784_cast_fp16 = slice_by_index(begin = var_32784_begin_0, end = var_32784_end_0, end_mask = var_32784_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32784_cast_fp16")]; tensor var_32788_begin_0 = const()[name = tensor("op_32788_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_32788_end_0 = const()[name = tensor("op_32788_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_32788_end_mask_0 = const()[name = tensor("op_32788_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32788_cast_fp16 = slice_by_index(begin = var_32788_begin_0, end = var_32788_end_0, end_mask = var_32788_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32788_cast_fp16")]; tensor var_32792_begin_0 = const()[name = tensor("op_32792_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_32792_end_0 = const()[name = tensor("op_32792_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_32792_end_mask_0 = const()[name = tensor("op_32792_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32792_cast_fp16 = slice_by_index(begin = var_32792_begin_0, end = var_32792_end_0, end_mask = var_32792_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32792_cast_fp16")]; tensor var_32796_begin_0 = const()[name = tensor("op_32796_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_32796_end_0 = const()[name = tensor("op_32796_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_32796_end_mask_0 = const()[name = tensor("op_32796_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_32796_cast_fp16 = slice_by_index(begin = var_32796_begin_0, end = var_32796_end_0, end_mask = var_32796_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_32796_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3201_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3201_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3201_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3201_equation_0, values = (var_32642_cast_fp16, var_32084_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3201_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3203_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3203_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3203_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3203_equation_0, values = (var_32642_cast_fp16, var_32091_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3203_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3205_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3205_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3205_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3205_equation_0, values = (var_32642_cast_fp16, var_32098_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3205_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3207_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3207_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3207_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3207_equation_0, values = (var_32642_cast_fp16, var_32105_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3207_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3209_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3209_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3209_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3209_equation_0, values = (var_32646_cast_fp16, var_32112_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3209_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3211_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3211_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3211_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3211_equation_0, values = (var_32646_cast_fp16, var_32119_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3211_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3213_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3213_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3213_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3213_equation_0, values = (var_32646_cast_fp16, var_32126_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3213_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3215_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3215_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3215_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3215_equation_0, values = (var_32646_cast_fp16, var_32133_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3215_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3217_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3217_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3217_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3217_equation_0, values = (var_32650_cast_fp16, var_32140_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3217_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3219_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3219_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3219_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3219_equation_0, values = (var_32650_cast_fp16, var_32147_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3219_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3221_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3221_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3221_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3221_equation_0, values = (var_32650_cast_fp16, var_32154_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3221_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3223_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3223_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3223_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3223_equation_0, values = (var_32650_cast_fp16, var_32161_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3223_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3225_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3225_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3225_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3225_equation_0, values = (var_32654_cast_fp16, var_32168_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3225_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3227_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3227_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3227_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3227_equation_0, values = (var_32654_cast_fp16, var_32175_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3227_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3229_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3229_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3229_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3229_equation_0, values = (var_32654_cast_fp16, var_32182_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3229_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3231_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3231_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3231_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3231_equation_0, values = (var_32654_cast_fp16, var_32189_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3231_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3233_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3233_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3233_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3233_equation_0, values = (var_32658_cast_fp16, var_32196_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3233_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3235_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3235_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3235_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3235_equation_0, values = (var_32658_cast_fp16, var_32203_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3235_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3237_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3237_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3237_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3237_equation_0, values = (var_32658_cast_fp16, var_32210_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3237_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3239_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3239_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3239_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3239_equation_0, values = (var_32658_cast_fp16, var_32217_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3239_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3241_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3241_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3241_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3241_equation_0, values = (var_32662_cast_fp16, var_32224_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3241_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3243_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3243_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3243_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3243_equation_0, values = (var_32662_cast_fp16, var_32231_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3243_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3245_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3245_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3245_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3245_equation_0, values = (var_32662_cast_fp16, var_32238_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3245_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3247_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3247_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3247_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3247_equation_0, values = (var_32662_cast_fp16, var_32245_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3247_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3249_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3249_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3249_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3249_equation_0, values = (var_32666_cast_fp16, var_32252_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3249_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3251_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3251_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3251_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3251_equation_0, values = (var_32666_cast_fp16, var_32259_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3251_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3253_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3253_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3253_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3253_equation_0, values = (var_32666_cast_fp16, var_32266_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3253_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3255_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3255_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3255_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3255_equation_0, values = (var_32666_cast_fp16, var_32273_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3255_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3257_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3257_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3257_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3257_equation_0, values = (var_32670_cast_fp16, var_32280_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3257_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3259_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3259_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3259_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3259_equation_0, values = (var_32670_cast_fp16, var_32287_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3259_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3261_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3261_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3261_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3261_equation_0, values = (var_32670_cast_fp16, var_32294_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3261_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3263_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3263_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3263_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3263_equation_0, values = (var_32670_cast_fp16, var_32301_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3263_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3265_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3265_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3265_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3265_equation_0, values = (var_32674_cast_fp16, var_32308_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3265_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3267_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3267_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3267_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3267_equation_0, values = (var_32674_cast_fp16, var_32315_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3267_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3269_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3269_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3269_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3269_equation_0, values = (var_32674_cast_fp16, var_32322_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3269_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3271_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3271_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3271_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3271_equation_0, values = (var_32674_cast_fp16, var_32329_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3271_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3273_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3273_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3273_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3273_equation_0, values = (var_32678_cast_fp16, var_32336_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3273_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3275_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3275_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3275_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3275_equation_0, values = (var_32678_cast_fp16, var_32343_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3275_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3277_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3277_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3277_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3277_equation_0, values = (var_32678_cast_fp16, var_32350_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3277_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3279_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3279_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3279_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3279_equation_0, values = (var_32678_cast_fp16, var_32357_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3279_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3281_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3281_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3281_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3281_equation_0, values = (var_32682_cast_fp16, var_32364_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3281_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3283_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3283_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3283_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3283_equation_0, values = (var_32682_cast_fp16, var_32371_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3283_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3285_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3285_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3285_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3285_equation_0, values = (var_32682_cast_fp16, var_32378_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3285_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3287_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3287_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3287_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3287_equation_0, values = (var_32682_cast_fp16, var_32385_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3287_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3289_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3289_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3289_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3289_equation_0, values = (var_32686_cast_fp16, var_32392_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3289_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3291_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3291_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3291_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3291_equation_0, values = (var_32686_cast_fp16, var_32399_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3291_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3293_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3293_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3293_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3293_equation_0, values = (var_32686_cast_fp16, var_32406_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3293_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3295_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3295_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3295_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3295_equation_0, values = (var_32686_cast_fp16, var_32413_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3295_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3297_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3297_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3297_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3297_equation_0, values = (var_32690_cast_fp16, var_32420_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3297_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3299_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3299_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3299_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3299_equation_0, values = (var_32690_cast_fp16, var_32427_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3299_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3301_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3301_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3301_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3301_equation_0, values = (var_32690_cast_fp16, var_32434_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3301_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3303_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3303_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3303_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3303_equation_0, values = (var_32690_cast_fp16, var_32441_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3303_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3305_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3305_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3305_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3305_equation_0, values = (var_32694_cast_fp16, var_32448_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3305_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3307_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3307_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3307_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3307_equation_0, values = (var_32694_cast_fp16, var_32455_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3307_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3309_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3309_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3309_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3309_equation_0, values = (var_32694_cast_fp16, var_32462_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3309_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3311_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3311_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3311_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3311_equation_0, values = (var_32694_cast_fp16, var_32469_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3311_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3313_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3313_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3313_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3313_equation_0, values = (var_32698_cast_fp16, var_32476_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3313_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3315_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3315_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3315_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3315_equation_0, values = (var_32698_cast_fp16, var_32483_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3315_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3317_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3317_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3317_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3317_equation_0, values = (var_32698_cast_fp16, var_32490_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3317_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3319_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3319_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3319_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3319_equation_0, values = (var_32698_cast_fp16, var_32497_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3319_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3321_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3321_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3321_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3321_equation_0, values = (var_32702_cast_fp16, var_32504_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3321_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3323_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3323_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3323_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3323_equation_0, values = (var_32702_cast_fp16, var_32511_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3323_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3325_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3325_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3325_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3325_equation_0, values = (var_32702_cast_fp16, var_32518_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3325_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3327_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3327_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3327_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3327_equation_0, values = (var_32702_cast_fp16, var_32525_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3327_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3329_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3329_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3329_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3329_equation_0, values = (var_32706_cast_fp16, var_32532_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3329_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3331_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3331_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3331_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3331_equation_0, values = (var_32706_cast_fp16, var_32539_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3331_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3333_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3333_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3333_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3333_equation_0, values = (var_32706_cast_fp16, var_32546_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3333_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3335_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3335_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3335_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3335_equation_0, values = (var_32706_cast_fp16, var_32553_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3335_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3337_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3337_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3337_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3337_equation_0, values = (var_32710_cast_fp16, var_32560_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3337_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3339_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3339_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3339_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3339_equation_0, values = (var_32710_cast_fp16, var_32567_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3339_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3341_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3341_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3341_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3341_equation_0, values = (var_32710_cast_fp16, var_32574_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3341_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3343_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3343_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3343_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3343_equation_0, values = (var_32710_cast_fp16, var_32581_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3343_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3345_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3345_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3345_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3345_equation_0, values = (var_32714_cast_fp16, var_32588_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3345_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3347_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3347_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3347_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3347_equation_0, values = (var_32714_cast_fp16, var_32595_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3347_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3349_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3349_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3349_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3349_equation_0, values = (var_32714_cast_fp16, var_32602_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3349_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3351_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3351_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3351_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3351_equation_0, values = (var_32714_cast_fp16, var_32609_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3351_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3353_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3353_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3353_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3353_equation_0, values = (var_32718_cast_fp16, var_32616_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3353_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3355_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3355_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3355_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3355_equation_0, values = (var_32718_cast_fp16, var_32623_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3355_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3357_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3357_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3357_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3357_equation_0, values = (var_32718_cast_fp16, var_32630_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3357_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3359_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3359_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3359_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3359_equation_0, values = (var_32718_cast_fp16, var_32637_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3359_cast_fp16")]; tensor var_32959_to_fp16 = const()[name = tensor("op_32959_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3201_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3201_cast_fp16, y = var_32959_to_fp16)[name = tensor("aw_chunk_3201_cast_fp16")]; tensor var_32961_to_fp16 = const()[name = tensor("op_32961_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3203_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3203_cast_fp16, y = var_32961_to_fp16)[name = tensor("aw_chunk_3203_cast_fp16")]; tensor var_32963_to_fp16 = const()[name = tensor("op_32963_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3205_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3205_cast_fp16, y = var_32963_to_fp16)[name = tensor("aw_chunk_3205_cast_fp16")]; tensor var_32965_to_fp16 = const()[name = tensor("op_32965_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3207_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3207_cast_fp16, y = var_32965_to_fp16)[name = tensor("aw_chunk_3207_cast_fp16")]; tensor var_32967_to_fp16 = const()[name = tensor("op_32967_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3209_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3209_cast_fp16, y = var_32967_to_fp16)[name = tensor("aw_chunk_3209_cast_fp16")]; tensor var_32969_to_fp16 = const()[name = tensor("op_32969_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3211_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3211_cast_fp16, y = var_32969_to_fp16)[name = tensor("aw_chunk_3211_cast_fp16")]; tensor var_32971_to_fp16 = const()[name = tensor("op_32971_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3213_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3213_cast_fp16, y = var_32971_to_fp16)[name = tensor("aw_chunk_3213_cast_fp16")]; tensor var_32973_to_fp16 = const()[name = tensor("op_32973_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3215_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3215_cast_fp16, y = var_32973_to_fp16)[name = tensor("aw_chunk_3215_cast_fp16")]; tensor var_32975_to_fp16 = const()[name = tensor("op_32975_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3217_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3217_cast_fp16, y = var_32975_to_fp16)[name = tensor("aw_chunk_3217_cast_fp16")]; tensor var_32977_to_fp16 = const()[name = tensor("op_32977_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3219_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3219_cast_fp16, y = var_32977_to_fp16)[name = tensor("aw_chunk_3219_cast_fp16")]; tensor var_32979_to_fp16 = const()[name = tensor("op_32979_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3221_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3221_cast_fp16, y = var_32979_to_fp16)[name = tensor("aw_chunk_3221_cast_fp16")]; tensor var_32981_to_fp16 = const()[name = tensor("op_32981_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3223_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3223_cast_fp16, y = var_32981_to_fp16)[name = tensor("aw_chunk_3223_cast_fp16")]; tensor var_32983_to_fp16 = const()[name = tensor("op_32983_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3225_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3225_cast_fp16, y = var_32983_to_fp16)[name = tensor("aw_chunk_3225_cast_fp16")]; tensor var_32985_to_fp16 = const()[name = tensor("op_32985_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3227_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3227_cast_fp16, y = var_32985_to_fp16)[name = tensor("aw_chunk_3227_cast_fp16")]; tensor var_32987_to_fp16 = const()[name = tensor("op_32987_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3229_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3229_cast_fp16, y = var_32987_to_fp16)[name = tensor("aw_chunk_3229_cast_fp16")]; tensor var_32989_to_fp16 = const()[name = tensor("op_32989_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3231_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3231_cast_fp16, y = var_32989_to_fp16)[name = tensor("aw_chunk_3231_cast_fp16")]; tensor var_32991_to_fp16 = const()[name = tensor("op_32991_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3233_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3233_cast_fp16, y = var_32991_to_fp16)[name = tensor("aw_chunk_3233_cast_fp16")]; tensor var_32993_to_fp16 = const()[name = tensor("op_32993_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3235_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3235_cast_fp16, y = var_32993_to_fp16)[name = tensor("aw_chunk_3235_cast_fp16")]; tensor var_32995_to_fp16 = const()[name = tensor("op_32995_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3237_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3237_cast_fp16, y = var_32995_to_fp16)[name = tensor("aw_chunk_3237_cast_fp16")]; tensor var_32997_to_fp16 = const()[name = tensor("op_32997_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3239_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3239_cast_fp16, y = var_32997_to_fp16)[name = tensor("aw_chunk_3239_cast_fp16")]; tensor var_32999_to_fp16 = const()[name = tensor("op_32999_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3241_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3241_cast_fp16, y = var_32999_to_fp16)[name = tensor("aw_chunk_3241_cast_fp16")]; tensor var_33001_to_fp16 = const()[name = tensor("op_33001_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3243_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3243_cast_fp16, y = var_33001_to_fp16)[name = tensor("aw_chunk_3243_cast_fp16")]; tensor var_33003_to_fp16 = const()[name = tensor("op_33003_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3245_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3245_cast_fp16, y = var_33003_to_fp16)[name = tensor("aw_chunk_3245_cast_fp16")]; tensor var_33005_to_fp16 = const()[name = tensor("op_33005_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3247_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3247_cast_fp16, y = var_33005_to_fp16)[name = tensor("aw_chunk_3247_cast_fp16")]; tensor var_33007_to_fp16 = const()[name = tensor("op_33007_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3249_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3249_cast_fp16, y = var_33007_to_fp16)[name = tensor("aw_chunk_3249_cast_fp16")]; tensor var_33009_to_fp16 = const()[name = tensor("op_33009_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3251_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3251_cast_fp16, y = var_33009_to_fp16)[name = tensor("aw_chunk_3251_cast_fp16")]; tensor var_33011_to_fp16 = const()[name = tensor("op_33011_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3253_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3253_cast_fp16, y = var_33011_to_fp16)[name = tensor("aw_chunk_3253_cast_fp16")]; tensor var_33013_to_fp16 = const()[name = tensor("op_33013_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3255_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3255_cast_fp16, y = var_33013_to_fp16)[name = tensor("aw_chunk_3255_cast_fp16")]; tensor var_33015_to_fp16 = const()[name = tensor("op_33015_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3257_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3257_cast_fp16, y = var_33015_to_fp16)[name = tensor("aw_chunk_3257_cast_fp16")]; tensor var_33017_to_fp16 = const()[name = tensor("op_33017_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3259_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3259_cast_fp16, y = var_33017_to_fp16)[name = tensor("aw_chunk_3259_cast_fp16")]; tensor var_33019_to_fp16 = const()[name = tensor("op_33019_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3261_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3261_cast_fp16, y = var_33019_to_fp16)[name = tensor("aw_chunk_3261_cast_fp16")]; tensor var_33021_to_fp16 = const()[name = tensor("op_33021_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3263_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3263_cast_fp16, y = var_33021_to_fp16)[name = tensor("aw_chunk_3263_cast_fp16")]; tensor var_33023_to_fp16 = const()[name = tensor("op_33023_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3265_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3265_cast_fp16, y = var_33023_to_fp16)[name = tensor("aw_chunk_3265_cast_fp16")]; tensor var_33025_to_fp16 = const()[name = tensor("op_33025_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3267_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3267_cast_fp16, y = var_33025_to_fp16)[name = tensor("aw_chunk_3267_cast_fp16")]; tensor var_33027_to_fp16 = const()[name = tensor("op_33027_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3269_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3269_cast_fp16, y = var_33027_to_fp16)[name = tensor("aw_chunk_3269_cast_fp16")]; tensor var_33029_to_fp16 = const()[name = tensor("op_33029_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3271_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3271_cast_fp16, y = var_33029_to_fp16)[name = tensor("aw_chunk_3271_cast_fp16")]; tensor var_33031_to_fp16 = const()[name = tensor("op_33031_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3273_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3273_cast_fp16, y = var_33031_to_fp16)[name = tensor("aw_chunk_3273_cast_fp16")]; tensor var_33033_to_fp16 = const()[name = tensor("op_33033_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3275_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3275_cast_fp16, y = var_33033_to_fp16)[name = tensor("aw_chunk_3275_cast_fp16")]; tensor var_33035_to_fp16 = const()[name = tensor("op_33035_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3277_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3277_cast_fp16, y = var_33035_to_fp16)[name = tensor("aw_chunk_3277_cast_fp16")]; tensor var_33037_to_fp16 = const()[name = tensor("op_33037_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3279_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3279_cast_fp16, y = var_33037_to_fp16)[name = tensor("aw_chunk_3279_cast_fp16")]; tensor var_33039_to_fp16 = const()[name = tensor("op_33039_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3281_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3281_cast_fp16, y = var_33039_to_fp16)[name = tensor("aw_chunk_3281_cast_fp16")]; tensor var_33041_to_fp16 = const()[name = tensor("op_33041_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3283_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3283_cast_fp16, y = var_33041_to_fp16)[name = tensor("aw_chunk_3283_cast_fp16")]; tensor var_33043_to_fp16 = const()[name = tensor("op_33043_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3285_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3285_cast_fp16, y = var_33043_to_fp16)[name = tensor("aw_chunk_3285_cast_fp16")]; tensor var_33045_to_fp16 = const()[name = tensor("op_33045_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3287_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3287_cast_fp16, y = var_33045_to_fp16)[name = tensor("aw_chunk_3287_cast_fp16")]; tensor var_33047_to_fp16 = const()[name = tensor("op_33047_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3289_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3289_cast_fp16, y = var_33047_to_fp16)[name = tensor("aw_chunk_3289_cast_fp16")]; tensor var_33049_to_fp16 = const()[name = tensor("op_33049_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3291_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3291_cast_fp16, y = var_33049_to_fp16)[name = tensor("aw_chunk_3291_cast_fp16")]; tensor var_33051_to_fp16 = const()[name = tensor("op_33051_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3293_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3293_cast_fp16, y = var_33051_to_fp16)[name = tensor("aw_chunk_3293_cast_fp16")]; tensor var_33053_to_fp16 = const()[name = tensor("op_33053_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3295_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3295_cast_fp16, y = var_33053_to_fp16)[name = tensor("aw_chunk_3295_cast_fp16")]; tensor var_33055_to_fp16 = const()[name = tensor("op_33055_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3297_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3297_cast_fp16, y = var_33055_to_fp16)[name = tensor("aw_chunk_3297_cast_fp16")]; tensor var_33057_to_fp16 = const()[name = tensor("op_33057_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3299_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3299_cast_fp16, y = var_33057_to_fp16)[name = tensor("aw_chunk_3299_cast_fp16")]; tensor var_33059_to_fp16 = const()[name = tensor("op_33059_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3301_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3301_cast_fp16, y = var_33059_to_fp16)[name = tensor("aw_chunk_3301_cast_fp16")]; tensor var_33061_to_fp16 = const()[name = tensor("op_33061_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3303_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3303_cast_fp16, y = var_33061_to_fp16)[name = tensor("aw_chunk_3303_cast_fp16")]; tensor var_33063_to_fp16 = const()[name = tensor("op_33063_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3305_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3305_cast_fp16, y = var_33063_to_fp16)[name = tensor("aw_chunk_3305_cast_fp16")]; tensor var_33065_to_fp16 = const()[name = tensor("op_33065_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3307_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3307_cast_fp16, y = var_33065_to_fp16)[name = tensor("aw_chunk_3307_cast_fp16")]; tensor var_33067_to_fp16 = const()[name = tensor("op_33067_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3309_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3309_cast_fp16, y = var_33067_to_fp16)[name = tensor("aw_chunk_3309_cast_fp16")]; tensor var_33069_to_fp16 = const()[name = tensor("op_33069_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3311_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3311_cast_fp16, y = var_33069_to_fp16)[name = tensor("aw_chunk_3311_cast_fp16")]; tensor var_33071_to_fp16 = const()[name = tensor("op_33071_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3313_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3313_cast_fp16, y = var_33071_to_fp16)[name = tensor("aw_chunk_3313_cast_fp16")]; tensor var_33073_to_fp16 = const()[name = tensor("op_33073_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3315_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3315_cast_fp16, y = var_33073_to_fp16)[name = tensor("aw_chunk_3315_cast_fp16")]; tensor var_33075_to_fp16 = const()[name = tensor("op_33075_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3317_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3317_cast_fp16, y = var_33075_to_fp16)[name = tensor("aw_chunk_3317_cast_fp16")]; tensor var_33077_to_fp16 = const()[name = tensor("op_33077_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3319_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3319_cast_fp16, y = var_33077_to_fp16)[name = tensor("aw_chunk_3319_cast_fp16")]; tensor var_33079_to_fp16 = const()[name = tensor("op_33079_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3321_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3321_cast_fp16, y = var_33079_to_fp16)[name = tensor("aw_chunk_3321_cast_fp16")]; tensor var_33081_to_fp16 = const()[name = tensor("op_33081_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3323_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3323_cast_fp16, y = var_33081_to_fp16)[name = tensor("aw_chunk_3323_cast_fp16")]; tensor var_33083_to_fp16 = const()[name = tensor("op_33083_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3325_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3325_cast_fp16, y = var_33083_to_fp16)[name = tensor("aw_chunk_3325_cast_fp16")]; tensor var_33085_to_fp16 = const()[name = tensor("op_33085_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3327_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3327_cast_fp16, y = var_33085_to_fp16)[name = tensor("aw_chunk_3327_cast_fp16")]; tensor var_33087_to_fp16 = const()[name = tensor("op_33087_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3329_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3329_cast_fp16, y = var_33087_to_fp16)[name = tensor("aw_chunk_3329_cast_fp16")]; tensor var_33089_to_fp16 = const()[name = tensor("op_33089_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3331_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3331_cast_fp16, y = var_33089_to_fp16)[name = tensor("aw_chunk_3331_cast_fp16")]; tensor var_33091_to_fp16 = const()[name = tensor("op_33091_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3333_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3333_cast_fp16, y = var_33091_to_fp16)[name = tensor("aw_chunk_3333_cast_fp16")]; tensor var_33093_to_fp16 = const()[name = tensor("op_33093_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3335_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3335_cast_fp16, y = var_33093_to_fp16)[name = tensor("aw_chunk_3335_cast_fp16")]; tensor var_33095_to_fp16 = const()[name = tensor("op_33095_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3337_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3337_cast_fp16, y = var_33095_to_fp16)[name = tensor("aw_chunk_3337_cast_fp16")]; tensor var_33097_to_fp16 = const()[name = tensor("op_33097_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3339_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3339_cast_fp16, y = var_33097_to_fp16)[name = tensor("aw_chunk_3339_cast_fp16")]; tensor var_33099_to_fp16 = const()[name = tensor("op_33099_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3341_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3341_cast_fp16, y = var_33099_to_fp16)[name = tensor("aw_chunk_3341_cast_fp16")]; tensor var_33101_to_fp16 = const()[name = tensor("op_33101_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3343_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3343_cast_fp16, y = var_33101_to_fp16)[name = tensor("aw_chunk_3343_cast_fp16")]; tensor var_33103_to_fp16 = const()[name = tensor("op_33103_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3345_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3345_cast_fp16, y = var_33103_to_fp16)[name = tensor("aw_chunk_3345_cast_fp16")]; tensor var_33105_to_fp16 = const()[name = tensor("op_33105_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3347_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3347_cast_fp16, y = var_33105_to_fp16)[name = tensor("aw_chunk_3347_cast_fp16")]; tensor var_33107_to_fp16 = const()[name = tensor("op_33107_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3349_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3349_cast_fp16, y = var_33107_to_fp16)[name = tensor("aw_chunk_3349_cast_fp16")]; tensor var_33109_to_fp16 = const()[name = tensor("op_33109_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3351_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3351_cast_fp16, y = var_33109_to_fp16)[name = tensor("aw_chunk_3351_cast_fp16")]; tensor var_33111_to_fp16 = const()[name = tensor("op_33111_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3353_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3353_cast_fp16, y = var_33111_to_fp16)[name = tensor("aw_chunk_3353_cast_fp16")]; tensor var_33113_to_fp16 = const()[name = tensor("op_33113_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3355_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3355_cast_fp16, y = var_33113_to_fp16)[name = tensor("aw_chunk_3355_cast_fp16")]; tensor var_33115_to_fp16 = const()[name = tensor("op_33115_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3357_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3357_cast_fp16, y = var_33115_to_fp16)[name = tensor("aw_chunk_3357_cast_fp16")]; tensor var_33117_to_fp16 = const()[name = tensor("op_33117_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3359_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3359_cast_fp16, y = var_33117_to_fp16)[name = tensor("aw_chunk_3359_cast_fp16")]; tensor var_33119_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3201_cast_fp16)[name = tensor("op_33119_cast_fp16")]; tensor var_33120_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3203_cast_fp16)[name = tensor("op_33120_cast_fp16")]; tensor var_33121_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3205_cast_fp16)[name = tensor("op_33121_cast_fp16")]; tensor var_33122_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3207_cast_fp16)[name = tensor("op_33122_cast_fp16")]; tensor var_33123_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3209_cast_fp16)[name = tensor("op_33123_cast_fp16")]; tensor var_33124_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3211_cast_fp16)[name = tensor("op_33124_cast_fp16")]; tensor var_33125_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3213_cast_fp16)[name = tensor("op_33125_cast_fp16")]; tensor var_33126_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3215_cast_fp16)[name = tensor("op_33126_cast_fp16")]; tensor var_33127_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3217_cast_fp16)[name = tensor("op_33127_cast_fp16")]; tensor var_33128_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3219_cast_fp16)[name = tensor("op_33128_cast_fp16")]; tensor var_33129_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3221_cast_fp16)[name = tensor("op_33129_cast_fp16")]; tensor var_33130_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3223_cast_fp16)[name = tensor("op_33130_cast_fp16")]; tensor var_33131_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3225_cast_fp16)[name = tensor("op_33131_cast_fp16")]; tensor var_33132_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3227_cast_fp16)[name = tensor("op_33132_cast_fp16")]; tensor var_33133_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3229_cast_fp16)[name = tensor("op_33133_cast_fp16")]; tensor var_33134_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3231_cast_fp16)[name = tensor("op_33134_cast_fp16")]; tensor var_33135_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3233_cast_fp16)[name = tensor("op_33135_cast_fp16")]; tensor var_33136_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3235_cast_fp16)[name = tensor("op_33136_cast_fp16")]; tensor var_33137_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3237_cast_fp16)[name = tensor("op_33137_cast_fp16")]; tensor var_33138_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3239_cast_fp16)[name = tensor("op_33138_cast_fp16")]; tensor var_33139_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3241_cast_fp16)[name = tensor("op_33139_cast_fp16")]; tensor var_33140_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3243_cast_fp16)[name = tensor("op_33140_cast_fp16")]; tensor var_33141_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3245_cast_fp16)[name = tensor("op_33141_cast_fp16")]; tensor var_33142_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3247_cast_fp16)[name = tensor("op_33142_cast_fp16")]; tensor var_33143_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3249_cast_fp16)[name = tensor("op_33143_cast_fp16")]; tensor var_33144_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3251_cast_fp16)[name = tensor("op_33144_cast_fp16")]; tensor var_33145_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3253_cast_fp16)[name = tensor("op_33145_cast_fp16")]; tensor var_33146_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3255_cast_fp16)[name = tensor("op_33146_cast_fp16")]; tensor var_33147_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3257_cast_fp16)[name = tensor("op_33147_cast_fp16")]; tensor var_33148_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3259_cast_fp16)[name = tensor("op_33148_cast_fp16")]; tensor var_33149_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3261_cast_fp16)[name = tensor("op_33149_cast_fp16")]; tensor var_33150_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3263_cast_fp16)[name = tensor("op_33150_cast_fp16")]; tensor var_33151_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3265_cast_fp16)[name = tensor("op_33151_cast_fp16")]; tensor var_33152_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3267_cast_fp16)[name = tensor("op_33152_cast_fp16")]; tensor var_33153_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3269_cast_fp16)[name = tensor("op_33153_cast_fp16")]; tensor var_33154_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3271_cast_fp16)[name = tensor("op_33154_cast_fp16")]; tensor var_33155_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3273_cast_fp16)[name = tensor("op_33155_cast_fp16")]; tensor var_33156_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3275_cast_fp16)[name = tensor("op_33156_cast_fp16")]; tensor var_33157_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3277_cast_fp16)[name = tensor("op_33157_cast_fp16")]; tensor var_33158_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3279_cast_fp16)[name = tensor("op_33158_cast_fp16")]; tensor var_33159_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3281_cast_fp16)[name = tensor("op_33159_cast_fp16")]; tensor var_33160_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3283_cast_fp16)[name = tensor("op_33160_cast_fp16")]; tensor var_33161_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3285_cast_fp16)[name = tensor("op_33161_cast_fp16")]; tensor var_33162_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3287_cast_fp16)[name = tensor("op_33162_cast_fp16")]; tensor var_33163_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3289_cast_fp16)[name = tensor("op_33163_cast_fp16")]; tensor var_33164_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3291_cast_fp16)[name = tensor("op_33164_cast_fp16")]; tensor var_33165_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3293_cast_fp16)[name = tensor("op_33165_cast_fp16")]; tensor var_33166_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3295_cast_fp16)[name = tensor("op_33166_cast_fp16")]; tensor var_33167_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3297_cast_fp16)[name = tensor("op_33167_cast_fp16")]; tensor var_33168_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3299_cast_fp16)[name = tensor("op_33168_cast_fp16")]; tensor var_33169_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3301_cast_fp16)[name = tensor("op_33169_cast_fp16")]; tensor var_33170_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3303_cast_fp16)[name = tensor("op_33170_cast_fp16")]; tensor var_33171_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3305_cast_fp16)[name = tensor("op_33171_cast_fp16")]; tensor var_33172_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3307_cast_fp16)[name = tensor("op_33172_cast_fp16")]; tensor var_33173_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3309_cast_fp16)[name = tensor("op_33173_cast_fp16")]; tensor var_33174_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3311_cast_fp16)[name = tensor("op_33174_cast_fp16")]; tensor var_33175_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3313_cast_fp16)[name = tensor("op_33175_cast_fp16")]; tensor var_33176_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3315_cast_fp16)[name = tensor("op_33176_cast_fp16")]; tensor var_33177_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3317_cast_fp16)[name = tensor("op_33177_cast_fp16")]; tensor var_33178_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3319_cast_fp16)[name = tensor("op_33178_cast_fp16")]; tensor var_33179_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3321_cast_fp16)[name = tensor("op_33179_cast_fp16")]; tensor var_33180_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3323_cast_fp16)[name = tensor("op_33180_cast_fp16")]; tensor var_33181_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3325_cast_fp16)[name = tensor("op_33181_cast_fp16")]; tensor var_33182_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3327_cast_fp16)[name = tensor("op_33182_cast_fp16")]; tensor var_33183_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3329_cast_fp16)[name = tensor("op_33183_cast_fp16")]; tensor var_33184_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3331_cast_fp16)[name = tensor("op_33184_cast_fp16")]; tensor var_33185_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3333_cast_fp16)[name = tensor("op_33185_cast_fp16")]; tensor var_33186_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3335_cast_fp16)[name = tensor("op_33186_cast_fp16")]; tensor var_33187_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3337_cast_fp16)[name = tensor("op_33187_cast_fp16")]; tensor var_33188_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3339_cast_fp16)[name = tensor("op_33188_cast_fp16")]; tensor var_33189_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3341_cast_fp16)[name = tensor("op_33189_cast_fp16")]; tensor var_33190_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3343_cast_fp16)[name = tensor("op_33190_cast_fp16")]; tensor var_33191_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3345_cast_fp16)[name = tensor("op_33191_cast_fp16")]; tensor var_33192_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3347_cast_fp16)[name = tensor("op_33192_cast_fp16")]; tensor var_33193_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3349_cast_fp16)[name = tensor("op_33193_cast_fp16")]; tensor var_33194_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3351_cast_fp16)[name = tensor("op_33194_cast_fp16")]; tensor var_33195_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3353_cast_fp16)[name = tensor("op_33195_cast_fp16")]; tensor var_33196_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3355_cast_fp16)[name = tensor("op_33196_cast_fp16")]; tensor var_33197_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3357_cast_fp16)[name = tensor("op_33197_cast_fp16")]; tensor var_33198_cast_fp16 = softmax(axis = var_31917, x = aw_chunk_3359_cast_fp16)[name = tensor("op_33198_cast_fp16")]; tensor var_33200_equation_0 = const()[name = tensor("op_33200_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33200_cast_fp16 = einsum(equation = var_33200_equation_0, values = (var_32720_cast_fp16, var_33119_cast_fp16))[name = tensor("op_33200_cast_fp16")]; tensor var_33202_equation_0 = const()[name = tensor("op_33202_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33202_cast_fp16 = einsum(equation = var_33202_equation_0, values = (var_32720_cast_fp16, var_33120_cast_fp16))[name = tensor("op_33202_cast_fp16")]; tensor var_33204_equation_0 = const()[name = tensor("op_33204_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33204_cast_fp16 = einsum(equation = var_33204_equation_0, values = (var_32720_cast_fp16, var_33121_cast_fp16))[name = tensor("op_33204_cast_fp16")]; tensor var_33206_equation_0 = const()[name = tensor("op_33206_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33206_cast_fp16 = einsum(equation = var_33206_equation_0, values = (var_32720_cast_fp16, var_33122_cast_fp16))[name = tensor("op_33206_cast_fp16")]; tensor var_33208_equation_0 = const()[name = tensor("op_33208_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33208_cast_fp16 = einsum(equation = var_33208_equation_0, values = (var_32724_cast_fp16, var_33123_cast_fp16))[name = tensor("op_33208_cast_fp16")]; tensor var_33210_equation_0 = const()[name = tensor("op_33210_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33210_cast_fp16 = einsum(equation = var_33210_equation_0, values = (var_32724_cast_fp16, var_33124_cast_fp16))[name = tensor("op_33210_cast_fp16")]; tensor var_33212_equation_0 = const()[name = tensor("op_33212_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33212_cast_fp16 = einsum(equation = var_33212_equation_0, values = (var_32724_cast_fp16, var_33125_cast_fp16))[name = tensor("op_33212_cast_fp16")]; tensor var_33214_equation_0 = const()[name = tensor("op_33214_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33214_cast_fp16 = einsum(equation = var_33214_equation_0, values = (var_32724_cast_fp16, var_33126_cast_fp16))[name = tensor("op_33214_cast_fp16")]; tensor var_33216_equation_0 = const()[name = tensor("op_33216_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33216_cast_fp16 = einsum(equation = var_33216_equation_0, values = (var_32728_cast_fp16, var_33127_cast_fp16))[name = tensor("op_33216_cast_fp16")]; tensor var_33218_equation_0 = const()[name = tensor("op_33218_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33218_cast_fp16 = einsum(equation = var_33218_equation_0, values = (var_32728_cast_fp16, var_33128_cast_fp16))[name = tensor("op_33218_cast_fp16")]; tensor var_33220_equation_0 = const()[name = tensor("op_33220_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33220_cast_fp16 = einsum(equation = var_33220_equation_0, values = (var_32728_cast_fp16, var_33129_cast_fp16))[name = tensor("op_33220_cast_fp16")]; tensor var_33222_equation_0 = const()[name = tensor("op_33222_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33222_cast_fp16 = einsum(equation = var_33222_equation_0, values = (var_32728_cast_fp16, var_33130_cast_fp16))[name = tensor("op_33222_cast_fp16")]; tensor var_33224_equation_0 = const()[name = tensor("op_33224_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33224_cast_fp16 = einsum(equation = var_33224_equation_0, values = (var_32732_cast_fp16, var_33131_cast_fp16))[name = tensor("op_33224_cast_fp16")]; tensor var_33226_equation_0 = const()[name = tensor("op_33226_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33226_cast_fp16 = einsum(equation = var_33226_equation_0, values = (var_32732_cast_fp16, var_33132_cast_fp16))[name = tensor("op_33226_cast_fp16")]; tensor var_33228_equation_0 = const()[name = tensor("op_33228_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33228_cast_fp16 = einsum(equation = var_33228_equation_0, values = (var_32732_cast_fp16, var_33133_cast_fp16))[name = tensor("op_33228_cast_fp16")]; tensor var_33230_equation_0 = const()[name = tensor("op_33230_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33230_cast_fp16 = einsum(equation = var_33230_equation_0, values = (var_32732_cast_fp16, var_33134_cast_fp16))[name = tensor("op_33230_cast_fp16")]; tensor var_33232_equation_0 = const()[name = tensor("op_33232_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33232_cast_fp16 = einsum(equation = var_33232_equation_0, values = (var_32736_cast_fp16, var_33135_cast_fp16))[name = tensor("op_33232_cast_fp16")]; tensor var_33234_equation_0 = const()[name = tensor("op_33234_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33234_cast_fp16 = einsum(equation = var_33234_equation_0, values = (var_32736_cast_fp16, var_33136_cast_fp16))[name = tensor("op_33234_cast_fp16")]; tensor var_33236_equation_0 = const()[name = tensor("op_33236_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33236_cast_fp16 = einsum(equation = var_33236_equation_0, values = (var_32736_cast_fp16, var_33137_cast_fp16))[name = tensor("op_33236_cast_fp16")]; tensor var_33238_equation_0 = const()[name = tensor("op_33238_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33238_cast_fp16 = einsum(equation = var_33238_equation_0, values = (var_32736_cast_fp16, var_33138_cast_fp16))[name = tensor("op_33238_cast_fp16")]; tensor var_33240_equation_0 = const()[name = tensor("op_33240_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33240_cast_fp16 = einsum(equation = var_33240_equation_0, values = (var_32740_cast_fp16, var_33139_cast_fp16))[name = tensor("op_33240_cast_fp16")]; tensor var_33242_equation_0 = const()[name = tensor("op_33242_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33242_cast_fp16 = einsum(equation = var_33242_equation_0, values = (var_32740_cast_fp16, var_33140_cast_fp16))[name = tensor("op_33242_cast_fp16")]; tensor var_33244_equation_0 = const()[name = tensor("op_33244_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33244_cast_fp16 = einsum(equation = var_33244_equation_0, values = (var_32740_cast_fp16, var_33141_cast_fp16))[name = tensor("op_33244_cast_fp16")]; tensor var_33246_equation_0 = const()[name = tensor("op_33246_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33246_cast_fp16 = einsum(equation = var_33246_equation_0, values = (var_32740_cast_fp16, var_33142_cast_fp16))[name = tensor("op_33246_cast_fp16")]; tensor var_33248_equation_0 = const()[name = tensor("op_33248_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33248_cast_fp16 = einsum(equation = var_33248_equation_0, values = (var_32744_cast_fp16, var_33143_cast_fp16))[name = tensor("op_33248_cast_fp16")]; tensor var_33250_equation_0 = const()[name = tensor("op_33250_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33250_cast_fp16 = einsum(equation = var_33250_equation_0, values = (var_32744_cast_fp16, var_33144_cast_fp16))[name = tensor("op_33250_cast_fp16")]; tensor var_33252_equation_0 = const()[name = tensor("op_33252_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33252_cast_fp16 = einsum(equation = var_33252_equation_0, values = (var_32744_cast_fp16, var_33145_cast_fp16))[name = tensor("op_33252_cast_fp16")]; tensor var_33254_equation_0 = const()[name = tensor("op_33254_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33254_cast_fp16 = einsum(equation = var_33254_equation_0, values = (var_32744_cast_fp16, var_33146_cast_fp16))[name = tensor("op_33254_cast_fp16")]; tensor var_33256_equation_0 = const()[name = tensor("op_33256_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33256_cast_fp16 = einsum(equation = var_33256_equation_0, values = (var_32748_cast_fp16, var_33147_cast_fp16))[name = tensor("op_33256_cast_fp16")]; tensor var_33258_equation_0 = const()[name = tensor("op_33258_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33258_cast_fp16 = einsum(equation = var_33258_equation_0, values = (var_32748_cast_fp16, var_33148_cast_fp16))[name = tensor("op_33258_cast_fp16")]; tensor var_33260_equation_0 = const()[name = tensor("op_33260_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33260_cast_fp16 = einsum(equation = var_33260_equation_0, values = (var_32748_cast_fp16, var_33149_cast_fp16))[name = tensor("op_33260_cast_fp16")]; tensor var_33262_equation_0 = const()[name = tensor("op_33262_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33262_cast_fp16 = einsum(equation = var_33262_equation_0, values = (var_32748_cast_fp16, var_33150_cast_fp16))[name = tensor("op_33262_cast_fp16")]; tensor var_33264_equation_0 = const()[name = tensor("op_33264_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33264_cast_fp16 = einsum(equation = var_33264_equation_0, values = (var_32752_cast_fp16, var_33151_cast_fp16))[name = tensor("op_33264_cast_fp16")]; tensor var_33266_equation_0 = const()[name = tensor("op_33266_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33266_cast_fp16 = einsum(equation = var_33266_equation_0, values = (var_32752_cast_fp16, var_33152_cast_fp16))[name = tensor("op_33266_cast_fp16")]; tensor var_33268_equation_0 = const()[name = tensor("op_33268_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33268_cast_fp16 = einsum(equation = var_33268_equation_0, values = (var_32752_cast_fp16, var_33153_cast_fp16))[name = tensor("op_33268_cast_fp16")]; tensor var_33270_equation_0 = const()[name = tensor("op_33270_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33270_cast_fp16 = einsum(equation = var_33270_equation_0, values = (var_32752_cast_fp16, var_33154_cast_fp16))[name = tensor("op_33270_cast_fp16")]; tensor var_33272_equation_0 = const()[name = tensor("op_33272_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33272_cast_fp16 = einsum(equation = var_33272_equation_0, values = (var_32756_cast_fp16, var_33155_cast_fp16))[name = tensor("op_33272_cast_fp16")]; tensor var_33274_equation_0 = const()[name = tensor("op_33274_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33274_cast_fp16 = einsum(equation = var_33274_equation_0, values = (var_32756_cast_fp16, var_33156_cast_fp16))[name = tensor("op_33274_cast_fp16")]; tensor var_33276_equation_0 = const()[name = tensor("op_33276_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33276_cast_fp16 = einsum(equation = var_33276_equation_0, values = (var_32756_cast_fp16, var_33157_cast_fp16))[name = tensor("op_33276_cast_fp16")]; tensor var_33278_equation_0 = const()[name = tensor("op_33278_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33278_cast_fp16 = einsum(equation = var_33278_equation_0, values = (var_32756_cast_fp16, var_33158_cast_fp16))[name = tensor("op_33278_cast_fp16")]; tensor var_33280_equation_0 = const()[name = tensor("op_33280_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33280_cast_fp16 = einsum(equation = var_33280_equation_0, values = (var_32760_cast_fp16, var_33159_cast_fp16))[name = tensor("op_33280_cast_fp16")]; tensor var_33282_equation_0 = const()[name = tensor("op_33282_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33282_cast_fp16 = einsum(equation = var_33282_equation_0, values = (var_32760_cast_fp16, var_33160_cast_fp16))[name = tensor("op_33282_cast_fp16")]; tensor var_33284_equation_0 = const()[name = tensor("op_33284_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33284_cast_fp16 = einsum(equation = var_33284_equation_0, values = (var_32760_cast_fp16, var_33161_cast_fp16))[name = tensor("op_33284_cast_fp16")]; tensor var_33286_equation_0 = const()[name = tensor("op_33286_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33286_cast_fp16 = einsum(equation = var_33286_equation_0, values = (var_32760_cast_fp16, var_33162_cast_fp16))[name = tensor("op_33286_cast_fp16")]; tensor var_33288_equation_0 = const()[name = tensor("op_33288_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33288_cast_fp16 = einsum(equation = var_33288_equation_0, values = (var_32764_cast_fp16, var_33163_cast_fp16))[name = tensor("op_33288_cast_fp16")]; tensor var_33290_equation_0 = const()[name = tensor("op_33290_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33290_cast_fp16 = einsum(equation = var_33290_equation_0, values = (var_32764_cast_fp16, var_33164_cast_fp16))[name = tensor("op_33290_cast_fp16")]; tensor var_33292_equation_0 = const()[name = tensor("op_33292_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33292_cast_fp16 = einsum(equation = var_33292_equation_0, values = (var_32764_cast_fp16, var_33165_cast_fp16))[name = tensor("op_33292_cast_fp16")]; tensor var_33294_equation_0 = const()[name = tensor("op_33294_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33294_cast_fp16 = einsum(equation = var_33294_equation_0, values = (var_32764_cast_fp16, var_33166_cast_fp16))[name = tensor("op_33294_cast_fp16")]; tensor var_33296_equation_0 = const()[name = tensor("op_33296_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33296_cast_fp16 = einsum(equation = var_33296_equation_0, values = (var_32768_cast_fp16, var_33167_cast_fp16))[name = tensor("op_33296_cast_fp16")]; tensor var_33298_equation_0 = const()[name = tensor("op_33298_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33298_cast_fp16 = einsum(equation = var_33298_equation_0, values = (var_32768_cast_fp16, var_33168_cast_fp16))[name = tensor("op_33298_cast_fp16")]; tensor var_33300_equation_0 = const()[name = tensor("op_33300_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33300_cast_fp16 = einsum(equation = var_33300_equation_0, values = (var_32768_cast_fp16, var_33169_cast_fp16))[name = tensor("op_33300_cast_fp16")]; tensor var_33302_equation_0 = const()[name = tensor("op_33302_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33302_cast_fp16 = einsum(equation = var_33302_equation_0, values = (var_32768_cast_fp16, var_33170_cast_fp16))[name = tensor("op_33302_cast_fp16")]; tensor var_33304_equation_0 = const()[name = tensor("op_33304_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33304_cast_fp16 = einsum(equation = var_33304_equation_0, values = (var_32772_cast_fp16, var_33171_cast_fp16))[name = tensor("op_33304_cast_fp16")]; tensor var_33306_equation_0 = const()[name = tensor("op_33306_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33306_cast_fp16 = einsum(equation = var_33306_equation_0, values = (var_32772_cast_fp16, var_33172_cast_fp16))[name = tensor("op_33306_cast_fp16")]; tensor var_33308_equation_0 = const()[name = tensor("op_33308_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33308_cast_fp16 = einsum(equation = var_33308_equation_0, values = (var_32772_cast_fp16, var_33173_cast_fp16))[name = tensor("op_33308_cast_fp16")]; tensor var_33310_equation_0 = const()[name = tensor("op_33310_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33310_cast_fp16 = einsum(equation = var_33310_equation_0, values = (var_32772_cast_fp16, var_33174_cast_fp16))[name = tensor("op_33310_cast_fp16")]; tensor var_33312_equation_0 = const()[name = tensor("op_33312_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33312_cast_fp16 = einsum(equation = var_33312_equation_0, values = (var_32776_cast_fp16, var_33175_cast_fp16))[name = tensor("op_33312_cast_fp16")]; tensor var_33314_equation_0 = const()[name = tensor("op_33314_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33314_cast_fp16 = einsum(equation = var_33314_equation_0, values = (var_32776_cast_fp16, var_33176_cast_fp16))[name = tensor("op_33314_cast_fp16")]; tensor var_33316_equation_0 = const()[name = tensor("op_33316_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33316_cast_fp16 = einsum(equation = var_33316_equation_0, values = (var_32776_cast_fp16, var_33177_cast_fp16))[name = tensor("op_33316_cast_fp16")]; tensor var_33318_equation_0 = const()[name = tensor("op_33318_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33318_cast_fp16 = einsum(equation = var_33318_equation_0, values = (var_32776_cast_fp16, var_33178_cast_fp16))[name = tensor("op_33318_cast_fp16")]; tensor var_33320_equation_0 = const()[name = tensor("op_33320_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33320_cast_fp16 = einsum(equation = var_33320_equation_0, values = (var_32780_cast_fp16, var_33179_cast_fp16))[name = tensor("op_33320_cast_fp16")]; tensor var_33322_equation_0 = const()[name = tensor("op_33322_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33322_cast_fp16 = einsum(equation = var_33322_equation_0, values = (var_32780_cast_fp16, var_33180_cast_fp16))[name = tensor("op_33322_cast_fp16")]; tensor var_33324_equation_0 = const()[name = tensor("op_33324_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33324_cast_fp16 = einsum(equation = var_33324_equation_0, values = (var_32780_cast_fp16, var_33181_cast_fp16))[name = tensor("op_33324_cast_fp16")]; tensor var_33326_equation_0 = const()[name = tensor("op_33326_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33326_cast_fp16 = einsum(equation = var_33326_equation_0, values = (var_32780_cast_fp16, var_33182_cast_fp16))[name = tensor("op_33326_cast_fp16")]; tensor var_33328_equation_0 = const()[name = tensor("op_33328_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33328_cast_fp16 = einsum(equation = var_33328_equation_0, values = (var_32784_cast_fp16, var_33183_cast_fp16))[name = tensor("op_33328_cast_fp16")]; tensor var_33330_equation_0 = const()[name = tensor("op_33330_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33330_cast_fp16 = einsum(equation = var_33330_equation_0, values = (var_32784_cast_fp16, var_33184_cast_fp16))[name = tensor("op_33330_cast_fp16")]; tensor var_33332_equation_0 = const()[name = tensor("op_33332_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33332_cast_fp16 = einsum(equation = var_33332_equation_0, values = (var_32784_cast_fp16, var_33185_cast_fp16))[name = tensor("op_33332_cast_fp16")]; tensor var_33334_equation_0 = const()[name = tensor("op_33334_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33334_cast_fp16 = einsum(equation = var_33334_equation_0, values = (var_32784_cast_fp16, var_33186_cast_fp16))[name = tensor("op_33334_cast_fp16")]; tensor var_33336_equation_0 = const()[name = tensor("op_33336_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33336_cast_fp16 = einsum(equation = var_33336_equation_0, values = (var_32788_cast_fp16, var_33187_cast_fp16))[name = tensor("op_33336_cast_fp16")]; tensor var_33338_equation_0 = const()[name = tensor("op_33338_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33338_cast_fp16 = einsum(equation = var_33338_equation_0, values = (var_32788_cast_fp16, var_33188_cast_fp16))[name = tensor("op_33338_cast_fp16")]; tensor var_33340_equation_0 = const()[name = tensor("op_33340_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33340_cast_fp16 = einsum(equation = var_33340_equation_0, values = (var_32788_cast_fp16, var_33189_cast_fp16))[name = tensor("op_33340_cast_fp16")]; tensor var_33342_equation_0 = const()[name = tensor("op_33342_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33342_cast_fp16 = einsum(equation = var_33342_equation_0, values = (var_32788_cast_fp16, var_33190_cast_fp16))[name = tensor("op_33342_cast_fp16")]; tensor var_33344_equation_0 = const()[name = tensor("op_33344_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33344_cast_fp16 = einsum(equation = var_33344_equation_0, values = (var_32792_cast_fp16, var_33191_cast_fp16))[name = tensor("op_33344_cast_fp16")]; tensor var_33346_equation_0 = const()[name = tensor("op_33346_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33346_cast_fp16 = einsum(equation = var_33346_equation_0, values = (var_32792_cast_fp16, var_33192_cast_fp16))[name = tensor("op_33346_cast_fp16")]; tensor var_33348_equation_0 = const()[name = tensor("op_33348_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33348_cast_fp16 = einsum(equation = var_33348_equation_0, values = (var_32792_cast_fp16, var_33193_cast_fp16))[name = tensor("op_33348_cast_fp16")]; tensor var_33350_equation_0 = const()[name = tensor("op_33350_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33350_cast_fp16 = einsum(equation = var_33350_equation_0, values = (var_32792_cast_fp16, var_33194_cast_fp16))[name = tensor("op_33350_cast_fp16")]; tensor var_33352_equation_0 = const()[name = tensor("op_33352_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33352_cast_fp16 = einsum(equation = var_33352_equation_0, values = (var_32796_cast_fp16, var_33195_cast_fp16))[name = tensor("op_33352_cast_fp16")]; tensor var_33354_equation_0 = const()[name = tensor("op_33354_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33354_cast_fp16 = einsum(equation = var_33354_equation_0, values = (var_32796_cast_fp16, var_33196_cast_fp16))[name = tensor("op_33354_cast_fp16")]; tensor var_33356_equation_0 = const()[name = tensor("op_33356_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33356_cast_fp16 = einsum(equation = var_33356_equation_0, values = (var_32796_cast_fp16, var_33197_cast_fp16))[name = tensor("op_33356_cast_fp16")]; tensor var_33358_equation_0 = const()[name = tensor("op_33358_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_33358_cast_fp16 = einsum(equation = var_33358_equation_0, values = (var_32796_cast_fp16, var_33198_cast_fp16))[name = tensor("op_33358_cast_fp16")]; tensor var_33360_interleave_0 = const()[name = tensor("op_33360_interleave_0"), val = tensor(false)]; tensor var_33360_cast_fp16 = concat(axis = var_31892, interleave = var_33360_interleave_0, values = (var_33200_cast_fp16, var_33202_cast_fp16, var_33204_cast_fp16, var_33206_cast_fp16))[name = tensor("op_33360_cast_fp16")]; tensor var_33362_interleave_0 = const()[name = tensor("op_33362_interleave_0"), val = tensor(false)]; tensor var_33362_cast_fp16 = concat(axis = var_31892, interleave = var_33362_interleave_0, values = (var_33208_cast_fp16, var_33210_cast_fp16, var_33212_cast_fp16, var_33214_cast_fp16))[name = tensor("op_33362_cast_fp16")]; tensor var_33364_interleave_0 = const()[name = tensor("op_33364_interleave_0"), val = tensor(false)]; tensor var_33364_cast_fp16 = concat(axis = var_31892, interleave = var_33364_interleave_0, values = (var_33216_cast_fp16, var_33218_cast_fp16, var_33220_cast_fp16, var_33222_cast_fp16))[name = tensor("op_33364_cast_fp16")]; tensor var_33366_interleave_0 = const()[name = tensor("op_33366_interleave_0"), val = tensor(false)]; tensor var_33366_cast_fp16 = concat(axis = var_31892, interleave = var_33366_interleave_0, values = (var_33224_cast_fp16, var_33226_cast_fp16, var_33228_cast_fp16, var_33230_cast_fp16))[name = tensor("op_33366_cast_fp16")]; tensor var_33368_interleave_0 = const()[name = tensor("op_33368_interleave_0"), val = tensor(false)]; tensor var_33368_cast_fp16 = concat(axis = var_31892, interleave = var_33368_interleave_0, values = (var_33232_cast_fp16, var_33234_cast_fp16, var_33236_cast_fp16, var_33238_cast_fp16))[name = tensor("op_33368_cast_fp16")]; tensor var_33370_interleave_0 = const()[name = tensor("op_33370_interleave_0"), val = tensor(false)]; tensor var_33370_cast_fp16 = concat(axis = var_31892, interleave = var_33370_interleave_0, values = (var_33240_cast_fp16, var_33242_cast_fp16, var_33244_cast_fp16, var_33246_cast_fp16))[name = tensor("op_33370_cast_fp16")]; tensor var_33372_interleave_0 = const()[name = tensor("op_33372_interleave_0"), val = tensor(false)]; tensor var_33372_cast_fp16 = concat(axis = var_31892, interleave = var_33372_interleave_0, values = (var_33248_cast_fp16, var_33250_cast_fp16, var_33252_cast_fp16, var_33254_cast_fp16))[name = tensor("op_33372_cast_fp16")]; tensor var_33374_interleave_0 = const()[name = tensor("op_33374_interleave_0"), val = tensor(false)]; tensor var_33374_cast_fp16 = concat(axis = var_31892, interleave = var_33374_interleave_0, values = (var_33256_cast_fp16, var_33258_cast_fp16, var_33260_cast_fp16, var_33262_cast_fp16))[name = tensor("op_33374_cast_fp16")]; tensor var_33376_interleave_0 = const()[name = tensor("op_33376_interleave_0"), val = tensor(false)]; tensor var_33376_cast_fp16 = concat(axis = var_31892, interleave = var_33376_interleave_0, values = (var_33264_cast_fp16, var_33266_cast_fp16, var_33268_cast_fp16, var_33270_cast_fp16))[name = tensor("op_33376_cast_fp16")]; tensor var_33378_interleave_0 = const()[name = tensor("op_33378_interleave_0"), val = tensor(false)]; tensor var_33378_cast_fp16 = concat(axis = var_31892, interleave = var_33378_interleave_0, values = (var_33272_cast_fp16, var_33274_cast_fp16, var_33276_cast_fp16, var_33278_cast_fp16))[name = tensor("op_33378_cast_fp16")]; tensor var_33380_interleave_0 = const()[name = tensor("op_33380_interleave_0"), val = tensor(false)]; tensor var_33380_cast_fp16 = concat(axis = var_31892, interleave = var_33380_interleave_0, values = (var_33280_cast_fp16, var_33282_cast_fp16, var_33284_cast_fp16, var_33286_cast_fp16))[name = tensor("op_33380_cast_fp16")]; tensor var_33382_interleave_0 = const()[name = tensor("op_33382_interleave_0"), val = tensor(false)]; tensor var_33382_cast_fp16 = concat(axis = var_31892, interleave = var_33382_interleave_0, values = (var_33288_cast_fp16, var_33290_cast_fp16, var_33292_cast_fp16, var_33294_cast_fp16))[name = tensor("op_33382_cast_fp16")]; tensor var_33384_interleave_0 = const()[name = tensor("op_33384_interleave_0"), val = tensor(false)]; tensor var_33384_cast_fp16 = concat(axis = var_31892, interleave = var_33384_interleave_0, values = (var_33296_cast_fp16, var_33298_cast_fp16, var_33300_cast_fp16, var_33302_cast_fp16))[name = tensor("op_33384_cast_fp16")]; tensor var_33386_interleave_0 = const()[name = tensor("op_33386_interleave_0"), val = tensor(false)]; tensor var_33386_cast_fp16 = concat(axis = var_31892, interleave = var_33386_interleave_0, values = (var_33304_cast_fp16, var_33306_cast_fp16, var_33308_cast_fp16, var_33310_cast_fp16))[name = tensor("op_33386_cast_fp16")]; tensor var_33388_interleave_0 = const()[name = tensor("op_33388_interleave_0"), val = tensor(false)]; tensor var_33388_cast_fp16 = concat(axis = var_31892, interleave = var_33388_interleave_0, values = (var_33312_cast_fp16, var_33314_cast_fp16, var_33316_cast_fp16, var_33318_cast_fp16))[name = tensor("op_33388_cast_fp16")]; tensor var_33390_interleave_0 = const()[name = tensor("op_33390_interleave_0"), val = tensor(false)]; tensor var_33390_cast_fp16 = concat(axis = var_31892, interleave = var_33390_interleave_0, values = (var_33320_cast_fp16, var_33322_cast_fp16, var_33324_cast_fp16, var_33326_cast_fp16))[name = tensor("op_33390_cast_fp16")]; tensor var_33392_interleave_0 = const()[name = tensor("op_33392_interleave_0"), val = tensor(false)]; tensor var_33392_cast_fp16 = concat(axis = var_31892, interleave = var_33392_interleave_0, values = (var_33328_cast_fp16, var_33330_cast_fp16, var_33332_cast_fp16, var_33334_cast_fp16))[name = tensor("op_33392_cast_fp16")]; tensor var_33394_interleave_0 = const()[name = tensor("op_33394_interleave_0"), val = tensor(false)]; tensor var_33394_cast_fp16 = concat(axis = var_31892, interleave = var_33394_interleave_0, values = (var_33336_cast_fp16, var_33338_cast_fp16, var_33340_cast_fp16, var_33342_cast_fp16))[name = tensor("op_33394_cast_fp16")]; tensor var_33396_interleave_0 = const()[name = tensor("op_33396_interleave_0"), val = tensor(false)]; tensor var_33396_cast_fp16 = concat(axis = var_31892, interleave = var_33396_interleave_0, values = (var_33344_cast_fp16, var_33346_cast_fp16, var_33348_cast_fp16, var_33350_cast_fp16))[name = tensor("op_33396_cast_fp16")]; tensor var_33398_interleave_0 = const()[name = tensor("op_33398_interleave_0"), val = tensor(false)]; tensor var_33398_cast_fp16 = concat(axis = var_31892, interleave = var_33398_interleave_0, values = (var_33352_cast_fp16, var_33354_cast_fp16, var_33356_cast_fp16, var_33358_cast_fp16))[name = tensor("op_33398_cast_fp16")]; tensor input_161_interleave_0 = const()[name = tensor("input_161_interleave_0"), val = tensor(false)]; tensor input_161_cast_fp16 = concat(axis = var_31917, interleave = input_161_interleave_0, values = (var_33360_cast_fp16, var_33362_cast_fp16, var_33364_cast_fp16, var_33366_cast_fp16, var_33368_cast_fp16, var_33370_cast_fp16, var_33372_cast_fp16, var_33374_cast_fp16, var_33376_cast_fp16, var_33378_cast_fp16, var_33380_cast_fp16, var_33382_cast_fp16, var_33384_cast_fp16, var_33386_cast_fp16, var_33388_cast_fp16, var_33390_cast_fp16, var_33392_cast_fp16, var_33394_cast_fp16, var_33396_cast_fp16, var_33398_cast_fp16))[name = tensor("input_161_cast_fp16")]; tensor var_33409_pad_type_0 = const()[name = tensor("op_33409_pad_type_0"), val = tensor("valid")]; tensor var_33409_strides_0 = const()[name = tensor("op_33409_strides_0"), val = tensor([1, 1])]; tensor var_33409_pad_0 = const()[name = tensor("op_33409_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_33409_dilations_0 = const()[name = tensor("op_33409_dilations_0"), val = tensor([1, 1])]; tensor var_33409_groups_0 = const()[name = tensor("op_33409_groups_0"), val = tensor(1)]; tensor layers_20_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(274890176))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275709440))), name = tensor("layers_20_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_20_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_20_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275709568)))]; tensor var_33409_cast_fp16 = conv(bias = layers_20_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_33409_dilations_0, groups = var_33409_groups_0, pad = var_33409_pad_0, pad_type = var_33409_pad_type_0, strides = var_33409_strides_0, weight = layers_20_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_161_cast_fp16)[name = tensor("op_33409_cast_fp16")]; tensor var_33415_pad_type_0 = const()[name = tensor("op_33415_pad_type_0"), val = tensor("valid")]; tensor var_33415_strides_0 = const()[name = tensor("op_33415_strides_0"), val = tensor([1, 1])]; tensor var_33415_pad_0 = const()[name = tensor("op_33415_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_33415_dilations_0 = const()[name = tensor("op_33415_dilations_0"), val = tensor([1, 1])]; tensor var_33415_groups_0 = const()[name = tensor("op_33415_groups_0"), val = tensor(1)]; tensor layers_20_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275726912))), name = tensor("layers_20_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275712192))), shape = tensor([1280, 1280, 1, 1])]; tensor var_33415_cast_fp16 = conv(dilations = var_33415_dilations_0, groups = var_33415_groups_0, pad = var_33415_pad_0, pad_type = var_33415_pad_type_0, strides = var_33415_strides_0, weight = layers_20_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_161_cast_fp16)[name = tensor("op_33415_cast_fp16")]; tensor obj_83_cast_fp16 = add(x = var_33409_cast_fp16, y = var_33415_cast_fp16)[name = tensor("obj_83_cast_fp16")]; tensor inputs_83_cast_fp16 = add(x = inputs_81_cast_fp16, y = obj_83_cast_fp16)[name = tensor("inputs_83_cast_fp16")]; tensor out_83_axes_0 = const()[name = tensor("out_83_axes_0"), val = tensor([1])]; tensor var_33426_to_fp16 = const()[name = tensor("op_33426_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_83_cast_fp16 = layer_norm(axes = out_83_axes_0, epsilon = var_33426_to_fp16, x = inputs_83_cast_fp16)[name = tensor("out_83_cast_fp16")]; tensor input_163_gamma_0_to_fp16 = const()[name = tensor("input_163_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275931776)))]; tensor input_163_beta_0_to_fp16 = const()[name = tensor("input_163_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275934400)))]; tensor input_163_epsilon_0_to_fp16 = const()[name = tensor("input_163_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_163_cast_fp16 = batch_norm(beta = input_163_beta_0_to_fp16, epsilon = input_163_epsilon_0_to_fp16, gamma = input_163_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_83_cast_fp16)[name = tensor("input_163_cast_fp16")]; tensor var_33444_pad_type_0 = const()[name = tensor("op_33444_pad_type_0"), val = tensor("valid")]; tensor var_33444_strides_0 = const()[name = tensor("op_33444_strides_0"), val = tensor([1, 1])]; tensor var_33444_pad_0 = const()[name = tensor("op_33444_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_33444_dilations_0 = const()[name = tensor("op_33444_dilations_0"), val = tensor([1, 1])]; tensor var_33444_groups_0 = const()[name = tensor("op_33444_groups_0"), val = tensor(1)]; tensor layers_20_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(275937024))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279213888))), name = tensor("layers_20_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_20_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_20_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279214016)))]; tensor var_33444_cast_fp16 = conv(bias = layers_20_fc1_inlier_module_bias_to_fp16, dilations = var_33444_dilations_0, groups = var_33444_groups_0, pad = var_33444_pad_0, pad_type = var_33444_pad_type_0, strides = var_33444_strides_0, weight = layers_20_fc1_inlier_module_weight_to_fp16_palettized, x = input_163_cast_fp16)[name = tensor("op_33444_cast_fp16")]; tensor var_33450_pad_type_0 = const()[name = tensor("op_33450_pad_type_0"), val = tensor("valid")]; tensor var_33450_strides_0 = const()[name = tensor("op_33450_strides_0"), val = tensor([1, 1])]; tensor var_33450_pad_0 = const()[name = tensor("op_33450_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_33450_dilations_0 = const()[name = tensor("op_33450_dilations_0"), val = tensor([1, 1])]; tensor var_33450_groups_0 = const()[name = tensor("op_33450_groups_0"), val = tensor(1)]; tensor layers_20_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279311872))), name = tensor("layers_20_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(279224320))), shape = tensor([5120, 1280, 1, 1])]; tensor var_33450_cast_fp16 = conv(dilations = var_33450_dilations_0, groups = var_33450_groups_0, pad = var_33450_pad_0, pad_type = var_33450_pad_type_0, strides = var_33450_strides_0, weight = layers_20_fc1_outlier_module_weight_to_fp16_sparsified, x = input_163_cast_fp16)[name = tensor("op_33450_cast_fp16")]; tensor input_165_cast_fp16 = add(x = var_33444_cast_fp16, y = var_33450_cast_fp16)[name = tensor("input_165_cast_fp16")]; tensor input_167_mode_0 = const()[name = tensor("input_167_mode_0"), val = tensor("EXACT")]; tensor input_167_cast_fp16 = gelu(mode = input_167_mode_0, x = input_165_cast_fp16)[name = tensor("input_167_cast_fp16")]; tensor var_33461_pad_type_0 = const()[name = tensor("op_33461_pad_type_0"), val = tensor("valid")]; tensor var_33461_strides_0 = const()[name = tensor("op_33461_strides_0"), val = tensor([1, 1])]; tensor var_33461_pad_0 = const()[name = tensor("op_33461_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_33461_dilations_0 = const()[name = tensor("op_33461_dilations_0"), val = tensor([1, 1])]; tensor var_33461_groups_0 = const()[name = tensor("op_33461_groups_0"), val = tensor(1)]; tensor layers_20_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(280131136))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283408000))), name = tensor("layers_20_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_20_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_20_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283408128)))]; tensor var_33461_cast_fp16 = conv(bias = layers_20_fc2_inlier_module_bias_to_fp16, dilations = var_33461_dilations_0, groups = var_33461_groups_0, pad = var_33461_pad_0, pad_type = var_33461_pad_type_0, strides = var_33461_strides_0, weight = layers_20_fc2_inlier_module_weight_to_fp16_palettized, x = input_167_cast_fp16)[name = tensor("op_33461_cast_fp16")]; tensor var_33467_pad_type_0 = const()[name = tensor("op_33467_pad_type_0"), val = tensor("valid")]; tensor var_33467_strides_0 = const()[name = tensor("op_33467_strides_0"), val = tensor([1, 1])]; tensor var_33467_pad_0 = const()[name = tensor("op_33467_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_33467_dilations_0 = const()[name = tensor("op_33467_dilations_0"), val = tensor([1, 1])]; tensor var_33467_groups_0 = const()[name = tensor("op_33467_groups_0"), val = tensor(1)]; tensor layers_20_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283466304))), name = tensor("layers_20_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(283410752))), shape = tensor([1280, 5120, 1, 1])]; tensor var_33467_cast_fp16 = conv(dilations = var_33467_dilations_0, groups = var_33467_groups_0, pad = var_33467_pad_0, pad_type = var_33467_pad_type_0, strides = var_33467_strides_0, weight = layers_20_fc2_outlier_module_weight_to_fp16_sparsified, x = input_167_cast_fp16)[name = tensor("op_33467_cast_fp16")]; tensor hidden_states_45_cast_fp16 = add(x = var_33461_cast_fp16, y = var_33467_cast_fp16)[name = tensor("hidden_states_45_cast_fp16")]; tensor inputs_85_cast_fp16 = add(x = inputs_83_cast_fp16, y = hidden_states_45_cast_fp16)[name = tensor("inputs_85_cast_fp16")]; tensor var_33473 = const()[name = tensor("op_33473"), val = tensor(3)]; tensor var_33498 = const()[name = tensor("op_33498"), val = tensor(1)]; tensor out_85_axes_0 = const()[name = tensor("out_85_axes_0"), val = tensor([1])]; tensor var_33515_to_fp16 = const()[name = tensor("op_33515_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_85_cast_fp16 = layer_norm(axes = out_85_axes_0, epsilon = var_33515_to_fp16, x = inputs_85_cast_fp16)[name = tensor("out_85_cast_fp16")]; tensor obj_85_gamma_0_to_fp16 = const()[name = tensor("obj_85_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284285568)))]; tensor obj_85_beta_0_to_fp16 = const()[name = tensor("obj_85_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284288192)))]; tensor obj_85_epsilon_0_to_fp16 = const()[name = tensor("obj_85_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_85_cast_fp16 = batch_norm(beta = obj_85_beta_0_to_fp16, epsilon = obj_85_epsilon_0_to_fp16, gamma = obj_85_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_85_cast_fp16)[name = tensor("obj_85_cast_fp16")]; tensor var_33537_pad_type_0 = const()[name = tensor("op_33537_pad_type_0"), val = tensor("valid")]; tensor var_33537_strides_0 = const()[name = tensor("op_33537_strides_0"), val = tensor([1, 1])]; tensor var_33537_pad_0 = const()[name = tensor("op_33537_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_33537_dilations_0 = const()[name = tensor("op_33537_dilations_0"), val = tensor([1, 1])]; tensor var_33537_groups_0 = const()[name = tensor("op_33537_groups_0"), val = tensor(1)]; tensor layers_21_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(284290816))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285110080))), name = tensor("layers_21_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_21_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_21_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285110208)))]; tensor var_33537_cast_fp16 = conv(bias = layers_21_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_33537_dilations_0, groups = var_33537_groups_0, pad = var_33537_pad_0, pad_type = var_33537_pad_type_0, strides = var_33537_strides_0, weight = layers_21_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_85_cast_fp16)[name = tensor("op_33537_cast_fp16")]; tensor var_33543_pad_type_0 = const()[name = tensor("op_33543_pad_type_0"), val = tensor("valid")]; tensor var_33543_strides_0 = const()[name = tensor("op_33543_strides_0"), val = tensor([1, 1])]; tensor var_33543_pad_0 = const()[name = tensor("op_33543_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_33543_dilations_0 = const()[name = tensor("op_33543_dilations_0"), val = tensor([1, 1])]; tensor var_33543_groups_0 = const()[name = tensor("op_33543_groups_0"), val = tensor(1)]; tensor layers_21_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285140544))), name = tensor("layers_21_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285112832))), shape = tensor([1280, 1280, 1, 1])]; tensor var_33543_cast_fp16 = conv(dilations = var_33543_dilations_0, groups = var_33543_groups_0, pad = var_33543_pad_0, pad_type = var_33543_pad_type_0, strides = var_33543_strides_0, weight = layers_21_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_85_cast_fp16)[name = tensor("op_33543_cast_fp16")]; tensor query_43_cast_fp16 = add(x = var_33537_cast_fp16, y = var_33543_cast_fp16)[name = tensor("query_43_cast_fp16")]; tensor var_33552_pad_type_0 = const()[name = tensor("op_33552_pad_type_0"), val = tensor("valid")]; tensor var_33552_strides_0 = const()[name = tensor("op_33552_strides_0"), val = tensor([1, 1])]; tensor var_33552_pad_0 = const()[name = tensor("op_33552_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_33552_dilations_0 = const()[name = tensor("op_33552_dilations_0"), val = tensor([1, 1])]; tensor var_33552_groups_0 = const()[name = tensor("op_33552_groups_0"), val = tensor(1)]; tensor layers_21_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(285345408))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286164672))), name = tensor("layers_21_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_33552_cast_fp16 = conv(dilations = var_33552_dilations_0, groups = var_33552_groups_0, pad = var_33552_pad_0, pad_type = var_33552_pad_type_0, strides = var_33552_strides_0, weight = layers_21_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_85_cast_fp16)[name = tensor("op_33552_cast_fp16")]; tensor var_33558_pad_type_0 = const()[name = tensor("op_33558_pad_type_0"), val = tensor("valid")]; tensor var_33558_strides_0 = const()[name = tensor("op_33558_strides_0"), val = tensor([1, 1])]; tensor var_33558_pad_0 = const()[name = tensor("op_33558_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_33558_dilations_0 = const()[name = tensor("op_33558_dilations_0"), val = tensor([1, 1])]; tensor var_33558_groups_0 = const()[name = tensor("op_33558_groups_0"), val = tensor(1)]; tensor layers_21_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286193792))), name = tensor("layers_21_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286164800))), shape = tensor([1280, 1280, 1, 1])]; tensor var_33558_cast_fp16 = conv(dilations = var_33558_dilations_0, groups = var_33558_groups_0, pad = var_33558_pad_0, pad_type = var_33558_pad_type_0, strides = var_33558_strides_0, weight = layers_21_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_85_cast_fp16)[name = tensor("op_33558_cast_fp16")]; tensor key_43_cast_fp16 = add(x = var_33552_cast_fp16, y = var_33558_cast_fp16)[name = tensor("key_43_cast_fp16")]; tensor var_33568_pad_type_0 = const()[name = tensor("op_33568_pad_type_0"), val = tensor("valid")]; tensor var_33568_strides_0 = const()[name = tensor("op_33568_strides_0"), val = tensor([1, 1])]; tensor var_33568_pad_0 = const()[name = tensor("op_33568_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_33568_dilations_0 = const()[name = tensor("op_33568_dilations_0"), val = tensor([1, 1])]; tensor var_33568_groups_0 = const()[name = tensor("op_33568_groups_0"), val = tensor(1)]; tensor layers_21_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(286398656))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(287217920))), name = tensor("layers_21_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_21_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_21_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(287218048)))]; tensor var_33568_cast_fp16 = conv(bias = layers_21_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_33568_dilations_0, groups = var_33568_groups_0, pad = var_33568_pad_0, pad_type = var_33568_pad_type_0, strides = var_33568_strides_0, weight = layers_21_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_85_cast_fp16)[name = tensor("op_33568_cast_fp16")]; tensor var_33574_pad_type_0 = const()[name = tensor("op_33574_pad_type_0"), val = tensor("valid")]; tensor var_33574_strides_0 = const()[name = tensor("op_33574_strides_0"), val = tensor([1, 1])]; tensor var_33574_pad_0 = const()[name = tensor("op_33574_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_33574_dilations_0 = const()[name = tensor("op_33574_dilations_0"), val = tensor([1, 1])]; tensor var_33574_groups_0 = const()[name = tensor("op_33574_groups_0"), val = tensor(1)]; tensor layers_21_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(287233472))), name = tensor("layers_21_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(287220672))), shape = tensor([1280, 1280, 1, 1])]; tensor var_33574_cast_fp16 = conv(dilations = var_33574_dilations_0, groups = var_33574_groups_0, pad = var_33574_pad_0, pad_type = var_33574_pad_type_0, strides = var_33574_strides_0, weight = layers_21_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_85_cast_fp16)[name = tensor("op_33574_cast_fp16")]; tensor value_43_cast_fp16 = add(x = var_33568_cast_fp16, y = var_33574_cast_fp16)[name = tensor("value_43_cast_fp16")]; tensor var_33580_begin_0 = const()[name = tensor("op_33580_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_33580_end_0 = const()[name = tensor("op_33580_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_33580_end_mask_0 = const()[name = tensor("op_33580_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33580_cast_fp16 = slice_by_index(begin = var_33580_begin_0, end = var_33580_end_0, end_mask = var_33580_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33580_cast_fp16")]; tensor var_33584_begin_0 = const()[name = tensor("op_33584_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_33584_end_0 = const()[name = tensor("op_33584_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_33584_end_mask_0 = const()[name = tensor("op_33584_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33584_cast_fp16 = slice_by_index(begin = var_33584_begin_0, end = var_33584_end_0, end_mask = var_33584_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33584_cast_fp16")]; tensor var_33588_begin_0 = const()[name = tensor("op_33588_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_33588_end_0 = const()[name = tensor("op_33588_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_33588_end_mask_0 = const()[name = tensor("op_33588_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33588_cast_fp16 = slice_by_index(begin = var_33588_begin_0, end = var_33588_end_0, end_mask = var_33588_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33588_cast_fp16")]; tensor var_33592_begin_0 = const()[name = tensor("op_33592_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_33592_end_0 = const()[name = tensor("op_33592_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_33592_end_mask_0 = const()[name = tensor("op_33592_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33592_cast_fp16 = slice_by_index(begin = var_33592_begin_0, end = var_33592_end_0, end_mask = var_33592_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33592_cast_fp16")]; tensor var_33596_begin_0 = const()[name = tensor("op_33596_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_33596_end_0 = const()[name = tensor("op_33596_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_33596_end_mask_0 = const()[name = tensor("op_33596_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33596_cast_fp16 = slice_by_index(begin = var_33596_begin_0, end = var_33596_end_0, end_mask = var_33596_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33596_cast_fp16")]; tensor var_33600_begin_0 = const()[name = tensor("op_33600_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_33600_end_0 = const()[name = tensor("op_33600_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_33600_end_mask_0 = const()[name = tensor("op_33600_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33600_cast_fp16 = slice_by_index(begin = var_33600_begin_0, end = var_33600_end_0, end_mask = var_33600_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33600_cast_fp16")]; tensor var_33604_begin_0 = const()[name = tensor("op_33604_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_33604_end_0 = const()[name = tensor("op_33604_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_33604_end_mask_0 = const()[name = tensor("op_33604_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33604_cast_fp16 = slice_by_index(begin = var_33604_begin_0, end = var_33604_end_0, end_mask = var_33604_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33604_cast_fp16")]; tensor var_33608_begin_0 = const()[name = tensor("op_33608_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_33608_end_0 = const()[name = tensor("op_33608_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_33608_end_mask_0 = const()[name = tensor("op_33608_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33608_cast_fp16 = slice_by_index(begin = var_33608_begin_0, end = var_33608_end_0, end_mask = var_33608_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33608_cast_fp16")]; tensor var_33612_begin_0 = const()[name = tensor("op_33612_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_33612_end_0 = const()[name = tensor("op_33612_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_33612_end_mask_0 = const()[name = tensor("op_33612_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33612_cast_fp16 = slice_by_index(begin = var_33612_begin_0, end = var_33612_end_0, end_mask = var_33612_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33612_cast_fp16")]; tensor var_33616_begin_0 = const()[name = tensor("op_33616_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_33616_end_0 = const()[name = tensor("op_33616_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_33616_end_mask_0 = const()[name = tensor("op_33616_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33616_cast_fp16 = slice_by_index(begin = var_33616_begin_0, end = var_33616_end_0, end_mask = var_33616_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33616_cast_fp16")]; tensor var_33620_begin_0 = const()[name = tensor("op_33620_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_33620_end_0 = const()[name = tensor("op_33620_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_33620_end_mask_0 = const()[name = tensor("op_33620_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33620_cast_fp16 = slice_by_index(begin = var_33620_begin_0, end = var_33620_end_0, end_mask = var_33620_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33620_cast_fp16")]; tensor var_33624_begin_0 = const()[name = tensor("op_33624_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_33624_end_0 = const()[name = tensor("op_33624_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_33624_end_mask_0 = const()[name = tensor("op_33624_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33624_cast_fp16 = slice_by_index(begin = var_33624_begin_0, end = var_33624_end_0, end_mask = var_33624_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33624_cast_fp16")]; tensor var_33628_begin_0 = const()[name = tensor("op_33628_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_33628_end_0 = const()[name = tensor("op_33628_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_33628_end_mask_0 = const()[name = tensor("op_33628_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33628_cast_fp16 = slice_by_index(begin = var_33628_begin_0, end = var_33628_end_0, end_mask = var_33628_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33628_cast_fp16")]; tensor var_33632_begin_0 = const()[name = tensor("op_33632_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_33632_end_0 = const()[name = tensor("op_33632_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_33632_end_mask_0 = const()[name = tensor("op_33632_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33632_cast_fp16 = slice_by_index(begin = var_33632_begin_0, end = var_33632_end_0, end_mask = var_33632_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33632_cast_fp16")]; tensor var_33636_begin_0 = const()[name = tensor("op_33636_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_33636_end_0 = const()[name = tensor("op_33636_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_33636_end_mask_0 = const()[name = tensor("op_33636_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33636_cast_fp16 = slice_by_index(begin = var_33636_begin_0, end = var_33636_end_0, end_mask = var_33636_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33636_cast_fp16")]; tensor var_33640_begin_0 = const()[name = tensor("op_33640_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_33640_end_0 = const()[name = tensor("op_33640_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_33640_end_mask_0 = const()[name = tensor("op_33640_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33640_cast_fp16 = slice_by_index(begin = var_33640_begin_0, end = var_33640_end_0, end_mask = var_33640_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33640_cast_fp16")]; tensor var_33644_begin_0 = const()[name = tensor("op_33644_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_33644_end_0 = const()[name = tensor("op_33644_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_33644_end_mask_0 = const()[name = tensor("op_33644_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33644_cast_fp16 = slice_by_index(begin = var_33644_begin_0, end = var_33644_end_0, end_mask = var_33644_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33644_cast_fp16")]; tensor var_33648_begin_0 = const()[name = tensor("op_33648_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_33648_end_0 = const()[name = tensor("op_33648_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_33648_end_mask_0 = const()[name = tensor("op_33648_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33648_cast_fp16 = slice_by_index(begin = var_33648_begin_0, end = var_33648_end_0, end_mask = var_33648_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33648_cast_fp16")]; tensor var_33652_begin_0 = const()[name = tensor("op_33652_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_33652_end_0 = const()[name = tensor("op_33652_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_33652_end_mask_0 = const()[name = tensor("op_33652_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33652_cast_fp16 = slice_by_index(begin = var_33652_begin_0, end = var_33652_end_0, end_mask = var_33652_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33652_cast_fp16")]; tensor var_33656_begin_0 = const()[name = tensor("op_33656_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_33656_end_0 = const()[name = tensor("op_33656_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_33656_end_mask_0 = const()[name = tensor("op_33656_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_33656_cast_fp16 = slice_by_index(begin = var_33656_begin_0, end = var_33656_end_0, end_mask = var_33656_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_33656_cast_fp16")]; tensor var_33665_begin_0 = const()[name = tensor("op_33665_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_33665_end_0 = const()[name = tensor("op_33665_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_33665_end_mask_0 = const()[name = tensor("op_33665_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33665_cast_fp16 = slice_by_index(begin = var_33665_begin_0, end = var_33665_end_0, end_mask = var_33665_end_mask_0, x = var_33580_cast_fp16)[name = tensor("op_33665_cast_fp16")]; tensor var_33672_begin_0 = const()[name = tensor("op_33672_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_33672_end_0 = const()[name = tensor("op_33672_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_33672_end_mask_0 = const()[name = tensor("op_33672_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33672_cast_fp16 = slice_by_index(begin = var_33672_begin_0, end = var_33672_end_0, end_mask = var_33672_end_mask_0, x = var_33580_cast_fp16)[name = tensor("op_33672_cast_fp16")]; tensor var_33679_begin_0 = const()[name = tensor("op_33679_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_33679_end_0 = const()[name = tensor("op_33679_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_33679_end_mask_0 = const()[name = tensor("op_33679_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33679_cast_fp16 = slice_by_index(begin = var_33679_begin_0, end = var_33679_end_0, end_mask = var_33679_end_mask_0, x = var_33580_cast_fp16)[name = tensor("op_33679_cast_fp16")]; tensor var_33686_begin_0 = const()[name = tensor("op_33686_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_33686_end_0 = const()[name = tensor("op_33686_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_33686_end_mask_0 = const()[name = tensor("op_33686_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33686_cast_fp16 = slice_by_index(begin = var_33686_begin_0, end = var_33686_end_0, end_mask = var_33686_end_mask_0, x = var_33580_cast_fp16)[name = tensor("op_33686_cast_fp16")]; tensor var_33693_begin_0 = const()[name = tensor("op_33693_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_33693_end_0 = const()[name = tensor("op_33693_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_33693_end_mask_0 = const()[name = tensor("op_33693_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33693_cast_fp16 = slice_by_index(begin = var_33693_begin_0, end = var_33693_end_0, end_mask = var_33693_end_mask_0, x = var_33584_cast_fp16)[name = tensor("op_33693_cast_fp16")]; tensor var_33700_begin_0 = const()[name = tensor("op_33700_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_33700_end_0 = const()[name = tensor("op_33700_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_33700_end_mask_0 = const()[name = tensor("op_33700_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33700_cast_fp16 = slice_by_index(begin = var_33700_begin_0, end = var_33700_end_0, end_mask = var_33700_end_mask_0, x = var_33584_cast_fp16)[name = tensor("op_33700_cast_fp16")]; tensor var_33707_begin_0 = const()[name = tensor("op_33707_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_33707_end_0 = const()[name = tensor("op_33707_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_33707_end_mask_0 = const()[name = tensor("op_33707_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33707_cast_fp16 = slice_by_index(begin = var_33707_begin_0, end = var_33707_end_0, end_mask = var_33707_end_mask_0, x = var_33584_cast_fp16)[name = tensor("op_33707_cast_fp16")]; tensor var_33714_begin_0 = const()[name = tensor("op_33714_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_33714_end_0 = const()[name = tensor("op_33714_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_33714_end_mask_0 = const()[name = tensor("op_33714_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33714_cast_fp16 = slice_by_index(begin = var_33714_begin_0, end = var_33714_end_0, end_mask = var_33714_end_mask_0, x = var_33584_cast_fp16)[name = tensor("op_33714_cast_fp16")]; tensor var_33721_begin_0 = const()[name = tensor("op_33721_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_33721_end_0 = const()[name = tensor("op_33721_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_33721_end_mask_0 = const()[name = tensor("op_33721_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33721_cast_fp16 = slice_by_index(begin = var_33721_begin_0, end = var_33721_end_0, end_mask = var_33721_end_mask_0, x = var_33588_cast_fp16)[name = tensor("op_33721_cast_fp16")]; tensor var_33728_begin_0 = const()[name = tensor("op_33728_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_33728_end_0 = const()[name = tensor("op_33728_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_33728_end_mask_0 = const()[name = tensor("op_33728_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33728_cast_fp16 = slice_by_index(begin = var_33728_begin_0, end = var_33728_end_0, end_mask = var_33728_end_mask_0, x = var_33588_cast_fp16)[name = tensor("op_33728_cast_fp16")]; tensor var_33735_begin_0 = const()[name = tensor("op_33735_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_33735_end_0 = const()[name = tensor("op_33735_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_33735_end_mask_0 = const()[name = tensor("op_33735_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33735_cast_fp16 = slice_by_index(begin = var_33735_begin_0, end = var_33735_end_0, end_mask = var_33735_end_mask_0, x = var_33588_cast_fp16)[name = tensor("op_33735_cast_fp16")]; tensor var_33742_begin_0 = const()[name = tensor("op_33742_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_33742_end_0 = const()[name = tensor("op_33742_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_33742_end_mask_0 = const()[name = tensor("op_33742_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33742_cast_fp16 = slice_by_index(begin = var_33742_begin_0, end = var_33742_end_0, end_mask = var_33742_end_mask_0, x = var_33588_cast_fp16)[name = tensor("op_33742_cast_fp16")]; tensor var_33749_begin_0 = const()[name = tensor("op_33749_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_33749_end_0 = const()[name = tensor("op_33749_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_33749_end_mask_0 = const()[name = tensor("op_33749_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33749_cast_fp16 = slice_by_index(begin = var_33749_begin_0, end = var_33749_end_0, end_mask = var_33749_end_mask_0, x = var_33592_cast_fp16)[name = tensor("op_33749_cast_fp16")]; tensor var_33756_begin_0 = const()[name = tensor("op_33756_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_33756_end_0 = const()[name = tensor("op_33756_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_33756_end_mask_0 = const()[name = tensor("op_33756_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33756_cast_fp16 = slice_by_index(begin = var_33756_begin_0, end = var_33756_end_0, end_mask = var_33756_end_mask_0, x = var_33592_cast_fp16)[name = tensor("op_33756_cast_fp16")]; tensor var_33763_begin_0 = const()[name = tensor("op_33763_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_33763_end_0 = const()[name = tensor("op_33763_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_33763_end_mask_0 = const()[name = tensor("op_33763_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33763_cast_fp16 = slice_by_index(begin = var_33763_begin_0, end = var_33763_end_0, end_mask = var_33763_end_mask_0, x = var_33592_cast_fp16)[name = tensor("op_33763_cast_fp16")]; tensor var_33770_begin_0 = const()[name = tensor("op_33770_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_33770_end_0 = const()[name = tensor("op_33770_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_33770_end_mask_0 = const()[name = tensor("op_33770_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33770_cast_fp16 = slice_by_index(begin = var_33770_begin_0, end = var_33770_end_0, end_mask = var_33770_end_mask_0, x = var_33592_cast_fp16)[name = tensor("op_33770_cast_fp16")]; tensor var_33777_begin_0 = const()[name = tensor("op_33777_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_33777_end_0 = const()[name = tensor("op_33777_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_33777_end_mask_0 = const()[name = tensor("op_33777_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33777_cast_fp16 = slice_by_index(begin = var_33777_begin_0, end = var_33777_end_0, end_mask = var_33777_end_mask_0, x = var_33596_cast_fp16)[name = tensor("op_33777_cast_fp16")]; tensor var_33784_begin_0 = const()[name = tensor("op_33784_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_33784_end_0 = const()[name = tensor("op_33784_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_33784_end_mask_0 = const()[name = tensor("op_33784_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33784_cast_fp16 = slice_by_index(begin = var_33784_begin_0, end = var_33784_end_0, end_mask = var_33784_end_mask_0, x = var_33596_cast_fp16)[name = tensor("op_33784_cast_fp16")]; tensor var_33791_begin_0 = const()[name = tensor("op_33791_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_33791_end_0 = const()[name = tensor("op_33791_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_33791_end_mask_0 = const()[name = tensor("op_33791_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33791_cast_fp16 = slice_by_index(begin = var_33791_begin_0, end = var_33791_end_0, end_mask = var_33791_end_mask_0, x = var_33596_cast_fp16)[name = tensor("op_33791_cast_fp16")]; tensor var_33798_begin_0 = const()[name = tensor("op_33798_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_33798_end_0 = const()[name = tensor("op_33798_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_33798_end_mask_0 = const()[name = tensor("op_33798_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33798_cast_fp16 = slice_by_index(begin = var_33798_begin_0, end = var_33798_end_0, end_mask = var_33798_end_mask_0, x = var_33596_cast_fp16)[name = tensor("op_33798_cast_fp16")]; tensor var_33805_begin_0 = const()[name = tensor("op_33805_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_33805_end_0 = const()[name = tensor("op_33805_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_33805_end_mask_0 = const()[name = tensor("op_33805_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33805_cast_fp16 = slice_by_index(begin = var_33805_begin_0, end = var_33805_end_0, end_mask = var_33805_end_mask_0, x = var_33600_cast_fp16)[name = tensor("op_33805_cast_fp16")]; tensor var_33812_begin_0 = const()[name = tensor("op_33812_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_33812_end_0 = const()[name = tensor("op_33812_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_33812_end_mask_0 = const()[name = tensor("op_33812_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33812_cast_fp16 = slice_by_index(begin = var_33812_begin_0, end = var_33812_end_0, end_mask = var_33812_end_mask_0, x = var_33600_cast_fp16)[name = tensor("op_33812_cast_fp16")]; tensor var_33819_begin_0 = const()[name = tensor("op_33819_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_33819_end_0 = const()[name = tensor("op_33819_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_33819_end_mask_0 = const()[name = tensor("op_33819_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33819_cast_fp16 = slice_by_index(begin = var_33819_begin_0, end = var_33819_end_0, end_mask = var_33819_end_mask_0, x = var_33600_cast_fp16)[name = tensor("op_33819_cast_fp16")]; tensor var_33826_begin_0 = const()[name = tensor("op_33826_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_33826_end_0 = const()[name = tensor("op_33826_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_33826_end_mask_0 = const()[name = tensor("op_33826_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33826_cast_fp16 = slice_by_index(begin = var_33826_begin_0, end = var_33826_end_0, end_mask = var_33826_end_mask_0, x = var_33600_cast_fp16)[name = tensor("op_33826_cast_fp16")]; tensor var_33833_begin_0 = const()[name = tensor("op_33833_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_33833_end_0 = const()[name = tensor("op_33833_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_33833_end_mask_0 = const()[name = tensor("op_33833_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33833_cast_fp16 = slice_by_index(begin = var_33833_begin_0, end = var_33833_end_0, end_mask = var_33833_end_mask_0, x = var_33604_cast_fp16)[name = tensor("op_33833_cast_fp16")]; tensor var_33840_begin_0 = const()[name = tensor("op_33840_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_33840_end_0 = const()[name = tensor("op_33840_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_33840_end_mask_0 = const()[name = tensor("op_33840_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33840_cast_fp16 = slice_by_index(begin = var_33840_begin_0, end = var_33840_end_0, end_mask = var_33840_end_mask_0, x = var_33604_cast_fp16)[name = tensor("op_33840_cast_fp16")]; tensor var_33847_begin_0 = const()[name = tensor("op_33847_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_33847_end_0 = const()[name = tensor("op_33847_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_33847_end_mask_0 = const()[name = tensor("op_33847_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33847_cast_fp16 = slice_by_index(begin = var_33847_begin_0, end = var_33847_end_0, end_mask = var_33847_end_mask_0, x = var_33604_cast_fp16)[name = tensor("op_33847_cast_fp16")]; tensor var_33854_begin_0 = const()[name = tensor("op_33854_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_33854_end_0 = const()[name = tensor("op_33854_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_33854_end_mask_0 = const()[name = tensor("op_33854_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33854_cast_fp16 = slice_by_index(begin = var_33854_begin_0, end = var_33854_end_0, end_mask = var_33854_end_mask_0, x = var_33604_cast_fp16)[name = tensor("op_33854_cast_fp16")]; tensor var_33861_begin_0 = const()[name = tensor("op_33861_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_33861_end_0 = const()[name = tensor("op_33861_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_33861_end_mask_0 = const()[name = tensor("op_33861_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33861_cast_fp16 = slice_by_index(begin = var_33861_begin_0, end = var_33861_end_0, end_mask = var_33861_end_mask_0, x = var_33608_cast_fp16)[name = tensor("op_33861_cast_fp16")]; tensor var_33868_begin_0 = const()[name = tensor("op_33868_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_33868_end_0 = const()[name = tensor("op_33868_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_33868_end_mask_0 = const()[name = tensor("op_33868_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33868_cast_fp16 = slice_by_index(begin = var_33868_begin_0, end = var_33868_end_0, end_mask = var_33868_end_mask_0, x = var_33608_cast_fp16)[name = tensor("op_33868_cast_fp16")]; tensor var_33875_begin_0 = const()[name = tensor("op_33875_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_33875_end_0 = const()[name = tensor("op_33875_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_33875_end_mask_0 = const()[name = tensor("op_33875_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33875_cast_fp16 = slice_by_index(begin = var_33875_begin_0, end = var_33875_end_0, end_mask = var_33875_end_mask_0, x = var_33608_cast_fp16)[name = tensor("op_33875_cast_fp16")]; tensor var_33882_begin_0 = const()[name = tensor("op_33882_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_33882_end_0 = const()[name = tensor("op_33882_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_33882_end_mask_0 = const()[name = tensor("op_33882_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33882_cast_fp16 = slice_by_index(begin = var_33882_begin_0, end = var_33882_end_0, end_mask = var_33882_end_mask_0, x = var_33608_cast_fp16)[name = tensor("op_33882_cast_fp16")]; tensor var_33889_begin_0 = const()[name = tensor("op_33889_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_33889_end_0 = const()[name = tensor("op_33889_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_33889_end_mask_0 = const()[name = tensor("op_33889_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33889_cast_fp16 = slice_by_index(begin = var_33889_begin_0, end = var_33889_end_0, end_mask = var_33889_end_mask_0, x = var_33612_cast_fp16)[name = tensor("op_33889_cast_fp16")]; tensor var_33896_begin_0 = const()[name = tensor("op_33896_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_33896_end_0 = const()[name = tensor("op_33896_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_33896_end_mask_0 = const()[name = tensor("op_33896_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33896_cast_fp16 = slice_by_index(begin = var_33896_begin_0, end = var_33896_end_0, end_mask = var_33896_end_mask_0, x = var_33612_cast_fp16)[name = tensor("op_33896_cast_fp16")]; tensor var_33903_begin_0 = const()[name = tensor("op_33903_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_33903_end_0 = const()[name = tensor("op_33903_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_33903_end_mask_0 = const()[name = tensor("op_33903_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33903_cast_fp16 = slice_by_index(begin = var_33903_begin_0, end = var_33903_end_0, end_mask = var_33903_end_mask_0, x = var_33612_cast_fp16)[name = tensor("op_33903_cast_fp16")]; tensor var_33910_begin_0 = const()[name = tensor("op_33910_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_33910_end_0 = const()[name = tensor("op_33910_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_33910_end_mask_0 = const()[name = tensor("op_33910_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33910_cast_fp16 = slice_by_index(begin = var_33910_begin_0, end = var_33910_end_0, end_mask = var_33910_end_mask_0, x = var_33612_cast_fp16)[name = tensor("op_33910_cast_fp16")]; tensor var_33917_begin_0 = const()[name = tensor("op_33917_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_33917_end_0 = const()[name = tensor("op_33917_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_33917_end_mask_0 = const()[name = tensor("op_33917_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33917_cast_fp16 = slice_by_index(begin = var_33917_begin_0, end = var_33917_end_0, end_mask = var_33917_end_mask_0, x = var_33616_cast_fp16)[name = tensor("op_33917_cast_fp16")]; tensor var_33924_begin_0 = const()[name = tensor("op_33924_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_33924_end_0 = const()[name = tensor("op_33924_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_33924_end_mask_0 = const()[name = tensor("op_33924_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33924_cast_fp16 = slice_by_index(begin = var_33924_begin_0, end = var_33924_end_0, end_mask = var_33924_end_mask_0, x = var_33616_cast_fp16)[name = tensor("op_33924_cast_fp16")]; tensor var_33931_begin_0 = const()[name = tensor("op_33931_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_33931_end_0 = const()[name = tensor("op_33931_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_33931_end_mask_0 = const()[name = tensor("op_33931_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33931_cast_fp16 = slice_by_index(begin = var_33931_begin_0, end = var_33931_end_0, end_mask = var_33931_end_mask_0, x = var_33616_cast_fp16)[name = tensor("op_33931_cast_fp16")]; tensor var_33938_begin_0 = const()[name = tensor("op_33938_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_33938_end_0 = const()[name = tensor("op_33938_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_33938_end_mask_0 = const()[name = tensor("op_33938_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33938_cast_fp16 = slice_by_index(begin = var_33938_begin_0, end = var_33938_end_0, end_mask = var_33938_end_mask_0, x = var_33616_cast_fp16)[name = tensor("op_33938_cast_fp16")]; tensor var_33945_begin_0 = const()[name = tensor("op_33945_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_33945_end_0 = const()[name = tensor("op_33945_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_33945_end_mask_0 = const()[name = tensor("op_33945_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33945_cast_fp16 = slice_by_index(begin = var_33945_begin_0, end = var_33945_end_0, end_mask = var_33945_end_mask_0, x = var_33620_cast_fp16)[name = tensor("op_33945_cast_fp16")]; tensor var_33952_begin_0 = const()[name = tensor("op_33952_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_33952_end_0 = const()[name = tensor("op_33952_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_33952_end_mask_0 = const()[name = tensor("op_33952_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33952_cast_fp16 = slice_by_index(begin = var_33952_begin_0, end = var_33952_end_0, end_mask = var_33952_end_mask_0, x = var_33620_cast_fp16)[name = tensor("op_33952_cast_fp16")]; tensor var_33959_begin_0 = const()[name = tensor("op_33959_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_33959_end_0 = const()[name = tensor("op_33959_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_33959_end_mask_0 = const()[name = tensor("op_33959_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33959_cast_fp16 = slice_by_index(begin = var_33959_begin_0, end = var_33959_end_0, end_mask = var_33959_end_mask_0, x = var_33620_cast_fp16)[name = tensor("op_33959_cast_fp16")]; tensor var_33966_begin_0 = const()[name = tensor("op_33966_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_33966_end_0 = const()[name = tensor("op_33966_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_33966_end_mask_0 = const()[name = tensor("op_33966_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33966_cast_fp16 = slice_by_index(begin = var_33966_begin_0, end = var_33966_end_0, end_mask = var_33966_end_mask_0, x = var_33620_cast_fp16)[name = tensor("op_33966_cast_fp16")]; tensor var_33973_begin_0 = const()[name = tensor("op_33973_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_33973_end_0 = const()[name = tensor("op_33973_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_33973_end_mask_0 = const()[name = tensor("op_33973_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33973_cast_fp16 = slice_by_index(begin = var_33973_begin_0, end = var_33973_end_0, end_mask = var_33973_end_mask_0, x = var_33624_cast_fp16)[name = tensor("op_33973_cast_fp16")]; tensor var_33980_begin_0 = const()[name = tensor("op_33980_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_33980_end_0 = const()[name = tensor("op_33980_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_33980_end_mask_0 = const()[name = tensor("op_33980_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33980_cast_fp16 = slice_by_index(begin = var_33980_begin_0, end = var_33980_end_0, end_mask = var_33980_end_mask_0, x = var_33624_cast_fp16)[name = tensor("op_33980_cast_fp16")]; tensor var_33987_begin_0 = const()[name = tensor("op_33987_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_33987_end_0 = const()[name = tensor("op_33987_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_33987_end_mask_0 = const()[name = tensor("op_33987_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33987_cast_fp16 = slice_by_index(begin = var_33987_begin_0, end = var_33987_end_0, end_mask = var_33987_end_mask_0, x = var_33624_cast_fp16)[name = tensor("op_33987_cast_fp16")]; tensor var_33994_begin_0 = const()[name = tensor("op_33994_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_33994_end_0 = const()[name = tensor("op_33994_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_33994_end_mask_0 = const()[name = tensor("op_33994_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_33994_cast_fp16 = slice_by_index(begin = var_33994_begin_0, end = var_33994_end_0, end_mask = var_33994_end_mask_0, x = var_33624_cast_fp16)[name = tensor("op_33994_cast_fp16")]; tensor var_34001_begin_0 = const()[name = tensor("op_34001_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_34001_end_0 = const()[name = tensor("op_34001_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_34001_end_mask_0 = const()[name = tensor("op_34001_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34001_cast_fp16 = slice_by_index(begin = var_34001_begin_0, end = var_34001_end_0, end_mask = var_34001_end_mask_0, x = var_33628_cast_fp16)[name = tensor("op_34001_cast_fp16")]; tensor var_34008_begin_0 = const()[name = tensor("op_34008_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_34008_end_0 = const()[name = tensor("op_34008_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_34008_end_mask_0 = const()[name = tensor("op_34008_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34008_cast_fp16 = slice_by_index(begin = var_34008_begin_0, end = var_34008_end_0, end_mask = var_34008_end_mask_0, x = var_33628_cast_fp16)[name = tensor("op_34008_cast_fp16")]; tensor var_34015_begin_0 = const()[name = tensor("op_34015_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_34015_end_0 = const()[name = tensor("op_34015_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_34015_end_mask_0 = const()[name = tensor("op_34015_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34015_cast_fp16 = slice_by_index(begin = var_34015_begin_0, end = var_34015_end_0, end_mask = var_34015_end_mask_0, x = var_33628_cast_fp16)[name = tensor("op_34015_cast_fp16")]; tensor var_34022_begin_0 = const()[name = tensor("op_34022_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_34022_end_0 = const()[name = tensor("op_34022_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_34022_end_mask_0 = const()[name = tensor("op_34022_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34022_cast_fp16 = slice_by_index(begin = var_34022_begin_0, end = var_34022_end_0, end_mask = var_34022_end_mask_0, x = var_33628_cast_fp16)[name = tensor("op_34022_cast_fp16")]; tensor var_34029_begin_0 = const()[name = tensor("op_34029_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_34029_end_0 = const()[name = tensor("op_34029_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_34029_end_mask_0 = const()[name = tensor("op_34029_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34029_cast_fp16 = slice_by_index(begin = var_34029_begin_0, end = var_34029_end_0, end_mask = var_34029_end_mask_0, x = var_33632_cast_fp16)[name = tensor("op_34029_cast_fp16")]; tensor var_34036_begin_0 = const()[name = tensor("op_34036_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_34036_end_0 = const()[name = tensor("op_34036_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_34036_end_mask_0 = const()[name = tensor("op_34036_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34036_cast_fp16 = slice_by_index(begin = var_34036_begin_0, end = var_34036_end_0, end_mask = var_34036_end_mask_0, x = var_33632_cast_fp16)[name = tensor("op_34036_cast_fp16")]; tensor var_34043_begin_0 = const()[name = tensor("op_34043_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_34043_end_0 = const()[name = tensor("op_34043_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_34043_end_mask_0 = const()[name = tensor("op_34043_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34043_cast_fp16 = slice_by_index(begin = var_34043_begin_0, end = var_34043_end_0, end_mask = var_34043_end_mask_0, x = var_33632_cast_fp16)[name = tensor("op_34043_cast_fp16")]; tensor var_34050_begin_0 = const()[name = tensor("op_34050_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_34050_end_0 = const()[name = tensor("op_34050_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_34050_end_mask_0 = const()[name = tensor("op_34050_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34050_cast_fp16 = slice_by_index(begin = var_34050_begin_0, end = var_34050_end_0, end_mask = var_34050_end_mask_0, x = var_33632_cast_fp16)[name = tensor("op_34050_cast_fp16")]; tensor var_34057_begin_0 = const()[name = tensor("op_34057_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_34057_end_0 = const()[name = tensor("op_34057_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_34057_end_mask_0 = const()[name = tensor("op_34057_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34057_cast_fp16 = slice_by_index(begin = var_34057_begin_0, end = var_34057_end_0, end_mask = var_34057_end_mask_0, x = var_33636_cast_fp16)[name = tensor("op_34057_cast_fp16")]; tensor var_34064_begin_0 = const()[name = tensor("op_34064_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_34064_end_0 = const()[name = tensor("op_34064_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_34064_end_mask_0 = const()[name = tensor("op_34064_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34064_cast_fp16 = slice_by_index(begin = var_34064_begin_0, end = var_34064_end_0, end_mask = var_34064_end_mask_0, x = var_33636_cast_fp16)[name = tensor("op_34064_cast_fp16")]; tensor var_34071_begin_0 = const()[name = tensor("op_34071_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_34071_end_0 = const()[name = tensor("op_34071_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_34071_end_mask_0 = const()[name = tensor("op_34071_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34071_cast_fp16 = slice_by_index(begin = var_34071_begin_0, end = var_34071_end_0, end_mask = var_34071_end_mask_0, x = var_33636_cast_fp16)[name = tensor("op_34071_cast_fp16")]; tensor var_34078_begin_0 = const()[name = tensor("op_34078_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_34078_end_0 = const()[name = tensor("op_34078_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_34078_end_mask_0 = const()[name = tensor("op_34078_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34078_cast_fp16 = slice_by_index(begin = var_34078_begin_0, end = var_34078_end_0, end_mask = var_34078_end_mask_0, x = var_33636_cast_fp16)[name = tensor("op_34078_cast_fp16")]; tensor var_34085_begin_0 = const()[name = tensor("op_34085_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_34085_end_0 = const()[name = tensor("op_34085_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_34085_end_mask_0 = const()[name = tensor("op_34085_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34085_cast_fp16 = slice_by_index(begin = var_34085_begin_0, end = var_34085_end_0, end_mask = var_34085_end_mask_0, x = var_33640_cast_fp16)[name = tensor("op_34085_cast_fp16")]; tensor var_34092_begin_0 = const()[name = tensor("op_34092_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_34092_end_0 = const()[name = tensor("op_34092_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_34092_end_mask_0 = const()[name = tensor("op_34092_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34092_cast_fp16 = slice_by_index(begin = var_34092_begin_0, end = var_34092_end_0, end_mask = var_34092_end_mask_0, x = var_33640_cast_fp16)[name = tensor("op_34092_cast_fp16")]; tensor var_34099_begin_0 = const()[name = tensor("op_34099_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_34099_end_0 = const()[name = tensor("op_34099_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_34099_end_mask_0 = const()[name = tensor("op_34099_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34099_cast_fp16 = slice_by_index(begin = var_34099_begin_0, end = var_34099_end_0, end_mask = var_34099_end_mask_0, x = var_33640_cast_fp16)[name = tensor("op_34099_cast_fp16")]; tensor var_34106_begin_0 = const()[name = tensor("op_34106_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_34106_end_0 = const()[name = tensor("op_34106_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_34106_end_mask_0 = const()[name = tensor("op_34106_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34106_cast_fp16 = slice_by_index(begin = var_34106_begin_0, end = var_34106_end_0, end_mask = var_34106_end_mask_0, x = var_33640_cast_fp16)[name = tensor("op_34106_cast_fp16")]; tensor var_34113_begin_0 = const()[name = tensor("op_34113_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_34113_end_0 = const()[name = tensor("op_34113_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_34113_end_mask_0 = const()[name = tensor("op_34113_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34113_cast_fp16 = slice_by_index(begin = var_34113_begin_0, end = var_34113_end_0, end_mask = var_34113_end_mask_0, x = var_33644_cast_fp16)[name = tensor("op_34113_cast_fp16")]; tensor var_34120_begin_0 = const()[name = tensor("op_34120_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_34120_end_0 = const()[name = tensor("op_34120_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_34120_end_mask_0 = const()[name = tensor("op_34120_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34120_cast_fp16 = slice_by_index(begin = var_34120_begin_0, end = var_34120_end_0, end_mask = var_34120_end_mask_0, x = var_33644_cast_fp16)[name = tensor("op_34120_cast_fp16")]; tensor var_34127_begin_0 = const()[name = tensor("op_34127_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_34127_end_0 = const()[name = tensor("op_34127_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_34127_end_mask_0 = const()[name = tensor("op_34127_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34127_cast_fp16 = slice_by_index(begin = var_34127_begin_0, end = var_34127_end_0, end_mask = var_34127_end_mask_0, x = var_33644_cast_fp16)[name = tensor("op_34127_cast_fp16")]; tensor var_34134_begin_0 = const()[name = tensor("op_34134_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_34134_end_0 = const()[name = tensor("op_34134_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_34134_end_mask_0 = const()[name = tensor("op_34134_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34134_cast_fp16 = slice_by_index(begin = var_34134_begin_0, end = var_34134_end_0, end_mask = var_34134_end_mask_0, x = var_33644_cast_fp16)[name = tensor("op_34134_cast_fp16")]; tensor var_34141_begin_0 = const()[name = tensor("op_34141_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_34141_end_0 = const()[name = tensor("op_34141_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_34141_end_mask_0 = const()[name = tensor("op_34141_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34141_cast_fp16 = slice_by_index(begin = var_34141_begin_0, end = var_34141_end_0, end_mask = var_34141_end_mask_0, x = var_33648_cast_fp16)[name = tensor("op_34141_cast_fp16")]; tensor var_34148_begin_0 = const()[name = tensor("op_34148_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_34148_end_0 = const()[name = tensor("op_34148_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_34148_end_mask_0 = const()[name = tensor("op_34148_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34148_cast_fp16 = slice_by_index(begin = var_34148_begin_0, end = var_34148_end_0, end_mask = var_34148_end_mask_0, x = var_33648_cast_fp16)[name = tensor("op_34148_cast_fp16")]; tensor var_34155_begin_0 = const()[name = tensor("op_34155_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_34155_end_0 = const()[name = tensor("op_34155_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_34155_end_mask_0 = const()[name = tensor("op_34155_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34155_cast_fp16 = slice_by_index(begin = var_34155_begin_0, end = var_34155_end_0, end_mask = var_34155_end_mask_0, x = var_33648_cast_fp16)[name = tensor("op_34155_cast_fp16")]; tensor var_34162_begin_0 = const()[name = tensor("op_34162_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_34162_end_0 = const()[name = tensor("op_34162_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_34162_end_mask_0 = const()[name = tensor("op_34162_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34162_cast_fp16 = slice_by_index(begin = var_34162_begin_0, end = var_34162_end_0, end_mask = var_34162_end_mask_0, x = var_33648_cast_fp16)[name = tensor("op_34162_cast_fp16")]; tensor var_34169_begin_0 = const()[name = tensor("op_34169_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_34169_end_0 = const()[name = tensor("op_34169_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_34169_end_mask_0 = const()[name = tensor("op_34169_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34169_cast_fp16 = slice_by_index(begin = var_34169_begin_0, end = var_34169_end_0, end_mask = var_34169_end_mask_0, x = var_33652_cast_fp16)[name = tensor("op_34169_cast_fp16")]; tensor var_34176_begin_0 = const()[name = tensor("op_34176_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_34176_end_0 = const()[name = tensor("op_34176_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_34176_end_mask_0 = const()[name = tensor("op_34176_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34176_cast_fp16 = slice_by_index(begin = var_34176_begin_0, end = var_34176_end_0, end_mask = var_34176_end_mask_0, x = var_33652_cast_fp16)[name = tensor("op_34176_cast_fp16")]; tensor var_34183_begin_0 = const()[name = tensor("op_34183_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_34183_end_0 = const()[name = tensor("op_34183_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_34183_end_mask_0 = const()[name = tensor("op_34183_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34183_cast_fp16 = slice_by_index(begin = var_34183_begin_0, end = var_34183_end_0, end_mask = var_34183_end_mask_0, x = var_33652_cast_fp16)[name = tensor("op_34183_cast_fp16")]; tensor var_34190_begin_0 = const()[name = tensor("op_34190_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_34190_end_0 = const()[name = tensor("op_34190_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_34190_end_mask_0 = const()[name = tensor("op_34190_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34190_cast_fp16 = slice_by_index(begin = var_34190_begin_0, end = var_34190_end_0, end_mask = var_34190_end_mask_0, x = var_33652_cast_fp16)[name = tensor("op_34190_cast_fp16")]; tensor var_34197_begin_0 = const()[name = tensor("op_34197_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_34197_end_0 = const()[name = tensor("op_34197_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_34197_end_mask_0 = const()[name = tensor("op_34197_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34197_cast_fp16 = slice_by_index(begin = var_34197_begin_0, end = var_34197_end_0, end_mask = var_34197_end_mask_0, x = var_33656_cast_fp16)[name = tensor("op_34197_cast_fp16")]; tensor var_34204_begin_0 = const()[name = tensor("op_34204_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_34204_end_0 = const()[name = tensor("op_34204_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_34204_end_mask_0 = const()[name = tensor("op_34204_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34204_cast_fp16 = slice_by_index(begin = var_34204_begin_0, end = var_34204_end_0, end_mask = var_34204_end_mask_0, x = var_33656_cast_fp16)[name = tensor("op_34204_cast_fp16")]; tensor var_34211_begin_0 = const()[name = tensor("op_34211_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_34211_end_0 = const()[name = tensor("op_34211_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_34211_end_mask_0 = const()[name = tensor("op_34211_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34211_cast_fp16 = slice_by_index(begin = var_34211_begin_0, end = var_34211_end_0, end_mask = var_34211_end_mask_0, x = var_33656_cast_fp16)[name = tensor("op_34211_cast_fp16")]; tensor var_34218_begin_0 = const()[name = tensor("op_34218_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_34218_end_0 = const()[name = tensor("op_34218_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_34218_end_mask_0 = const()[name = tensor("op_34218_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34218_cast_fp16 = slice_by_index(begin = var_34218_begin_0, end = var_34218_end_0, end_mask = var_34218_end_mask_0, x = var_33656_cast_fp16)[name = tensor("op_34218_cast_fp16")]; tensor k_43_perm_0 = const()[name = tensor("k_43_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_34223_begin_0 = const()[name = tensor("op_34223_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_34223_end_0 = const()[name = tensor("op_34223_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_34223_end_mask_0 = const()[name = tensor("op_34223_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_43_cast_fp16 = transpose(perm = k_43_perm_0, x = key_43_cast_fp16)[name = tensor("transpose_10")]; tensor var_34223_cast_fp16 = slice_by_index(begin = var_34223_begin_0, end = var_34223_end_0, end_mask = var_34223_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34223_cast_fp16")]; tensor var_34227_begin_0 = const()[name = tensor("op_34227_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_34227_end_0 = const()[name = tensor("op_34227_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_34227_end_mask_0 = const()[name = tensor("op_34227_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34227_cast_fp16 = slice_by_index(begin = var_34227_begin_0, end = var_34227_end_0, end_mask = var_34227_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34227_cast_fp16")]; tensor var_34231_begin_0 = const()[name = tensor("op_34231_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_34231_end_0 = const()[name = tensor("op_34231_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_34231_end_mask_0 = const()[name = tensor("op_34231_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34231_cast_fp16 = slice_by_index(begin = var_34231_begin_0, end = var_34231_end_0, end_mask = var_34231_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34231_cast_fp16")]; tensor var_34235_begin_0 = const()[name = tensor("op_34235_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_34235_end_0 = const()[name = tensor("op_34235_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_34235_end_mask_0 = const()[name = tensor("op_34235_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34235_cast_fp16 = slice_by_index(begin = var_34235_begin_0, end = var_34235_end_0, end_mask = var_34235_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34235_cast_fp16")]; tensor var_34239_begin_0 = const()[name = tensor("op_34239_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_34239_end_0 = const()[name = tensor("op_34239_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_34239_end_mask_0 = const()[name = tensor("op_34239_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34239_cast_fp16 = slice_by_index(begin = var_34239_begin_0, end = var_34239_end_0, end_mask = var_34239_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34239_cast_fp16")]; tensor var_34243_begin_0 = const()[name = tensor("op_34243_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_34243_end_0 = const()[name = tensor("op_34243_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_34243_end_mask_0 = const()[name = tensor("op_34243_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34243_cast_fp16 = slice_by_index(begin = var_34243_begin_0, end = var_34243_end_0, end_mask = var_34243_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34243_cast_fp16")]; tensor var_34247_begin_0 = const()[name = tensor("op_34247_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_34247_end_0 = const()[name = tensor("op_34247_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_34247_end_mask_0 = const()[name = tensor("op_34247_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34247_cast_fp16 = slice_by_index(begin = var_34247_begin_0, end = var_34247_end_0, end_mask = var_34247_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34247_cast_fp16")]; tensor var_34251_begin_0 = const()[name = tensor("op_34251_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_34251_end_0 = const()[name = tensor("op_34251_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_34251_end_mask_0 = const()[name = tensor("op_34251_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34251_cast_fp16 = slice_by_index(begin = var_34251_begin_0, end = var_34251_end_0, end_mask = var_34251_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34251_cast_fp16")]; tensor var_34255_begin_0 = const()[name = tensor("op_34255_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_34255_end_0 = const()[name = tensor("op_34255_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_34255_end_mask_0 = const()[name = tensor("op_34255_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34255_cast_fp16 = slice_by_index(begin = var_34255_begin_0, end = var_34255_end_0, end_mask = var_34255_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34255_cast_fp16")]; tensor var_34259_begin_0 = const()[name = tensor("op_34259_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_34259_end_0 = const()[name = tensor("op_34259_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_34259_end_mask_0 = const()[name = tensor("op_34259_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34259_cast_fp16 = slice_by_index(begin = var_34259_begin_0, end = var_34259_end_0, end_mask = var_34259_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34259_cast_fp16")]; tensor var_34263_begin_0 = const()[name = tensor("op_34263_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_34263_end_0 = const()[name = tensor("op_34263_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_34263_end_mask_0 = const()[name = tensor("op_34263_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34263_cast_fp16 = slice_by_index(begin = var_34263_begin_0, end = var_34263_end_0, end_mask = var_34263_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34263_cast_fp16")]; tensor var_34267_begin_0 = const()[name = tensor("op_34267_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_34267_end_0 = const()[name = tensor("op_34267_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_34267_end_mask_0 = const()[name = tensor("op_34267_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34267_cast_fp16 = slice_by_index(begin = var_34267_begin_0, end = var_34267_end_0, end_mask = var_34267_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34267_cast_fp16")]; tensor var_34271_begin_0 = const()[name = tensor("op_34271_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_34271_end_0 = const()[name = tensor("op_34271_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_34271_end_mask_0 = const()[name = tensor("op_34271_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34271_cast_fp16 = slice_by_index(begin = var_34271_begin_0, end = var_34271_end_0, end_mask = var_34271_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34271_cast_fp16")]; tensor var_34275_begin_0 = const()[name = tensor("op_34275_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_34275_end_0 = const()[name = tensor("op_34275_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_34275_end_mask_0 = const()[name = tensor("op_34275_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34275_cast_fp16 = slice_by_index(begin = var_34275_begin_0, end = var_34275_end_0, end_mask = var_34275_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34275_cast_fp16")]; tensor var_34279_begin_0 = const()[name = tensor("op_34279_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_34279_end_0 = const()[name = tensor("op_34279_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_34279_end_mask_0 = const()[name = tensor("op_34279_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34279_cast_fp16 = slice_by_index(begin = var_34279_begin_0, end = var_34279_end_0, end_mask = var_34279_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34279_cast_fp16")]; tensor var_34283_begin_0 = const()[name = tensor("op_34283_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_34283_end_0 = const()[name = tensor("op_34283_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_34283_end_mask_0 = const()[name = tensor("op_34283_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34283_cast_fp16 = slice_by_index(begin = var_34283_begin_0, end = var_34283_end_0, end_mask = var_34283_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34283_cast_fp16")]; tensor var_34287_begin_0 = const()[name = tensor("op_34287_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_34287_end_0 = const()[name = tensor("op_34287_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_34287_end_mask_0 = const()[name = tensor("op_34287_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34287_cast_fp16 = slice_by_index(begin = var_34287_begin_0, end = var_34287_end_0, end_mask = var_34287_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34287_cast_fp16")]; tensor var_34291_begin_0 = const()[name = tensor("op_34291_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_34291_end_0 = const()[name = tensor("op_34291_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_34291_end_mask_0 = const()[name = tensor("op_34291_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34291_cast_fp16 = slice_by_index(begin = var_34291_begin_0, end = var_34291_end_0, end_mask = var_34291_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34291_cast_fp16")]; tensor var_34295_begin_0 = const()[name = tensor("op_34295_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_34295_end_0 = const()[name = tensor("op_34295_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_34295_end_mask_0 = const()[name = tensor("op_34295_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34295_cast_fp16 = slice_by_index(begin = var_34295_begin_0, end = var_34295_end_0, end_mask = var_34295_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34295_cast_fp16")]; tensor var_34299_begin_0 = const()[name = tensor("op_34299_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_34299_end_0 = const()[name = tensor("op_34299_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_34299_end_mask_0 = const()[name = tensor("op_34299_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_34299_cast_fp16 = slice_by_index(begin = var_34299_begin_0, end = var_34299_end_0, end_mask = var_34299_end_mask_0, x = k_43_cast_fp16)[name = tensor("op_34299_cast_fp16")]; tensor var_34301_begin_0 = const()[name = tensor("op_34301_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_34301_end_0 = const()[name = tensor("op_34301_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_34301_end_mask_0 = const()[name = tensor("op_34301_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34301_cast_fp16 = slice_by_index(begin = var_34301_begin_0, end = var_34301_end_0, end_mask = var_34301_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34301_cast_fp16")]; tensor var_34305_begin_0 = const()[name = tensor("op_34305_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_34305_end_0 = const()[name = tensor("op_34305_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_34305_end_mask_0 = const()[name = tensor("op_34305_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34305_cast_fp16 = slice_by_index(begin = var_34305_begin_0, end = var_34305_end_0, end_mask = var_34305_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34305_cast_fp16")]; tensor var_34309_begin_0 = const()[name = tensor("op_34309_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_34309_end_0 = const()[name = tensor("op_34309_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_34309_end_mask_0 = const()[name = tensor("op_34309_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34309_cast_fp16 = slice_by_index(begin = var_34309_begin_0, end = var_34309_end_0, end_mask = var_34309_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34309_cast_fp16")]; tensor var_34313_begin_0 = const()[name = tensor("op_34313_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_34313_end_0 = const()[name = tensor("op_34313_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_34313_end_mask_0 = const()[name = tensor("op_34313_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34313_cast_fp16 = slice_by_index(begin = var_34313_begin_0, end = var_34313_end_0, end_mask = var_34313_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34313_cast_fp16")]; tensor var_34317_begin_0 = const()[name = tensor("op_34317_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_34317_end_0 = const()[name = tensor("op_34317_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_34317_end_mask_0 = const()[name = tensor("op_34317_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34317_cast_fp16 = slice_by_index(begin = var_34317_begin_0, end = var_34317_end_0, end_mask = var_34317_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34317_cast_fp16")]; tensor var_34321_begin_0 = const()[name = tensor("op_34321_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_34321_end_0 = const()[name = tensor("op_34321_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_34321_end_mask_0 = const()[name = tensor("op_34321_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34321_cast_fp16 = slice_by_index(begin = var_34321_begin_0, end = var_34321_end_0, end_mask = var_34321_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34321_cast_fp16")]; tensor var_34325_begin_0 = const()[name = tensor("op_34325_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_34325_end_0 = const()[name = tensor("op_34325_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_34325_end_mask_0 = const()[name = tensor("op_34325_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34325_cast_fp16 = slice_by_index(begin = var_34325_begin_0, end = var_34325_end_0, end_mask = var_34325_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34325_cast_fp16")]; tensor var_34329_begin_0 = const()[name = tensor("op_34329_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_34329_end_0 = const()[name = tensor("op_34329_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_34329_end_mask_0 = const()[name = tensor("op_34329_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34329_cast_fp16 = slice_by_index(begin = var_34329_begin_0, end = var_34329_end_0, end_mask = var_34329_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34329_cast_fp16")]; tensor var_34333_begin_0 = const()[name = tensor("op_34333_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_34333_end_0 = const()[name = tensor("op_34333_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_34333_end_mask_0 = const()[name = tensor("op_34333_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34333_cast_fp16 = slice_by_index(begin = var_34333_begin_0, end = var_34333_end_0, end_mask = var_34333_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34333_cast_fp16")]; tensor var_34337_begin_0 = const()[name = tensor("op_34337_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_34337_end_0 = const()[name = tensor("op_34337_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_34337_end_mask_0 = const()[name = tensor("op_34337_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34337_cast_fp16 = slice_by_index(begin = var_34337_begin_0, end = var_34337_end_0, end_mask = var_34337_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34337_cast_fp16")]; tensor var_34341_begin_0 = const()[name = tensor("op_34341_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_34341_end_0 = const()[name = tensor("op_34341_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_34341_end_mask_0 = const()[name = tensor("op_34341_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34341_cast_fp16 = slice_by_index(begin = var_34341_begin_0, end = var_34341_end_0, end_mask = var_34341_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34341_cast_fp16")]; tensor var_34345_begin_0 = const()[name = tensor("op_34345_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_34345_end_0 = const()[name = tensor("op_34345_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_34345_end_mask_0 = const()[name = tensor("op_34345_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34345_cast_fp16 = slice_by_index(begin = var_34345_begin_0, end = var_34345_end_0, end_mask = var_34345_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34345_cast_fp16")]; tensor var_34349_begin_0 = const()[name = tensor("op_34349_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_34349_end_0 = const()[name = tensor("op_34349_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_34349_end_mask_0 = const()[name = tensor("op_34349_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34349_cast_fp16 = slice_by_index(begin = var_34349_begin_0, end = var_34349_end_0, end_mask = var_34349_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34349_cast_fp16")]; tensor var_34353_begin_0 = const()[name = tensor("op_34353_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_34353_end_0 = const()[name = tensor("op_34353_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_34353_end_mask_0 = const()[name = tensor("op_34353_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34353_cast_fp16 = slice_by_index(begin = var_34353_begin_0, end = var_34353_end_0, end_mask = var_34353_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34353_cast_fp16")]; tensor var_34357_begin_0 = const()[name = tensor("op_34357_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_34357_end_0 = const()[name = tensor("op_34357_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_34357_end_mask_0 = const()[name = tensor("op_34357_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34357_cast_fp16 = slice_by_index(begin = var_34357_begin_0, end = var_34357_end_0, end_mask = var_34357_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34357_cast_fp16")]; tensor var_34361_begin_0 = const()[name = tensor("op_34361_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_34361_end_0 = const()[name = tensor("op_34361_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_34361_end_mask_0 = const()[name = tensor("op_34361_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34361_cast_fp16 = slice_by_index(begin = var_34361_begin_0, end = var_34361_end_0, end_mask = var_34361_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34361_cast_fp16")]; tensor var_34365_begin_0 = const()[name = tensor("op_34365_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_34365_end_0 = const()[name = tensor("op_34365_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_34365_end_mask_0 = const()[name = tensor("op_34365_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34365_cast_fp16 = slice_by_index(begin = var_34365_begin_0, end = var_34365_end_0, end_mask = var_34365_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34365_cast_fp16")]; tensor var_34369_begin_0 = const()[name = tensor("op_34369_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_34369_end_0 = const()[name = tensor("op_34369_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_34369_end_mask_0 = const()[name = tensor("op_34369_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34369_cast_fp16 = slice_by_index(begin = var_34369_begin_0, end = var_34369_end_0, end_mask = var_34369_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34369_cast_fp16")]; tensor var_34373_begin_0 = const()[name = tensor("op_34373_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_34373_end_0 = const()[name = tensor("op_34373_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_34373_end_mask_0 = const()[name = tensor("op_34373_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34373_cast_fp16 = slice_by_index(begin = var_34373_begin_0, end = var_34373_end_0, end_mask = var_34373_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34373_cast_fp16")]; tensor var_34377_begin_0 = const()[name = tensor("op_34377_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_34377_end_0 = const()[name = tensor("op_34377_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_34377_end_mask_0 = const()[name = tensor("op_34377_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_34377_cast_fp16 = slice_by_index(begin = var_34377_begin_0, end = var_34377_end_0, end_mask = var_34377_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_34377_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3361_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3361_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3361_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3361_equation_0, values = (var_34223_cast_fp16, var_33665_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3361_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3363_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3363_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3363_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3363_equation_0, values = (var_34223_cast_fp16, var_33672_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3363_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3365_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3365_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3365_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3365_equation_0, values = (var_34223_cast_fp16, var_33679_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3365_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3367_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3367_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3367_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3367_equation_0, values = (var_34223_cast_fp16, var_33686_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3367_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3369_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3369_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3369_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3369_equation_0, values = (var_34227_cast_fp16, var_33693_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3369_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3371_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3371_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3371_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3371_equation_0, values = (var_34227_cast_fp16, var_33700_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3371_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3373_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3373_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3373_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3373_equation_0, values = (var_34227_cast_fp16, var_33707_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3373_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3375_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3375_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3375_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3375_equation_0, values = (var_34227_cast_fp16, var_33714_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3375_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3377_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3377_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3377_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3377_equation_0, values = (var_34231_cast_fp16, var_33721_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3377_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3379_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3379_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3379_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3379_equation_0, values = (var_34231_cast_fp16, var_33728_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3379_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3381_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3381_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3381_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3381_equation_0, values = (var_34231_cast_fp16, var_33735_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3381_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3383_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3383_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3383_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3383_equation_0, values = (var_34231_cast_fp16, var_33742_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3383_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3385_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3385_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3385_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3385_equation_0, values = (var_34235_cast_fp16, var_33749_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3385_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3387_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3387_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3387_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3387_equation_0, values = (var_34235_cast_fp16, var_33756_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3387_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3389_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3389_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3389_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3389_equation_0, values = (var_34235_cast_fp16, var_33763_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3389_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3391_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3391_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3391_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3391_equation_0, values = (var_34235_cast_fp16, var_33770_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3391_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3393_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3393_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3393_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3393_equation_0, values = (var_34239_cast_fp16, var_33777_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3393_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3395_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3395_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3395_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3395_equation_0, values = (var_34239_cast_fp16, var_33784_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3395_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3397_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3397_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3397_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3397_equation_0, values = (var_34239_cast_fp16, var_33791_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3397_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3399_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3399_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3399_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3399_equation_0, values = (var_34239_cast_fp16, var_33798_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3399_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3401_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3401_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3401_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3401_equation_0, values = (var_34243_cast_fp16, var_33805_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3401_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3403_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3403_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3403_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3403_equation_0, values = (var_34243_cast_fp16, var_33812_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3403_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3405_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3405_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3405_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3405_equation_0, values = (var_34243_cast_fp16, var_33819_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3405_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3407_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3407_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3407_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3407_equation_0, values = (var_34243_cast_fp16, var_33826_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3407_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3409_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3409_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3409_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3409_equation_0, values = (var_34247_cast_fp16, var_33833_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3409_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3411_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3411_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3411_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3411_equation_0, values = (var_34247_cast_fp16, var_33840_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3411_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3413_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3413_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3413_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3413_equation_0, values = (var_34247_cast_fp16, var_33847_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3413_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3415_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3415_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3415_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3415_equation_0, values = (var_34247_cast_fp16, var_33854_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3415_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3417_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3417_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3417_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3417_equation_0, values = (var_34251_cast_fp16, var_33861_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3417_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3419_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3419_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3419_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3419_equation_0, values = (var_34251_cast_fp16, var_33868_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3419_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3421_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3421_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3421_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3421_equation_0, values = (var_34251_cast_fp16, var_33875_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3421_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3423_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3423_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3423_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3423_equation_0, values = (var_34251_cast_fp16, var_33882_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3423_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3425_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3425_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3425_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3425_equation_0, values = (var_34255_cast_fp16, var_33889_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3425_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3427_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3427_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3427_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3427_equation_0, values = (var_34255_cast_fp16, var_33896_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3427_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3429_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3429_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3429_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3429_equation_0, values = (var_34255_cast_fp16, var_33903_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3429_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3431_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3431_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3431_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3431_equation_0, values = (var_34255_cast_fp16, var_33910_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3431_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3433_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3433_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3433_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3433_equation_0, values = (var_34259_cast_fp16, var_33917_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3433_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3435_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3435_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3435_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3435_equation_0, values = (var_34259_cast_fp16, var_33924_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3435_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3437_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3437_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3437_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3437_equation_0, values = (var_34259_cast_fp16, var_33931_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3437_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3439_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3439_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3439_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3439_equation_0, values = (var_34259_cast_fp16, var_33938_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3439_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3441_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3441_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3441_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3441_equation_0, values = (var_34263_cast_fp16, var_33945_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3441_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3443_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3443_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3443_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3443_equation_0, values = (var_34263_cast_fp16, var_33952_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3443_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3445_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3445_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3445_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3445_equation_0, values = (var_34263_cast_fp16, var_33959_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3445_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3447_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3447_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3447_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3447_equation_0, values = (var_34263_cast_fp16, var_33966_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3447_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3449_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3449_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3449_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3449_equation_0, values = (var_34267_cast_fp16, var_33973_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3449_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3451_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3451_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3451_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3451_equation_0, values = (var_34267_cast_fp16, var_33980_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3451_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3453_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3453_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3453_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3453_equation_0, values = (var_34267_cast_fp16, var_33987_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3453_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3455_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3455_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3455_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3455_equation_0, values = (var_34267_cast_fp16, var_33994_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3455_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3457_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3457_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3457_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3457_equation_0, values = (var_34271_cast_fp16, var_34001_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3457_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3459_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3459_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3459_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3459_equation_0, values = (var_34271_cast_fp16, var_34008_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3459_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3461_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3461_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3461_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3461_equation_0, values = (var_34271_cast_fp16, var_34015_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3461_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3463_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3463_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3463_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3463_equation_0, values = (var_34271_cast_fp16, var_34022_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3463_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3465_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3465_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3465_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3465_equation_0, values = (var_34275_cast_fp16, var_34029_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3465_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3467_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3467_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3467_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3467_equation_0, values = (var_34275_cast_fp16, var_34036_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3467_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3469_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3469_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3469_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3469_equation_0, values = (var_34275_cast_fp16, var_34043_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3469_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3471_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3471_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3471_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3471_equation_0, values = (var_34275_cast_fp16, var_34050_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3471_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3473_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3473_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3473_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3473_equation_0, values = (var_34279_cast_fp16, var_34057_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3473_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3475_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3475_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3475_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3475_equation_0, values = (var_34279_cast_fp16, var_34064_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3475_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3477_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3477_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3477_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3477_equation_0, values = (var_34279_cast_fp16, var_34071_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3477_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3479_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3479_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3479_equation_0, values = (var_34279_cast_fp16, var_34078_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3479_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3481_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3481_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3481_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3481_equation_0, values = (var_34283_cast_fp16, var_34085_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3481_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3483_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3483_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3483_equation_0, values = (var_34283_cast_fp16, var_34092_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3483_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3485_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3485_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3485_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3485_equation_0, values = (var_34283_cast_fp16, var_34099_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3485_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3487_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3487_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3487_equation_0, values = (var_34283_cast_fp16, var_34106_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3487_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3489_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3489_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3489_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3489_equation_0, values = (var_34287_cast_fp16, var_34113_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3489_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3491_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3491_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3491_equation_0, values = (var_34287_cast_fp16, var_34120_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3491_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3493_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3493_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3493_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3493_equation_0, values = (var_34287_cast_fp16, var_34127_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3493_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3495_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3495_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3495_equation_0, values = (var_34287_cast_fp16, var_34134_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3495_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3497_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3497_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3497_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3497_equation_0, values = (var_34291_cast_fp16, var_34141_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3497_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3499_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3499_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3499_equation_0, values = (var_34291_cast_fp16, var_34148_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3499_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3501_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3501_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3501_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3501_equation_0, values = (var_34291_cast_fp16, var_34155_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3501_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3503_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3503_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3503_equation_0, values = (var_34291_cast_fp16, var_34162_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3503_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3505_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3505_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3505_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3505_equation_0, values = (var_34295_cast_fp16, var_34169_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3505_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3507_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3507_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3507_equation_0, values = (var_34295_cast_fp16, var_34176_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3507_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3509_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3509_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3509_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3509_equation_0, values = (var_34295_cast_fp16, var_34183_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3509_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3511_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3511_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3511_equation_0, values = (var_34295_cast_fp16, var_34190_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3511_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3513_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3513_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3513_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3513_equation_0, values = (var_34299_cast_fp16, var_34197_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3513_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3515_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3515_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3515_equation_0, values = (var_34299_cast_fp16, var_34204_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3515_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3517_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3517_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3517_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3517_equation_0, values = (var_34299_cast_fp16, var_34211_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3517_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3519_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3519_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3519_equation_0, values = (var_34299_cast_fp16, var_34218_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3519_cast_fp16")]; tensor var_34540_to_fp16 = const()[name = tensor("op_34540_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3361_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3361_cast_fp16, y = var_34540_to_fp16)[name = tensor("aw_chunk_3361_cast_fp16")]; tensor var_34542_to_fp16 = const()[name = tensor("op_34542_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3363_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3363_cast_fp16, y = var_34542_to_fp16)[name = tensor("aw_chunk_3363_cast_fp16")]; tensor var_34544_to_fp16 = const()[name = tensor("op_34544_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3365_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3365_cast_fp16, y = var_34544_to_fp16)[name = tensor("aw_chunk_3365_cast_fp16")]; tensor var_34546_to_fp16 = const()[name = tensor("op_34546_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3367_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3367_cast_fp16, y = var_34546_to_fp16)[name = tensor("aw_chunk_3367_cast_fp16")]; tensor var_34548_to_fp16 = const()[name = tensor("op_34548_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3369_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3369_cast_fp16, y = var_34548_to_fp16)[name = tensor("aw_chunk_3369_cast_fp16")]; tensor var_34550_to_fp16 = const()[name = tensor("op_34550_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3371_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3371_cast_fp16, y = var_34550_to_fp16)[name = tensor("aw_chunk_3371_cast_fp16")]; tensor var_34552_to_fp16 = const()[name = tensor("op_34552_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3373_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3373_cast_fp16, y = var_34552_to_fp16)[name = tensor("aw_chunk_3373_cast_fp16")]; tensor var_34554_to_fp16 = const()[name = tensor("op_34554_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3375_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3375_cast_fp16, y = var_34554_to_fp16)[name = tensor("aw_chunk_3375_cast_fp16")]; tensor var_34556_to_fp16 = const()[name = tensor("op_34556_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3377_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3377_cast_fp16, y = var_34556_to_fp16)[name = tensor("aw_chunk_3377_cast_fp16")]; tensor var_34558_to_fp16 = const()[name = tensor("op_34558_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3379_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3379_cast_fp16, y = var_34558_to_fp16)[name = tensor("aw_chunk_3379_cast_fp16")]; tensor var_34560_to_fp16 = const()[name = tensor("op_34560_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3381_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3381_cast_fp16, y = var_34560_to_fp16)[name = tensor("aw_chunk_3381_cast_fp16")]; tensor var_34562_to_fp16 = const()[name = tensor("op_34562_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3383_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3383_cast_fp16, y = var_34562_to_fp16)[name = tensor("aw_chunk_3383_cast_fp16")]; tensor var_34564_to_fp16 = const()[name = tensor("op_34564_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3385_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3385_cast_fp16, y = var_34564_to_fp16)[name = tensor("aw_chunk_3385_cast_fp16")]; tensor var_34566_to_fp16 = const()[name = tensor("op_34566_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3387_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3387_cast_fp16, y = var_34566_to_fp16)[name = tensor("aw_chunk_3387_cast_fp16")]; tensor var_34568_to_fp16 = const()[name = tensor("op_34568_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3389_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3389_cast_fp16, y = var_34568_to_fp16)[name = tensor("aw_chunk_3389_cast_fp16")]; tensor var_34570_to_fp16 = const()[name = tensor("op_34570_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3391_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3391_cast_fp16, y = var_34570_to_fp16)[name = tensor("aw_chunk_3391_cast_fp16")]; tensor var_34572_to_fp16 = const()[name = tensor("op_34572_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3393_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3393_cast_fp16, y = var_34572_to_fp16)[name = tensor("aw_chunk_3393_cast_fp16")]; tensor var_34574_to_fp16 = const()[name = tensor("op_34574_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3395_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3395_cast_fp16, y = var_34574_to_fp16)[name = tensor("aw_chunk_3395_cast_fp16")]; tensor var_34576_to_fp16 = const()[name = tensor("op_34576_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3397_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3397_cast_fp16, y = var_34576_to_fp16)[name = tensor("aw_chunk_3397_cast_fp16")]; tensor var_34578_to_fp16 = const()[name = tensor("op_34578_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3399_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3399_cast_fp16, y = var_34578_to_fp16)[name = tensor("aw_chunk_3399_cast_fp16")]; tensor var_34580_to_fp16 = const()[name = tensor("op_34580_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3401_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3401_cast_fp16, y = var_34580_to_fp16)[name = tensor("aw_chunk_3401_cast_fp16")]; tensor var_34582_to_fp16 = const()[name = tensor("op_34582_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3403_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3403_cast_fp16, y = var_34582_to_fp16)[name = tensor("aw_chunk_3403_cast_fp16")]; tensor var_34584_to_fp16 = const()[name = tensor("op_34584_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3405_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3405_cast_fp16, y = var_34584_to_fp16)[name = tensor("aw_chunk_3405_cast_fp16")]; tensor var_34586_to_fp16 = const()[name = tensor("op_34586_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3407_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3407_cast_fp16, y = var_34586_to_fp16)[name = tensor("aw_chunk_3407_cast_fp16")]; tensor var_34588_to_fp16 = const()[name = tensor("op_34588_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3409_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3409_cast_fp16, y = var_34588_to_fp16)[name = tensor("aw_chunk_3409_cast_fp16")]; tensor var_34590_to_fp16 = const()[name = tensor("op_34590_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3411_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3411_cast_fp16, y = var_34590_to_fp16)[name = tensor("aw_chunk_3411_cast_fp16")]; tensor var_34592_to_fp16 = const()[name = tensor("op_34592_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3413_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3413_cast_fp16, y = var_34592_to_fp16)[name = tensor("aw_chunk_3413_cast_fp16")]; tensor var_34594_to_fp16 = const()[name = tensor("op_34594_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3415_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3415_cast_fp16, y = var_34594_to_fp16)[name = tensor("aw_chunk_3415_cast_fp16")]; tensor var_34596_to_fp16 = const()[name = tensor("op_34596_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3417_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3417_cast_fp16, y = var_34596_to_fp16)[name = tensor("aw_chunk_3417_cast_fp16")]; tensor var_34598_to_fp16 = const()[name = tensor("op_34598_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3419_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3419_cast_fp16, y = var_34598_to_fp16)[name = tensor("aw_chunk_3419_cast_fp16")]; tensor var_34600_to_fp16 = const()[name = tensor("op_34600_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3421_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3421_cast_fp16, y = var_34600_to_fp16)[name = tensor("aw_chunk_3421_cast_fp16")]; tensor var_34602_to_fp16 = const()[name = tensor("op_34602_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3423_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3423_cast_fp16, y = var_34602_to_fp16)[name = tensor("aw_chunk_3423_cast_fp16")]; tensor var_34604_to_fp16 = const()[name = tensor("op_34604_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3425_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3425_cast_fp16, y = var_34604_to_fp16)[name = tensor("aw_chunk_3425_cast_fp16")]; tensor var_34606_to_fp16 = const()[name = tensor("op_34606_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3427_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3427_cast_fp16, y = var_34606_to_fp16)[name = tensor("aw_chunk_3427_cast_fp16")]; tensor var_34608_to_fp16 = const()[name = tensor("op_34608_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3429_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3429_cast_fp16, y = var_34608_to_fp16)[name = tensor("aw_chunk_3429_cast_fp16")]; tensor var_34610_to_fp16 = const()[name = tensor("op_34610_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3431_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3431_cast_fp16, y = var_34610_to_fp16)[name = tensor("aw_chunk_3431_cast_fp16")]; tensor var_34612_to_fp16 = const()[name = tensor("op_34612_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3433_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3433_cast_fp16, y = var_34612_to_fp16)[name = tensor("aw_chunk_3433_cast_fp16")]; tensor var_34614_to_fp16 = const()[name = tensor("op_34614_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3435_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3435_cast_fp16, y = var_34614_to_fp16)[name = tensor("aw_chunk_3435_cast_fp16")]; tensor var_34616_to_fp16 = const()[name = tensor("op_34616_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3437_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3437_cast_fp16, y = var_34616_to_fp16)[name = tensor("aw_chunk_3437_cast_fp16")]; tensor var_34618_to_fp16 = const()[name = tensor("op_34618_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3439_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3439_cast_fp16, y = var_34618_to_fp16)[name = tensor("aw_chunk_3439_cast_fp16")]; tensor var_34620_to_fp16 = const()[name = tensor("op_34620_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3441_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3441_cast_fp16, y = var_34620_to_fp16)[name = tensor("aw_chunk_3441_cast_fp16")]; tensor var_34622_to_fp16 = const()[name = tensor("op_34622_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3443_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3443_cast_fp16, y = var_34622_to_fp16)[name = tensor("aw_chunk_3443_cast_fp16")]; tensor var_34624_to_fp16 = const()[name = tensor("op_34624_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3445_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3445_cast_fp16, y = var_34624_to_fp16)[name = tensor("aw_chunk_3445_cast_fp16")]; tensor var_34626_to_fp16 = const()[name = tensor("op_34626_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3447_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3447_cast_fp16, y = var_34626_to_fp16)[name = tensor("aw_chunk_3447_cast_fp16")]; tensor var_34628_to_fp16 = const()[name = tensor("op_34628_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3449_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3449_cast_fp16, y = var_34628_to_fp16)[name = tensor("aw_chunk_3449_cast_fp16")]; tensor var_34630_to_fp16 = const()[name = tensor("op_34630_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3451_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3451_cast_fp16, y = var_34630_to_fp16)[name = tensor("aw_chunk_3451_cast_fp16")]; tensor var_34632_to_fp16 = const()[name = tensor("op_34632_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3453_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3453_cast_fp16, y = var_34632_to_fp16)[name = tensor("aw_chunk_3453_cast_fp16")]; tensor var_34634_to_fp16 = const()[name = tensor("op_34634_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3455_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3455_cast_fp16, y = var_34634_to_fp16)[name = tensor("aw_chunk_3455_cast_fp16")]; tensor var_34636_to_fp16 = const()[name = tensor("op_34636_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3457_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3457_cast_fp16, y = var_34636_to_fp16)[name = tensor("aw_chunk_3457_cast_fp16")]; tensor var_34638_to_fp16 = const()[name = tensor("op_34638_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3459_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3459_cast_fp16, y = var_34638_to_fp16)[name = tensor("aw_chunk_3459_cast_fp16")]; tensor var_34640_to_fp16 = const()[name = tensor("op_34640_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3461_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3461_cast_fp16, y = var_34640_to_fp16)[name = tensor("aw_chunk_3461_cast_fp16")]; tensor var_34642_to_fp16 = const()[name = tensor("op_34642_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3463_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3463_cast_fp16, y = var_34642_to_fp16)[name = tensor("aw_chunk_3463_cast_fp16")]; tensor var_34644_to_fp16 = const()[name = tensor("op_34644_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3465_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3465_cast_fp16, y = var_34644_to_fp16)[name = tensor("aw_chunk_3465_cast_fp16")]; tensor var_34646_to_fp16 = const()[name = tensor("op_34646_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3467_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3467_cast_fp16, y = var_34646_to_fp16)[name = tensor("aw_chunk_3467_cast_fp16")]; tensor var_34648_to_fp16 = const()[name = tensor("op_34648_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3469_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3469_cast_fp16, y = var_34648_to_fp16)[name = tensor("aw_chunk_3469_cast_fp16")]; tensor var_34650_to_fp16 = const()[name = tensor("op_34650_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3471_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3471_cast_fp16, y = var_34650_to_fp16)[name = tensor("aw_chunk_3471_cast_fp16")]; tensor var_34652_to_fp16 = const()[name = tensor("op_34652_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3473_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3473_cast_fp16, y = var_34652_to_fp16)[name = tensor("aw_chunk_3473_cast_fp16")]; tensor var_34654_to_fp16 = const()[name = tensor("op_34654_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3475_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3475_cast_fp16, y = var_34654_to_fp16)[name = tensor("aw_chunk_3475_cast_fp16")]; tensor var_34656_to_fp16 = const()[name = tensor("op_34656_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3477_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3477_cast_fp16, y = var_34656_to_fp16)[name = tensor("aw_chunk_3477_cast_fp16")]; tensor var_34658_to_fp16 = const()[name = tensor("op_34658_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3479_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3479_cast_fp16, y = var_34658_to_fp16)[name = tensor("aw_chunk_3479_cast_fp16")]; tensor var_34660_to_fp16 = const()[name = tensor("op_34660_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3481_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3481_cast_fp16, y = var_34660_to_fp16)[name = tensor("aw_chunk_3481_cast_fp16")]; tensor var_34662_to_fp16 = const()[name = tensor("op_34662_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3483_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3483_cast_fp16, y = var_34662_to_fp16)[name = tensor("aw_chunk_3483_cast_fp16")]; tensor var_34664_to_fp16 = const()[name = tensor("op_34664_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3485_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3485_cast_fp16, y = var_34664_to_fp16)[name = tensor("aw_chunk_3485_cast_fp16")]; tensor var_34666_to_fp16 = const()[name = tensor("op_34666_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3487_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3487_cast_fp16, y = var_34666_to_fp16)[name = tensor("aw_chunk_3487_cast_fp16")]; tensor var_34668_to_fp16 = const()[name = tensor("op_34668_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3489_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3489_cast_fp16, y = var_34668_to_fp16)[name = tensor("aw_chunk_3489_cast_fp16")]; tensor var_34670_to_fp16 = const()[name = tensor("op_34670_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3491_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3491_cast_fp16, y = var_34670_to_fp16)[name = tensor("aw_chunk_3491_cast_fp16")]; tensor var_34672_to_fp16 = const()[name = tensor("op_34672_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3493_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3493_cast_fp16, y = var_34672_to_fp16)[name = tensor("aw_chunk_3493_cast_fp16")]; tensor var_34674_to_fp16 = const()[name = tensor("op_34674_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3495_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3495_cast_fp16, y = var_34674_to_fp16)[name = tensor("aw_chunk_3495_cast_fp16")]; tensor var_34676_to_fp16 = const()[name = tensor("op_34676_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3497_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3497_cast_fp16, y = var_34676_to_fp16)[name = tensor("aw_chunk_3497_cast_fp16")]; tensor var_34678_to_fp16 = const()[name = tensor("op_34678_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3499_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3499_cast_fp16, y = var_34678_to_fp16)[name = tensor("aw_chunk_3499_cast_fp16")]; tensor var_34680_to_fp16 = const()[name = tensor("op_34680_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3501_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3501_cast_fp16, y = var_34680_to_fp16)[name = tensor("aw_chunk_3501_cast_fp16")]; tensor var_34682_to_fp16 = const()[name = tensor("op_34682_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3503_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3503_cast_fp16, y = var_34682_to_fp16)[name = tensor("aw_chunk_3503_cast_fp16")]; tensor var_34684_to_fp16 = const()[name = tensor("op_34684_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3505_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3505_cast_fp16, y = var_34684_to_fp16)[name = tensor("aw_chunk_3505_cast_fp16")]; tensor var_34686_to_fp16 = const()[name = tensor("op_34686_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3507_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3507_cast_fp16, y = var_34686_to_fp16)[name = tensor("aw_chunk_3507_cast_fp16")]; tensor var_34688_to_fp16 = const()[name = tensor("op_34688_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3509_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3509_cast_fp16, y = var_34688_to_fp16)[name = tensor("aw_chunk_3509_cast_fp16")]; tensor var_34690_to_fp16 = const()[name = tensor("op_34690_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3511_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3511_cast_fp16, y = var_34690_to_fp16)[name = tensor("aw_chunk_3511_cast_fp16")]; tensor var_34692_to_fp16 = const()[name = tensor("op_34692_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3513_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3513_cast_fp16, y = var_34692_to_fp16)[name = tensor("aw_chunk_3513_cast_fp16")]; tensor var_34694_to_fp16 = const()[name = tensor("op_34694_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3515_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3515_cast_fp16, y = var_34694_to_fp16)[name = tensor("aw_chunk_3515_cast_fp16")]; tensor var_34696_to_fp16 = const()[name = tensor("op_34696_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3517_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3517_cast_fp16, y = var_34696_to_fp16)[name = tensor("aw_chunk_3517_cast_fp16")]; tensor var_34698_to_fp16 = const()[name = tensor("op_34698_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3519_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3519_cast_fp16, y = var_34698_to_fp16)[name = tensor("aw_chunk_3519_cast_fp16")]; tensor var_34700_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3361_cast_fp16)[name = tensor("op_34700_cast_fp16")]; tensor var_34701_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3363_cast_fp16)[name = tensor("op_34701_cast_fp16")]; tensor var_34702_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3365_cast_fp16)[name = tensor("op_34702_cast_fp16")]; tensor var_34703_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3367_cast_fp16)[name = tensor("op_34703_cast_fp16")]; tensor var_34704_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3369_cast_fp16)[name = tensor("op_34704_cast_fp16")]; tensor var_34705_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3371_cast_fp16)[name = tensor("op_34705_cast_fp16")]; tensor var_34706_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3373_cast_fp16)[name = tensor("op_34706_cast_fp16")]; tensor var_34707_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3375_cast_fp16)[name = tensor("op_34707_cast_fp16")]; tensor var_34708_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3377_cast_fp16)[name = tensor("op_34708_cast_fp16")]; tensor var_34709_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3379_cast_fp16)[name = tensor("op_34709_cast_fp16")]; tensor var_34710_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3381_cast_fp16)[name = tensor("op_34710_cast_fp16")]; tensor var_34711_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3383_cast_fp16)[name = tensor("op_34711_cast_fp16")]; tensor var_34712_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3385_cast_fp16)[name = tensor("op_34712_cast_fp16")]; tensor var_34713_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3387_cast_fp16)[name = tensor("op_34713_cast_fp16")]; tensor var_34714_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3389_cast_fp16)[name = tensor("op_34714_cast_fp16")]; tensor var_34715_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3391_cast_fp16)[name = tensor("op_34715_cast_fp16")]; tensor var_34716_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3393_cast_fp16)[name = tensor("op_34716_cast_fp16")]; tensor var_34717_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3395_cast_fp16)[name = tensor("op_34717_cast_fp16")]; tensor var_34718_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3397_cast_fp16)[name = tensor("op_34718_cast_fp16")]; tensor var_34719_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3399_cast_fp16)[name = tensor("op_34719_cast_fp16")]; tensor var_34720_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3401_cast_fp16)[name = tensor("op_34720_cast_fp16")]; tensor var_34721_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3403_cast_fp16)[name = tensor("op_34721_cast_fp16")]; tensor var_34722_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3405_cast_fp16)[name = tensor("op_34722_cast_fp16")]; tensor var_34723_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3407_cast_fp16)[name = tensor("op_34723_cast_fp16")]; tensor var_34724_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3409_cast_fp16)[name = tensor("op_34724_cast_fp16")]; tensor var_34725_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3411_cast_fp16)[name = tensor("op_34725_cast_fp16")]; tensor var_34726_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3413_cast_fp16)[name = tensor("op_34726_cast_fp16")]; tensor var_34727_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3415_cast_fp16)[name = tensor("op_34727_cast_fp16")]; tensor var_34728_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3417_cast_fp16)[name = tensor("op_34728_cast_fp16")]; tensor var_34729_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3419_cast_fp16)[name = tensor("op_34729_cast_fp16")]; tensor var_34730_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3421_cast_fp16)[name = tensor("op_34730_cast_fp16")]; tensor var_34731_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3423_cast_fp16)[name = tensor("op_34731_cast_fp16")]; tensor var_34732_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3425_cast_fp16)[name = tensor("op_34732_cast_fp16")]; tensor var_34733_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3427_cast_fp16)[name = tensor("op_34733_cast_fp16")]; tensor var_34734_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3429_cast_fp16)[name = tensor("op_34734_cast_fp16")]; tensor var_34735_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3431_cast_fp16)[name = tensor("op_34735_cast_fp16")]; tensor var_34736_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3433_cast_fp16)[name = tensor("op_34736_cast_fp16")]; tensor var_34737_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3435_cast_fp16)[name = tensor("op_34737_cast_fp16")]; tensor var_34738_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3437_cast_fp16)[name = tensor("op_34738_cast_fp16")]; tensor var_34739_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3439_cast_fp16)[name = tensor("op_34739_cast_fp16")]; tensor var_34740_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3441_cast_fp16)[name = tensor("op_34740_cast_fp16")]; tensor var_34741_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3443_cast_fp16)[name = tensor("op_34741_cast_fp16")]; tensor var_34742_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3445_cast_fp16)[name = tensor("op_34742_cast_fp16")]; tensor var_34743_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3447_cast_fp16)[name = tensor("op_34743_cast_fp16")]; tensor var_34744_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3449_cast_fp16)[name = tensor("op_34744_cast_fp16")]; tensor var_34745_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3451_cast_fp16)[name = tensor("op_34745_cast_fp16")]; tensor var_34746_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3453_cast_fp16)[name = tensor("op_34746_cast_fp16")]; tensor var_34747_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3455_cast_fp16)[name = tensor("op_34747_cast_fp16")]; tensor var_34748_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3457_cast_fp16)[name = tensor("op_34748_cast_fp16")]; tensor var_34749_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3459_cast_fp16)[name = tensor("op_34749_cast_fp16")]; tensor var_34750_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3461_cast_fp16)[name = tensor("op_34750_cast_fp16")]; tensor var_34751_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3463_cast_fp16)[name = tensor("op_34751_cast_fp16")]; tensor var_34752_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3465_cast_fp16)[name = tensor("op_34752_cast_fp16")]; tensor var_34753_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3467_cast_fp16)[name = tensor("op_34753_cast_fp16")]; tensor var_34754_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3469_cast_fp16)[name = tensor("op_34754_cast_fp16")]; tensor var_34755_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3471_cast_fp16)[name = tensor("op_34755_cast_fp16")]; tensor var_34756_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3473_cast_fp16)[name = tensor("op_34756_cast_fp16")]; tensor var_34757_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3475_cast_fp16)[name = tensor("op_34757_cast_fp16")]; tensor var_34758_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3477_cast_fp16)[name = tensor("op_34758_cast_fp16")]; tensor var_34759_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3479_cast_fp16)[name = tensor("op_34759_cast_fp16")]; tensor var_34760_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3481_cast_fp16)[name = tensor("op_34760_cast_fp16")]; tensor var_34761_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3483_cast_fp16)[name = tensor("op_34761_cast_fp16")]; tensor var_34762_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3485_cast_fp16)[name = tensor("op_34762_cast_fp16")]; tensor var_34763_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3487_cast_fp16)[name = tensor("op_34763_cast_fp16")]; tensor var_34764_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3489_cast_fp16)[name = tensor("op_34764_cast_fp16")]; tensor var_34765_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3491_cast_fp16)[name = tensor("op_34765_cast_fp16")]; tensor var_34766_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3493_cast_fp16)[name = tensor("op_34766_cast_fp16")]; tensor var_34767_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3495_cast_fp16)[name = tensor("op_34767_cast_fp16")]; tensor var_34768_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3497_cast_fp16)[name = tensor("op_34768_cast_fp16")]; tensor var_34769_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3499_cast_fp16)[name = tensor("op_34769_cast_fp16")]; tensor var_34770_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3501_cast_fp16)[name = tensor("op_34770_cast_fp16")]; tensor var_34771_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3503_cast_fp16)[name = tensor("op_34771_cast_fp16")]; tensor var_34772_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3505_cast_fp16)[name = tensor("op_34772_cast_fp16")]; tensor var_34773_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3507_cast_fp16)[name = tensor("op_34773_cast_fp16")]; tensor var_34774_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3509_cast_fp16)[name = tensor("op_34774_cast_fp16")]; tensor var_34775_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3511_cast_fp16)[name = tensor("op_34775_cast_fp16")]; tensor var_34776_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3513_cast_fp16)[name = tensor("op_34776_cast_fp16")]; tensor var_34777_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3515_cast_fp16)[name = tensor("op_34777_cast_fp16")]; tensor var_34778_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3517_cast_fp16)[name = tensor("op_34778_cast_fp16")]; tensor var_34779_cast_fp16 = softmax(axis = var_33498, x = aw_chunk_3519_cast_fp16)[name = tensor("op_34779_cast_fp16")]; tensor var_34781_equation_0 = const()[name = tensor("op_34781_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34781_cast_fp16 = einsum(equation = var_34781_equation_0, values = (var_34301_cast_fp16, var_34700_cast_fp16))[name = tensor("op_34781_cast_fp16")]; tensor var_34783_equation_0 = const()[name = tensor("op_34783_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34783_cast_fp16 = einsum(equation = var_34783_equation_0, values = (var_34301_cast_fp16, var_34701_cast_fp16))[name = tensor("op_34783_cast_fp16")]; tensor var_34785_equation_0 = const()[name = tensor("op_34785_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34785_cast_fp16 = einsum(equation = var_34785_equation_0, values = (var_34301_cast_fp16, var_34702_cast_fp16))[name = tensor("op_34785_cast_fp16")]; tensor var_34787_equation_0 = const()[name = tensor("op_34787_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34787_cast_fp16 = einsum(equation = var_34787_equation_0, values = (var_34301_cast_fp16, var_34703_cast_fp16))[name = tensor("op_34787_cast_fp16")]; tensor var_34789_equation_0 = const()[name = tensor("op_34789_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34789_cast_fp16 = einsum(equation = var_34789_equation_0, values = (var_34305_cast_fp16, var_34704_cast_fp16))[name = tensor("op_34789_cast_fp16")]; tensor var_34791_equation_0 = const()[name = tensor("op_34791_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34791_cast_fp16 = einsum(equation = var_34791_equation_0, values = (var_34305_cast_fp16, var_34705_cast_fp16))[name = tensor("op_34791_cast_fp16")]; tensor var_34793_equation_0 = const()[name = tensor("op_34793_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34793_cast_fp16 = einsum(equation = var_34793_equation_0, values = (var_34305_cast_fp16, var_34706_cast_fp16))[name = tensor("op_34793_cast_fp16")]; tensor var_34795_equation_0 = const()[name = tensor("op_34795_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34795_cast_fp16 = einsum(equation = var_34795_equation_0, values = (var_34305_cast_fp16, var_34707_cast_fp16))[name = tensor("op_34795_cast_fp16")]; tensor var_34797_equation_0 = const()[name = tensor("op_34797_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34797_cast_fp16 = einsum(equation = var_34797_equation_0, values = (var_34309_cast_fp16, var_34708_cast_fp16))[name = tensor("op_34797_cast_fp16")]; tensor var_34799_equation_0 = const()[name = tensor("op_34799_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34799_cast_fp16 = einsum(equation = var_34799_equation_0, values = (var_34309_cast_fp16, var_34709_cast_fp16))[name = tensor("op_34799_cast_fp16")]; tensor var_34801_equation_0 = const()[name = tensor("op_34801_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34801_cast_fp16 = einsum(equation = var_34801_equation_0, values = (var_34309_cast_fp16, var_34710_cast_fp16))[name = tensor("op_34801_cast_fp16")]; tensor var_34803_equation_0 = const()[name = tensor("op_34803_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34803_cast_fp16 = einsum(equation = var_34803_equation_0, values = (var_34309_cast_fp16, var_34711_cast_fp16))[name = tensor("op_34803_cast_fp16")]; tensor var_34805_equation_0 = const()[name = tensor("op_34805_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34805_cast_fp16 = einsum(equation = var_34805_equation_0, values = (var_34313_cast_fp16, var_34712_cast_fp16))[name = tensor("op_34805_cast_fp16")]; tensor var_34807_equation_0 = const()[name = tensor("op_34807_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34807_cast_fp16 = einsum(equation = var_34807_equation_0, values = (var_34313_cast_fp16, var_34713_cast_fp16))[name = tensor("op_34807_cast_fp16")]; tensor var_34809_equation_0 = const()[name = tensor("op_34809_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34809_cast_fp16 = einsum(equation = var_34809_equation_0, values = (var_34313_cast_fp16, var_34714_cast_fp16))[name = tensor("op_34809_cast_fp16")]; tensor var_34811_equation_0 = const()[name = tensor("op_34811_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34811_cast_fp16 = einsum(equation = var_34811_equation_0, values = (var_34313_cast_fp16, var_34715_cast_fp16))[name = tensor("op_34811_cast_fp16")]; tensor var_34813_equation_0 = const()[name = tensor("op_34813_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34813_cast_fp16 = einsum(equation = var_34813_equation_0, values = (var_34317_cast_fp16, var_34716_cast_fp16))[name = tensor("op_34813_cast_fp16")]; tensor var_34815_equation_0 = const()[name = tensor("op_34815_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34815_cast_fp16 = einsum(equation = var_34815_equation_0, values = (var_34317_cast_fp16, var_34717_cast_fp16))[name = tensor("op_34815_cast_fp16")]; tensor var_34817_equation_0 = const()[name = tensor("op_34817_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34817_cast_fp16 = einsum(equation = var_34817_equation_0, values = (var_34317_cast_fp16, var_34718_cast_fp16))[name = tensor("op_34817_cast_fp16")]; tensor var_34819_equation_0 = const()[name = tensor("op_34819_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34819_cast_fp16 = einsum(equation = var_34819_equation_0, values = (var_34317_cast_fp16, var_34719_cast_fp16))[name = tensor("op_34819_cast_fp16")]; tensor var_34821_equation_0 = const()[name = tensor("op_34821_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34821_cast_fp16 = einsum(equation = var_34821_equation_0, values = (var_34321_cast_fp16, var_34720_cast_fp16))[name = tensor("op_34821_cast_fp16")]; tensor var_34823_equation_0 = const()[name = tensor("op_34823_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34823_cast_fp16 = einsum(equation = var_34823_equation_0, values = (var_34321_cast_fp16, var_34721_cast_fp16))[name = tensor("op_34823_cast_fp16")]; tensor var_34825_equation_0 = const()[name = tensor("op_34825_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34825_cast_fp16 = einsum(equation = var_34825_equation_0, values = (var_34321_cast_fp16, var_34722_cast_fp16))[name = tensor("op_34825_cast_fp16")]; tensor var_34827_equation_0 = const()[name = tensor("op_34827_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34827_cast_fp16 = einsum(equation = var_34827_equation_0, values = (var_34321_cast_fp16, var_34723_cast_fp16))[name = tensor("op_34827_cast_fp16")]; tensor var_34829_equation_0 = const()[name = tensor("op_34829_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34829_cast_fp16 = einsum(equation = var_34829_equation_0, values = (var_34325_cast_fp16, var_34724_cast_fp16))[name = tensor("op_34829_cast_fp16")]; tensor var_34831_equation_0 = const()[name = tensor("op_34831_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34831_cast_fp16 = einsum(equation = var_34831_equation_0, values = (var_34325_cast_fp16, var_34725_cast_fp16))[name = tensor("op_34831_cast_fp16")]; tensor var_34833_equation_0 = const()[name = tensor("op_34833_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34833_cast_fp16 = einsum(equation = var_34833_equation_0, values = (var_34325_cast_fp16, var_34726_cast_fp16))[name = tensor("op_34833_cast_fp16")]; tensor var_34835_equation_0 = const()[name = tensor("op_34835_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34835_cast_fp16 = einsum(equation = var_34835_equation_0, values = (var_34325_cast_fp16, var_34727_cast_fp16))[name = tensor("op_34835_cast_fp16")]; tensor var_34837_equation_0 = const()[name = tensor("op_34837_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34837_cast_fp16 = einsum(equation = var_34837_equation_0, values = (var_34329_cast_fp16, var_34728_cast_fp16))[name = tensor("op_34837_cast_fp16")]; tensor var_34839_equation_0 = const()[name = tensor("op_34839_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34839_cast_fp16 = einsum(equation = var_34839_equation_0, values = (var_34329_cast_fp16, var_34729_cast_fp16))[name = tensor("op_34839_cast_fp16")]; tensor var_34841_equation_0 = const()[name = tensor("op_34841_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34841_cast_fp16 = einsum(equation = var_34841_equation_0, values = (var_34329_cast_fp16, var_34730_cast_fp16))[name = tensor("op_34841_cast_fp16")]; tensor var_34843_equation_0 = const()[name = tensor("op_34843_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34843_cast_fp16 = einsum(equation = var_34843_equation_0, values = (var_34329_cast_fp16, var_34731_cast_fp16))[name = tensor("op_34843_cast_fp16")]; tensor var_34845_equation_0 = const()[name = tensor("op_34845_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34845_cast_fp16 = einsum(equation = var_34845_equation_0, values = (var_34333_cast_fp16, var_34732_cast_fp16))[name = tensor("op_34845_cast_fp16")]; tensor var_34847_equation_0 = const()[name = tensor("op_34847_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34847_cast_fp16 = einsum(equation = var_34847_equation_0, values = (var_34333_cast_fp16, var_34733_cast_fp16))[name = tensor("op_34847_cast_fp16")]; tensor var_34849_equation_0 = const()[name = tensor("op_34849_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34849_cast_fp16 = einsum(equation = var_34849_equation_0, values = (var_34333_cast_fp16, var_34734_cast_fp16))[name = tensor("op_34849_cast_fp16")]; tensor var_34851_equation_0 = const()[name = tensor("op_34851_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34851_cast_fp16 = einsum(equation = var_34851_equation_0, values = (var_34333_cast_fp16, var_34735_cast_fp16))[name = tensor("op_34851_cast_fp16")]; tensor var_34853_equation_0 = const()[name = tensor("op_34853_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34853_cast_fp16 = einsum(equation = var_34853_equation_0, values = (var_34337_cast_fp16, var_34736_cast_fp16))[name = tensor("op_34853_cast_fp16")]; tensor var_34855_equation_0 = const()[name = tensor("op_34855_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34855_cast_fp16 = einsum(equation = var_34855_equation_0, values = (var_34337_cast_fp16, var_34737_cast_fp16))[name = tensor("op_34855_cast_fp16")]; tensor var_34857_equation_0 = const()[name = tensor("op_34857_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34857_cast_fp16 = einsum(equation = var_34857_equation_0, values = (var_34337_cast_fp16, var_34738_cast_fp16))[name = tensor("op_34857_cast_fp16")]; tensor var_34859_equation_0 = const()[name = tensor("op_34859_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34859_cast_fp16 = einsum(equation = var_34859_equation_0, values = (var_34337_cast_fp16, var_34739_cast_fp16))[name = tensor("op_34859_cast_fp16")]; tensor var_34861_equation_0 = const()[name = tensor("op_34861_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34861_cast_fp16 = einsum(equation = var_34861_equation_0, values = (var_34341_cast_fp16, var_34740_cast_fp16))[name = tensor("op_34861_cast_fp16")]; tensor var_34863_equation_0 = const()[name = tensor("op_34863_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34863_cast_fp16 = einsum(equation = var_34863_equation_0, values = (var_34341_cast_fp16, var_34741_cast_fp16))[name = tensor("op_34863_cast_fp16")]; tensor var_34865_equation_0 = const()[name = tensor("op_34865_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34865_cast_fp16 = einsum(equation = var_34865_equation_0, values = (var_34341_cast_fp16, var_34742_cast_fp16))[name = tensor("op_34865_cast_fp16")]; tensor var_34867_equation_0 = const()[name = tensor("op_34867_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34867_cast_fp16 = einsum(equation = var_34867_equation_0, values = (var_34341_cast_fp16, var_34743_cast_fp16))[name = tensor("op_34867_cast_fp16")]; tensor var_34869_equation_0 = const()[name = tensor("op_34869_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34869_cast_fp16 = einsum(equation = var_34869_equation_0, values = (var_34345_cast_fp16, var_34744_cast_fp16))[name = tensor("op_34869_cast_fp16")]; tensor var_34871_equation_0 = const()[name = tensor("op_34871_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34871_cast_fp16 = einsum(equation = var_34871_equation_0, values = (var_34345_cast_fp16, var_34745_cast_fp16))[name = tensor("op_34871_cast_fp16")]; tensor var_34873_equation_0 = const()[name = tensor("op_34873_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34873_cast_fp16 = einsum(equation = var_34873_equation_0, values = (var_34345_cast_fp16, var_34746_cast_fp16))[name = tensor("op_34873_cast_fp16")]; tensor var_34875_equation_0 = const()[name = tensor("op_34875_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34875_cast_fp16 = einsum(equation = var_34875_equation_0, values = (var_34345_cast_fp16, var_34747_cast_fp16))[name = tensor("op_34875_cast_fp16")]; tensor var_34877_equation_0 = const()[name = tensor("op_34877_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34877_cast_fp16 = einsum(equation = var_34877_equation_0, values = (var_34349_cast_fp16, var_34748_cast_fp16))[name = tensor("op_34877_cast_fp16")]; tensor var_34879_equation_0 = const()[name = tensor("op_34879_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34879_cast_fp16 = einsum(equation = var_34879_equation_0, values = (var_34349_cast_fp16, var_34749_cast_fp16))[name = tensor("op_34879_cast_fp16")]; tensor var_34881_equation_0 = const()[name = tensor("op_34881_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34881_cast_fp16 = einsum(equation = var_34881_equation_0, values = (var_34349_cast_fp16, var_34750_cast_fp16))[name = tensor("op_34881_cast_fp16")]; tensor var_34883_equation_0 = const()[name = tensor("op_34883_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34883_cast_fp16 = einsum(equation = var_34883_equation_0, values = (var_34349_cast_fp16, var_34751_cast_fp16))[name = tensor("op_34883_cast_fp16")]; tensor var_34885_equation_0 = const()[name = tensor("op_34885_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34885_cast_fp16 = einsum(equation = var_34885_equation_0, values = (var_34353_cast_fp16, var_34752_cast_fp16))[name = tensor("op_34885_cast_fp16")]; tensor var_34887_equation_0 = const()[name = tensor("op_34887_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34887_cast_fp16 = einsum(equation = var_34887_equation_0, values = (var_34353_cast_fp16, var_34753_cast_fp16))[name = tensor("op_34887_cast_fp16")]; tensor var_34889_equation_0 = const()[name = tensor("op_34889_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34889_cast_fp16 = einsum(equation = var_34889_equation_0, values = (var_34353_cast_fp16, var_34754_cast_fp16))[name = tensor("op_34889_cast_fp16")]; tensor var_34891_equation_0 = const()[name = tensor("op_34891_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34891_cast_fp16 = einsum(equation = var_34891_equation_0, values = (var_34353_cast_fp16, var_34755_cast_fp16))[name = tensor("op_34891_cast_fp16")]; tensor var_34893_equation_0 = const()[name = tensor("op_34893_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34893_cast_fp16 = einsum(equation = var_34893_equation_0, values = (var_34357_cast_fp16, var_34756_cast_fp16))[name = tensor("op_34893_cast_fp16")]; tensor var_34895_equation_0 = const()[name = tensor("op_34895_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34895_cast_fp16 = einsum(equation = var_34895_equation_0, values = (var_34357_cast_fp16, var_34757_cast_fp16))[name = tensor("op_34895_cast_fp16")]; tensor var_34897_equation_0 = const()[name = tensor("op_34897_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34897_cast_fp16 = einsum(equation = var_34897_equation_0, values = (var_34357_cast_fp16, var_34758_cast_fp16))[name = tensor("op_34897_cast_fp16")]; tensor var_34899_equation_0 = const()[name = tensor("op_34899_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34899_cast_fp16 = einsum(equation = var_34899_equation_0, values = (var_34357_cast_fp16, var_34759_cast_fp16))[name = tensor("op_34899_cast_fp16")]; tensor var_34901_equation_0 = const()[name = tensor("op_34901_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34901_cast_fp16 = einsum(equation = var_34901_equation_0, values = (var_34361_cast_fp16, var_34760_cast_fp16))[name = tensor("op_34901_cast_fp16")]; tensor var_34903_equation_0 = const()[name = tensor("op_34903_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34903_cast_fp16 = einsum(equation = var_34903_equation_0, values = (var_34361_cast_fp16, var_34761_cast_fp16))[name = tensor("op_34903_cast_fp16")]; tensor var_34905_equation_0 = const()[name = tensor("op_34905_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34905_cast_fp16 = einsum(equation = var_34905_equation_0, values = (var_34361_cast_fp16, var_34762_cast_fp16))[name = tensor("op_34905_cast_fp16")]; tensor var_34907_equation_0 = const()[name = tensor("op_34907_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34907_cast_fp16 = einsum(equation = var_34907_equation_0, values = (var_34361_cast_fp16, var_34763_cast_fp16))[name = tensor("op_34907_cast_fp16")]; tensor var_34909_equation_0 = const()[name = tensor("op_34909_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34909_cast_fp16 = einsum(equation = var_34909_equation_0, values = (var_34365_cast_fp16, var_34764_cast_fp16))[name = tensor("op_34909_cast_fp16")]; tensor var_34911_equation_0 = const()[name = tensor("op_34911_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34911_cast_fp16 = einsum(equation = var_34911_equation_0, values = (var_34365_cast_fp16, var_34765_cast_fp16))[name = tensor("op_34911_cast_fp16")]; tensor var_34913_equation_0 = const()[name = tensor("op_34913_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34913_cast_fp16 = einsum(equation = var_34913_equation_0, values = (var_34365_cast_fp16, var_34766_cast_fp16))[name = tensor("op_34913_cast_fp16")]; tensor var_34915_equation_0 = const()[name = tensor("op_34915_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34915_cast_fp16 = einsum(equation = var_34915_equation_0, values = (var_34365_cast_fp16, var_34767_cast_fp16))[name = tensor("op_34915_cast_fp16")]; tensor var_34917_equation_0 = const()[name = tensor("op_34917_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34917_cast_fp16 = einsum(equation = var_34917_equation_0, values = (var_34369_cast_fp16, var_34768_cast_fp16))[name = tensor("op_34917_cast_fp16")]; tensor var_34919_equation_0 = const()[name = tensor("op_34919_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34919_cast_fp16 = einsum(equation = var_34919_equation_0, values = (var_34369_cast_fp16, var_34769_cast_fp16))[name = tensor("op_34919_cast_fp16")]; tensor var_34921_equation_0 = const()[name = tensor("op_34921_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34921_cast_fp16 = einsum(equation = var_34921_equation_0, values = (var_34369_cast_fp16, var_34770_cast_fp16))[name = tensor("op_34921_cast_fp16")]; tensor var_34923_equation_0 = const()[name = tensor("op_34923_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34923_cast_fp16 = einsum(equation = var_34923_equation_0, values = (var_34369_cast_fp16, var_34771_cast_fp16))[name = tensor("op_34923_cast_fp16")]; tensor var_34925_equation_0 = const()[name = tensor("op_34925_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34925_cast_fp16 = einsum(equation = var_34925_equation_0, values = (var_34373_cast_fp16, var_34772_cast_fp16))[name = tensor("op_34925_cast_fp16")]; tensor var_34927_equation_0 = const()[name = tensor("op_34927_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34927_cast_fp16 = einsum(equation = var_34927_equation_0, values = (var_34373_cast_fp16, var_34773_cast_fp16))[name = tensor("op_34927_cast_fp16")]; tensor var_34929_equation_0 = const()[name = tensor("op_34929_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34929_cast_fp16 = einsum(equation = var_34929_equation_0, values = (var_34373_cast_fp16, var_34774_cast_fp16))[name = tensor("op_34929_cast_fp16")]; tensor var_34931_equation_0 = const()[name = tensor("op_34931_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34931_cast_fp16 = einsum(equation = var_34931_equation_0, values = (var_34373_cast_fp16, var_34775_cast_fp16))[name = tensor("op_34931_cast_fp16")]; tensor var_34933_equation_0 = const()[name = tensor("op_34933_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34933_cast_fp16 = einsum(equation = var_34933_equation_0, values = (var_34377_cast_fp16, var_34776_cast_fp16))[name = tensor("op_34933_cast_fp16")]; tensor var_34935_equation_0 = const()[name = tensor("op_34935_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34935_cast_fp16 = einsum(equation = var_34935_equation_0, values = (var_34377_cast_fp16, var_34777_cast_fp16))[name = tensor("op_34935_cast_fp16")]; tensor var_34937_equation_0 = const()[name = tensor("op_34937_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34937_cast_fp16 = einsum(equation = var_34937_equation_0, values = (var_34377_cast_fp16, var_34778_cast_fp16))[name = tensor("op_34937_cast_fp16")]; tensor var_34939_equation_0 = const()[name = tensor("op_34939_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_34939_cast_fp16 = einsum(equation = var_34939_equation_0, values = (var_34377_cast_fp16, var_34779_cast_fp16))[name = tensor("op_34939_cast_fp16")]; tensor var_34941_interleave_0 = const()[name = tensor("op_34941_interleave_0"), val = tensor(false)]; tensor var_34941_cast_fp16 = concat(axis = var_33473, interleave = var_34941_interleave_0, values = (var_34781_cast_fp16, var_34783_cast_fp16, var_34785_cast_fp16, var_34787_cast_fp16))[name = tensor("op_34941_cast_fp16")]; tensor var_34943_interleave_0 = const()[name = tensor("op_34943_interleave_0"), val = tensor(false)]; tensor var_34943_cast_fp16 = concat(axis = var_33473, interleave = var_34943_interleave_0, values = (var_34789_cast_fp16, var_34791_cast_fp16, var_34793_cast_fp16, var_34795_cast_fp16))[name = tensor("op_34943_cast_fp16")]; tensor var_34945_interleave_0 = const()[name = tensor("op_34945_interleave_0"), val = tensor(false)]; tensor var_34945_cast_fp16 = concat(axis = var_33473, interleave = var_34945_interleave_0, values = (var_34797_cast_fp16, var_34799_cast_fp16, var_34801_cast_fp16, var_34803_cast_fp16))[name = tensor("op_34945_cast_fp16")]; tensor var_34947_interleave_0 = const()[name = tensor("op_34947_interleave_0"), val = tensor(false)]; tensor var_34947_cast_fp16 = concat(axis = var_33473, interleave = var_34947_interleave_0, values = (var_34805_cast_fp16, var_34807_cast_fp16, var_34809_cast_fp16, var_34811_cast_fp16))[name = tensor("op_34947_cast_fp16")]; tensor var_34949_interleave_0 = const()[name = tensor("op_34949_interleave_0"), val = tensor(false)]; tensor var_34949_cast_fp16 = concat(axis = var_33473, interleave = var_34949_interleave_0, values = (var_34813_cast_fp16, var_34815_cast_fp16, var_34817_cast_fp16, var_34819_cast_fp16))[name = tensor("op_34949_cast_fp16")]; tensor var_34951_interleave_0 = const()[name = tensor("op_34951_interleave_0"), val = tensor(false)]; tensor var_34951_cast_fp16 = concat(axis = var_33473, interleave = var_34951_interleave_0, values = (var_34821_cast_fp16, var_34823_cast_fp16, var_34825_cast_fp16, var_34827_cast_fp16))[name = tensor("op_34951_cast_fp16")]; tensor var_34953_interleave_0 = const()[name = tensor("op_34953_interleave_0"), val = tensor(false)]; tensor var_34953_cast_fp16 = concat(axis = var_33473, interleave = var_34953_interleave_0, values = (var_34829_cast_fp16, var_34831_cast_fp16, var_34833_cast_fp16, var_34835_cast_fp16))[name = tensor("op_34953_cast_fp16")]; tensor var_34955_interleave_0 = const()[name = tensor("op_34955_interleave_0"), val = tensor(false)]; tensor var_34955_cast_fp16 = concat(axis = var_33473, interleave = var_34955_interleave_0, values = (var_34837_cast_fp16, var_34839_cast_fp16, var_34841_cast_fp16, var_34843_cast_fp16))[name = tensor("op_34955_cast_fp16")]; tensor var_34957_interleave_0 = const()[name = tensor("op_34957_interleave_0"), val = tensor(false)]; tensor var_34957_cast_fp16 = concat(axis = var_33473, interleave = var_34957_interleave_0, values = (var_34845_cast_fp16, var_34847_cast_fp16, var_34849_cast_fp16, var_34851_cast_fp16))[name = tensor("op_34957_cast_fp16")]; tensor var_34959_interleave_0 = const()[name = tensor("op_34959_interleave_0"), val = tensor(false)]; tensor var_34959_cast_fp16 = concat(axis = var_33473, interleave = var_34959_interleave_0, values = (var_34853_cast_fp16, var_34855_cast_fp16, var_34857_cast_fp16, var_34859_cast_fp16))[name = tensor("op_34959_cast_fp16")]; tensor var_34961_interleave_0 = const()[name = tensor("op_34961_interleave_0"), val = tensor(false)]; tensor var_34961_cast_fp16 = concat(axis = var_33473, interleave = var_34961_interleave_0, values = (var_34861_cast_fp16, var_34863_cast_fp16, var_34865_cast_fp16, var_34867_cast_fp16))[name = tensor("op_34961_cast_fp16")]; tensor var_34963_interleave_0 = const()[name = tensor("op_34963_interleave_0"), val = tensor(false)]; tensor var_34963_cast_fp16 = concat(axis = var_33473, interleave = var_34963_interleave_0, values = (var_34869_cast_fp16, var_34871_cast_fp16, var_34873_cast_fp16, var_34875_cast_fp16))[name = tensor("op_34963_cast_fp16")]; tensor var_34965_interleave_0 = const()[name = tensor("op_34965_interleave_0"), val = tensor(false)]; tensor var_34965_cast_fp16 = concat(axis = var_33473, interleave = var_34965_interleave_0, values = (var_34877_cast_fp16, var_34879_cast_fp16, var_34881_cast_fp16, var_34883_cast_fp16))[name = tensor("op_34965_cast_fp16")]; tensor var_34967_interleave_0 = const()[name = tensor("op_34967_interleave_0"), val = tensor(false)]; tensor var_34967_cast_fp16 = concat(axis = var_33473, interleave = var_34967_interleave_0, values = (var_34885_cast_fp16, var_34887_cast_fp16, var_34889_cast_fp16, var_34891_cast_fp16))[name = tensor("op_34967_cast_fp16")]; tensor var_34969_interleave_0 = const()[name = tensor("op_34969_interleave_0"), val = tensor(false)]; tensor var_34969_cast_fp16 = concat(axis = var_33473, interleave = var_34969_interleave_0, values = (var_34893_cast_fp16, var_34895_cast_fp16, var_34897_cast_fp16, var_34899_cast_fp16))[name = tensor("op_34969_cast_fp16")]; tensor var_34971_interleave_0 = const()[name = tensor("op_34971_interleave_0"), val = tensor(false)]; tensor var_34971_cast_fp16 = concat(axis = var_33473, interleave = var_34971_interleave_0, values = (var_34901_cast_fp16, var_34903_cast_fp16, var_34905_cast_fp16, var_34907_cast_fp16))[name = tensor("op_34971_cast_fp16")]; tensor var_34973_interleave_0 = const()[name = tensor("op_34973_interleave_0"), val = tensor(false)]; tensor var_34973_cast_fp16 = concat(axis = var_33473, interleave = var_34973_interleave_0, values = (var_34909_cast_fp16, var_34911_cast_fp16, var_34913_cast_fp16, var_34915_cast_fp16))[name = tensor("op_34973_cast_fp16")]; tensor var_34975_interleave_0 = const()[name = tensor("op_34975_interleave_0"), val = tensor(false)]; tensor var_34975_cast_fp16 = concat(axis = var_33473, interleave = var_34975_interleave_0, values = (var_34917_cast_fp16, var_34919_cast_fp16, var_34921_cast_fp16, var_34923_cast_fp16))[name = tensor("op_34975_cast_fp16")]; tensor var_34977_interleave_0 = const()[name = tensor("op_34977_interleave_0"), val = tensor(false)]; tensor var_34977_cast_fp16 = concat(axis = var_33473, interleave = var_34977_interleave_0, values = (var_34925_cast_fp16, var_34927_cast_fp16, var_34929_cast_fp16, var_34931_cast_fp16))[name = tensor("op_34977_cast_fp16")]; tensor var_34979_interleave_0 = const()[name = tensor("op_34979_interleave_0"), val = tensor(false)]; tensor var_34979_cast_fp16 = concat(axis = var_33473, interleave = var_34979_interleave_0, values = (var_34933_cast_fp16, var_34935_cast_fp16, var_34937_cast_fp16, var_34939_cast_fp16))[name = tensor("op_34979_cast_fp16")]; tensor input_169_interleave_0 = const()[name = tensor("input_169_interleave_0"), val = tensor(false)]; tensor input_169_cast_fp16 = concat(axis = var_33498, interleave = input_169_interleave_0, values = (var_34941_cast_fp16, var_34943_cast_fp16, var_34945_cast_fp16, var_34947_cast_fp16, var_34949_cast_fp16, var_34951_cast_fp16, var_34953_cast_fp16, var_34955_cast_fp16, var_34957_cast_fp16, var_34959_cast_fp16, var_34961_cast_fp16, var_34963_cast_fp16, var_34965_cast_fp16, var_34967_cast_fp16, var_34969_cast_fp16, var_34971_cast_fp16, var_34973_cast_fp16, var_34975_cast_fp16, var_34977_cast_fp16, var_34979_cast_fp16))[name = tensor("input_169_cast_fp16")]; tensor var_34990_pad_type_0 = const()[name = tensor("op_34990_pad_type_0"), val = tensor("valid")]; tensor var_34990_strides_0 = const()[name = tensor("op_34990_strides_0"), val = tensor([1, 1])]; tensor var_34990_pad_0 = const()[name = tensor("op_34990_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_34990_dilations_0 = const()[name = tensor("op_34990_dilations_0"), val = tensor([1, 1])]; tensor var_34990_groups_0 = const()[name = tensor("op_34990_groups_0"), val = tensor(1)]; tensor layers_21_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(287438336))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288257600))), name = tensor("layers_21_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_21_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_21_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288257728)))]; tensor var_34990_cast_fp16 = conv(bias = layers_21_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_34990_dilations_0, groups = var_34990_groups_0, pad = var_34990_pad_0, pad_type = var_34990_pad_type_0, strides = var_34990_strides_0, weight = layers_21_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_169_cast_fp16)[name = tensor("op_34990_cast_fp16")]; tensor var_34996_pad_type_0 = const()[name = tensor("op_34996_pad_type_0"), val = tensor("valid")]; tensor var_34996_strides_0 = const()[name = tensor("op_34996_strides_0"), val = tensor([1, 1])]; tensor var_34996_pad_0 = const()[name = tensor("op_34996_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_34996_dilations_0 = const()[name = tensor("op_34996_dilations_0"), val = tensor([1, 1])]; tensor var_34996_groups_0 = const()[name = tensor("op_34996_groups_0"), val = tensor(1)]; tensor layers_21_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288272320))), name = tensor("layers_21_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288260352))), shape = tensor([1280, 1280, 1, 1])]; tensor var_34996_cast_fp16 = conv(dilations = var_34996_dilations_0, groups = var_34996_groups_0, pad = var_34996_pad_0, pad_type = var_34996_pad_type_0, strides = var_34996_strides_0, weight = layers_21_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_169_cast_fp16)[name = tensor("op_34996_cast_fp16")]; tensor obj_87_cast_fp16 = add(x = var_34990_cast_fp16, y = var_34996_cast_fp16)[name = tensor("obj_87_cast_fp16")]; tensor inputs_87_cast_fp16 = add(x = inputs_85_cast_fp16, y = obj_87_cast_fp16)[name = tensor("inputs_87_cast_fp16")]; tensor out_87_axes_0 = const()[name = tensor("out_87_axes_0"), val = tensor([1])]; tensor var_35007_to_fp16 = const()[name = tensor("op_35007_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_87_cast_fp16 = layer_norm(axes = out_87_axes_0, epsilon = var_35007_to_fp16, x = inputs_87_cast_fp16)[name = tensor("out_87_cast_fp16")]; tensor input_171_gamma_0_to_fp16 = const()[name = tensor("input_171_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288477184)))]; tensor input_171_beta_0_to_fp16 = const()[name = tensor("input_171_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288479808)))]; tensor input_171_epsilon_0_to_fp16 = const()[name = tensor("input_171_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_171_cast_fp16 = batch_norm(beta = input_171_beta_0_to_fp16, epsilon = input_171_epsilon_0_to_fp16, gamma = input_171_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_87_cast_fp16)[name = tensor("input_171_cast_fp16")]; tensor var_35025_pad_type_0 = const()[name = tensor("op_35025_pad_type_0"), val = tensor("valid")]; tensor var_35025_strides_0 = const()[name = tensor("op_35025_strides_0"), val = tensor([1, 1])]; tensor var_35025_pad_0 = const()[name = tensor("op_35025_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_35025_dilations_0 = const()[name = tensor("op_35025_dilations_0"), val = tensor([1, 1])]; tensor var_35025_groups_0 = const()[name = tensor("op_35025_groups_0"), val = tensor(1)]; tensor layers_21_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(288482432))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(291759296))), name = tensor("layers_21_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_21_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_21_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(291759424)))]; tensor var_35025_cast_fp16 = conv(bias = layers_21_fc1_inlier_module_bias_to_fp16, dilations = var_35025_dilations_0, groups = var_35025_groups_0, pad = var_35025_pad_0, pad_type = var_35025_pad_type_0, strides = var_35025_strides_0, weight = layers_21_fc1_inlier_module_weight_to_fp16_palettized, x = input_171_cast_fp16)[name = tensor("op_35025_cast_fp16")]; tensor var_35031_pad_type_0 = const()[name = tensor("op_35031_pad_type_0"), val = tensor("valid")]; tensor var_35031_strides_0 = const()[name = tensor("op_35031_strides_0"), val = tensor([1, 1])]; tensor var_35031_pad_0 = const()[name = tensor("op_35031_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_35031_dilations_0 = const()[name = tensor("op_35031_dilations_0"), val = tensor([1, 1])]; tensor var_35031_groups_0 = const()[name = tensor("op_35031_groups_0"), val = tensor(1)]; tensor layers_21_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(291830912))), name = tensor("layers_21_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(291769728))), shape = tensor([5120, 1280, 1, 1])]; tensor var_35031_cast_fp16 = conv(dilations = var_35031_dilations_0, groups = var_35031_groups_0, pad = var_35031_pad_0, pad_type = var_35031_pad_type_0, strides = var_35031_strides_0, weight = layers_21_fc1_outlier_module_weight_to_fp16_sparsified, x = input_171_cast_fp16)[name = tensor("op_35031_cast_fp16")]; tensor input_173_cast_fp16 = add(x = var_35025_cast_fp16, y = var_35031_cast_fp16)[name = tensor("input_173_cast_fp16")]; tensor input_175_mode_0 = const()[name = tensor("input_175_mode_0"), val = tensor("EXACT")]; tensor input_175_cast_fp16 = gelu(mode = input_175_mode_0, x = input_173_cast_fp16)[name = tensor("input_175_cast_fp16")]; tensor var_35042_pad_type_0 = const()[name = tensor("op_35042_pad_type_0"), val = tensor("valid")]; tensor var_35042_strides_0 = const()[name = tensor("op_35042_strides_0"), val = tensor([1, 1])]; tensor var_35042_pad_0 = const()[name = tensor("op_35042_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_35042_dilations_0 = const()[name = tensor("op_35042_dilations_0"), val = tensor([1, 1])]; tensor var_35042_groups_0 = const()[name = tensor("op_35042_groups_0"), val = tensor(1)]; tensor layers_21_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(292650176))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(295927040))), name = tensor("layers_21_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_21_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_21_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(295927168)))]; tensor var_35042_cast_fp16 = conv(bias = layers_21_fc2_inlier_module_bias_to_fp16, dilations = var_35042_dilations_0, groups = var_35042_groups_0, pad = var_35042_pad_0, pad_type = var_35042_pad_type_0, strides = var_35042_strides_0, weight = layers_21_fc2_inlier_module_weight_to_fp16_palettized, x = input_175_cast_fp16)[name = tensor("op_35042_cast_fp16")]; tensor var_35048_pad_type_0 = const()[name = tensor("op_35048_pad_type_0"), val = tensor("valid")]; tensor var_35048_strides_0 = const()[name = tensor("op_35048_strides_0"), val = tensor([1, 1])]; tensor var_35048_pad_0 = const()[name = tensor("op_35048_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_35048_dilations_0 = const()[name = tensor("op_35048_dilations_0"), val = tensor([1, 1])]; tensor var_35048_groups_0 = const()[name = tensor("op_35048_groups_0"), val = tensor(1)]; tensor layers_21_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(295992512))), name = tensor("layers_21_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(295929792))), shape = tensor([1280, 5120, 1, 1])]; tensor var_35048_cast_fp16 = conv(dilations = var_35048_dilations_0, groups = var_35048_groups_0, pad = var_35048_pad_0, pad_type = var_35048_pad_type_0, strides = var_35048_strides_0, weight = layers_21_fc2_outlier_module_weight_to_fp16_sparsified, x = input_175_cast_fp16)[name = tensor("op_35048_cast_fp16")]; tensor hidden_states_47_cast_fp16 = add(x = var_35042_cast_fp16, y = var_35048_cast_fp16)[name = tensor("hidden_states_47_cast_fp16")]; tensor inputs_89_cast_fp16 = add(x = inputs_87_cast_fp16, y = hidden_states_47_cast_fp16)[name = tensor("inputs_89_cast_fp16")]; tensor var_35054 = const()[name = tensor("op_35054"), val = tensor(3)]; tensor var_35079 = const()[name = tensor("op_35079"), val = tensor(1)]; tensor out_89_axes_0 = const()[name = tensor("out_89_axes_0"), val = tensor([1])]; tensor var_35096_to_fp16 = const()[name = tensor("op_35096_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_89_cast_fp16 = layer_norm(axes = out_89_axes_0, epsilon = var_35096_to_fp16, x = inputs_89_cast_fp16)[name = tensor("out_89_cast_fp16")]; tensor obj_89_gamma_0_to_fp16 = const()[name = tensor("obj_89_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(296811776)))]; tensor obj_89_beta_0_to_fp16 = const()[name = tensor("obj_89_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(296814400)))]; tensor obj_89_epsilon_0_to_fp16 = const()[name = tensor("obj_89_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_89_cast_fp16 = batch_norm(beta = obj_89_beta_0_to_fp16, epsilon = obj_89_epsilon_0_to_fp16, gamma = obj_89_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_89_cast_fp16)[name = tensor("obj_89_cast_fp16")]; tensor var_35118_pad_type_0 = const()[name = tensor("op_35118_pad_type_0"), val = tensor("valid")]; tensor var_35118_strides_0 = const()[name = tensor("op_35118_strides_0"), val = tensor([1, 1])]; tensor var_35118_pad_0 = const()[name = tensor("op_35118_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_35118_dilations_0 = const()[name = tensor("op_35118_dilations_0"), val = tensor([1, 1])]; tensor var_35118_groups_0 = const()[name = tensor("op_35118_groups_0"), val = tensor(1)]; tensor layers_22_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(296817024))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297636288))), name = tensor("layers_22_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_22_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_22_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297636416)))]; tensor var_35118_cast_fp16 = conv(bias = layers_22_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_35118_dilations_0, groups = var_35118_groups_0, pad = var_35118_pad_0, pad_type = var_35118_pad_type_0, strides = var_35118_strides_0, weight = layers_22_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_89_cast_fp16)[name = tensor("op_35118_cast_fp16")]; tensor var_35124_pad_type_0 = const()[name = tensor("op_35124_pad_type_0"), val = tensor("valid")]; tensor var_35124_strides_0 = const()[name = tensor("op_35124_strides_0"), val = tensor([1, 1])]; tensor var_35124_pad_0 = const()[name = tensor("op_35124_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_35124_dilations_0 = const()[name = tensor("op_35124_dilations_0"), val = tensor([1, 1])]; tensor var_35124_groups_0 = const()[name = tensor("op_35124_groups_0"), val = tensor(1)]; tensor layers_22_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297674560))), name = tensor("layers_22_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297639040))), shape = tensor([1280, 1280, 1, 1])]; tensor var_35124_cast_fp16 = conv(dilations = var_35124_dilations_0, groups = var_35124_groups_0, pad = var_35124_pad_0, pad_type = var_35124_pad_type_0, strides = var_35124_strides_0, weight = layers_22_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_89_cast_fp16)[name = tensor("op_35124_cast_fp16")]; tensor query_45_cast_fp16 = add(x = var_35118_cast_fp16, y = var_35124_cast_fp16)[name = tensor("query_45_cast_fp16")]; tensor var_35133_pad_type_0 = const()[name = tensor("op_35133_pad_type_0"), val = tensor("valid")]; tensor var_35133_strides_0 = const()[name = tensor("op_35133_strides_0"), val = tensor([1, 1])]; tensor var_35133_pad_0 = const()[name = tensor("op_35133_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_35133_dilations_0 = const()[name = tensor("op_35133_dilations_0"), val = tensor([1, 1])]; tensor var_35133_groups_0 = const()[name = tensor("op_35133_groups_0"), val = tensor(1)]; tensor layers_22_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(297879424))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(298698688))), name = tensor("layers_22_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_35133_cast_fp16 = conv(dilations = var_35133_dilations_0, groups = var_35133_groups_0, pad = var_35133_pad_0, pad_type = var_35133_pad_type_0, strides = var_35133_strides_0, weight = layers_22_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_89_cast_fp16)[name = tensor("op_35133_cast_fp16")]; tensor var_35139_pad_type_0 = const()[name = tensor("op_35139_pad_type_0"), val = tensor("valid")]; tensor var_35139_strides_0 = const()[name = tensor("op_35139_strides_0"), val = tensor([1, 1])]; tensor var_35139_pad_0 = const()[name = tensor("op_35139_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_35139_dilations_0 = const()[name = tensor("op_35139_dilations_0"), val = tensor([1, 1])]; tensor var_35139_groups_0 = const()[name = tensor("op_35139_groups_0"), val = tensor(1)]; tensor layers_22_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(298728768))), name = tensor("layers_22_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(298698816))), shape = tensor([1280, 1280, 1, 1])]; tensor var_35139_cast_fp16 = conv(dilations = var_35139_dilations_0, groups = var_35139_groups_0, pad = var_35139_pad_0, pad_type = var_35139_pad_type_0, strides = var_35139_strides_0, weight = layers_22_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_89_cast_fp16)[name = tensor("op_35139_cast_fp16")]; tensor key_45_cast_fp16 = add(x = var_35133_cast_fp16, y = var_35139_cast_fp16)[name = tensor("key_45_cast_fp16")]; tensor var_35149_pad_type_0 = const()[name = tensor("op_35149_pad_type_0"), val = tensor("valid")]; tensor var_35149_strides_0 = const()[name = tensor("op_35149_strides_0"), val = tensor([1, 1])]; tensor var_35149_pad_0 = const()[name = tensor("op_35149_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_35149_dilations_0 = const()[name = tensor("op_35149_dilations_0"), val = tensor([1, 1])]; tensor var_35149_groups_0 = const()[name = tensor("op_35149_groups_0"), val = tensor(1)]; tensor layers_22_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(298933632))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299752896))), name = tensor("layers_22_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_22_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_22_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299753024)))]; tensor var_35149_cast_fp16 = conv(bias = layers_22_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_35149_dilations_0, groups = var_35149_groups_0, pad = var_35149_pad_0, pad_type = var_35149_pad_type_0, strides = var_35149_strides_0, weight = layers_22_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_89_cast_fp16)[name = tensor("op_35149_cast_fp16")]; tensor var_35155_pad_type_0 = const()[name = tensor("op_35155_pad_type_0"), val = tensor("valid")]; tensor var_35155_strides_0 = const()[name = tensor("op_35155_strides_0"), val = tensor([1, 1])]; tensor var_35155_pad_0 = const()[name = tensor("op_35155_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_35155_dilations_0 = const()[name = tensor("op_35155_dilations_0"), val = tensor([1, 1])]; tensor var_35155_groups_0 = const()[name = tensor("op_35155_groups_0"), val = tensor(1)]; tensor layers_22_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299768064))), name = tensor("layers_22_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299755648))), shape = tensor([1280, 1280, 1, 1])]; tensor var_35155_cast_fp16 = conv(dilations = var_35155_dilations_0, groups = var_35155_groups_0, pad = var_35155_pad_0, pad_type = var_35155_pad_type_0, strides = var_35155_strides_0, weight = layers_22_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_89_cast_fp16)[name = tensor("op_35155_cast_fp16")]; tensor value_45_cast_fp16 = add(x = var_35149_cast_fp16, y = var_35155_cast_fp16)[name = tensor("value_45_cast_fp16")]; tensor var_35161_begin_0 = const()[name = tensor("op_35161_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35161_end_0 = const()[name = tensor("op_35161_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35161_end_mask_0 = const()[name = tensor("op_35161_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35161_cast_fp16 = slice_by_index(begin = var_35161_begin_0, end = var_35161_end_0, end_mask = var_35161_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35161_cast_fp16")]; tensor var_35165_begin_0 = const()[name = tensor("op_35165_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_35165_end_0 = const()[name = tensor("op_35165_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_35165_end_mask_0 = const()[name = tensor("op_35165_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35165_cast_fp16 = slice_by_index(begin = var_35165_begin_0, end = var_35165_end_0, end_mask = var_35165_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35165_cast_fp16")]; tensor var_35169_begin_0 = const()[name = tensor("op_35169_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_35169_end_0 = const()[name = tensor("op_35169_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_35169_end_mask_0 = const()[name = tensor("op_35169_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35169_cast_fp16 = slice_by_index(begin = var_35169_begin_0, end = var_35169_end_0, end_mask = var_35169_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35169_cast_fp16")]; tensor var_35173_begin_0 = const()[name = tensor("op_35173_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_35173_end_0 = const()[name = tensor("op_35173_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_35173_end_mask_0 = const()[name = tensor("op_35173_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35173_cast_fp16 = slice_by_index(begin = var_35173_begin_0, end = var_35173_end_0, end_mask = var_35173_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35173_cast_fp16")]; tensor var_35177_begin_0 = const()[name = tensor("op_35177_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_35177_end_0 = const()[name = tensor("op_35177_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_35177_end_mask_0 = const()[name = tensor("op_35177_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35177_cast_fp16 = slice_by_index(begin = var_35177_begin_0, end = var_35177_end_0, end_mask = var_35177_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35177_cast_fp16")]; tensor var_35181_begin_0 = const()[name = tensor("op_35181_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_35181_end_0 = const()[name = tensor("op_35181_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_35181_end_mask_0 = const()[name = tensor("op_35181_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35181_cast_fp16 = slice_by_index(begin = var_35181_begin_0, end = var_35181_end_0, end_mask = var_35181_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35181_cast_fp16")]; tensor var_35185_begin_0 = const()[name = tensor("op_35185_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_35185_end_0 = const()[name = tensor("op_35185_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_35185_end_mask_0 = const()[name = tensor("op_35185_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35185_cast_fp16 = slice_by_index(begin = var_35185_begin_0, end = var_35185_end_0, end_mask = var_35185_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35185_cast_fp16")]; tensor var_35189_begin_0 = const()[name = tensor("op_35189_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_35189_end_0 = const()[name = tensor("op_35189_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_35189_end_mask_0 = const()[name = tensor("op_35189_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35189_cast_fp16 = slice_by_index(begin = var_35189_begin_0, end = var_35189_end_0, end_mask = var_35189_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35189_cast_fp16")]; tensor var_35193_begin_0 = const()[name = tensor("op_35193_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_35193_end_0 = const()[name = tensor("op_35193_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_35193_end_mask_0 = const()[name = tensor("op_35193_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35193_cast_fp16 = slice_by_index(begin = var_35193_begin_0, end = var_35193_end_0, end_mask = var_35193_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35193_cast_fp16")]; tensor var_35197_begin_0 = const()[name = tensor("op_35197_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_35197_end_0 = const()[name = tensor("op_35197_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_35197_end_mask_0 = const()[name = tensor("op_35197_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35197_cast_fp16 = slice_by_index(begin = var_35197_begin_0, end = var_35197_end_0, end_mask = var_35197_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35197_cast_fp16")]; tensor var_35201_begin_0 = const()[name = tensor("op_35201_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_35201_end_0 = const()[name = tensor("op_35201_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_35201_end_mask_0 = const()[name = tensor("op_35201_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35201_cast_fp16 = slice_by_index(begin = var_35201_begin_0, end = var_35201_end_0, end_mask = var_35201_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35201_cast_fp16")]; tensor var_35205_begin_0 = const()[name = tensor("op_35205_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_35205_end_0 = const()[name = tensor("op_35205_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_35205_end_mask_0 = const()[name = tensor("op_35205_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35205_cast_fp16 = slice_by_index(begin = var_35205_begin_0, end = var_35205_end_0, end_mask = var_35205_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35205_cast_fp16")]; tensor var_35209_begin_0 = const()[name = tensor("op_35209_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_35209_end_0 = const()[name = tensor("op_35209_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_35209_end_mask_0 = const()[name = tensor("op_35209_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35209_cast_fp16 = slice_by_index(begin = var_35209_begin_0, end = var_35209_end_0, end_mask = var_35209_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35209_cast_fp16")]; tensor var_35213_begin_0 = const()[name = tensor("op_35213_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_35213_end_0 = const()[name = tensor("op_35213_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_35213_end_mask_0 = const()[name = tensor("op_35213_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35213_cast_fp16 = slice_by_index(begin = var_35213_begin_0, end = var_35213_end_0, end_mask = var_35213_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35213_cast_fp16")]; tensor var_35217_begin_0 = const()[name = tensor("op_35217_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_35217_end_0 = const()[name = tensor("op_35217_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_35217_end_mask_0 = const()[name = tensor("op_35217_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35217_cast_fp16 = slice_by_index(begin = var_35217_begin_0, end = var_35217_end_0, end_mask = var_35217_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35217_cast_fp16")]; tensor var_35221_begin_0 = const()[name = tensor("op_35221_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_35221_end_0 = const()[name = tensor("op_35221_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_35221_end_mask_0 = const()[name = tensor("op_35221_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35221_cast_fp16 = slice_by_index(begin = var_35221_begin_0, end = var_35221_end_0, end_mask = var_35221_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35221_cast_fp16")]; tensor var_35225_begin_0 = const()[name = tensor("op_35225_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_35225_end_0 = const()[name = tensor("op_35225_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_35225_end_mask_0 = const()[name = tensor("op_35225_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35225_cast_fp16 = slice_by_index(begin = var_35225_begin_0, end = var_35225_end_0, end_mask = var_35225_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35225_cast_fp16")]; tensor var_35229_begin_0 = const()[name = tensor("op_35229_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_35229_end_0 = const()[name = tensor("op_35229_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_35229_end_mask_0 = const()[name = tensor("op_35229_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35229_cast_fp16 = slice_by_index(begin = var_35229_begin_0, end = var_35229_end_0, end_mask = var_35229_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35229_cast_fp16")]; tensor var_35233_begin_0 = const()[name = tensor("op_35233_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_35233_end_0 = const()[name = tensor("op_35233_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_35233_end_mask_0 = const()[name = tensor("op_35233_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35233_cast_fp16 = slice_by_index(begin = var_35233_begin_0, end = var_35233_end_0, end_mask = var_35233_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35233_cast_fp16")]; tensor var_35237_begin_0 = const()[name = tensor("op_35237_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_35237_end_0 = const()[name = tensor("op_35237_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_35237_end_mask_0 = const()[name = tensor("op_35237_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35237_cast_fp16 = slice_by_index(begin = var_35237_begin_0, end = var_35237_end_0, end_mask = var_35237_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_35237_cast_fp16")]; tensor var_35246_begin_0 = const()[name = tensor("op_35246_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35246_end_0 = const()[name = tensor("op_35246_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35246_end_mask_0 = const()[name = tensor("op_35246_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35246_cast_fp16 = slice_by_index(begin = var_35246_begin_0, end = var_35246_end_0, end_mask = var_35246_end_mask_0, x = var_35161_cast_fp16)[name = tensor("op_35246_cast_fp16")]; tensor var_35253_begin_0 = const()[name = tensor("op_35253_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35253_end_0 = const()[name = tensor("op_35253_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35253_end_mask_0 = const()[name = tensor("op_35253_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35253_cast_fp16 = slice_by_index(begin = var_35253_begin_0, end = var_35253_end_0, end_mask = var_35253_end_mask_0, x = var_35161_cast_fp16)[name = tensor("op_35253_cast_fp16")]; tensor var_35260_begin_0 = const()[name = tensor("op_35260_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35260_end_0 = const()[name = tensor("op_35260_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35260_end_mask_0 = const()[name = tensor("op_35260_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35260_cast_fp16 = slice_by_index(begin = var_35260_begin_0, end = var_35260_end_0, end_mask = var_35260_end_mask_0, x = var_35161_cast_fp16)[name = tensor("op_35260_cast_fp16")]; tensor var_35267_begin_0 = const()[name = tensor("op_35267_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35267_end_0 = const()[name = tensor("op_35267_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35267_end_mask_0 = const()[name = tensor("op_35267_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35267_cast_fp16 = slice_by_index(begin = var_35267_begin_0, end = var_35267_end_0, end_mask = var_35267_end_mask_0, x = var_35161_cast_fp16)[name = tensor("op_35267_cast_fp16")]; tensor var_35274_begin_0 = const()[name = tensor("op_35274_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35274_end_0 = const()[name = tensor("op_35274_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35274_end_mask_0 = const()[name = tensor("op_35274_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35274_cast_fp16 = slice_by_index(begin = var_35274_begin_0, end = var_35274_end_0, end_mask = var_35274_end_mask_0, x = var_35165_cast_fp16)[name = tensor("op_35274_cast_fp16")]; tensor var_35281_begin_0 = const()[name = tensor("op_35281_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35281_end_0 = const()[name = tensor("op_35281_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35281_end_mask_0 = const()[name = tensor("op_35281_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35281_cast_fp16 = slice_by_index(begin = var_35281_begin_0, end = var_35281_end_0, end_mask = var_35281_end_mask_0, x = var_35165_cast_fp16)[name = tensor("op_35281_cast_fp16")]; tensor var_35288_begin_0 = const()[name = tensor("op_35288_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35288_end_0 = const()[name = tensor("op_35288_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35288_end_mask_0 = const()[name = tensor("op_35288_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35288_cast_fp16 = slice_by_index(begin = var_35288_begin_0, end = var_35288_end_0, end_mask = var_35288_end_mask_0, x = var_35165_cast_fp16)[name = tensor("op_35288_cast_fp16")]; tensor var_35295_begin_0 = const()[name = tensor("op_35295_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35295_end_0 = const()[name = tensor("op_35295_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35295_end_mask_0 = const()[name = tensor("op_35295_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35295_cast_fp16 = slice_by_index(begin = var_35295_begin_0, end = var_35295_end_0, end_mask = var_35295_end_mask_0, x = var_35165_cast_fp16)[name = tensor("op_35295_cast_fp16")]; tensor var_35302_begin_0 = const()[name = tensor("op_35302_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35302_end_0 = const()[name = tensor("op_35302_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35302_end_mask_0 = const()[name = tensor("op_35302_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35302_cast_fp16 = slice_by_index(begin = var_35302_begin_0, end = var_35302_end_0, end_mask = var_35302_end_mask_0, x = var_35169_cast_fp16)[name = tensor("op_35302_cast_fp16")]; tensor var_35309_begin_0 = const()[name = tensor("op_35309_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35309_end_0 = const()[name = tensor("op_35309_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35309_end_mask_0 = const()[name = tensor("op_35309_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35309_cast_fp16 = slice_by_index(begin = var_35309_begin_0, end = var_35309_end_0, end_mask = var_35309_end_mask_0, x = var_35169_cast_fp16)[name = tensor("op_35309_cast_fp16")]; tensor var_35316_begin_0 = const()[name = tensor("op_35316_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35316_end_0 = const()[name = tensor("op_35316_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35316_end_mask_0 = const()[name = tensor("op_35316_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35316_cast_fp16 = slice_by_index(begin = var_35316_begin_0, end = var_35316_end_0, end_mask = var_35316_end_mask_0, x = var_35169_cast_fp16)[name = tensor("op_35316_cast_fp16")]; tensor var_35323_begin_0 = const()[name = tensor("op_35323_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35323_end_0 = const()[name = tensor("op_35323_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35323_end_mask_0 = const()[name = tensor("op_35323_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35323_cast_fp16 = slice_by_index(begin = var_35323_begin_0, end = var_35323_end_0, end_mask = var_35323_end_mask_0, x = var_35169_cast_fp16)[name = tensor("op_35323_cast_fp16")]; tensor var_35330_begin_0 = const()[name = tensor("op_35330_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35330_end_0 = const()[name = tensor("op_35330_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35330_end_mask_0 = const()[name = tensor("op_35330_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35330_cast_fp16 = slice_by_index(begin = var_35330_begin_0, end = var_35330_end_0, end_mask = var_35330_end_mask_0, x = var_35173_cast_fp16)[name = tensor("op_35330_cast_fp16")]; tensor var_35337_begin_0 = const()[name = tensor("op_35337_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35337_end_0 = const()[name = tensor("op_35337_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35337_end_mask_0 = const()[name = tensor("op_35337_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35337_cast_fp16 = slice_by_index(begin = var_35337_begin_0, end = var_35337_end_0, end_mask = var_35337_end_mask_0, x = var_35173_cast_fp16)[name = tensor("op_35337_cast_fp16")]; tensor var_35344_begin_0 = const()[name = tensor("op_35344_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35344_end_0 = const()[name = tensor("op_35344_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35344_end_mask_0 = const()[name = tensor("op_35344_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35344_cast_fp16 = slice_by_index(begin = var_35344_begin_0, end = var_35344_end_0, end_mask = var_35344_end_mask_0, x = var_35173_cast_fp16)[name = tensor("op_35344_cast_fp16")]; tensor var_35351_begin_0 = const()[name = tensor("op_35351_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35351_end_0 = const()[name = tensor("op_35351_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35351_end_mask_0 = const()[name = tensor("op_35351_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35351_cast_fp16 = slice_by_index(begin = var_35351_begin_0, end = var_35351_end_0, end_mask = var_35351_end_mask_0, x = var_35173_cast_fp16)[name = tensor("op_35351_cast_fp16")]; tensor var_35358_begin_0 = const()[name = tensor("op_35358_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35358_end_0 = const()[name = tensor("op_35358_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35358_end_mask_0 = const()[name = tensor("op_35358_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35358_cast_fp16 = slice_by_index(begin = var_35358_begin_0, end = var_35358_end_0, end_mask = var_35358_end_mask_0, x = var_35177_cast_fp16)[name = tensor("op_35358_cast_fp16")]; tensor var_35365_begin_0 = const()[name = tensor("op_35365_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35365_end_0 = const()[name = tensor("op_35365_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35365_end_mask_0 = const()[name = tensor("op_35365_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35365_cast_fp16 = slice_by_index(begin = var_35365_begin_0, end = var_35365_end_0, end_mask = var_35365_end_mask_0, x = var_35177_cast_fp16)[name = tensor("op_35365_cast_fp16")]; tensor var_35372_begin_0 = const()[name = tensor("op_35372_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35372_end_0 = const()[name = tensor("op_35372_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35372_end_mask_0 = const()[name = tensor("op_35372_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35372_cast_fp16 = slice_by_index(begin = var_35372_begin_0, end = var_35372_end_0, end_mask = var_35372_end_mask_0, x = var_35177_cast_fp16)[name = tensor("op_35372_cast_fp16")]; tensor var_35379_begin_0 = const()[name = tensor("op_35379_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35379_end_0 = const()[name = tensor("op_35379_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35379_end_mask_0 = const()[name = tensor("op_35379_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35379_cast_fp16 = slice_by_index(begin = var_35379_begin_0, end = var_35379_end_0, end_mask = var_35379_end_mask_0, x = var_35177_cast_fp16)[name = tensor("op_35379_cast_fp16")]; tensor var_35386_begin_0 = const()[name = tensor("op_35386_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35386_end_0 = const()[name = tensor("op_35386_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35386_end_mask_0 = const()[name = tensor("op_35386_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35386_cast_fp16 = slice_by_index(begin = var_35386_begin_0, end = var_35386_end_0, end_mask = var_35386_end_mask_0, x = var_35181_cast_fp16)[name = tensor("op_35386_cast_fp16")]; tensor var_35393_begin_0 = const()[name = tensor("op_35393_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35393_end_0 = const()[name = tensor("op_35393_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35393_end_mask_0 = const()[name = tensor("op_35393_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35393_cast_fp16 = slice_by_index(begin = var_35393_begin_0, end = var_35393_end_0, end_mask = var_35393_end_mask_0, x = var_35181_cast_fp16)[name = tensor("op_35393_cast_fp16")]; tensor var_35400_begin_0 = const()[name = tensor("op_35400_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35400_end_0 = const()[name = tensor("op_35400_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35400_end_mask_0 = const()[name = tensor("op_35400_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35400_cast_fp16 = slice_by_index(begin = var_35400_begin_0, end = var_35400_end_0, end_mask = var_35400_end_mask_0, x = var_35181_cast_fp16)[name = tensor("op_35400_cast_fp16")]; tensor var_35407_begin_0 = const()[name = tensor("op_35407_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35407_end_0 = const()[name = tensor("op_35407_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35407_end_mask_0 = const()[name = tensor("op_35407_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35407_cast_fp16 = slice_by_index(begin = var_35407_begin_0, end = var_35407_end_0, end_mask = var_35407_end_mask_0, x = var_35181_cast_fp16)[name = tensor("op_35407_cast_fp16")]; tensor var_35414_begin_0 = const()[name = tensor("op_35414_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35414_end_0 = const()[name = tensor("op_35414_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35414_end_mask_0 = const()[name = tensor("op_35414_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35414_cast_fp16 = slice_by_index(begin = var_35414_begin_0, end = var_35414_end_0, end_mask = var_35414_end_mask_0, x = var_35185_cast_fp16)[name = tensor("op_35414_cast_fp16")]; tensor var_35421_begin_0 = const()[name = tensor("op_35421_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35421_end_0 = const()[name = tensor("op_35421_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35421_end_mask_0 = const()[name = tensor("op_35421_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35421_cast_fp16 = slice_by_index(begin = var_35421_begin_0, end = var_35421_end_0, end_mask = var_35421_end_mask_0, x = var_35185_cast_fp16)[name = tensor("op_35421_cast_fp16")]; tensor var_35428_begin_0 = const()[name = tensor("op_35428_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35428_end_0 = const()[name = tensor("op_35428_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35428_end_mask_0 = const()[name = tensor("op_35428_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35428_cast_fp16 = slice_by_index(begin = var_35428_begin_0, end = var_35428_end_0, end_mask = var_35428_end_mask_0, x = var_35185_cast_fp16)[name = tensor("op_35428_cast_fp16")]; tensor var_35435_begin_0 = const()[name = tensor("op_35435_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35435_end_0 = const()[name = tensor("op_35435_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35435_end_mask_0 = const()[name = tensor("op_35435_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35435_cast_fp16 = slice_by_index(begin = var_35435_begin_0, end = var_35435_end_0, end_mask = var_35435_end_mask_0, x = var_35185_cast_fp16)[name = tensor("op_35435_cast_fp16")]; tensor var_35442_begin_0 = const()[name = tensor("op_35442_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35442_end_0 = const()[name = tensor("op_35442_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35442_end_mask_0 = const()[name = tensor("op_35442_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35442_cast_fp16 = slice_by_index(begin = var_35442_begin_0, end = var_35442_end_0, end_mask = var_35442_end_mask_0, x = var_35189_cast_fp16)[name = tensor("op_35442_cast_fp16")]; tensor var_35449_begin_0 = const()[name = tensor("op_35449_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35449_end_0 = const()[name = tensor("op_35449_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35449_end_mask_0 = const()[name = tensor("op_35449_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35449_cast_fp16 = slice_by_index(begin = var_35449_begin_0, end = var_35449_end_0, end_mask = var_35449_end_mask_0, x = var_35189_cast_fp16)[name = tensor("op_35449_cast_fp16")]; tensor var_35456_begin_0 = const()[name = tensor("op_35456_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35456_end_0 = const()[name = tensor("op_35456_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35456_end_mask_0 = const()[name = tensor("op_35456_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35456_cast_fp16 = slice_by_index(begin = var_35456_begin_0, end = var_35456_end_0, end_mask = var_35456_end_mask_0, x = var_35189_cast_fp16)[name = tensor("op_35456_cast_fp16")]; tensor var_35463_begin_0 = const()[name = tensor("op_35463_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35463_end_0 = const()[name = tensor("op_35463_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35463_end_mask_0 = const()[name = tensor("op_35463_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35463_cast_fp16 = slice_by_index(begin = var_35463_begin_0, end = var_35463_end_0, end_mask = var_35463_end_mask_0, x = var_35189_cast_fp16)[name = tensor("op_35463_cast_fp16")]; tensor var_35470_begin_0 = const()[name = tensor("op_35470_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35470_end_0 = const()[name = tensor("op_35470_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35470_end_mask_0 = const()[name = tensor("op_35470_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35470_cast_fp16 = slice_by_index(begin = var_35470_begin_0, end = var_35470_end_0, end_mask = var_35470_end_mask_0, x = var_35193_cast_fp16)[name = tensor("op_35470_cast_fp16")]; tensor var_35477_begin_0 = const()[name = tensor("op_35477_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35477_end_0 = const()[name = tensor("op_35477_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35477_end_mask_0 = const()[name = tensor("op_35477_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35477_cast_fp16 = slice_by_index(begin = var_35477_begin_0, end = var_35477_end_0, end_mask = var_35477_end_mask_0, x = var_35193_cast_fp16)[name = tensor("op_35477_cast_fp16")]; tensor var_35484_begin_0 = const()[name = tensor("op_35484_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35484_end_0 = const()[name = tensor("op_35484_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35484_end_mask_0 = const()[name = tensor("op_35484_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35484_cast_fp16 = slice_by_index(begin = var_35484_begin_0, end = var_35484_end_0, end_mask = var_35484_end_mask_0, x = var_35193_cast_fp16)[name = tensor("op_35484_cast_fp16")]; tensor var_35491_begin_0 = const()[name = tensor("op_35491_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35491_end_0 = const()[name = tensor("op_35491_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35491_end_mask_0 = const()[name = tensor("op_35491_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35491_cast_fp16 = slice_by_index(begin = var_35491_begin_0, end = var_35491_end_0, end_mask = var_35491_end_mask_0, x = var_35193_cast_fp16)[name = tensor("op_35491_cast_fp16")]; tensor var_35498_begin_0 = const()[name = tensor("op_35498_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35498_end_0 = const()[name = tensor("op_35498_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35498_end_mask_0 = const()[name = tensor("op_35498_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35498_cast_fp16 = slice_by_index(begin = var_35498_begin_0, end = var_35498_end_0, end_mask = var_35498_end_mask_0, x = var_35197_cast_fp16)[name = tensor("op_35498_cast_fp16")]; tensor var_35505_begin_0 = const()[name = tensor("op_35505_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35505_end_0 = const()[name = tensor("op_35505_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35505_end_mask_0 = const()[name = tensor("op_35505_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35505_cast_fp16 = slice_by_index(begin = var_35505_begin_0, end = var_35505_end_0, end_mask = var_35505_end_mask_0, x = var_35197_cast_fp16)[name = tensor("op_35505_cast_fp16")]; tensor var_35512_begin_0 = const()[name = tensor("op_35512_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35512_end_0 = const()[name = tensor("op_35512_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35512_end_mask_0 = const()[name = tensor("op_35512_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35512_cast_fp16 = slice_by_index(begin = var_35512_begin_0, end = var_35512_end_0, end_mask = var_35512_end_mask_0, x = var_35197_cast_fp16)[name = tensor("op_35512_cast_fp16")]; tensor var_35519_begin_0 = const()[name = tensor("op_35519_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35519_end_0 = const()[name = tensor("op_35519_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35519_end_mask_0 = const()[name = tensor("op_35519_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35519_cast_fp16 = slice_by_index(begin = var_35519_begin_0, end = var_35519_end_0, end_mask = var_35519_end_mask_0, x = var_35197_cast_fp16)[name = tensor("op_35519_cast_fp16")]; tensor var_35526_begin_0 = const()[name = tensor("op_35526_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35526_end_0 = const()[name = tensor("op_35526_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35526_end_mask_0 = const()[name = tensor("op_35526_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35526_cast_fp16 = slice_by_index(begin = var_35526_begin_0, end = var_35526_end_0, end_mask = var_35526_end_mask_0, x = var_35201_cast_fp16)[name = tensor("op_35526_cast_fp16")]; tensor var_35533_begin_0 = const()[name = tensor("op_35533_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35533_end_0 = const()[name = tensor("op_35533_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35533_end_mask_0 = const()[name = tensor("op_35533_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35533_cast_fp16 = slice_by_index(begin = var_35533_begin_0, end = var_35533_end_0, end_mask = var_35533_end_mask_0, x = var_35201_cast_fp16)[name = tensor("op_35533_cast_fp16")]; tensor var_35540_begin_0 = const()[name = tensor("op_35540_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35540_end_0 = const()[name = tensor("op_35540_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35540_end_mask_0 = const()[name = tensor("op_35540_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35540_cast_fp16 = slice_by_index(begin = var_35540_begin_0, end = var_35540_end_0, end_mask = var_35540_end_mask_0, x = var_35201_cast_fp16)[name = tensor("op_35540_cast_fp16")]; tensor var_35547_begin_0 = const()[name = tensor("op_35547_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35547_end_0 = const()[name = tensor("op_35547_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35547_end_mask_0 = const()[name = tensor("op_35547_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35547_cast_fp16 = slice_by_index(begin = var_35547_begin_0, end = var_35547_end_0, end_mask = var_35547_end_mask_0, x = var_35201_cast_fp16)[name = tensor("op_35547_cast_fp16")]; tensor var_35554_begin_0 = const()[name = tensor("op_35554_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35554_end_0 = const()[name = tensor("op_35554_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35554_end_mask_0 = const()[name = tensor("op_35554_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35554_cast_fp16 = slice_by_index(begin = var_35554_begin_0, end = var_35554_end_0, end_mask = var_35554_end_mask_0, x = var_35205_cast_fp16)[name = tensor("op_35554_cast_fp16")]; tensor var_35561_begin_0 = const()[name = tensor("op_35561_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35561_end_0 = const()[name = tensor("op_35561_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35561_end_mask_0 = const()[name = tensor("op_35561_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35561_cast_fp16 = slice_by_index(begin = var_35561_begin_0, end = var_35561_end_0, end_mask = var_35561_end_mask_0, x = var_35205_cast_fp16)[name = tensor("op_35561_cast_fp16")]; tensor var_35568_begin_0 = const()[name = tensor("op_35568_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35568_end_0 = const()[name = tensor("op_35568_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35568_end_mask_0 = const()[name = tensor("op_35568_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35568_cast_fp16 = slice_by_index(begin = var_35568_begin_0, end = var_35568_end_0, end_mask = var_35568_end_mask_0, x = var_35205_cast_fp16)[name = tensor("op_35568_cast_fp16")]; tensor var_35575_begin_0 = const()[name = tensor("op_35575_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35575_end_0 = const()[name = tensor("op_35575_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35575_end_mask_0 = const()[name = tensor("op_35575_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35575_cast_fp16 = slice_by_index(begin = var_35575_begin_0, end = var_35575_end_0, end_mask = var_35575_end_mask_0, x = var_35205_cast_fp16)[name = tensor("op_35575_cast_fp16")]; tensor var_35582_begin_0 = const()[name = tensor("op_35582_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35582_end_0 = const()[name = tensor("op_35582_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35582_end_mask_0 = const()[name = tensor("op_35582_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35582_cast_fp16 = slice_by_index(begin = var_35582_begin_0, end = var_35582_end_0, end_mask = var_35582_end_mask_0, x = var_35209_cast_fp16)[name = tensor("op_35582_cast_fp16")]; tensor var_35589_begin_0 = const()[name = tensor("op_35589_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35589_end_0 = const()[name = tensor("op_35589_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35589_end_mask_0 = const()[name = tensor("op_35589_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35589_cast_fp16 = slice_by_index(begin = var_35589_begin_0, end = var_35589_end_0, end_mask = var_35589_end_mask_0, x = var_35209_cast_fp16)[name = tensor("op_35589_cast_fp16")]; tensor var_35596_begin_0 = const()[name = tensor("op_35596_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35596_end_0 = const()[name = tensor("op_35596_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35596_end_mask_0 = const()[name = tensor("op_35596_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35596_cast_fp16 = slice_by_index(begin = var_35596_begin_0, end = var_35596_end_0, end_mask = var_35596_end_mask_0, x = var_35209_cast_fp16)[name = tensor("op_35596_cast_fp16")]; tensor var_35603_begin_0 = const()[name = tensor("op_35603_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35603_end_0 = const()[name = tensor("op_35603_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35603_end_mask_0 = const()[name = tensor("op_35603_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35603_cast_fp16 = slice_by_index(begin = var_35603_begin_0, end = var_35603_end_0, end_mask = var_35603_end_mask_0, x = var_35209_cast_fp16)[name = tensor("op_35603_cast_fp16")]; tensor var_35610_begin_0 = const()[name = tensor("op_35610_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35610_end_0 = const()[name = tensor("op_35610_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35610_end_mask_0 = const()[name = tensor("op_35610_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35610_cast_fp16 = slice_by_index(begin = var_35610_begin_0, end = var_35610_end_0, end_mask = var_35610_end_mask_0, x = var_35213_cast_fp16)[name = tensor("op_35610_cast_fp16")]; tensor var_35617_begin_0 = const()[name = tensor("op_35617_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35617_end_0 = const()[name = tensor("op_35617_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35617_end_mask_0 = const()[name = tensor("op_35617_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35617_cast_fp16 = slice_by_index(begin = var_35617_begin_0, end = var_35617_end_0, end_mask = var_35617_end_mask_0, x = var_35213_cast_fp16)[name = tensor("op_35617_cast_fp16")]; tensor var_35624_begin_0 = const()[name = tensor("op_35624_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35624_end_0 = const()[name = tensor("op_35624_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35624_end_mask_0 = const()[name = tensor("op_35624_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35624_cast_fp16 = slice_by_index(begin = var_35624_begin_0, end = var_35624_end_0, end_mask = var_35624_end_mask_0, x = var_35213_cast_fp16)[name = tensor("op_35624_cast_fp16")]; tensor var_35631_begin_0 = const()[name = tensor("op_35631_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35631_end_0 = const()[name = tensor("op_35631_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35631_end_mask_0 = const()[name = tensor("op_35631_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35631_cast_fp16 = slice_by_index(begin = var_35631_begin_0, end = var_35631_end_0, end_mask = var_35631_end_mask_0, x = var_35213_cast_fp16)[name = tensor("op_35631_cast_fp16")]; tensor var_35638_begin_0 = const()[name = tensor("op_35638_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35638_end_0 = const()[name = tensor("op_35638_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35638_end_mask_0 = const()[name = tensor("op_35638_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35638_cast_fp16 = slice_by_index(begin = var_35638_begin_0, end = var_35638_end_0, end_mask = var_35638_end_mask_0, x = var_35217_cast_fp16)[name = tensor("op_35638_cast_fp16")]; tensor var_35645_begin_0 = const()[name = tensor("op_35645_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35645_end_0 = const()[name = tensor("op_35645_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35645_end_mask_0 = const()[name = tensor("op_35645_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35645_cast_fp16 = slice_by_index(begin = var_35645_begin_0, end = var_35645_end_0, end_mask = var_35645_end_mask_0, x = var_35217_cast_fp16)[name = tensor("op_35645_cast_fp16")]; tensor var_35652_begin_0 = const()[name = tensor("op_35652_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35652_end_0 = const()[name = tensor("op_35652_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35652_end_mask_0 = const()[name = tensor("op_35652_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35652_cast_fp16 = slice_by_index(begin = var_35652_begin_0, end = var_35652_end_0, end_mask = var_35652_end_mask_0, x = var_35217_cast_fp16)[name = tensor("op_35652_cast_fp16")]; tensor var_35659_begin_0 = const()[name = tensor("op_35659_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35659_end_0 = const()[name = tensor("op_35659_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35659_end_mask_0 = const()[name = tensor("op_35659_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35659_cast_fp16 = slice_by_index(begin = var_35659_begin_0, end = var_35659_end_0, end_mask = var_35659_end_mask_0, x = var_35217_cast_fp16)[name = tensor("op_35659_cast_fp16")]; tensor var_35666_begin_0 = const()[name = tensor("op_35666_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35666_end_0 = const()[name = tensor("op_35666_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35666_end_mask_0 = const()[name = tensor("op_35666_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35666_cast_fp16 = slice_by_index(begin = var_35666_begin_0, end = var_35666_end_0, end_mask = var_35666_end_mask_0, x = var_35221_cast_fp16)[name = tensor("op_35666_cast_fp16")]; tensor var_35673_begin_0 = const()[name = tensor("op_35673_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35673_end_0 = const()[name = tensor("op_35673_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35673_end_mask_0 = const()[name = tensor("op_35673_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35673_cast_fp16 = slice_by_index(begin = var_35673_begin_0, end = var_35673_end_0, end_mask = var_35673_end_mask_0, x = var_35221_cast_fp16)[name = tensor("op_35673_cast_fp16")]; tensor var_35680_begin_0 = const()[name = tensor("op_35680_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35680_end_0 = const()[name = tensor("op_35680_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35680_end_mask_0 = const()[name = tensor("op_35680_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35680_cast_fp16 = slice_by_index(begin = var_35680_begin_0, end = var_35680_end_0, end_mask = var_35680_end_mask_0, x = var_35221_cast_fp16)[name = tensor("op_35680_cast_fp16")]; tensor var_35687_begin_0 = const()[name = tensor("op_35687_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35687_end_0 = const()[name = tensor("op_35687_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35687_end_mask_0 = const()[name = tensor("op_35687_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35687_cast_fp16 = slice_by_index(begin = var_35687_begin_0, end = var_35687_end_0, end_mask = var_35687_end_mask_0, x = var_35221_cast_fp16)[name = tensor("op_35687_cast_fp16")]; tensor var_35694_begin_0 = const()[name = tensor("op_35694_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35694_end_0 = const()[name = tensor("op_35694_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35694_end_mask_0 = const()[name = tensor("op_35694_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35694_cast_fp16 = slice_by_index(begin = var_35694_begin_0, end = var_35694_end_0, end_mask = var_35694_end_mask_0, x = var_35225_cast_fp16)[name = tensor("op_35694_cast_fp16")]; tensor var_35701_begin_0 = const()[name = tensor("op_35701_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35701_end_0 = const()[name = tensor("op_35701_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35701_end_mask_0 = const()[name = tensor("op_35701_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35701_cast_fp16 = slice_by_index(begin = var_35701_begin_0, end = var_35701_end_0, end_mask = var_35701_end_mask_0, x = var_35225_cast_fp16)[name = tensor("op_35701_cast_fp16")]; tensor var_35708_begin_0 = const()[name = tensor("op_35708_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35708_end_0 = const()[name = tensor("op_35708_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35708_end_mask_0 = const()[name = tensor("op_35708_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35708_cast_fp16 = slice_by_index(begin = var_35708_begin_0, end = var_35708_end_0, end_mask = var_35708_end_mask_0, x = var_35225_cast_fp16)[name = tensor("op_35708_cast_fp16")]; tensor var_35715_begin_0 = const()[name = tensor("op_35715_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35715_end_0 = const()[name = tensor("op_35715_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35715_end_mask_0 = const()[name = tensor("op_35715_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35715_cast_fp16 = slice_by_index(begin = var_35715_begin_0, end = var_35715_end_0, end_mask = var_35715_end_mask_0, x = var_35225_cast_fp16)[name = tensor("op_35715_cast_fp16")]; tensor var_35722_begin_0 = const()[name = tensor("op_35722_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35722_end_0 = const()[name = tensor("op_35722_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35722_end_mask_0 = const()[name = tensor("op_35722_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35722_cast_fp16 = slice_by_index(begin = var_35722_begin_0, end = var_35722_end_0, end_mask = var_35722_end_mask_0, x = var_35229_cast_fp16)[name = tensor("op_35722_cast_fp16")]; tensor var_35729_begin_0 = const()[name = tensor("op_35729_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35729_end_0 = const()[name = tensor("op_35729_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35729_end_mask_0 = const()[name = tensor("op_35729_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35729_cast_fp16 = slice_by_index(begin = var_35729_begin_0, end = var_35729_end_0, end_mask = var_35729_end_mask_0, x = var_35229_cast_fp16)[name = tensor("op_35729_cast_fp16")]; tensor var_35736_begin_0 = const()[name = tensor("op_35736_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35736_end_0 = const()[name = tensor("op_35736_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35736_end_mask_0 = const()[name = tensor("op_35736_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35736_cast_fp16 = slice_by_index(begin = var_35736_begin_0, end = var_35736_end_0, end_mask = var_35736_end_mask_0, x = var_35229_cast_fp16)[name = tensor("op_35736_cast_fp16")]; tensor var_35743_begin_0 = const()[name = tensor("op_35743_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35743_end_0 = const()[name = tensor("op_35743_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35743_end_mask_0 = const()[name = tensor("op_35743_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35743_cast_fp16 = slice_by_index(begin = var_35743_begin_0, end = var_35743_end_0, end_mask = var_35743_end_mask_0, x = var_35229_cast_fp16)[name = tensor("op_35743_cast_fp16")]; tensor var_35750_begin_0 = const()[name = tensor("op_35750_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35750_end_0 = const()[name = tensor("op_35750_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35750_end_mask_0 = const()[name = tensor("op_35750_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35750_cast_fp16 = slice_by_index(begin = var_35750_begin_0, end = var_35750_end_0, end_mask = var_35750_end_mask_0, x = var_35233_cast_fp16)[name = tensor("op_35750_cast_fp16")]; tensor var_35757_begin_0 = const()[name = tensor("op_35757_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35757_end_0 = const()[name = tensor("op_35757_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35757_end_mask_0 = const()[name = tensor("op_35757_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35757_cast_fp16 = slice_by_index(begin = var_35757_begin_0, end = var_35757_end_0, end_mask = var_35757_end_mask_0, x = var_35233_cast_fp16)[name = tensor("op_35757_cast_fp16")]; tensor var_35764_begin_0 = const()[name = tensor("op_35764_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35764_end_0 = const()[name = tensor("op_35764_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35764_end_mask_0 = const()[name = tensor("op_35764_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35764_cast_fp16 = slice_by_index(begin = var_35764_begin_0, end = var_35764_end_0, end_mask = var_35764_end_mask_0, x = var_35233_cast_fp16)[name = tensor("op_35764_cast_fp16")]; tensor var_35771_begin_0 = const()[name = tensor("op_35771_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35771_end_0 = const()[name = tensor("op_35771_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35771_end_mask_0 = const()[name = tensor("op_35771_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35771_cast_fp16 = slice_by_index(begin = var_35771_begin_0, end = var_35771_end_0, end_mask = var_35771_end_mask_0, x = var_35233_cast_fp16)[name = tensor("op_35771_cast_fp16")]; tensor var_35778_begin_0 = const()[name = tensor("op_35778_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35778_end_0 = const()[name = tensor("op_35778_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_35778_end_mask_0 = const()[name = tensor("op_35778_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35778_cast_fp16 = slice_by_index(begin = var_35778_begin_0, end = var_35778_end_0, end_mask = var_35778_end_mask_0, x = var_35237_cast_fp16)[name = tensor("op_35778_cast_fp16")]; tensor var_35785_begin_0 = const()[name = tensor("op_35785_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_35785_end_0 = const()[name = tensor("op_35785_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_35785_end_mask_0 = const()[name = tensor("op_35785_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35785_cast_fp16 = slice_by_index(begin = var_35785_begin_0, end = var_35785_end_0, end_mask = var_35785_end_mask_0, x = var_35237_cast_fp16)[name = tensor("op_35785_cast_fp16")]; tensor var_35792_begin_0 = const()[name = tensor("op_35792_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_35792_end_0 = const()[name = tensor("op_35792_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_35792_end_mask_0 = const()[name = tensor("op_35792_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35792_cast_fp16 = slice_by_index(begin = var_35792_begin_0, end = var_35792_end_0, end_mask = var_35792_end_mask_0, x = var_35237_cast_fp16)[name = tensor("op_35792_cast_fp16")]; tensor var_35799_begin_0 = const()[name = tensor("op_35799_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_35799_end_0 = const()[name = tensor("op_35799_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35799_end_mask_0 = const()[name = tensor("op_35799_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35799_cast_fp16 = slice_by_index(begin = var_35799_begin_0, end = var_35799_end_0, end_mask = var_35799_end_mask_0, x = var_35237_cast_fp16)[name = tensor("op_35799_cast_fp16")]; tensor k_45_perm_0 = const()[name = tensor("k_45_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_35804_begin_0 = const()[name = tensor("op_35804_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35804_end_0 = const()[name = tensor("op_35804_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_35804_end_mask_0 = const()[name = tensor("op_35804_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_45_cast_fp16 = transpose(perm = k_45_perm_0, x = key_45_cast_fp16)[name = tensor("transpose_9")]; tensor var_35804_cast_fp16 = slice_by_index(begin = var_35804_begin_0, end = var_35804_end_0, end_mask = var_35804_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35804_cast_fp16")]; tensor var_35808_begin_0 = const()[name = tensor("op_35808_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_35808_end_0 = const()[name = tensor("op_35808_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_35808_end_mask_0 = const()[name = tensor("op_35808_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35808_cast_fp16 = slice_by_index(begin = var_35808_begin_0, end = var_35808_end_0, end_mask = var_35808_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35808_cast_fp16")]; tensor var_35812_begin_0 = const()[name = tensor("op_35812_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_35812_end_0 = const()[name = tensor("op_35812_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_35812_end_mask_0 = const()[name = tensor("op_35812_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35812_cast_fp16 = slice_by_index(begin = var_35812_begin_0, end = var_35812_end_0, end_mask = var_35812_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35812_cast_fp16")]; tensor var_35816_begin_0 = const()[name = tensor("op_35816_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_35816_end_0 = const()[name = tensor("op_35816_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_35816_end_mask_0 = const()[name = tensor("op_35816_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35816_cast_fp16 = slice_by_index(begin = var_35816_begin_0, end = var_35816_end_0, end_mask = var_35816_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35816_cast_fp16")]; tensor var_35820_begin_0 = const()[name = tensor("op_35820_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_35820_end_0 = const()[name = tensor("op_35820_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_35820_end_mask_0 = const()[name = tensor("op_35820_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35820_cast_fp16 = slice_by_index(begin = var_35820_begin_0, end = var_35820_end_0, end_mask = var_35820_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35820_cast_fp16")]; tensor var_35824_begin_0 = const()[name = tensor("op_35824_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_35824_end_0 = const()[name = tensor("op_35824_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_35824_end_mask_0 = const()[name = tensor("op_35824_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35824_cast_fp16 = slice_by_index(begin = var_35824_begin_0, end = var_35824_end_0, end_mask = var_35824_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35824_cast_fp16")]; tensor var_35828_begin_0 = const()[name = tensor("op_35828_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_35828_end_0 = const()[name = tensor("op_35828_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_35828_end_mask_0 = const()[name = tensor("op_35828_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35828_cast_fp16 = slice_by_index(begin = var_35828_begin_0, end = var_35828_end_0, end_mask = var_35828_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35828_cast_fp16")]; tensor var_35832_begin_0 = const()[name = tensor("op_35832_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_35832_end_0 = const()[name = tensor("op_35832_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_35832_end_mask_0 = const()[name = tensor("op_35832_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35832_cast_fp16 = slice_by_index(begin = var_35832_begin_0, end = var_35832_end_0, end_mask = var_35832_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35832_cast_fp16")]; tensor var_35836_begin_0 = const()[name = tensor("op_35836_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_35836_end_0 = const()[name = tensor("op_35836_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_35836_end_mask_0 = const()[name = tensor("op_35836_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35836_cast_fp16 = slice_by_index(begin = var_35836_begin_0, end = var_35836_end_0, end_mask = var_35836_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35836_cast_fp16")]; tensor var_35840_begin_0 = const()[name = tensor("op_35840_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_35840_end_0 = const()[name = tensor("op_35840_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_35840_end_mask_0 = const()[name = tensor("op_35840_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35840_cast_fp16 = slice_by_index(begin = var_35840_begin_0, end = var_35840_end_0, end_mask = var_35840_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35840_cast_fp16")]; tensor var_35844_begin_0 = const()[name = tensor("op_35844_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_35844_end_0 = const()[name = tensor("op_35844_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_35844_end_mask_0 = const()[name = tensor("op_35844_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35844_cast_fp16 = slice_by_index(begin = var_35844_begin_0, end = var_35844_end_0, end_mask = var_35844_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35844_cast_fp16")]; tensor var_35848_begin_0 = const()[name = tensor("op_35848_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_35848_end_0 = const()[name = tensor("op_35848_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_35848_end_mask_0 = const()[name = tensor("op_35848_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35848_cast_fp16 = slice_by_index(begin = var_35848_begin_0, end = var_35848_end_0, end_mask = var_35848_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35848_cast_fp16")]; tensor var_35852_begin_0 = const()[name = tensor("op_35852_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_35852_end_0 = const()[name = tensor("op_35852_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_35852_end_mask_0 = const()[name = tensor("op_35852_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35852_cast_fp16 = slice_by_index(begin = var_35852_begin_0, end = var_35852_end_0, end_mask = var_35852_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35852_cast_fp16")]; tensor var_35856_begin_0 = const()[name = tensor("op_35856_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_35856_end_0 = const()[name = tensor("op_35856_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_35856_end_mask_0 = const()[name = tensor("op_35856_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35856_cast_fp16 = slice_by_index(begin = var_35856_begin_0, end = var_35856_end_0, end_mask = var_35856_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35856_cast_fp16")]; tensor var_35860_begin_0 = const()[name = tensor("op_35860_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_35860_end_0 = const()[name = tensor("op_35860_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_35860_end_mask_0 = const()[name = tensor("op_35860_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35860_cast_fp16 = slice_by_index(begin = var_35860_begin_0, end = var_35860_end_0, end_mask = var_35860_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35860_cast_fp16")]; tensor var_35864_begin_0 = const()[name = tensor("op_35864_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_35864_end_0 = const()[name = tensor("op_35864_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_35864_end_mask_0 = const()[name = tensor("op_35864_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35864_cast_fp16 = slice_by_index(begin = var_35864_begin_0, end = var_35864_end_0, end_mask = var_35864_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35864_cast_fp16")]; tensor var_35868_begin_0 = const()[name = tensor("op_35868_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_35868_end_0 = const()[name = tensor("op_35868_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_35868_end_mask_0 = const()[name = tensor("op_35868_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35868_cast_fp16 = slice_by_index(begin = var_35868_begin_0, end = var_35868_end_0, end_mask = var_35868_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35868_cast_fp16")]; tensor var_35872_begin_0 = const()[name = tensor("op_35872_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_35872_end_0 = const()[name = tensor("op_35872_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_35872_end_mask_0 = const()[name = tensor("op_35872_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35872_cast_fp16 = slice_by_index(begin = var_35872_begin_0, end = var_35872_end_0, end_mask = var_35872_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35872_cast_fp16")]; tensor var_35876_begin_0 = const()[name = tensor("op_35876_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_35876_end_0 = const()[name = tensor("op_35876_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_35876_end_mask_0 = const()[name = tensor("op_35876_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35876_cast_fp16 = slice_by_index(begin = var_35876_begin_0, end = var_35876_end_0, end_mask = var_35876_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35876_cast_fp16")]; tensor var_35880_begin_0 = const()[name = tensor("op_35880_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_35880_end_0 = const()[name = tensor("op_35880_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_35880_end_mask_0 = const()[name = tensor("op_35880_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_35880_cast_fp16 = slice_by_index(begin = var_35880_begin_0, end = var_35880_end_0, end_mask = var_35880_end_mask_0, x = k_45_cast_fp16)[name = tensor("op_35880_cast_fp16")]; tensor var_35882_begin_0 = const()[name = tensor("op_35882_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_35882_end_0 = const()[name = tensor("op_35882_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_35882_end_mask_0 = const()[name = tensor("op_35882_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35882_cast_fp16 = slice_by_index(begin = var_35882_begin_0, end = var_35882_end_0, end_mask = var_35882_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35882_cast_fp16")]; tensor var_35886_begin_0 = const()[name = tensor("op_35886_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_35886_end_0 = const()[name = tensor("op_35886_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_35886_end_mask_0 = const()[name = tensor("op_35886_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35886_cast_fp16 = slice_by_index(begin = var_35886_begin_0, end = var_35886_end_0, end_mask = var_35886_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35886_cast_fp16")]; tensor var_35890_begin_0 = const()[name = tensor("op_35890_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_35890_end_0 = const()[name = tensor("op_35890_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_35890_end_mask_0 = const()[name = tensor("op_35890_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35890_cast_fp16 = slice_by_index(begin = var_35890_begin_0, end = var_35890_end_0, end_mask = var_35890_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35890_cast_fp16")]; tensor var_35894_begin_0 = const()[name = tensor("op_35894_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_35894_end_0 = const()[name = tensor("op_35894_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_35894_end_mask_0 = const()[name = tensor("op_35894_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35894_cast_fp16 = slice_by_index(begin = var_35894_begin_0, end = var_35894_end_0, end_mask = var_35894_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35894_cast_fp16")]; tensor var_35898_begin_0 = const()[name = tensor("op_35898_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_35898_end_0 = const()[name = tensor("op_35898_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_35898_end_mask_0 = const()[name = tensor("op_35898_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35898_cast_fp16 = slice_by_index(begin = var_35898_begin_0, end = var_35898_end_0, end_mask = var_35898_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35898_cast_fp16")]; tensor var_35902_begin_0 = const()[name = tensor("op_35902_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_35902_end_0 = const()[name = tensor("op_35902_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_35902_end_mask_0 = const()[name = tensor("op_35902_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35902_cast_fp16 = slice_by_index(begin = var_35902_begin_0, end = var_35902_end_0, end_mask = var_35902_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35902_cast_fp16")]; tensor var_35906_begin_0 = const()[name = tensor("op_35906_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_35906_end_0 = const()[name = tensor("op_35906_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_35906_end_mask_0 = const()[name = tensor("op_35906_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35906_cast_fp16 = slice_by_index(begin = var_35906_begin_0, end = var_35906_end_0, end_mask = var_35906_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35906_cast_fp16")]; tensor var_35910_begin_0 = const()[name = tensor("op_35910_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_35910_end_0 = const()[name = tensor("op_35910_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_35910_end_mask_0 = const()[name = tensor("op_35910_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35910_cast_fp16 = slice_by_index(begin = var_35910_begin_0, end = var_35910_end_0, end_mask = var_35910_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35910_cast_fp16")]; tensor var_35914_begin_0 = const()[name = tensor("op_35914_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_35914_end_0 = const()[name = tensor("op_35914_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_35914_end_mask_0 = const()[name = tensor("op_35914_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35914_cast_fp16 = slice_by_index(begin = var_35914_begin_0, end = var_35914_end_0, end_mask = var_35914_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35914_cast_fp16")]; tensor var_35918_begin_0 = const()[name = tensor("op_35918_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_35918_end_0 = const()[name = tensor("op_35918_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_35918_end_mask_0 = const()[name = tensor("op_35918_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35918_cast_fp16 = slice_by_index(begin = var_35918_begin_0, end = var_35918_end_0, end_mask = var_35918_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35918_cast_fp16")]; tensor var_35922_begin_0 = const()[name = tensor("op_35922_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_35922_end_0 = const()[name = tensor("op_35922_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_35922_end_mask_0 = const()[name = tensor("op_35922_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35922_cast_fp16 = slice_by_index(begin = var_35922_begin_0, end = var_35922_end_0, end_mask = var_35922_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35922_cast_fp16")]; tensor var_35926_begin_0 = const()[name = tensor("op_35926_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_35926_end_0 = const()[name = tensor("op_35926_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_35926_end_mask_0 = const()[name = tensor("op_35926_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35926_cast_fp16 = slice_by_index(begin = var_35926_begin_0, end = var_35926_end_0, end_mask = var_35926_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35926_cast_fp16")]; tensor var_35930_begin_0 = const()[name = tensor("op_35930_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_35930_end_0 = const()[name = tensor("op_35930_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_35930_end_mask_0 = const()[name = tensor("op_35930_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35930_cast_fp16 = slice_by_index(begin = var_35930_begin_0, end = var_35930_end_0, end_mask = var_35930_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35930_cast_fp16")]; tensor var_35934_begin_0 = const()[name = tensor("op_35934_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_35934_end_0 = const()[name = tensor("op_35934_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_35934_end_mask_0 = const()[name = tensor("op_35934_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35934_cast_fp16 = slice_by_index(begin = var_35934_begin_0, end = var_35934_end_0, end_mask = var_35934_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35934_cast_fp16")]; tensor var_35938_begin_0 = const()[name = tensor("op_35938_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_35938_end_0 = const()[name = tensor("op_35938_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_35938_end_mask_0 = const()[name = tensor("op_35938_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35938_cast_fp16 = slice_by_index(begin = var_35938_begin_0, end = var_35938_end_0, end_mask = var_35938_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35938_cast_fp16")]; tensor var_35942_begin_0 = const()[name = tensor("op_35942_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_35942_end_0 = const()[name = tensor("op_35942_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_35942_end_mask_0 = const()[name = tensor("op_35942_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35942_cast_fp16 = slice_by_index(begin = var_35942_begin_0, end = var_35942_end_0, end_mask = var_35942_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35942_cast_fp16")]; tensor var_35946_begin_0 = const()[name = tensor("op_35946_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_35946_end_0 = const()[name = tensor("op_35946_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_35946_end_mask_0 = const()[name = tensor("op_35946_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35946_cast_fp16 = slice_by_index(begin = var_35946_begin_0, end = var_35946_end_0, end_mask = var_35946_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35946_cast_fp16")]; tensor var_35950_begin_0 = const()[name = tensor("op_35950_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_35950_end_0 = const()[name = tensor("op_35950_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_35950_end_mask_0 = const()[name = tensor("op_35950_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35950_cast_fp16 = slice_by_index(begin = var_35950_begin_0, end = var_35950_end_0, end_mask = var_35950_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35950_cast_fp16")]; tensor var_35954_begin_0 = const()[name = tensor("op_35954_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_35954_end_0 = const()[name = tensor("op_35954_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_35954_end_mask_0 = const()[name = tensor("op_35954_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35954_cast_fp16 = slice_by_index(begin = var_35954_begin_0, end = var_35954_end_0, end_mask = var_35954_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35954_cast_fp16")]; tensor var_35958_begin_0 = const()[name = tensor("op_35958_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_35958_end_0 = const()[name = tensor("op_35958_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_35958_end_mask_0 = const()[name = tensor("op_35958_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_35958_cast_fp16 = slice_by_index(begin = var_35958_begin_0, end = var_35958_end_0, end_mask = var_35958_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_35958_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3521_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3521_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3521_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3521_equation_0, values = (var_35804_cast_fp16, var_35246_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3521_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3523_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3523_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3523_equation_0, values = (var_35804_cast_fp16, var_35253_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3523_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3525_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3525_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3525_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3525_equation_0, values = (var_35804_cast_fp16, var_35260_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3525_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3527_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3527_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3527_equation_0, values = (var_35804_cast_fp16, var_35267_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3527_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3529_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3529_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3529_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3529_equation_0, values = (var_35808_cast_fp16, var_35274_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3529_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3531_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3531_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3531_equation_0, values = (var_35808_cast_fp16, var_35281_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3531_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3533_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3533_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3533_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3533_equation_0, values = (var_35808_cast_fp16, var_35288_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3533_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3535_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3535_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3535_equation_0, values = (var_35808_cast_fp16, var_35295_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3535_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3537_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3537_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3537_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3537_equation_0, values = (var_35812_cast_fp16, var_35302_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3537_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3539_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3539_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3539_equation_0, values = (var_35812_cast_fp16, var_35309_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3539_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3541_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3541_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3541_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3541_equation_0, values = (var_35812_cast_fp16, var_35316_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3541_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3543_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3543_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3543_equation_0, values = (var_35812_cast_fp16, var_35323_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3543_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3545_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3545_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3545_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3545_equation_0, values = (var_35816_cast_fp16, var_35330_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3545_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3547_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3547_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3547_equation_0, values = (var_35816_cast_fp16, var_35337_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3547_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3549_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3549_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3549_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3549_equation_0, values = (var_35816_cast_fp16, var_35344_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3549_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3551_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3551_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3551_equation_0, values = (var_35816_cast_fp16, var_35351_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3551_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3553_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3553_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3553_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3553_equation_0, values = (var_35820_cast_fp16, var_35358_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3553_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3555_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3555_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3555_equation_0, values = (var_35820_cast_fp16, var_35365_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3555_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3557_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3557_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3557_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3557_equation_0, values = (var_35820_cast_fp16, var_35372_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3557_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3559_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3559_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3559_equation_0, values = (var_35820_cast_fp16, var_35379_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3559_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3561_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3561_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3561_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3561_equation_0, values = (var_35824_cast_fp16, var_35386_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3561_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3563_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3563_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3563_equation_0, values = (var_35824_cast_fp16, var_35393_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3563_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3565_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3565_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3565_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3565_equation_0, values = (var_35824_cast_fp16, var_35400_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3565_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3567_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3567_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3567_equation_0, values = (var_35824_cast_fp16, var_35407_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3567_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3569_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3569_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3569_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3569_equation_0, values = (var_35828_cast_fp16, var_35414_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3569_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3571_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3571_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3571_equation_0, values = (var_35828_cast_fp16, var_35421_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3571_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3573_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3573_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3573_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3573_equation_0, values = (var_35828_cast_fp16, var_35428_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3573_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3575_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3575_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3575_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3575_equation_0, values = (var_35828_cast_fp16, var_35435_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3575_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3577_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3577_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3577_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3577_equation_0, values = (var_35832_cast_fp16, var_35442_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3577_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3579_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3579_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3579_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3579_equation_0, values = (var_35832_cast_fp16, var_35449_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3579_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3581_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3581_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3581_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3581_equation_0, values = (var_35832_cast_fp16, var_35456_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3581_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3583_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3583_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3583_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3583_equation_0, values = (var_35832_cast_fp16, var_35463_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3583_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3585_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3585_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3585_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3585_equation_0, values = (var_35836_cast_fp16, var_35470_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3585_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3587_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3587_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3587_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3587_equation_0, values = (var_35836_cast_fp16, var_35477_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3587_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3589_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3589_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3589_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3589_equation_0, values = (var_35836_cast_fp16, var_35484_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3589_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3591_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3591_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3591_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3591_equation_0, values = (var_35836_cast_fp16, var_35491_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3591_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3593_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3593_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3593_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3593_equation_0, values = (var_35840_cast_fp16, var_35498_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3593_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3595_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3595_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3595_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3595_equation_0, values = (var_35840_cast_fp16, var_35505_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3595_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3597_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3597_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3597_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3597_equation_0, values = (var_35840_cast_fp16, var_35512_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3597_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3599_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3599_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3599_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3599_equation_0, values = (var_35840_cast_fp16, var_35519_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3599_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3601_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3601_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3601_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3601_equation_0, values = (var_35844_cast_fp16, var_35526_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3601_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3603_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3603_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3603_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3603_equation_0, values = (var_35844_cast_fp16, var_35533_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3603_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3605_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3605_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3605_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3605_equation_0, values = (var_35844_cast_fp16, var_35540_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3605_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3607_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3607_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3607_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3607_equation_0, values = (var_35844_cast_fp16, var_35547_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3607_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3609_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3609_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3609_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3609_equation_0, values = (var_35848_cast_fp16, var_35554_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3609_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3611_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3611_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3611_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3611_equation_0, values = (var_35848_cast_fp16, var_35561_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3611_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3613_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3613_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3613_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3613_equation_0, values = (var_35848_cast_fp16, var_35568_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3613_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3615_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3615_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3615_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3615_equation_0, values = (var_35848_cast_fp16, var_35575_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3615_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3617_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3617_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3617_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3617_equation_0, values = (var_35852_cast_fp16, var_35582_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3617_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3619_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3619_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3619_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3619_equation_0, values = (var_35852_cast_fp16, var_35589_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3619_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3621_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3621_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3621_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3621_equation_0, values = (var_35852_cast_fp16, var_35596_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3621_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3623_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3623_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3623_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3623_equation_0, values = (var_35852_cast_fp16, var_35603_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3623_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3625_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3625_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3625_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3625_equation_0, values = (var_35856_cast_fp16, var_35610_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3625_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3627_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3627_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3627_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3627_equation_0, values = (var_35856_cast_fp16, var_35617_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3627_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3629_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3629_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3629_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3629_equation_0, values = (var_35856_cast_fp16, var_35624_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3629_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3631_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3631_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3631_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3631_equation_0, values = (var_35856_cast_fp16, var_35631_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3631_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3633_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3633_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3633_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3633_equation_0, values = (var_35860_cast_fp16, var_35638_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3633_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3635_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3635_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3635_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3635_equation_0, values = (var_35860_cast_fp16, var_35645_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3635_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3637_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3637_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3637_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3637_equation_0, values = (var_35860_cast_fp16, var_35652_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3637_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3639_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3639_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3639_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3639_equation_0, values = (var_35860_cast_fp16, var_35659_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3639_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3641_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3641_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3641_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3641_equation_0, values = (var_35864_cast_fp16, var_35666_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3641_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3643_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3643_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3643_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3643_equation_0, values = (var_35864_cast_fp16, var_35673_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3643_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3645_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3645_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3645_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3645_equation_0, values = (var_35864_cast_fp16, var_35680_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3645_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3647_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3647_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3647_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3647_equation_0, values = (var_35864_cast_fp16, var_35687_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3647_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3649_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3649_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3649_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3649_equation_0, values = (var_35868_cast_fp16, var_35694_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3649_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3651_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3651_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3651_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3651_equation_0, values = (var_35868_cast_fp16, var_35701_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3651_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3653_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3653_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3653_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3653_equation_0, values = (var_35868_cast_fp16, var_35708_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3653_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3655_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3655_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3655_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3655_equation_0, values = (var_35868_cast_fp16, var_35715_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3655_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3657_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3657_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3657_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3657_equation_0, values = (var_35872_cast_fp16, var_35722_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3657_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3659_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3659_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3659_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3659_equation_0, values = (var_35872_cast_fp16, var_35729_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3659_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3661_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3661_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3661_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3661_equation_0, values = (var_35872_cast_fp16, var_35736_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3661_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3663_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3663_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3663_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3663_equation_0, values = (var_35872_cast_fp16, var_35743_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3663_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3665_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3665_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3665_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3665_equation_0, values = (var_35876_cast_fp16, var_35750_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3665_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3667_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3667_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3667_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3667_equation_0, values = (var_35876_cast_fp16, var_35757_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3667_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3669_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3669_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3669_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3669_equation_0, values = (var_35876_cast_fp16, var_35764_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3669_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3671_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3671_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3671_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3671_equation_0, values = (var_35876_cast_fp16, var_35771_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3671_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3673_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3673_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3673_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3673_equation_0, values = (var_35880_cast_fp16, var_35778_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3673_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3675_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3675_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3675_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3675_equation_0, values = (var_35880_cast_fp16, var_35785_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3675_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3677_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3677_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3677_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3677_equation_0, values = (var_35880_cast_fp16, var_35792_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3677_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3679_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3679_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3679_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3679_equation_0, values = (var_35880_cast_fp16, var_35799_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3679_cast_fp16")]; tensor var_36121_to_fp16 = const()[name = tensor("op_36121_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3521_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3521_cast_fp16, y = var_36121_to_fp16)[name = tensor("aw_chunk_3521_cast_fp16")]; tensor var_36123_to_fp16 = const()[name = tensor("op_36123_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3523_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3523_cast_fp16, y = var_36123_to_fp16)[name = tensor("aw_chunk_3523_cast_fp16")]; tensor var_36125_to_fp16 = const()[name = tensor("op_36125_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3525_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3525_cast_fp16, y = var_36125_to_fp16)[name = tensor("aw_chunk_3525_cast_fp16")]; tensor var_36127_to_fp16 = const()[name = tensor("op_36127_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3527_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3527_cast_fp16, y = var_36127_to_fp16)[name = tensor("aw_chunk_3527_cast_fp16")]; tensor var_36129_to_fp16 = const()[name = tensor("op_36129_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3529_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3529_cast_fp16, y = var_36129_to_fp16)[name = tensor("aw_chunk_3529_cast_fp16")]; tensor var_36131_to_fp16 = const()[name = tensor("op_36131_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3531_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3531_cast_fp16, y = var_36131_to_fp16)[name = tensor("aw_chunk_3531_cast_fp16")]; tensor var_36133_to_fp16 = const()[name = tensor("op_36133_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3533_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3533_cast_fp16, y = var_36133_to_fp16)[name = tensor("aw_chunk_3533_cast_fp16")]; tensor var_36135_to_fp16 = const()[name = tensor("op_36135_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3535_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3535_cast_fp16, y = var_36135_to_fp16)[name = tensor("aw_chunk_3535_cast_fp16")]; tensor var_36137_to_fp16 = const()[name = tensor("op_36137_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3537_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3537_cast_fp16, y = var_36137_to_fp16)[name = tensor("aw_chunk_3537_cast_fp16")]; tensor var_36139_to_fp16 = const()[name = tensor("op_36139_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3539_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3539_cast_fp16, y = var_36139_to_fp16)[name = tensor("aw_chunk_3539_cast_fp16")]; tensor var_36141_to_fp16 = const()[name = tensor("op_36141_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3541_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3541_cast_fp16, y = var_36141_to_fp16)[name = tensor("aw_chunk_3541_cast_fp16")]; tensor var_36143_to_fp16 = const()[name = tensor("op_36143_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3543_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3543_cast_fp16, y = var_36143_to_fp16)[name = tensor("aw_chunk_3543_cast_fp16")]; tensor var_36145_to_fp16 = const()[name = tensor("op_36145_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3545_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3545_cast_fp16, y = var_36145_to_fp16)[name = tensor("aw_chunk_3545_cast_fp16")]; tensor var_36147_to_fp16 = const()[name = tensor("op_36147_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3547_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3547_cast_fp16, y = var_36147_to_fp16)[name = tensor("aw_chunk_3547_cast_fp16")]; tensor var_36149_to_fp16 = const()[name = tensor("op_36149_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3549_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3549_cast_fp16, y = var_36149_to_fp16)[name = tensor("aw_chunk_3549_cast_fp16")]; tensor var_36151_to_fp16 = const()[name = tensor("op_36151_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3551_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3551_cast_fp16, y = var_36151_to_fp16)[name = tensor("aw_chunk_3551_cast_fp16")]; tensor var_36153_to_fp16 = const()[name = tensor("op_36153_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3553_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3553_cast_fp16, y = var_36153_to_fp16)[name = tensor("aw_chunk_3553_cast_fp16")]; tensor var_36155_to_fp16 = const()[name = tensor("op_36155_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3555_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3555_cast_fp16, y = var_36155_to_fp16)[name = tensor("aw_chunk_3555_cast_fp16")]; tensor var_36157_to_fp16 = const()[name = tensor("op_36157_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3557_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3557_cast_fp16, y = var_36157_to_fp16)[name = tensor("aw_chunk_3557_cast_fp16")]; tensor var_36159_to_fp16 = const()[name = tensor("op_36159_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3559_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3559_cast_fp16, y = var_36159_to_fp16)[name = tensor("aw_chunk_3559_cast_fp16")]; tensor var_36161_to_fp16 = const()[name = tensor("op_36161_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3561_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3561_cast_fp16, y = var_36161_to_fp16)[name = tensor("aw_chunk_3561_cast_fp16")]; tensor var_36163_to_fp16 = const()[name = tensor("op_36163_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3563_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3563_cast_fp16, y = var_36163_to_fp16)[name = tensor("aw_chunk_3563_cast_fp16")]; tensor var_36165_to_fp16 = const()[name = tensor("op_36165_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3565_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3565_cast_fp16, y = var_36165_to_fp16)[name = tensor("aw_chunk_3565_cast_fp16")]; tensor var_36167_to_fp16 = const()[name = tensor("op_36167_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3567_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3567_cast_fp16, y = var_36167_to_fp16)[name = tensor("aw_chunk_3567_cast_fp16")]; tensor var_36169_to_fp16 = const()[name = tensor("op_36169_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3569_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3569_cast_fp16, y = var_36169_to_fp16)[name = tensor("aw_chunk_3569_cast_fp16")]; tensor var_36171_to_fp16 = const()[name = tensor("op_36171_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3571_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3571_cast_fp16, y = var_36171_to_fp16)[name = tensor("aw_chunk_3571_cast_fp16")]; tensor var_36173_to_fp16 = const()[name = tensor("op_36173_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3573_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3573_cast_fp16, y = var_36173_to_fp16)[name = tensor("aw_chunk_3573_cast_fp16")]; tensor var_36175_to_fp16 = const()[name = tensor("op_36175_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3575_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3575_cast_fp16, y = var_36175_to_fp16)[name = tensor("aw_chunk_3575_cast_fp16")]; tensor var_36177_to_fp16 = const()[name = tensor("op_36177_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3577_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3577_cast_fp16, y = var_36177_to_fp16)[name = tensor("aw_chunk_3577_cast_fp16")]; tensor var_36179_to_fp16 = const()[name = tensor("op_36179_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3579_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3579_cast_fp16, y = var_36179_to_fp16)[name = tensor("aw_chunk_3579_cast_fp16")]; tensor var_36181_to_fp16 = const()[name = tensor("op_36181_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3581_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3581_cast_fp16, y = var_36181_to_fp16)[name = tensor("aw_chunk_3581_cast_fp16")]; tensor var_36183_to_fp16 = const()[name = tensor("op_36183_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3583_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3583_cast_fp16, y = var_36183_to_fp16)[name = tensor("aw_chunk_3583_cast_fp16")]; tensor var_36185_to_fp16 = const()[name = tensor("op_36185_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3585_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3585_cast_fp16, y = var_36185_to_fp16)[name = tensor("aw_chunk_3585_cast_fp16")]; tensor var_36187_to_fp16 = const()[name = tensor("op_36187_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3587_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3587_cast_fp16, y = var_36187_to_fp16)[name = tensor("aw_chunk_3587_cast_fp16")]; tensor var_36189_to_fp16 = const()[name = tensor("op_36189_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3589_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3589_cast_fp16, y = var_36189_to_fp16)[name = tensor("aw_chunk_3589_cast_fp16")]; tensor var_36191_to_fp16 = const()[name = tensor("op_36191_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3591_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3591_cast_fp16, y = var_36191_to_fp16)[name = tensor("aw_chunk_3591_cast_fp16")]; tensor var_36193_to_fp16 = const()[name = tensor("op_36193_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3593_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3593_cast_fp16, y = var_36193_to_fp16)[name = tensor("aw_chunk_3593_cast_fp16")]; tensor var_36195_to_fp16 = const()[name = tensor("op_36195_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3595_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3595_cast_fp16, y = var_36195_to_fp16)[name = tensor("aw_chunk_3595_cast_fp16")]; tensor var_36197_to_fp16 = const()[name = tensor("op_36197_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3597_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3597_cast_fp16, y = var_36197_to_fp16)[name = tensor("aw_chunk_3597_cast_fp16")]; tensor var_36199_to_fp16 = const()[name = tensor("op_36199_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3599_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3599_cast_fp16, y = var_36199_to_fp16)[name = tensor("aw_chunk_3599_cast_fp16")]; tensor var_36201_to_fp16 = const()[name = tensor("op_36201_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3601_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3601_cast_fp16, y = var_36201_to_fp16)[name = tensor("aw_chunk_3601_cast_fp16")]; tensor var_36203_to_fp16 = const()[name = tensor("op_36203_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3603_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3603_cast_fp16, y = var_36203_to_fp16)[name = tensor("aw_chunk_3603_cast_fp16")]; tensor var_36205_to_fp16 = const()[name = tensor("op_36205_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3605_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3605_cast_fp16, y = var_36205_to_fp16)[name = tensor("aw_chunk_3605_cast_fp16")]; tensor var_36207_to_fp16 = const()[name = tensor("op_36207_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3607_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3607_cast_fp16, y = var_36207_to_fp16)[name = tensor("aw_chunk_3607_cast_fp16")]; tensor var_36209_to_fp16 = const()[name = tensor("op_36209_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3609_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3609_cast_fp16, y = var_36209_to_fp16)[name = tensor("aw_chunk_3609_cast_fp16")]; tensor var_36211_to_fp16 = const()[name = tensor("op_36211_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3611_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3611_cast_fp16, y = var_36211_to_fp16)[name = tensor("aw_chunk_3611_cast_fp16")]; tensor var_36213_to_fp16 = const()[name = tensor("op_36213_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3613_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3613_cast_fp16, y = var_36213_to_fp16)[name = tensor("aw_chunk_3613_cast_fp16")]; tensor var_36215_to_fp16 = const()[name = tensor("op_36215_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3615_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3615_cast_fp16, y = var_36215_to_fp16)[name = tensor("aw_chunk_3615_cast_fp16")]; tensor var_36217_to_fp16 = const()[name = tensor("op_36217_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3617_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3617_cast_fp16, y = var_36217_to_fp16)[name = tensor("aw_chunk_3617_cast_fp16")]; tensor var_36219_to_fp16 = const()[name = tensor("op_36219_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3619_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3619_cast_fp16, y = var_36219_to_fp16)[name = tensor("aw_chunk_3619_cast_fp16")]; tensor var_36221_to_fp16 = const()[name = tensor("op_36221_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3621_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3621_cast_fp16, y = var_36221_to_fp16)[name = tensor("aw_chunk_3621_cast_fp16")]; tensor var_36223_to_fp16 = const()[name = tensor("op_36223_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3623_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3623_cast_fp16, y = var_36223_to_fp16)[name = tensor("aw_chunk_3623_cast_fp16")]; tensor var_36225_to_fp16 = const()[name = tensor("op_36225_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3625_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3625_cast_fp16, y = var_36225_to_fp16)[name = tensor("aw_chunk_3625_cast_fp16")]; tensor var_36227_to_fp16 = const()[name = tensor("op_36227_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3627_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3627_cast_fp16, y = var_36227_to_fp16)[name = tensor("aw_chunk_3627_cast_fp16")]; tensor var_36229_to_fp16 = const()[name = tensor("op_36229_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3629_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3629_cast_fp16, y = var_36229_to_fp16)[name = tensor("aw_chunk_3629_cast_fp16")]; tensor var_36231_to_fp16 = const()[name = tensor("op_36231_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3631_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3631_cast_fp16, y = var_36231_to_fp16)[name = tensor("aw_chunk_3631_cast_fp16")]; tensor var_36233_to_fp16 = const()[name = tensor("op_36233_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3633_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3633_cast_fp16, y = var_36233_to_fp16)[name = tensor("aw_chunk_3633_cast_fp16")]; tensor var_36235_to_fp16 = const()[name = tensor("op_36235_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3635_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3635_cast_fp16, y = var_36235_to_fp16)[name = tensor("aw_chunk_3635_cast_fp16")]; tensor var_36237_to_fp16 = const()[name = tensor("op_36237_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3637_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3637_cast_fp16, y = var_36237_to_fp16)[name = tensor("aw_chunk_3637_cast_fp16")]; tensor var_36239_to_fp16 = const()[name = tensor("op_36239_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3639_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3639_cast_fp16, y = var_36239_to_fp16)[name = tensor("aw_chunk_3639_cast_fp16")]; tensor var_36241_to_fp16 = const()[name = tensor("op_36241_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3641_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3641_cast_fp16, y = var_36241_to_fp16)[name = tensor("aw_chunk_3641_cast_fp16")]; tensor var_36243_to_fp16 = const()[name = tensor("op_36243_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3643_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3643_cast_fp16, y = var_36243_to_fp16)[name = tensor("aw_chunk_3643_cast_fp16")]; tensor var_36245_to_fp16 = const()[name = tensor("op_36245_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3645_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3645_cast_fp16, y = var_36245_to_fp16)[name = tensor("aw_chunk_3645_cast_fp16")]; tensor var_36247_to_fp16 = const()[name = tensor("op_36247_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3647_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3647_cast_fp16, y = var_36247_to_fp16)[name = tensor("aw_chunk_3647_cast_fp16")]; tensor var_36249_to_fp16 = const()[name = tensor("op_36249_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3649_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3649_cast_fp16, y = var_36249_to_fp16)[name = tensor("aw_chunk_3649_cast_fp16")]; tensor var_36251_to_fp16 = const()[name = tensor("op_36251_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3651_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3651_cast_fp16, y = var_36251_to_fp16)[name = tensor("aw_chunk_3651_cast_fp16")]; tensor var_36253_to_fp16 = const()[name = tensor("op_36253_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3653_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3653_cast_fp16, y = var_36253_to_fp16)[name = tensor("aw_chunk_3653_cast_fp16")]; tensor var_36255_to_fp16 = const()[name = tensor("op_36255_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3655_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3655_cast_fp16, y = var_36255_to_fp16)[name = tensor("aw_chunk_3655_cast_fp16")]; tensor var_36257_to_fp16 = const()[name = tensor("op_36257_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3657_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3657_cast_fp16, y = var_36257_to_fp16)[name = tensor("aw_chunk_3657_cast_fp16")]; tensor var_36259_to_fp16 = const()[name = tensor("op_36259_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3659_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3659_cast_fp16, y = var_36259_to_fp16)[name = tensor("aw_chunk_3659_cast_fp16")]; tensor var_36261_to_fp16 = const()[name = tensor("op_36261_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3661_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3661_cast_fp16, y = var_36261_to_fp16)[name = tensor("aw_chunk_3661_cast_fp16")]; tensor var_36263_to_fp16 = const()[name = tensor("op_36263_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3663_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3663_cast_fp16, y = var_36263_to_fp16)[name = tensor("aw_chunk_3663_cast_fp16")]; tensor var_36265_to_fp16 = const()[name = tensor("op_36265_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3665_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3665_cast_fp16, y = var_36265_to_fp16)[name = tensor("aw_chunk_3665_cast_fp16")]; tensor var_36267_to_fp16 = const()[name = tensor("op_36267_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3667_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3667_cast_fp16, y = var_36267_to_fp16)[name = tensor("aw_chunk_3667_cast_fp16")]; tensor var_36269_to_fp16 = const()[name = tensor("op_36269_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3669_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3669_cast_fp16, y = var_36269_to_fp16)[name = tensor("aw_chunk_3669_cast_fp16")]; tensor var_36271_to_fp16 = const()[name = tensor("op_36271_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3671_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3671_cast_fp16, y = var_36271_to_fp16)[name = tensor("aw_chunk_3671_cast_fp16")]; tensor var_36273_to_fp16 = const()[name = tensor("op_36273_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3673_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3673_cast_fp16, y = var_36273_to_fp16)[name = tensor("aw_chunk_3673_cast_fp16")]; tensor var_36275_to_fp16 = const()[name = tensor("op_36275_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3675_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3675_cast_fp16, y = var_36275_to_fp16)[name = tensor("aw_chunk_3675_cast_fp16")]; tensor var_36277_to_fp16 = const()[name = tensor("op_36277_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3677_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3677_cast_fp16, y = var_36277_to_fp16)[name = tensor("aw_chunk_3677_cast_fp16")]; tensor var_36279_to_fp16 = const()[name = tensor("op_36279_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3679_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3679_cast_fp16, y = var_36279_to_fp16)[name = tensor("aw_chunk_3679_cast_fp16")]; tensor var_36281_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3521_cast_fp16)[name = tensor("op_36281_cast_fp16")]; tensor var_36282_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3523_cast_fp16)[name = tensor("op_36282_cast_fp16")]; tensor var_36283_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3525_cast_fp16)[name = tensor("op_36283_cast_fp16")]; tensor var_36284_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3527_cast_fp16)[name = tensor("op_36284_cast_fp16")]; tensor var_36285_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3529_cast_fp16)[name = tensor("op_36285_cast_fp16")]; tensor var_36286_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3531_cast_fp16)[name = tensor("op_36286_cast_fp16")]; tensor var_36287_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3533_cast_fp16)[name = tensor("op_36287_cast_fp16")]; tensor var_36288_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3535_cast_fp16)[name = tensor("op_36288_cast_fp16")]; tensor var_36289_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3537_cast_fp16)[name = tensor("op_36289_cast_fp16")]; tensor var_36290_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3539_cast_fp16)[name = tensor("op_36290_cast_fp16")]; tensor var_36291_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3541_cast_fp16)[name = tensor("op_36291_cast_fp16")]; tensor var_36292_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3543_cast_fp16)[name = tensor("op_36292_cast_fp16")]; tensor var_36293_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3545_cast_fp16)[name = tensor("op_36293_cast_fp16")]; tensor var_36294_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3547_cast_fp16)[name = tensor("op_36294_cast_fp16")]; tensor var_36295_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3549_cast_fp16)[name = tensor("op_36295_cast_fp16")]; tensor var_36296_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3551_cast_fp16)[name = tensor("op_36296_cast_fp16")]; tensor var_36297_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3553_cast_fp16)[name = tensor("op_36297_cast_fp16")]; tensor var_36298_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3555_cast_fp16)[name = tensor("op_36298_cast_fp16")]; tensor var_36299_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3557_cast_fp16)[name = tensor("op_36299_cast_fp16")]; tensor var_36300_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3559_cast_fp16)[name = tensor("op_36300_cast_fp16")]; tensor var_36301_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3561_cast_fp16)[name = tensor("op_36301_cast_fp16")]; tensor var_36302_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3563_cast_fp16)[name = tensor("op_36302_cast_fp16")]; tensor var_36303_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3565_cast_fp16)[name = tensor("op_36303_cast_fp16")]; tensor var_36304_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3567_cast_fp16)[name = tensor("op_36304_cast_fp16")]; tensor var_36305_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3569_cast_fp16)[name = tensor("op_36305_cast_fp16")]; tensor var_36306_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3571_cast_fp16)[name = tensor("op_36306_cast_fp16")]; tensor var_36307_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3573_cast_fp16)[name = tensor("op_36307_cast_fp16")]; tensor var_36308_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3575_cast_fp16)[name = tensor("op_36308_cast_fp16")]; tensor var_36309_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3577_cast_fp16)[name = tensor("op_36309_cast_fp16")]; tensor var_36310_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3579_cast_fp16)[name = tensor("op_36310_cast_fp16")]; tensor var_36311_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3581_cast_fp16)[name = tensor("op_36311_cast_fp16")]; tensor var_36312_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3583_cast_fp16)[name = tensor("op_36312_cast_fp16")]; tensor var_36313_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3585_cast_fp16)[name = tensor("op_36313_cast_fp16")]; tensor var_36314_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3587_cast_fp16)[name = tensor("op_36314_cast_fp16")]; tensor var_36315_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3589_cast_fp16)[name = tensor("op_36315_cast_fp16")]; tensor var_36316_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3591_cast_fp16)[name = tensor("op_36316_cast_fp16")]; tensor var_36317_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3593_cast_fp16)[name = tensor("op_36317_cast_fp16")]; tensor var_36318_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3595_cast_fp16)[name = tensor("op_36318_cast_fp16")]; tensor var_36319_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3597_cast_fp16)[name = tensor("op_36319_cast_fp16")]; tensor var_36320_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3599_cast_fp16)[name = tensor("op_36320_cast_fp16")]; tensor var_36321_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3601_cast_fp16)[name = tensor("op_36321_cast_fp16")]; tensor var_36322_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3603_cast_fp16)[name = tensor("op_36322_cast_fp16")]; tensor var_36323_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3605_cast_fp16)[name = tensor("op_36323_cast_fp16")]; tensor var_36324_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3607_cast_fp16)[name = tensor("op_36324_cast_fp16")]; tensor var_36325_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3609_cast_fp16)[name = tensor("op_36325_cast_fp16")]; tensor var_36326_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3611_cast_fp16)[name = tensor("op_36326_cast_fp16")]; tensor var_36327_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3613_cast_fp16)[name = tensor("op_36327_cast_fp16")]; tensor var_36328_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3615_cast_fp16)[name = tensor("op_36328_cast_fp16")]; tensor var_36329_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3617_cast_fp16)[name = tensor("op_36329_cast_fp16")]; tensor var_36330_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3619_cast_fp16)[name = tensor("op_36330_cast_fp16")]; tensor var_36331_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3621_cast_fp16)[name = tensor("op_36331_cast_fp16")]; tensor var_36332_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3623_cast_fp16)[name = tensor("op_36332_cast_fp16")]; tensor var_36333_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3625_cast_fp16)[name = tensor("op_36333_cast_fp16")]; tensor var_36334_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3627_cast_fp16)[name = tensor("op_36334_cast_fp16")]; tensor var_36335_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3629_cast_fp16)[name = tensor("op_36335_cast_fp16")]; tensor var_36336_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3631_cast_fp16)[name = tensor("op_36336_cast_fp16")]; tensor var_36337_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3633_cast_fp16)[name = tensor("op_36337_cast_fp16")]; tensor var_36338_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3635_cast_fp16)[name = tensor("op_36338_cast_fp16")]; tensor var_36339_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3637_cast_fp16)[name = tensor("op_36339_cast_fp16")]; tensor var_36340_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3639_cast_fp16)[name = tensor("op_36340_cast_fp16")]; tensor var_36341_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3641_cast_fp16)[name = tensor("op_36341_cast_fp16")]; tensor var_36342_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3643_cast_fp16)[name = tensor("op_36342_cast_fp16")]; tensor var_36343_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3645_cast_fp16)[name = tensor("op_36343_cast_fp16")]; tensor var_36344_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3647_cast_fp16)[name = tensor("op_36344_cast_fp16")]; tensor var_36345_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3649_cast_fp16)[name = tensor("op_36345_cast_fp16")]; tensor var_36346_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3651_cast_fp16)[name = tensor("op_36346_cast_fp16")]; tensor var_36347_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3653_cast_fp16)[name = tensor("op_36347_cast_fp16")]; tensor var_36348_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3655_cast_fp16)[name = tensor("op_36348_cast_fp16")]; tensor var_36349_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3657_cast_fp16)[name = tensor("op_36349_cast_fp16")]; tensor var_36350_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3659_cast_fp16)[name = tensor("op_36350_cast_fp16")]; tensor var_36351_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3661_cast_fp16)[name = tensor("op_36351_cast_fp16")]; tensor var_36352_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3663_cast_fp16)[name = tensor("op_36352_cast_fp16")]; tensor var_36353_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3665_cast_fp16)[name = tensor("op_36353_cast_fp16")]; tensor var_36354_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3667_cast_fp16)[name = tensor("op_36354_cast_fp16")]; tensor var_36355_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3669_cast_fp16)[name = tensor("op_36355_cast_fp16")]; tensor var_36356_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3671_cast_fp16)[name = tensor("op_36356_cast_fp16")]; tensor var_36357_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3673_cast_fp16)[name = tensor("op_36357_cast_fp16")]; tensor var_36358_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3675_cast_fp16)[name = tensor("op_36358_cast_fp16")]; tensor var_36359_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3677_cast_fp16)[name = tensor("op_36359_cast_fp16")]; tensor var_36360_cast_fp16 = softmax(axis = var_35079, x = aw_chunk_3679_cast_fp16)[name = tensor("op_36360_cast_fp16")]; tensor var_36362_equation_0 = const()[name = tensor("op_36362_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36362_cast_fp16 = einsum(equation = var_36362_equation_0, values = (var_35882_cast_fp16, var_36281_cast_fp16))[name = tensor("op_36362_cast_fp16")]; tensor var_36364_equation_0 = const()[name = tensor("op_36364_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36364_cast_fp16 = einsum(equation = var_36364_equation_0, values = (var_35882_cast_fp16, var_36282_cast_fp16))[name = tensor("op_36364_cast_fp16")]; tensor var_36366_equation_0 = const()[name = tensor("op_36366_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36366_cast_fp16 = einsum(equation = var_36366_equation_0, values = (var_35882_cast_fp16, var_36283_cast_fp16))[name = tensor("op_36366_cast_fp16")]; tensor var_36368_equation_0 = const()[name = tensor("op_36368_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36368_cast_fp16 = einsum(equation = var_36368_equation_0, values = (var_35882_cast_fp16, var_36284_cast_fp16))[name = tensor("op_36368_cast_fp16")]; tensor var_36370_equation_0 = const()[name = tensor("op_36370_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36370_cast_fp16 = einsum(equation = var_36370_equation_0, values = (var_35886_cast_fp16, var_36285_cast_fp16))[name = tensor("op_36370_cast_fp16")]; tensor var_36372_equation_0 = const()[name = tensor("op_36372_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36372_cast_fp16 = einsum(equation = var_36372_equation_0, values = (var_35886_cast_fp16, var_36286_cast_fp16))[name = tensor("op_36372_cast_fp16")]; tensor var_36374_equation_0 = const()[name = tensor("op_36374_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36374_cast_fp16 = einsum(equation = var_36374_equation_0, values = (var_35886_cast_fp16, var_36287_cast_fp16))[name = tensor("op_36374_cast_fp16")]; tensor var_36376_equation_0 = const()[name = tensor("op_36376_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36376_cast_fp16 = einsum(equation = var_36376_equation_0, values = (var_35886_cast_fp16, var_36288_cast_fp16))[name = tensor("op_36376_cast_fp16")]; tensor var_36378_equation_0 = const()[name = tensor("op_36378_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36378_cast_fp16 = einsum(equation = var_36378_equation_0, values = (var_35890_cast_fp16, var_36289_cast_fp16))[name = tensor("op_36378_cast_fp16")]; tensor var_36380_equation_0 = const()[name = tensor("op_36380_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36380_cast_fp16 = einsum(equation = var_36380_equation_0, values = (var_35890_cast_fp16, var_36290_cast_fp16))[name = tensor("op_36380_cast_fp16")]; tensor var_36382_equation_0 = const()[name = tensor("op_36382_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36382_cast_fp16 = einsum(equation = var_36382_equation_0, values = (var_35890_cast_fp16, var_36291_cast_fp16))[name = tensor("op_36382_cast_fp16")]; tensor var_36384_equation_0 = const()[name = tensor("op_36384_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36384_cast_fp16 = einsum(equation = var_36384_equation_0, values = (var_35890_cast_fp16, var_36292_cast_fp16))[name = tensor("op_36384_cast_fp16")]; tensor var_36386_equation_0 = const()[name = tensor("op_36386_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36386_cast_fp16 = einsum(equation = var_36386_equation_0, values = (var_35894_cast_fp16, var_36293_cast_fp16))[name = tensor("op_36386_cast_fp16")]; tensor var_36388_equation_0 = const()[name = tensor("op_36388_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36388_cast_fp16 = einsum(equation = var_36388_equation_0, values = (var_35894_cast_fp16, var_36294_cast_fp16))[name = tensor("op_36388_cast_fp16")]; tensor var_36390_equation_0 = const()[name = tensor("op_36390_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36390_cast_fp16 = einsum(equation = var_36390_equation_0, values = (var_35894_cast_fp16, var_36295_cast_fp16))[name = tensor("op_36390_cast_fp16")]; tensor var_36392_equation_0 = const()[name = tensor("op_36392_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36392_cast_fp16 = einsum(equation = var_36392_equation_0, values = (var_35894_cast_fp16, var_36296_cast_fp16))[name = tensor("op_36392_cast_fp16")]; tensor var_36394_equation_0 = const()[name = tensor("op_36394_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36394_cast_fp16 = einsum(equation = var_36394_equation_0, values = (var_35898_cast_fp16, var_36297_cast_fp16))[name = tensor("op_36394_cast_fp16")]; tensor var_36396_equation_0 = const()[name = tensor("op_36396_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36396_cast_fp16 = einsum(equation = var_36396_equation_0, values = (var_35898_cast_fp16, var_36298_cast_fp16))[name = tensor("op_36396_cast_fp16")]; tensor var_36398_equation_0 = const()[name = tensor("op_36398_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36398_cast_fp16 = einsum(equation = var_36398_equation_0, values = (var_35898_cast_fp16, var_36299_cast_fp16))[name = tensor("op_36398_cast_fp16")]; tensor var_36400_equation_0 = const()[name = tensor("op_36400_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36400_cast_fp16 = einsum(equation = var_36400_equation_0, values = (var_35898_cast_fp16, var_36300_cast_fp16))[name = tensor("op_36400_cast_fp16")]; tensor var_36402_equation_0 = const()[name = tensor("op_36402_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36402_cast_fp16 = einsum(equation = var_36402_equation_0, values = (var_35902_cast_fp16, var_36301_cast_fp16))[name = tensor("op_36402_cast_fp16")]; tensor var_36404_equation_0 = const()[name = tensor("op_36404_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36404_cast_fp16 = einsum(equation = var_36404_equation_0, values = (var_35902_cast_fp16, var_36302_cast_fp16))[name = tensor("op_36404_cast_fp16")]; tensor var_36406_equation_0 = const()[name = tensor("op_36406_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36406_cast_fp16 = einsum(equation = var_36406_equation_0, values = (var_35902_cast_fp16, var_36303_cast_fp16))[name = tensor("op_36406_cast_fp16")]; tensor var_36408_equation_0 = const()[name = tensor("op_36408_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36408_cast_fp16 = einsum(equation = var_36408_equation_0, values = (var_35902_cast_fp16, var_36304_cast_fp16))[name = tensor("op_36408_cast_fp16")]; tensor var_36410_equation_0 = const()[name = tensor("op_36410_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36410_cast_fp16 = einsum(equation = var_36410_equation_0, values = (var_35906_cast_fp16, var_36305_cast_fp16))[name = tensor("op_36410_cast_fp16")]; tensor var_36412_equation_0 = const()[name = tensor("op_36412_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36412_cast_fp16 = einsum(equation = var_36412_equation_0, values = (var_35906_cast_fp16, var_36306_cast_fp16))[name = tensor("op_36412_cast_fp16")]; tensor var_36414_equation_0 = const()[name = tensor("op_36414_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36414_cast_fp16 = einsum(equation = var_36414_equation_0, values = (var_35906_cast_fp16, var_36307_cast_fp16))[name = tensor("op_36414_cast_fp16")]; tensor var_36416_equation_0 = const()[name = tensor("op_36416_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36416_cast_fp16 = einsum(equation = var_36416_equation_0, values = (var_35906_cast_fp16, var_36308_cast_fp16))[name = tensor("op_36416_cast_fp16")]; tensor var_36418_equation_0 = const()[name = tensor("op_36418_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36418_cast_fp16 = einsum(equation = var_36418_equation_0, values = (var_35910_cast_fp16, var_36309_cast_fp16))[name = tensor("op_36418_cast_fp16")]; tensor var_36420_equation_0 = const()[name = tensor("op_36420_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36420_cast_fp16 = einsum(equation = var_36420_equation_0, values = (var_35910_cast_fp16, var_36310_cast_fp16))[name = tensor("op_36420_cast_fp16")]; tensor var_36422_equation_0 = const()[name = tensor("op_36422_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36422_cast_fp16 = einsum(equation = var_36422_equation_0, values = (var_35910_cast_fp16, var_36311_cast_fp16))[name = tensor("op_36422_cast_fp16")]; tensor var_36424_equation_0 = const()[name = tensor("op_36424_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36424_cast_fp16 = einsum(equation = var_36424_equation_0, values = (var_35910_cast_fp16, var_36312_cast_fp16))[name = tensor("op_36424_cast_fp16")]; tensor var_36426_equation_0 = const()[name = tensor("op_36426_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36426_cast_fp16 = einsum(equation = var_36426_equation_0, values = (var_35914_cast_fp16, var_36313_cast_fp16))[name = tensor("op_36426_cast_fp16")]; tensor var_36428_equation_0 = const()[name = tensor("op_36428_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36428_cast_fp16 = einsum(equation = var_36428_equation_0, values = (var_35914_cast_fp16, var_36314_cast_fp16))[name = tensor("op_36428_cast_fp16")]; tensor var_36430_equation_0 = const()[name = tensor("op_36430_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36430_cast_fp16 = einsum(equation = var_36430_equation_0, values = (var_35914_cast_fp16, var_36315_cast_fp16))[name = tensor("op_36430_cast_fp16")]; tensor var_36432_equation_0 = const()[name = tensor("op_36432_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36432_cast_fp16 = einsum(equation = var_36432_equation_0, values = (var_35914_cast_fp16, var_36316_cast_fp16))[name = tensor("op_36432_cast_fp16")]; tensor var_36434_equation_0 = const()[name = tensor("op_36434_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36434_cast_fp16 = einsum(equation = var_36434_equation_0, values = (var_35918_cast_fp16, var_36317_cast_fp16))[name = tensor("op_36434_cast_fp16")]; tensor var_36436_equation_0 = const()[name = tensor("op_36436_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36436_cast_fp16 = einsum(equation = var_36436_equation_0, values = (var_35918_cast_fp16, var_36318_cast_fp16))[name = tensor("op_36436_cast_fp16")]; tensor var_36438_equation_0 = const()[name = tensor("op_36438_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36438_cast_fp16 = einsum(equation = var_36438_equation_0, values = (var_35918_cast_fp16, var_36319_cast_fp16))[name = tensor("op_36438_cast_fp16")]; tensor var_36440_equation_0 = const()[name = tensor("op_36440_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36440_cast_fp16 = einsum(equation = var_36440_equation_0, values = (var_35918_cast_fp16, var_36320_cast_fp16))[name = tensor("op_36440_cast_fp16")]; tensor var_36442_equation_0 = const()[name = tensor("op_36442_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36442_cast_fp16 = einsum(equation = var_36442_equation_0, values = (var_35922_cast_fp16, var_36321_cast_fp16))[name = tensor("op_36442_cast_fp16")]; tensor var_36444_equation_0 = const()[name = tensor("op_36444_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36444_cast_fp16 = einsum(equation = var_36444_equation_0, values = (var_35922_cast_fp16, var_36322_cast_fp16))[name = tensor("op_36444_cast_fp16")]; tensor var_36446_equation_0 = const()[name = tensor("op_36446_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36446_cast_fp16 = einsum(equation = var_36446_equation_0, values = (var_35922_cast_fp16, var_36323_cast_fp16))[name = tensor("op_36446_cast_fp16")]; tensor var_36448_equation_0 = const()[name = tensor("op_36448_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36448_cast_fp16 = einsum(equation = var_36448_equation_0, values = (var_35922_cast_fp16, var_36324_cast_fp16))[name = tensor("op_36448_cast_fp16")]; tensor var_36450_equation_0 = const()[name = tensor("op_36450_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36450_cast_fp16 = einsum(equation = var_36450_equation_0, values = (var_35926_cast_fp16, var_36325_cast_fp16))[name = tensor("op_36450_cast_fp16")]; tensor var_36452_equation_0 = const()[name = tensor("op_36452_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36452_cast_fp16 = einsum(equation = var_36452_equation_0, values = (var_35926_cast_fp16, var_36326_cast_fp16))[name = tensor("op_36452_cast_fp16")]; tensor var_36454_equation_0 = const()[name = tensor("op_36454_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36454_cast_fp16 = einsum(equation = var_36454_equation_0, values = (var_35926_cast_fp16, var_36327_cast_fp16))[name = tensor("op_36454_cast_fp16")]; tensor var_36456_equation_0 = const()[name = tensor("op_36456_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36456_cast_fp16 = einsum(equation = var_36456_equation_0, values = (var_35926_cast_fp16, var_36328_cast_fp16))[name = tensor("op_36456_cast_fp16")]; tensor var_36458_equation_0 = const()[name = tensor("op_36458_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36458_cast_fp16 = einsum(equation = var_36458_equation_0, values = (var_35930_cast_fp16, var_36329_cast_fp16))[name = tensor("op_36458_cast_fp16")]; tensor var_36460_equation_0 = const()[name = tensor("op_36460_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36460_cast_fp16 = einsum(equation = var_36460_equation_0, values = (var_35930_cast_fp16, var_36330_cast_fp16))[name = tensor("op_36460_cast_fp16")]; tensor var_36462_equation_0 = const()[name = tensor("op_36462_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36462_cast_fp16 = einsum(equation = var_36462_equation_0, values = (var_35930_cast_fp16, var_36331_cast_fp16))[name = tensor("op_36462_cast_fp16")]; tensor var_36464_equation_0 = const()[name = tensor("op_36464_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36464_cast_fp16 = einsum(equation = var_36464_equation_0, values = (var_35930_cast_fp16, var_36332_cast_fp16))[name = tensor("op_36464_cast_fp16")]; tensor var_36466_equation_0 = const()[name = tensor("op_36466_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36466_cast_fp16 = einsum(equation = var_36466_equation_0, values = (var_35934_cast_fp16, var_36333_cast_fp16))[name = tensor("op_36466_cast_fp16")]; tensor var_36468_equation_0 = const()[name = tensor("op_36468_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36468_cast_fp16 = einsum(equation = var_36468_equation_0, values = (var_35934_cast_fp16, var_36334_cast_fp16))[name = tensor("op_36468_cast_fp16")]; tensor var_36470_equation_0 = const()[name = tensor("op_36470_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36470_cast_fp16 = einsum(equation = var_36470_equation_0, values = (var_35934_cast_fp16, var_36335_cast_fp16))[name = tensor("op_36470_cast_fp16")]; tensor var_36472_equation_0 = const()[name = tensor("op_36472_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36472_cast_fp16 = einsum(equation = var_36472_equation_0, values = (var_35934_cast_fp16, var_36336_cast_fp16))[name = tensor("op_36472_cast_fp16")]; tensor var_36474_equation_0 = const()[name = tensor("op_36474_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36474_cast_fp16 = einsum(equation = var_36474_equation_0, values = (var_35938_cast_fp16, var_36337_cast_fp16))[name = tensor("op_36474_cast_fp16")]; tensor var_36476_equation_0 = const()[name = tensor("op_36476_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36476_cast_fp16 = einsum(equation = var_36476_equation_0, values = (var_35938_cast_fp16, var_36338_cast_fp16))[name = tensor("op_36476_cast_fp16")]; tensor var_36478_equation_0 = const()[name = tensor("op_36478_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36478_cast_fp16 = einsum(equation = var_36478_equation_0, values = (var_35938_cast_fp16, var_36339_cast_fp16))[name = tensor("op_36478_cast_fp16")]; tensor var_36480_equation_0 = const()[name = tensor("op_36480_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36480_cast_fp16 = einsum(equation = var_36480_equation_0, values = (var_35938_cast_fp16, var_36340_cast_fp16))[name = tensor("op_36480_cast_fp16")]; tensor var_36482_equation_0 = const()[name = tensor("op_36482_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36482_cast_fp16 = einsum(equation = var_36482_equation_0, values = (var_35942_cast_fp16, var_36341_cast_fp16))[name = tensor("op_36482_cast_fp16")]; tensor var_36484_equation_0 = const()[name = tensor("op_36484_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36484_cast_fp16 = einsum(equation = var_36484_equation_0, values = (var_35942_cast_fp16, var_36342_cast_fp16))[name = tensor("op_36484_cast_fp16")]; tensor var_36486_equation_0 = const()[name = tensor("op_36486_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36486_cast_fp16 = einsum(equation = var_36486_equation_0, values = (var_35942_cast_fp16, var_36343_cast_fp16))[name = tensor("op_36486_cast_fp16")]; tensor var_36488_equation_0 = const()[name = tensor("op_36488_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36488_cast_fp16 = einsum(equation = var_36488_equation_0, values = (var_35942_cast_fp16, var_36344_cast_fp16))[name = tensor("op_36488_cast_fp16")]; tensor var_36490_equation_0 = const()[name = tensor("op_36490_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36490_cast_fp16 = einsum(equation = var_36490_equation_0, values = (var_35946_cast_fp16, var_36345_cast_fp16))[name = tensor("op_36490_cast_fp16")]; tensor var_36492_equation_0 = const()[name = tensor("op_36492_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36492_cast_fp16 = einsum(equation = var_36492_equation_0, values = (var_35946_cast_fp16, var_36346_cast_fp16))[name = tensor("op_36492_cast_fp16")]; tensor var_36494_equation_0 = const()[name = tensor("op_36494_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36494_cast_fp16 = einsum(equation = var_36494_equation_0, values = (var_35946_cast_fp16, var_36347_cast_fp16))[name = tensor("op_36494_cast_fp16")]; tensor var_36496_equation_0 = const()[name = tensor("op_36496_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36496_cast_fp16 = einsum(equation = var_36496_equation_0, values = (var_35946_cast_fp16, var_36348_cast_fp16))[name = tensor("op_36496_cast_fp16")]; tensor var_36498_equation_0 = const()[name = tensor("op_36498_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36498_cast_fp16 = einsum(equation = var_36498_equation_0, values = (var_35950_cast_fp16, var_36349_cast_fp16))[name = tensor("op_36498_cast_fp16")]; tensor var_36500_equation_0 = const()[name = tensor("op_36500_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36500_cast_fp16 = einsum(equation = var_36500_equation_0, values = (var_35950_cast_fp16, var_36350_cast_fp16))[name = tensor("op_36500_cast_fp16")]; tensor var_36502_equation_0 = const()[name = tensor("op_36502_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36502_cast_fp16 = einsum(equation = var_36502_equation_0, values = (var_35950_cast_fp16, var_36351_cast_fp16))[name = tensor("op_36502_cast_fp16")]; tensor var_36504_equation_0 = const()[name = tensor("op_36504_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36504_cast_fp16 = einsum(equation = var_36504_equation_0, values = (var_35950_cast_fp16, var_36352_cast_fp16))[name = tensor("op_36504_cast_fp16")]; tensor var_36506_equation_0 = const()[name = tensor("op_36506_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36506_cast_fp16 = einsum(equation = var_36506_equation_0, values = (var_35954_cast_fp16, var_36353_cast_fp16))[name = tensor("op_36506_cast_fp16")]; tensor var_36508_equation_0 = const()[name = tensor("op_36508_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36508_cast_fp16 = einsum(equation = var_36508_equation_0, values = (var_35954_cast_fp16, var_36354_cast_fp16))[name = tensor("op_36508_cast_fp16")]; tensor var_36510_equation_0 = const()[name = tensor("op_36510_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36510_cast_fp16 = einsum(equation = var_36510_equation_0, values = (var_35954_cast_fp16, var_36355_cast_fp16))[name = tensor("op_36510_cast_fp16")]; tensor var_36512_equation_0 = const()[name = tensor("op_36512_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36512_cast_fp16 = einsum(equation = var_36512_equation_0, values = (var_35954_cast_fp16, var_36356_cast_fp16))[name = tensor("op_36512_cast_fp16")]; tensor var_36514_equation_0 = const()[name = tensor("op_36514_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36514_cast_fp16 = einsum(equation = var_36514_equation_0, values = (var_35958_cast_fp16, var_36357_cast_fp16))[name = tensor("op_36514_cast_fp16")]; tensor var_36516_equation_0 = const()[name = tensor("op_36516_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36516_cast_fp16 = einsum(equation = var_36516_equation_0, values = (var_35958_cast_fp16, var_36358_cast_fp16))[name = tensor("op_36516_cast_fp16")]; tensor var_36518_equation_0 = const()[name = tensor("op_36518_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36518_cast_fp16 = einsum(equation = var_36518_equation_0, values = (var_35958_cast_fp16, var_36359_cast_fp16))[name = tensor("op_36518_cast_fp16")]; tensor var_36520_equation_0 = const()[name = tensor("op_36520_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_36520_cast_fp16 = einsum(equation = var_36520_equation_0, values = (var_35958_cast_fp16, var_36360_cast_fp16))[name = tensor("op_36520_cast_fp16")]; tensor var_36522_interleave_0 = const()[name = tensor("op_36522_interleave_0"), val = tensor(false)]; tensor var_36522_cast_fp16 = concat(axis = var_35054, interleave = var_36522_interleave_0, values = (var_36362_cast_fp16, var_36364_cast_fp16, var_36366_cast_fp16, var_36368_cast_fp16))[name = tensor("op_36522_cast_fp16")]; tensor var_36524_interleave_0 = const()[name = tensor("op_36524_interleave_0"), val = tensor(false)]; tensor var_36524_cast_fp16 = concat(axis = var_35054, interleave = var_36524_interleave_0, values = (var_36370_cast_fp16, var_36372_cast_fp16, var_36374_cast_fp16, var_36376_cast_fp16))[name = tensor("op_36524_cast_fp16")]; tensor var_36526_interleave_0 = const()[name = tensor("op_36526_interleave_0"), val = tensor(false)]; tensor var_36526_cast_fp16 = concat(axis = var_35054, interleave = var_36526_interleave_0, values = (var_36378_cast_fp16, var_36380_cast_fp16, var_36382_cast_fp16, var_36384_cast_fp16))[name = tensor("op_36526_cast_fp16")]; tensor var_36528_interleave_0 = const()[name = tensor("op_36528_interleave_0"), val = tensor(false)]; tensor var_36528_cast_fp16 = concat(axis = var_35054, interleave = var_36528_interleave_0, values = (var_36386_cast_fp16, var_36388_cast_fp16, var_36390_cast_fp16, var_36392_cast_fp16))[name = tensor("op_36528_cast_fp16")]; tensor var_36530_interleave_0 = const()[name = tensor("op_36530_interleave_0"), val = tensor(false)]; tensor var_36530_cast_fp16 = concat(axis = var_35054, interleave = var_36530_interleave_0, values = (var_36394_cast_fp16, var_36396_cast_fp16, var_36398_cast_fp16, var_36400_cast_fp16))[name = tensor("op_36530_cast_fp16")]; tensor var_36532_interleave_0 = const()[name = tensor("op_36532_interleave_0"), val = tensor(false)]; tensor var_36532_cast_fp16 = concat(axis = var_35054, interleave = var_36532_interleave_0, values = (var_36402_cast_fp16, var_36404_cast_fp16, var_36406_cast_fp16, var_36408_cast_fp16))[name = tensor("op_36532_cast_fp16")]; tensor var_36534_interleave_0 = const()[name = tensor("op_36534_interleave_0"), val = tensor(false)]; tensor var_36534_cast_fp16 = concat(axis = var_35054, interleave = var_36534_interleave_0, values = (var_36410_cast_fp16, var_36412_cast_fp16, var_36414_cast_fp16, var_36416_cast_fp16))[name = tensor("op_36534_cast_fp16")]; tensor var_36536_interleave_0 = const()[name = tensor("op_36536_interleave_0"), val = tensor(false)]; tensor var_36536_cast_fp16 = concat(axis = var_35054, interleave = var_36536_interleave_0, values = (var_36418_cast_fp16, var_36420_cast_fp16, var_36422_cast_fp16, var_36424_cast_fp16))[name = tensor("op_36536_cast_fp16")]; tensor var_36538_interleave_0 = const()[name = tensor("op_36538_interleave_0"), val = tensor(false)]; tensor var_36538_cast_fp16 = concat(axis = var_35054, interleave = var_36538_interleave_0, values = (var_36426_cast_fp16, var_36428_cast_fp16, var_36430_cast_fp16, var_36432_cast_fp16))[name = tensor("op_36538_cast_fp16")]; tensor var_36540_interleave_0 = const()[name = tensor("op_36540_interleave_0"), val = tensor(false)]; tensor var_36540_cast_fp16 = concat(axis = var_35054, interleave = var_36540_interleave_0, values = (var_36434_cast_fp16, var_36436_cast_fp16, var_36438_cast_fp16, var_36440_cast_fp16))[name = tensor("op_36540_cast_fp16")]; tensor var_36542_interleave_0 = const()[name = tensor("op_36542_interleave_0"), val = tensor(false)]; tensor var_36542_cast_fp16 = concat(axis = var_35054, interleave = var_36542_interleave_0, values = (var_36442_cast_fp16, var_36444_cast_fp16, var_36446_cast_fp16, var_36448_cast_fp16))[name = tensor("op_36542_cast_fp16")]; tensor var_36544_interleave_0 = const()[name = tensor("op_36544_interleave_0"), val = tensor(false)]; tensor var_36544_cast_fp16 = concat(axis = var_35054, interleave = var_36544_interleave_0, values = (var_36450_cast_fp16, var_36452_cast_fp16, var_36454_cast_fp16, var_36456_cast_fp16))[name = tensor("op_36544_cast_fp16")]; tensor var_36546_interleave_0 = const()[name = tensor("op_36546_interleave_0"), val = tensor(false)]; tensor var_36546_cast_fp16 = concat(axis = var_35054, interleave = var_36546_interleave_0, values = (var_36458_cast_fp16, var_36460_cast_fp16, var_36462_cast_fp16, var_36464_cast_fp16))[name = tensor("op_36546_cast_fp16")]; tensor var_36548_interleave_0 = const()[name = tensor("op_36548_interleave_0"), val = tensor(false)]; tensor var_36548_cast_fp16 = concat(axis = var_35054, interleave = var_36548_interleave_0, values = (var_36466_cast_fp16, var_36468_cast_fp16, var_36470_cast_fp16, var_36472_cast_fp16))[name = tensor("op_36548_cast_fp16")]; tensor var_36550_interleave_0 = const()[name = tensor("op_36550_interleave_0"), val = tensor(false)]; tensor var_36550_cast_fp16 = concat(axis = var_35054, interleave = var_36550_interleave_0, values = (var_36474_cast_fp16, var_36476_cast_fp16, var_36478_cast_fp16, var_36480_cast_fp16))[name = tensor("op_36550_cast_fp16")]; tensor var_36552_interleave_0 = const()[name = tensor("op_36552_interleave_0"), val = tensor(false)]; tensor var_36552_cast_fp16 = concat(axis = var_35054, interleave = var_36552_interleave_0, values = (var_36482_cast_fp16, var_36484_cast_fp16, var_36486_cast_fp16, var_36488_cast_fp16))[name = tensor("op_36552_cast_fp16")]; tensor var_36554_interleave_0 = const()[name = tensor("op_36554_interleave_0"), val = tensor(false)]; tensor var_36554_cast_fp16 = concat(axis = var_35054, interleave = var_36554_interleave_0, values = (var_36490_cast_fp16, var_36492_cast_fp16, var_36494_cast_fp16, var_36496_cast_fp16))[name = tensor("op_36554_cast_fp16")]; tensor var_36556_interleave_0 = const()[name = tensor("op_36556_interleave_0"), val = tensor(false)]; tensor var_36556_cast_fp16 = concat(axis = var_35054, interleave = var_36556_interleave_0, values = (var_36498_cast_fp16, var_36500_cast_fp16, var_36502_cast_fp16, var_36504_cast_fp16))[name = tensor("op_36556_cast_fp16")]; tensor var_36558_interleave_0 = const()[name = tensor("op_36558_interleave_0"), val = tensor(false)]; tensor var_36558_cast_fp16 = concat(axis = var_35054, interleave = var_36558_interleave_0, values = (var_36506_cast_fp16, var_36508_cast_fp16, var_36510_cast_fp16, var_36512_cast_fp16))[name = tensor("op_36558_cast_fp16")]; tensor var_36560_interleave_0 = const()[name = tensor("op_36560_interleave_0"), val = tensor(false)]; tensor var_36560_cast_fp16 = concat(axis = var_35054, interleave = var_36560_interleave_0, values = (var_36514_cast_fp16, var_36516_cast_fp16, var_36518_cast_fp16, var_36520_cast_fp16))[name = tensor("op_36560_cast_fp16")]; tensor input_177_interleave_0 = const()[name = tensor("input_177_interleave_0"), val = tensor(false)]; tensor input_177_cast_fp16 = concat(axis = var_35079, interleave = input_177_interleave_0, values = (var_36522_cast_fp16, var_36524_cast_fp16, var_36526_cast_fp16, var_36528_cast_fp16, var_36530_cast_fp16, var_36532_cast_fp16, var_36534_cast_fp16, var_36536_cast_fp16, var_36538_cast_fp16, var_36540_cast_fp16, var_36542_cast_fp16, var_36544_cast_fp16, var_36546_cast_fp16, var_36548_cast_fp16, var_36550_cast_fp16, var_36552_cast_fp16, var_36554_cast_fp16, var_36556_cast_fp16, var_36558_cast_fp16, var_36560_cast_fp16))[name = tensor("input_177_cast_fp16")]; tensor var_36571_pad_type_0 = const()[name = tensor("op_36571_pad_type_0"), val = tensor("valid")]; tensor var_36571_strides_0 = const()[name = tensor("op_36571_strides_0"), val = tensor([1, 1])]; tensor var_36571_pad_0 = const()[name = tensor("op_36571_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_36571_dilations_0 = const()[name = tensor("op_36571_dilations_0"), val = tensor([1, 1])]; tensor var_36571_groups_0 = const()[name = tensor("op_36571_groups_0"), val = tensor(1)]; tensor layers_22_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299972928))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(300792192))), name = tensor("layers_22_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_22_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_22_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(300792320)))]; tensor var_36571_cast_fp16 = conv(bias = layers_22_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_36571_dilations_0, groups = var_36571_groups_0, pad = var_36571_pad_0, pad_type = var_36571_pad_type_0, strides = var_36571_strides_0, weight = layers_22_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_177_cast_fp16)[name = tensor("op_36571_cast_fp16")]; tensor var_36577_pad_type_0 = const()[name = tensor("op_36577_pad_type_0"), val = tensor("valid")]; tensor var_36577_strides_0 = const()[name = tensor("op_36577_strides_0"), val = tensor([1, 1])]; tensor var_36577_pad_0 = const()[name = tensor("op_36577_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_36577_dilations_0 = const()[name = tensor("op_36577_dilations_0"), val = tensor([1, 1])]; tensor var_36577_groups_0 = const()[name = tensor("op_36577_groups_0"), val = tensor(1)]; tensor layers_22_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(300807232))), name = tensor("layers_22_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(300794944))), shape = tensor([1280, 1280, 1, 1])]; tensor var_36577_cast_fp16 = conv(dilations = var_36577_dilations_0, groups = var_36577_groups_0, pad = var_36577_pad_0, pad_type = var_36577_pad_type_0, strides = var_36577_strides_0, weight = layers_22_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_177_cast_fp16)[name = tensor("op_36577_cast_fp16")]; tensor obj_91_cast_fp16 = add(x = var_36571_cast_fp16, y = var_36577_cast_fp16)[name = tensor("obj_91_cast_fp16")]; tensor inputs_91_cast_fp16 = add(x = inputs_89_cast_fp16, y = obj_91_cast_fp16)[name = tensor("inputs_91_cast_fp16")]; tensor out_91_axes_0 = const()[name = tensor("out_91_axes_0"), val = tensor([1])]; tensor var_36588_to_fp16 = const()[name = tensor("op_36588_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_91_cast_fp16 = layer_norm(axes = out_91_axes_0, epsilon = var_36588_to_fp16, x = inputs_91_cast_fp16)[name = tensor("out_91_cast_fp16")]; tensor input_179_gamma_0_to_fp16 = const()[name = tensor("input_179_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(301012096)))]; tensor input_179_beta_0_to_fp16 = const()[name = tensor("input_179_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(301014720)))]; tensor input_179_epsilon_0_to_fp16 = const()[name = tensor("input_179_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_179_cast_fp16 = batch_norm(beta = input_179_beta_0_to_fp16, epsilon = input_179_epsilon_0_to_fp16, gamma = input_179_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_91_cast_fp16)[name = tensor("input_179_cast_fp16")]; tensor var_36606_pad_type_0 = const()[name = tensor("op_36606_pad_type_0"), val = tensor("valid")]; tensor var_36606_strides_0 = const()[name = tensor("op_36606_strides_0"), val = tensor([1, 1])]; tensor var_36606_pad_0 = const()[name = tensor("op_36606_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_36606_dilations_0 = const()[name = tensor("op_36606_dilations_0"), val = tensor([1, 1])]; tensor var_36606_groups_0 = const()[name = tensor("op_36606_groups_0"), val = tensor(1)]; tensor layers_22_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(301017344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(304294208))), name = tensor("layers_22_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_22_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_22_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(304294336)))]; tensor var_36606_cast_fp16 = conv(bias = layers_22_fc1_inlier_module_bias_to_fp16, dilations = var_36606_dilations_0, groups = var_36606_groups_0, pad = var_36606_pad_0, pad_type = var_36606_pad_type_0, strides = var_36606_strides_0, weight = layers_22_fc1_inlier_module_weight_to_fp16_palettized, x = input_179_cast_fp16)[name = tensor("op_36606_cast_fp16")]; tensor var_36612_pad_type_0 = const()[name = tensor("op_36612_pad_type_0"), val = tensor("valid")]; tensor var_36612_strides_0 = const()[name = tensor("op_36612_strides_0"), val = tensor([1, 1])]; tensor var_36612_pad_0 = const()[name = tensor("op_36612_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_36612_dilations_0 = const()[name = tensor("op_36612_dilations_0"), val = tensor([1, 1])]; tensor var_36612_groups_0 = const()[name = tensor("op_36612_groups_0"), val = tensor(1)]; tensor layers_22_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(304372736))), name = tensor("layers_22_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(304304640))), shape = tensor([5120, 1280, 1, 1])]; tensor var_36612_cast_fp16 = conv(dilations = var_36612_dilations_0, groups = var_36612_groups_0, pad = var_36612_pad_0, pad_type = var_36612_pad_type_0, strides = var_36612_strides_0, weight = layers_22_fc1_outlier_module_weight_to_fp16_sparsified, x = input_179_cast_fp16)[name = tensor("op_36612_cast_fp16")]; tensor input_181_cast_fp16 = add(x = var_36606_cast_fp16, y = var_36612_cast_fp16)[name = tensor("input_181_cast_fp16")]; tensor input_183_mode_0 = const()[name = tensor("input_183_mode_0"), val = tensor("EXACT")]; tensor input_183_cast_fp16 = gelu(mode = input_183_mode_0, x = input_181_cast_fp16)[name = tensor("input_183_cast_fp16")]; tensor var_36623_pad_type_0 = const()[name = tensor("op_36623_pad_type_0"), val = tensor("valid")]; tensor var_36623_strides_0 = const()[name = tensor("op_36623_strides_0"), val = tensor([1, 1])]; tensor var_36623_pad_0 = const()[name = tensor("op_36623_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_36623_dilations_0 = const()[name = tensor("op_36623_dilations_0"), val = tensor([1, 1])]; tensor var_36623_groups_0 = const()[name = tensor("op_36623_groups_0"), val = tensor(1)]; tensor layers_22_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(305192000))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308468864))), name = tensor("layers_22_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_22_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_22_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308468992)))]; tensor var_36623_cast_fp16 = conv(bias = layers_22_fc2_inlier_module_bias_to_fp16, dilations = var_36623_dilations_0, groups = var_36623_groups_0, pad = var_36623_pad_0, pad_type = var_36623_pad_type_0, strides = var_36623_strides_0, weight = layers_22_fc2_inlier_module_weight_to_fp16_palettized, x = input_183_cast_fp16)[name = tensor("op_36623_cast_fp16")]; tensor var_36629_pad_type_0 = const()[name = tensor("op_36629_pad_type_0"), val = tensor("valid")]; tensor var_36629_strides_0 = const()[name = tensor("op_36629_strides_0"), val = tensor([1, 1])]; tensor var_36629_pad_0 = const()[name = tensor("op_36629_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_36629_dilations_0 = const()[name = tensor("op_36629_dilations_0"), val = tensor([1, 1])]; tensor var_36629_groups_0 = const()[name = tensor("op_36629_groups_0"), val = tensor(1)]; tensor layers_22_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308533376))), name = tensor("layers_22_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(308471616))), shape = tensor([1280, 5120, 1, 1])]; tensor var_36629_cast_fp16 = conv(dilations = var_36629_dilations_0, groups = var_36629_groups_0, pad = var_36629_pad_0, pad_type = var_36629_pad_type_0, strides = var_36629_strides_0, weight = layers_22_fc2_outlier_module_weight_to_fp16_sparsified, x = input_183_cast_fp16)[name = tensor("op_36629_cast_fp16")]; tensor hidden_states_49_cast_fp16 = add(x = var_36623_cast_fp16, y = var_36629_cast_fp16)[name = tensor("hidden_states_49_cast_fp16")]; tensor inputs_93_cast_fp16 = add(x = inputs_91_cast_fp16, y = hidden_states_49_cast_fp16)[name = tensor("inputs_93_cast_fp16")]; tensor var_36635 = const()[name = tensor("op_36635"), val = tensor(3)]; tensor var_36660 = const()[name = tensor("op_36660"), val = tensor(1)]; tensor out_93_axes_0 = const()[name = tensor("out_93_axes_0"), val = tensor([1])]; tensor var_36677_to_fp16 = const()[name = tensor("op_36677_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_93_cast_fp16 = layer_norm(axes = out_93_axes_0, epsilon = var_36677_to_fp16, x = inputs_93_cast_fp16)[name = tensor("out_93_cast_fp16")]; tensor obj_93_gamma_0_to_fp16 = const()[name = tensor("obj_93_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309352640)))]; tensor obj_93_beta_0_to_fp16 = const()[name = tensor("obj_93_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309355264)))]; tensor obj_93_epsilon_0_to_fp16 = const()[name = tensor("obj_93_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_93_cast_fp16 = batch_norm(beta = obj_93_beta_0_to_fp16, epsilon = obj_93_epsilon_0_to_fp16, gamma = obj_93_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_93_cast_fp16)[name = tensor("obj_93_cast_fp16")]; tensor var_36699_pad_type_0 = const()[name = tensor("op_36699_pad_type_0"), val = tensor("valid")]; tensor var_36699_strides_0 = const()[name = tensor("op_36699_strides_0"), val = tensor([1, 1])]; tensor var_36699_pad_0 = const()[name = tensor("op_36699_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_36699_dilations_0 = const()[name = tensor("op_36699_dilations_0"), val = tensor([1, 1])]; tensor var_36699_groups_0 = const()[name = tensor("op_36699_groups_0"), val = tensor(1)]; tensor layers_23_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(309357888))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310177152))), name = tensor("layers_23_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_23_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_23_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310177280)))]; tensor var_36699_cast_fp16 = conv(bias = layers_23_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_36699_dilations_0, groups = var_36699_groups_0, pad = var_36699_pad_0, pad_type = var_36699_pad_type_0, strides = var_36699_strides_0, weight = layers_23_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_93_cast_fp16)[name = tensor("op_36699_cast_fp16")]; tensor var_36705_pad_type_0 = const()[name = tensor("op_36705_pad_type_0"), val = tensor("valid")]; tensor var_36705_strides_0 = const()[name = tensor("op_36705_strides_0"), val = tensor([1, 1])]; tensor var_36705_pad_0 = const()[name = tensor("op_36705_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_36705_dilations_0 = const()[name = tensor("op_36705_dilations_0"), val = tensor([1, 1])]; tensor var_36705_groups_0 = const()[name = tensor("op_36705_groups_0"), val = tensor(1)]; tensor layers_23_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310208448))), name = tensor("layers_23_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310179904))), shape = tensor([1280, 1280, 1, 1])]; tensor var_36705_cast_fp16 = conv(dilations = var_36705_dilations_0, groups = var_36705_groups_0, pad = var_36705_pad_0, pad_type = var_36705_pad_type_0, strides = var_36705_strides_0, weight = layers_23_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_93_cast_fp16)[name = tensor("op_36705_cast_fp16")]; tensor query_47_cast_fp16 = add(x = var_36699_cast_fp16, y = var_36705_cast_fp16)[name = tensor("query_47_cast_fp16")]; tensor var_36714_pad_type_0 = const()[name = tensor("op_36714_pad_type_0"), val = tensor("valid")]; tensor var_36714_strides_0 = const()[name = tensor("op_36714_strides_0"), val = tensor([1, 1])]; tensor var_36714_pad_0 = const()[name = tensor("op_36714_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_36714_dilations_0 = const()[name = tensor("op_36714_dilations_0"), val = tensor([1, 1])]; tensor var_36714_groups_0 = const()[name = tensor("op_36714_groups_0"), val = tensor(1)]; tensor layers_23_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(310413312))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(311232576))), name = tensor("layers_23_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_36714_cast_fp16 = conv(dilations = var_36714_dilations_0, groups = var_36714_groups_0, pad = var_36714_pad_0, pad_type = var_36714_pad_type_0, strides = var_36714_strides_0, weight = layers_23_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_93_cast_fp16)[name = tensor("op_36714_cast_fp16")]; tensor var_36720_pad_type_0 = const()[name = tensor("op_36720_pad_type_0"), val = tensor("valid")]; tensor var_36720_strides_0 = const()[name = tensor("op_36720_strides_0"), val = tensor([1, 1])]; tensor var_36720_pad_0 = const()[name = tensor("op_36720_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_36720_dilations_0 = const()[name = tensor("op_36720_dilations_0"), val = tensor([1, 1])]; tensor var_36720_groups_0 = const()[name = tensor("op_36720_groups_0"), val = tensor(1)]; tensor layers_23_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(311257088))), name = tensor("layers_23_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(311232704))), shape = tensor([1280, 1280, 1, 1])]; tensor var_36720_cast_fp16 = conv(dilations = var_36720_dilations_0, groups = var_36720_groups_0, pad = var_36720_pad_0, pad_type = var_36720_pad_type_0, strides = var_36720_strides_0, weight = layers_23_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_93_cast_fp16)[name = tensor("op_36720_cast_fp16")]; tensor key_47_cast_fp16 = add(x = var_36714_cast_fp16, y = var_36720_cast_fp16)[name = tensor("key_47_cast_fp16")]; tensor var_36730_pad_type_0 = const()[name = tensor("op_36730_pad_type_0"), val = tensor("valid")]; tensor var_36730_strides_0 = const()[name = tensor("op_36730_strides_0"), val = tensor([1, 1])]; tensor var_36730_pad_0 = const()[name = tensor("op_36730_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_36730_dilations_0 = const()[name = tensor("op_36730_dilations_0"), val = tensor([1, 1])]; tensor var_36730_groups_0 = const()[name = tensor("op_36730_groups_0"), val = tensor(1)]; tensor layers_23_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(311461952))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312281216))), name = tensor("layers_23_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_23_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_23_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312281344)))]; tensor var_36730_cast_fp16 = conv(bias = layers_23_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_36730_dilations_0, groups = var_36730_groups_0, pad = var_36730_pad_0, pad_type = var_36730_pad_type_0, strides = var_36730_strides_0, weight = layers_23_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_93_cast_fp16)[name = tensor("op_36730_cast_fp16")]; tensor var_36736_pad_type_0 = const()[name = tensor("op_36736_pad_type_0"), val = tensor("valid")]; tensor var_36736_strides_0 = const()[name = tensor("op_36736_strides_0"), val = tensor([1, 1])]; tensor var_36736_pad_0 = const()[name = tensor("op_36736_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_36736_dilations_0 = const()[name = tensor("op_36736_dilations_0"), val = tensor([1, 1])]; tensor var_36736_groups_0 = const()[name = tensor("op_36736_groups_0"), val = tensor(1)]; tensor layers_23_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312297152))), name = tensor("layers_23_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312283968))), shape = tensor([1280, 1280, 1, 1])]; tensor var_36736_cast_fp16 = conv(dilations = var_36736_dilations_0, groups = var_36736_groups_0, pad = var_36736_pad_0, pad_type = var_36736_pad_type_0, strides = var_36736_strides_0, weight = layers_23_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_93_cast_fp16)[name = tensor("op_36736_cast_fp16")]; tensor value_47_cast_fp16 = add(x = var_36730_cast_fp16, y = var_36736_cast_fp16)[name = tensor("value_47_cast_fp16")]; tensor var_36742_begin_0 = const()[name = tensor("op_36742_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_36742_end_0 = const()[name = tensor("op_36742_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_36742_end_mask_0 = const()[name = tensor("op_36742_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36742_cast_fp16 = slice_by_index(begin = var_36742_begin_0, end = var_36742_end_0, end_mask = var_36742_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36742_cast_fp16")]; tensor var_36746_begin_0 = const()[name = tensor("op_36746_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_36746_end_0 = const()[name = tensor("op_36746_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_36746_end_mask_0 = const()[name = tensor("op_36746_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36746_cast_fp16 = slice_by_index(begin = var_36746_begin_0, end = var_36746_end_0, end_mask = var_36746_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36746_cast_fp16")]; tensor var_36750_begin_0 = const()[name = tensor("op_36750_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_36750_end_0 = const()[name = tensor("op_36750_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_36750_end_mask_0 = const()[name = tensor("op_36750_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36750_cast_fp16 = slice_by_index(begin = var_36750_begin_0, end = var_36750_end_0, end_mask = var_36750_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36750_cast_fp16")]; tensor var_36754_begin_0 = const()[name = tensor("op_36754_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_36754_end_0 = const()[name = tensor("op_36754_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_36754_end_mask_0 = const()[name = tensor("op_36754_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36754_cast_fp16 = slice_by_index(begin = var_36754_begin_0, end = var_36754_end_0, end_mask = var_36754_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36754_cast_fp16")]; tensor var_36758_begin_0 = const()[name = tensor("op_36758_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_36758_end_0 = const()[name = tensor("op_36758_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_36758_end_mask_0 = const()[name = tensor("op_36758_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36758_cast_fp16 = slice_by_index(begin = var_36758_begin_0, end = var_36758_end_0, end_mask = var_36758_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36758_cast_fp16")]; tensor var_36762_begin_0 = const()[name = tensor("op_36762_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_36762_end_0 = const()[name = tensor("op_36762_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_36762_end_mask_0 = const()[name = tensor("op_36762_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36762_cast_fp16 = slice_by_index(begin = var_36762_begin_0, end = var_36762_end_0, end_mask = var_36762_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36762_cast_fp16")]; tensor var_36766_begin_0 = const()[name = tensor("op_36766_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_36766_end_0 = const()[name = tensor("op_36766_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_36766_end_mask_0 = const()[name = tensor("op_36766_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36766_cast_fp16 = slice_by_index(begin = var_36766_begin_0, end = var_36766_end_0, end_mask = var_36766_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36766_cast_fp16")]; tensor var_36770_begin_0 = const()[name = tensor("op_36770_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_36770_end_0 = const()[name = tensor("op_36770_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_36770_end_mask_0 = const()[name = tensor("op_36770_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36770_cast_fp16 = slice_by_index(begin = var_36770_begin_0, end = var_36770_end_0, end_mask = var_36770_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36770_cast_fp16")]; tensor var_36774_begin_0 = const()[name = tensor("op_36774_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_36774_end_0 = const()[name = tensor("op_36774_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_36774_end_mask_0 = const()[name = tensor("op_36774_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36774_cast_fp16 = slice_by_index(begin = var_36774_begin_0, end = var_36774_end_0, end_mask = var_36774_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36774_cast_fp16")]; tensor var_36778_begin_0 = const()[name = tensor("op_36778_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_36778_end_0 = const()[name = tensor("op_36778_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_36778_end_mask_0 = const()[name = tensor("op_36778_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36778_cast_fp16 = slice_by_index(begin = var_36778_begin_0, end = var_36778_end_0, end_mask = var_36778_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36778_cast_fp16")]; tensor var_36782_begin_0 = const()[name = tensor("op_36782_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_36782_end_0 = const()[name = tensor("op_36782_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_36782_end_mask_0 = const()[name = tensor("op_36782_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36782_cast_fp16 = slice_by_index(begin = var_36782_begin_0, end = var_36782_end_0, end_mask = var_36782_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36782_cast_fp16")]; tensor var_36786_begin_0 = const()[name = tensor("op_36786_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_36786_end_0 = const()[name = tensor("op_36786_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_36786_end_mask_0 = const()[name = tensor("op_36786_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36786_cast_fp16 = slice_by_index(begin = var_36786_begin_0, end = var_36786_end_0, end_mask = var_36786_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36786_cast_fp16")]; tensor var_36790_begin_0 = const()[name = tensor("op_36790_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_36790_end_0 = const()[name = tensor("op_36790_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_36790_end_mask_0 = const()[name = tensor("op_36790_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36790_cast_fp16 = slice_by_index(begin = var_36790_begin_0, end = var_36790_end_0, end_mask = var_36790_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36790_cast_fp16")]; tensor var_36794_begin_0 = const()[name = tensor("op_36794_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_36794_end_0 = const()[name = tensor("op_36794_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_36794_end_mask_0 = const()[name = tensor("op_36794_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36794_cast_fp16 = slice_by_index(begin = var_36794_begin_0, end = var_36794_end_0, end_mask = var_36794_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36794_cast_fp16")]; tensor var_36798_begin_0 = const()[name = tensor("op_36798_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_36798_end_0 = const()[name = tensor("op_36798_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_36798_end_mask_0 = const()[name = tensor("op_36798_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36798_cast_fp16 = slice_by_index(begin = var_36798_begin_0, end = var_36798_end_0, end_mask = var_36798_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36798_cast_fp16")]; tensor var_36802_begin_0 = const()[name = tensor("op_36802_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_36802_end_0 = const()[name = tensor("op_36802_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_36802_end_mask_0 = const()[name = tensor("op_36802_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36802_cast_fp16 = slice_by_index(begin = var_36802_begin_0, end = var_36802_end_0, end_mask = var_36802_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36802_cast_fp16")]; tensor var_36806_begin_0 = const()[name = tensor("op_36806_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_36806_end_0 = const()[name = tensor("op_36806_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_36806_end_mask_0 = const()[name = tensor("op_36806_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36806_cast_fp16 = slice_by_index(begin = var_36806_begin_0, end = var_36806_end_0, end_mask = var_36806_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36806_cast_fp16")]; tensor var_36810_begin_0 = const()[name = tensor("op_36810_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_36810_end_0 = const()[name = tensor("op_36810_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_36810_end_mask_0 = const()[name = tensor("op_36810_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36810_cast_fp16 = slice_by_index(begin = var_36810_begin_0, end = var_36810_end_0, end_mask = var_36810_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36810_cast_fp16")]; tensor var_36814_begin_0 = const()[name = tensor("op_36814_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_36814_end_0 = const()[name = tensor("op_36814_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_36814_end_mask_0 = const()[name = tensor("op_36814_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36814_cast_fp16 = slice_by_index(begin = var_36814_begin_0, end = var_36814_end_0, end_mask = var_36814_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36814_cast_fp16")]; tensor var_36818_begin_0 = const()[name = tensor("op_36818_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_36818_end_0 = const()[name = tensor("op_36818_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_36818_end_mask_0 = const()[name = tensor("op_36818_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_36818_cast_fp16 = slice_by_index(begin = var_36818_begin_0, end = var_36818_end_0, end_mask = var_36818_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_36818_cast_fp16")]; tensor var_36827_begin_0 = const()[name = tensor("op_36827_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_36827_end_0 = const()[name = tensor("op_36827_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_36827_end_mask_0 = const()[name = tensor("op_36827_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36827_cast_fp16 = slice_by_index(begin = var_36827_begin_0, end = var_36827_end_0, end_mask = var_36827_end_mask_0, x = var_36742_cast_fp16)[name = tensor("op_36827_cast_fp16")]; tensor var_36834_begin_0 = const()[name = tensor("op_36834_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_36834_end_0 = const()[name = tensor("op_36834_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_36834_end_mask_0 = const()[name = tensor("op_36834_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36834_cast_fp16 = slice_by_index(begin = var_36834_begin_0, end = var_36834_end_0, end_mask = var_36834_end_mask_0, x = var_36742_cast_fp16)[name = tensor("op_36834_cast_fp16")]; tensor var_36841_begin_0 = const()[name = tensor("op_36841_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_36841_end_0 = const()[name = tensor("op_36841_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_36841_end_mask_0 = const()[name = tensor("op_36841_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36841_cast_fp16 = slice_by_index(begin = var_36841_begin_0, end = var_36841_end_0, end_mask = var_36841_end_mask_0, x = var_36742_cast_fp16)[name = tensor("op_36841_cast_fp16")]; tensor var_36848_begin_0 = const()[name = tensor("op_36848_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_36848_end_0 = const()[name = tensor("op_36848_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_36848_end_mask_0 = const()[name = tensor("op_36848_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36848_cast_fp16 = slice_by_index(begin = var_36848_begin_0, end = var_36848_end_0, end_mask = var_36848_end_mask_0, x = var_36742_cast_fp16)[name = tensor("op_36848_cast_fp16")]; tensor var_36855_begin_0 = const()[name = tensor("op_36855_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_36855_end_0 = const()[name = tensor("op_36855_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_36855_end_mask_0 = const()[name = tensor("op_36855_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36855_cast_fp16 = slice_by_index(begin = var_36855_begin_0, end = var_36855_end_0, end_mask = var_36855_end_mask_0, x = var_36746_cast_fp16)[name = tensor("op_36855_cast_fp16")]; tensor var_36862_begin_0 = const()[name = tensor("op_36862_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_36862_end_0 = const()[name = tensor("op_36862_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_36862_end_mask_0 = const()[name = tensor("op_36862_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36862_cast_fp16 = slice_by_index(begin = var_36862_begin_0, end = var_36862_end_0, end_mask = var_36862_end_mask_0, x = var_36746_cast_fp16)[name = tensor("op_36862_cast_fp16")]; tensor var_36869_begin_0 = const()[name = tensor("op_36869_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_36869_end_0 = const()[name = tensor("op_36869_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_36869_end_mask_0 = const()[name = tensor("op_36869_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36869_cast_fp16 = slice_by_index(begin = var_36869_begin_0, end = var_36869_end_0, end_mask = var_36869_end_mask_0, x = var_36746_cast_fp16)[name = tensor("op_36869_cast_fp16")]; tensor var_36876_begin_0 = const()[name = tensor("op_36876_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_36876_end_0 = const()[name = tensor("op_36876_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_36876_end_mask_0 = const()[name = tensor("op_36876_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36876_cast_fp16 = slice_by_index(begin = var_36876_begin_0, end = var_36876_end_0, end_mask = var_36876_end_mask_0, x = var_36746_cast_fp16)[name = tensor("op_36876_cast_fp16")]; tensor var_36883_begin_0 = const()[name = tensor("op_36883_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_36883_end_0 = const()[name = tensor("op_36883_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_36883_end_mask_0 = const()[name = tensor("op_36883_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36883_cast_fp16 = slice_by_index(begin = var_36883_begin_0, end = var_36883_end_0, end_mask = var_36883_end_mask_0, x = var_36750_cast_fp16)[name = tensor("op_36883_cast_fp16")]; tensor var_36890_begin_0 = const()[name = tensor("op_36890_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_36890_end_0 = const()[name = tensor("op_36890_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_36890_end_mask_0 = const()[name = tensor("op_36890_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36890_cast_fp16 = slice_by_index(begin = var_36890_begin_0, end = var_36890_end_0, end_mask = var_36890_end_mask_0, x = var_36750_cast_fp16)[name = tensor("op_36890_cast_fp16")]; tensor var_36897_begin_0 = const()[name = tensor("op_36897_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_36897_end_0 = const()[name = tensor("op_36897_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_36897_end_mask_0 = const()[name = tensor("op_36897_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36897_cast_fp16 = slice_by_index(begin = var_36897_begin_0, end = var_36897_end_0, end_mask = var_36897_end_mask_0, x = var_36750_cast_fp16)[name = tensor("op_36897_cast_fp16")]; tensor var_36904_begin_0 = const()[name = tensor("op_36904_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_36904_end_0 = const()[name = tensor("op_36904_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_36904_end_mask_0 = const()[name = tensor("op_36904_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36904_cast_fp16 = slice_by_index(begin = var_36904_begin_0, end = var_36904_end_0, end_mask = var_36904_end_mask_0, x = var_36750_cast_fp16)[name = tensor("op_36904_cast_fp16")]; tensor var_36911_begin_0 = const()[name = tensor("op_36911_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_36911_end_0 = const()[name = tensor("op_36911_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_36911_end_mask_0 = const()[name = tensor("op_36911_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36911_cast_fp16 = slice_by_index(begin = var_36911_begin_0, end = var_36911_end_0, end_mask = var_36911_end_mask_0, x = var_36754_cast_fp16)[name = tensor("op_36911_cast_fp16")]; tensor var_36918_begin_0 = const()[name = tensor("op_36918_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_36918_end_0 = const()[name = tensor("op_36918_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_36918_end_mask_0 = const()[name = tensor("op_36918_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36918_cast_fp16 = slice_by_index(begin = var_36918_begin_0, end = var_36918_end_0, end_mask = var_36918_end_mask_0, x = var_36754_cast_fp16)[name = tensor("op_36918_cast_fp16")]; tensor var_36925_begin_0 = const()[name = tensor("op_36925_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_36925_end_0 = const()[name = tensor("op_36925_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_36925_end_mask_0 = const()[name = tensor("op_36925_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36925_cast_fp16 = slice_by_index(begin = var_36925_begin_0, end = var_36925_end_0, end_mask = var_36925_end_mask_0, x = var_36754_cast_fp16)[name = tensor("op_36925_cast_fp16")]; tensor var_36932_begin_0 = const()[name = tensor("op_36932_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_36932_end_0 = const()[name = tensor("op_36932_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_36932_end_mask_0 = const()[name = tensor("op_36932_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36932_cast_fp16 = slice_by_index(begin = var_36932_begin_0, end = var_36932_end_0, end_mask = var_36932_end_mask_0, x = var_36754_cast_fp16)[name = tensor("op_36932_cast_fp16")]; tensor var_36939_begin_0 = const()[name = tensor("op_36939_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_36939_end_0 = const()[name = tensor("op_36939_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_36939_end_mask_0 = const()[name = tensor("op_36939_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36939_cast_fp16 = slice_by_index(begin = var_36939_begin_0, end = var_36939_end_0, end_mask = var_36939_end_mask_0, x = var_36758_cast_fp16)[name = tensor("op_36939_cast_fp16")]; tensor var_36946_begin_0 = const()[name = tensor("op_36946_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_36946_end_0 = const()[name = tensor("op_36946_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_36946_end_mask_0 = const()[name = tensor("op_36946_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36946_cast_fp16 = slice_by_index(begin = var_36946_begin_0, end = var_36946_end_0, end_mask = var_36946_end_mask_0, x = var_36758_cast_fp16)[name = tensor("op_36946_cast_fp16")]; tensor var_36953_begin_0 = const()[name = tensor("op_36953_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_36953_end_0 = const()[name = tensor("op_36953_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_36953_end_mask_0 = const()[name = tensor("op_36953_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36953_cast_fp16 = slice_by_index(begin = var_36953_begin_0, end = var_36953_end_0, end_mask = var_36953_end_mask_0, x = var_36758_cast_fp16)[name = tensor("op_36953_cast_fp16")]; tensor var_36960_begin_0 = const()[name = tensor("op_36960_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_36960_end_0 = const()[name = tensor("op_36960_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_36960_end_mask_0 = const()[name = tensor("op_36960_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36960_cast_fp16 = slice_by_index(begin = var_36960_begin_0, end = var_36960_end_0, end_mask = var_36960_end_mask_0, x = var_36758_cast_fp16)[name = tensor("op_36960_cast_fp16")]; tensor var_36967_begin_0 = const()[name = tensor("op_36967_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_36967_end_0 = const()[name = tensor("op_36967_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_36967_end_mask_0 = const()[name = tensor("op_36967_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36967_cast_fp16 = slice_by_index(begin = var_36967_begin_0, end = var_36967_end_0, end_mask = var_36967_end_mask_0, x = var_36762_cast_fp16)[name = tensor("op_36967_cast_fp16")]; tensor var_36974_begin_0 = const()[name = tensor("op_36974_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_36974_end_0 = const()[name = tensor("op_36974_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_36974_end_mask_0 = const()[name = tensor("op_36974_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36974_cast_fp16 = slice_by_index(begin = var_36974_begin_0, end = var_36974_end_0, end_mask = var_36974_end_mask_0, x = var_36762_cast_fp16)[name = tensor("op_36974_cast_fp16")]; tensor var_36981_begin_0 = const()[name = tensor("op_36981_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_36981_end_0 = const()[name = tensor("op_36981_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_36981_end_mask_0 = const()[name = tensor("op_36981_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36981_cast_fp16 = slice_by_index(begin = var_36981_begin_0, end = var_36981_end_0, end_mask = var_36981_end_mask_0, x = var_36762_cast_fp16)[name = tensor("op_36981_cast_fp16")]; tensor var_36988_begin_0 = const()[name = tensor("op_36988_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_36988_end_0 = const()[name = tensor("op_36988_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_36988_end_mask_0 = const()[name = tensor("op_36988_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36988_cast_fp16 = slice_by_index(begin = var_36988_begin_0, end = var_36988_end_0, end_mask = var_36988_end_mask_0, x = var_36762_cast_fp16)[name = tensor("op_36988_cast_fp16")]; tensor var_36995_begin_0 = const()[name = tensor("op_36995_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_36995_end_0 = const()[name = tensor("op_36995_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_36995_end_mask_0 = const()[name = tensor("op_36995_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_36995_cast_fp16 = slice_by_index(begin = var_36995_begin_0, end = var_36995_end_0, end_mask = var_36995_end_mask_0, x = var_36766_cast_fp16)[name = tensor("op_36995_cast_fp16")]; tensor var_37002_begin_0 = const()[name = tensor("op_37002_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_37002_end_0 = const()[name = tensor("op_37002_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_37002_end_mask_0 = const()[name = tensor("op_37002_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37002_cast_fp16 = slice_by_index(begin = var_37002_begin_0, end = var_37002_end_0, end_mask = var_37002_end_mask_0, x = var_36766_cast_fp16)[name = tensor("op_37002_cast_fp16")]; tensor var_37009_begin_0 = const()[name = tensor("op_37009_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_37009_end_0 = const()[name = tensor("op_37009_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_37009_end_mask_0 = const()[name = tensor("op_37009_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37009_cast_fp16 = slice_by_index(begin = var_37009_begin_0, end = var_37009_end_0, end_mask = var_37009_end_mask_0, x = var_36766_cast_fp16)[name = tensor("op_37009_cast_fp16")]; tensor var_37016_begin_0 = const()[name = tensor("op_37016_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_37016_end_0 = const()[name = tensor("op_37016_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_37016_end_mask_0 = const()[name = tensor("op_37016_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37016_cast_fp16 = slice_by_index(begin = var_37016_begin_0, end = var_37016_end_0, end_mask = var_37016_end_mask_0, x = var_36766_cast_fp16)[name = tensor("op_37016_cast_fp16")]; tensor var_37023_begin_0 = const()[name = tensor("op_37023_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_37023_end_0 = const()[name = tensor("op_37023_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_37023_end_mask_0 = const()[name = tensor("op_37023_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37023_cast_fp16 = slice_by_index(begin = var_37023_begin_0, end = var_37023_end_0, end_mask = var_37023_end_mask_0, x = var_36770_cast_fp16)[name = tensor("op_37023_cast_fp16")]; tensor var_37030_begin_0 = const()[name = tensor("op_37030_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_37030_end_0 = const()[name = tensor("op_37030_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_37030_end_mask_0 = const()[name = tensor("op_37030_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37030_cast_fp16 = slice_by_index(begin = var_37030_begin_0, end = var_37030_end_0, end_mask = var_37030_end_mask_0, x = var_36770_cast_fp16)[name = tensor("op_37030_cast_fp16")]; tensor var_37037_begin_0 = const()[name = tensor("op_37037_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_37037_end_0 = const()[name = tensor("op_37037_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_37037_end_mask_0 = const()[name = tensor("op_37037_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37037_cast_fp16 = slice_by_index(begin = var_37037_begin_0, end = var_37037_end_0, end_mask = var_37037_end_mask_0, x = var_36770_cast_fp16)[name = tensor("op_37037_cast_fp16")]; tensor var_37044_begin_0 = const()[name = tensor("op_37044_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_37044_end_0 = const()[name = tensor("op_37044_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_37044_end_mask_0 = const()[name = tensor("op_37044_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37044_cast_fp16 = slice_by_index(begin = var_37044_begin_0, end = var_37044_end_0, end_mask = var_37044_end_mask_0, x = var_36770_cast_fp16)[name = tensor("op_37044_cast_fp16")]; tensor var_37051_begin_0 = const()[name = tensor("op_37051_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_37051_end_0 = const()[name = tensor("op_37051_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_37051_end_mask_0 = const()[name = tensor("op_37051_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37051_cast_fp16 = slice_by_index(begin = var_37051_begin_0, end = var_37051_end_0, end_mask = var_37051_end_mask_0, x = var_36774_cast_fp16)[name = tensor("op_37051_cast_fp16")]; tensor var_37058_begin_0 = const()[name = tensor("op_37058_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_37058_end_0 = const()[name = tensor("op_37058_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_37058_end_mask_0 = const()[name = tensor("op_37058_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37058_cast_fp16 = slice_by_index(begin = var_37058_begin_0, end = var_37058_end_0, end_mask = var_37058_end_mask_0, x = var_36774_cast_fp16)[name = tensor("op_37058_cast_fp16")]; tensor var_37065_begin_0 = const()[name = tensor("op_37065_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_37065_end_0 = const()[name = tensor("op_37065_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_37065_end_mask_0 = const()[name = tensor("op_37065_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37065_cast_fp16 = slice_by_index(begin = var_37065_begin_0, end = var_37065_end_0, end_mask = var_37065_end_mask_0, x = var_36774_cast_fp16)[name = tensor("op_37065_cast_fp16")]; tensor var_37072_begin_0 = const()[name = tensor("op_37072_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_37072_end_0 = const()[name = tensor("op_37072_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_37072_end_mask_0 = const()[name = tensor("op_37072_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37072_cast_fp16 = slice_by_index(begin = var_37072_begin_0, end = var_37072_end_0, end_mask = var_37072_end_mask_0, x = var_36774_cast_fp16)[name = tensor("op_37072_cast_fp16")]; tensor var_37079_begin_0 = const()[name = tensor("op_37079_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_37079_end_0 = const()[name = tensor("op_37079_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_37079_end_mask_0 = const()[name = tensor("op_37079_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37079_cast_fp16 = slice_by_index(begin = var_37079_begin_0, end = var_37079_end_0, end_mask = var_37079_end_mask_0, x = var_36778_cast_fp16)[name = tensor("op_37079_cast_fp16")]; tensor var_37086_begin_0 = const()[name = tensor("op_37086_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_37086_end_0 = const()[name = tensor("op_37086_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_37086_end_mask_0 = const()[name = tensor("op_37086_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37086_cast_fp16 = slice_by_index(begin = var_37086_begin_0, end = var_37086_end_0, end_mask = var_37086_end_mask_0, x = var_36778_cast_fp16)[name = tensor("op_37086_cast_fp16")]; tensor var_37093_begin_0 = const()[name = tensor("op_37093_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_37093_end_0 = const()[name = tensor("op_37093_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_37093_end_mask_0 = const()[name = tensor("op_37093_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37093_cast_fp16 = slice_by_index(begin = var_37093_begin_0, end = var_37093_end_0, end_mask = var_37093_end_mask_0, x = var_36778_cast_fp16)[name = tensor("op_37093_cast_fp16")]; tensor var_37100_begin_0 = const()[name = tensor("op_37100_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_37100_end_0 = const()[name = tensor("op_37100_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_37100_end_mask_0 = const()[name = tensor("op_37100_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37100_cast_fp16 = slice_by_index(begin = var_37100_begin_0, end = var_37100_end_0, end_mask = var_37100_end_mask_0, x = var_36778_cast_fp16)[name = tensor("op_37100_cast_fp16")]; tensor var_37107_begin_0 = const()[name = tensor("op_37107_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_37107_end_0 = const()[name = tensor("op_37107_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_37107_end_mask_0 = const()[name = tensor("op_37107_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37107_cast_fp16 = slice_by_index(begin = var_37107_begin_0, end = var_37107_end_0, end_mask = var_37107_end_mask_0, x = var_36782_cast_fp16)[name = tensor("op_37107_cast_fp16")]; tensor var_37114_begin_0 = const()[name = tensor("op_37114_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_37114_end_0 = const()[name = tensor("op_37114_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_37114_end_mask_0 = const()[name = tensor("op_37114_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37114_cast_fp16 = slice_by_index(begin = var_37114_begin_0, end = var_37114_end_0, end_mask = var_37114_end_mask_0, x = var_36782_cast_fp16)[name = tensor("op_37114_cast_fp16")]; tensor var_37121_begin_0 = const()[name = tensor("op_37121_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_37121_end_0 = const()[name = tensor("op_37121_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_37121_end_mask_0 = const()[name = tensor("op_37121_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37121_cast_fp16 = slice_by_index(begin = var_37121_begin_0, end = var_37121_end_0, end_mask = var_37121_end_mask_0, x = var_36782_cast_fp16)[name = tensor("op_37121_cast_fp16")]; tensor var_37128_begin_0 = const()[name = tensor("op_37128_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_37128_end_0 = const()[name = tensor("op_37128_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_37128_end_mask_0 = const()[name = tensor("op_37128_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37128_cast_fp16 = slice_by_index(begin = var_37128_begin_0, end = var_37128_end_0, end_mask = var_37128_end_mask_0, x = var_36782_cast_fp16)[name = tensor("op_37128_cast_fp16")]; tensor var_37135_begin_0 = const()[name = tensor("op_37135_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_37135_end_0 = const()[name = tensor("op_37135_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_37135_end_mask_0 = const()[name = tensor("op_37135_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37135_cast_fp16 = slice_by_index(begin = var_37135_begin_0, end = var_37135_end_0, end_mask = var_37135_end_mask_0, x = var_36786_cast_fp16)[name = tensor("op_37135_cast_fp16")]; tensor var_37142_begin_0 = const()[name = tensor("op_37142_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_37142_end_0 = const()[name = tensor("op_37142_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_37142_end_mask_0 = const()[name = tensor("op_37142_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37142_cast_fp16 = slice_by_index(begin = var_37142_begin_0, end = var_37142_end_0, end_mask = var_37142_end_mask_0, x = var_36786_cast_fp16)[name = tensor("op_37142_cast_fp16")]; tensor var_37149_begin_0 = const()[name = tensor("op_37149_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_37149_end_0 = const()[name = tensor("op_37149_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_37149_end_mask_0 = const()[name = tensor("op_37149_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37149_cast_fp16 = slice_by_index(begin = var_37149_begin_0, end = var_37149_end_0, end_mask = var_37149_end_mask_0, x = var_36786_cast_fp16)[name = tensor("op_37149_cast_fp16")]; tensor var_37156_begin_0 = const()[name = tensor("op_37156_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_37156_end_0 = const()[name = tensor("op_37156_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_37156_end_mask_0 = const()[name = tensor("op_37156_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37156_cast_fp16 = slice_by_index(begin = var_37156_begin_0, end = var_37156_end_0, end_mask = var_37156_end_mask_0, x = var_36786_cast_fp16)[name = tensor("op_37156_cast_fp16")]; tensor var_37163_begin_0 = const()[name = tensor("op_37163_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_37163_end_0 = const()[name = tensor("op_37163_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_37163_end_mask_0 = const()[name = tensor("op_37163_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37163_cast_fp16 = slice_by_index(begin = var_37163_begin_0, end = var_37163_end_0, end_mask = var_37163_end_mask_0, x = var_36790_cast_fp16)[name = tensor("op_37163_cast_fp16")]; tensor var_37170_begin_0 = const()[name = tensor("op_37170_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_37170_end_0 = const()[name = tensor("op_37170_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_37170_end_mask_0 = const()[name = tensor("op_37170_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37170_cast_fp16 = slice_by_index(begin = var_37170_begin_0, end = var_37170_end_0, end_mask = var_37170_end_mask_0, x = var_36790_cast_fp16)[name = tensor("op_37170_cast_fp16")]; tensor var_37177_begin_0 = const()[name = tensor("op_37177_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_37177_end_0 = const()[name = tensor("op_37177_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_37177_end_mask_0 = const()[name = tensor("op_37177_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37177_cast_fp16 = slice_by_index(begin = var_37177_begin_0, end = var_37177_end_0, end_mask = var_37177_end_mask_0, x = var_36790_cast_fp16)[name = tensor("op_37177_cast_fp16")]; tensor var_37184_begin_0 = const()[name = tensor("op_37184_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_37184_end_0 = const()[name = tensor("op_37184_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_37184_end_mask_0 = const()[name = tensor("op_37184_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37184_cast_fp16 = slice_by_index(begin = var_37184_begin_0, end = var_37184_end_0, end_mask = var_37184_end_mask_0, x = var_36790_cast_fp16)[name = tensor("op_37184_cast_fp16")]; tensor var_37191_begin_0 = const()[name = tensor("op_37191_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_37191_end_0 = const()[name = tensor("op_37191_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_37191_end_mask_0 = const()[name = tensor("op_37191_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37191_cast_fp16 = slice_by_index(begin = var_37191_begin_0, end = var_37191_end_0, end_mask = var_37191_end_mask_0, x = var_36794_cast_fp16)[name = tensor("op_37191_cast_fp16")]; tensor var_37198_begin_0 = const()[name = tensor("op_37198_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_37198_end_0 = const()[name = tensor("op_37198_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_37198_end_mask_0 = const()[name = tensor("op_37198_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37198_cast_fp16 = slice_by_index(begin = var_37198_begin_0, end = var_37198_end_0, end_mask = var_37198_end_mask_0, x = var_36794_cast_fp16)[name = tensor("op_37198_cast_fp16")]; tensor var_37205_begin_0 = const()[name = tensor("op_37205_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_37205_end_0 = const()[name = tensor("op_37205_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_37205_end_mask_0 = const()[name = tensor("op_37205_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37205_cast_fp16 = slice_by_index(begin = var_37205_begin_0, end = var_37205_end_0, end_mask = var_37205_end_mask_0, x = var_36794_cast_fp16)[name = tensor("op_37205_cast_fp16")]; tensor var_37212_begin_0 = const()[name = tensor("op_37212_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_37212_end_0 = const()[name = tensor("op_37212_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_37212_end_mask_0 = const()[name = tensor("op_37212_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37212_cast_fp16 = slice_by_index(begin = var_37212_begin_0, end = var_37212_end_0, end_mask = var_37212_end_mask_0, x = var_36794_cast_fp16)[name = tensor("op_37212_cast_fp16")]; tensor var_37219_begin_0 = const()[name = tensor("op_37219_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_37219_end_0 = const()[name = tensor("op_37219_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_37219_end_mask_0 = const()[name = tensor("op_37219_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37219_cast_fp16 = slice_by_index(begin = var_37219_begin_0, end = var_37219_end_0, end_mask = var_37219_end_mask_0, x = var_36798_cast_fp16)[name = tensor("op_37219_cast_fp16")]; tensor var_37226_begin_0 = const()[name = tensor("op_37226_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_37226_end_0 = const()[name = tensor("op_37226_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_37226_end_mask_0 = const()[name = tensor("op_37226_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37226_cast_fp16 = slice_by_index(begin = var_37226_begin_0, end = var_37226_end_0, end_mask = var_37226_end_mask_0, x = var_36798_cast_fp16)[name = tensor("op_37226_cast_fp16")]; tensor var_37233_begin_0 = const()[name = tensor("op_37233_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_37233_end_0 = const()[name = tensor("op_37233_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_37233_end_mask_0 = const()[name = tensor("op_37233_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37233_cast_fp16 = slice_by_index(begin = var_37233_begin_0, end = var_37233_end_0, end_mask = var_37233_end_mask_0, x = var_36798_cast_fp16)[name = tensor("op_37233_cast_fp16")]; tensor var_37240_begin_0 = const()[name = tensor("op_37240_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_37240_end_0 = const()[name = tensor("op_37240_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_37240_end_mask_0 = const()[name = tensor("op_37240_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37240_cast_fp16 = slice_by_index(begin = var_37240_begin_0, end = var_37240_end_0, end_mask = var_37240_end_mask_0, x = var_36798_cast_fp16)[name = tensor("op_37240_cast_fp16")]; tensor var_37247_begin_0 = const()[name = tensor("op_37247_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_37247_end_0 = const()[name = tensor("op_37247_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_37247_end_mask_0 = const()[name = tensor("op_37247_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37247_cast_fp16 = slice_by_index(begin = var_37247_begin_0, end = var_37247_end_0, end_mask = var_37247_end_mask_0, x = var_36802_cast_fp16)[name = tensor("op_37247_cast_fp16")]; tensor var_37254_begin_0 = const()[name = tensor("op_37254_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_37254_end_0 = const()[name = tensor("op_37254_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_37254_end_mask_0 = const()[name = tensor("op_37254_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37254_cast_fp16 = slice_by_index(begin = var_37254_begin_0, end = var_37254_end_0, end_mask = var_37254_end_mask_0, x = var_36802_cast_fp16)[name = tensor("op_37254_cast_fp16")]; tensor var_37261_begin_0 = const()[name = tensor("op_37261_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_37261_end_0 = const()[name = tensor("op_37261_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_37261_end_mask_0 = const()[name = tensor("op_37261_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37261_cast_fp16 = slice_by_index(begin = var_37261_begin_0, end = var_37261_end_0, end_mask = var_37261_end_mask_0, x = var_36802_cast_fp16)[name = tensor("op_37261_cast_fp16")]; tensor var_37268_begin_0 = const()[name = tensor("op_37268_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_37268_end_0 = const()[name = tensor("op_37268_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_37268_end_mask_0 = const()[name = tensor("op_37268_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37268_cast_fp16 = slice_by_index(begin = var_37268_begin_0, end = var_37268_end_0, end_mask = var_37268_end_mask_0, x = var_36802_cast_fp16)[name = tensor("op_37268_cast_fp16")]; tensor var_37275_begin_0 = const()[name = tensor("op_37275_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_37275_end_0 = const()[name = tensor("op_37275_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_37275_end_mask_0 = const()[name = tensor("op_37275_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37275_cast_fp16 = slice_by_index(begin = var_37275_begin_0, end = var_37275_end_0, end_mask = var_37275_end_mask_0, x = var_36806_cast_fp16)[name = tensor("op_37275_cast_fp16")]; tensor var_37282_begin_0 = const()[name = tensor("op_37282_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_37282_end_0 = const()[name = tensor("op_37282_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_37282_end_mask_0 = const()[name = tensor("op_37282_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37282_cast_fp16 = slice_by_index(begin = var_37282_begin_0, end = var_37282_end_0, end_mask = var_37282_end_mask_0, x = var_36806_cast_fp16)[name = tensor("op_37282_cast_fp16")]; tensor var_37289_begin_0 = const()[name = tensor("op_37289_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_37289_end_0 = const()[name = tensor("op_37289_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_37289_end_mask_0 = const()[name = tensor("op_37289_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37289_cast_fp16 = slice_by_index(begin = var_37289_begin_0, end = var_37289_end_0, end_mask = var_37289_end_mask_0, x = var_36806_cast_fp16)[name = tensor("op_37289_cast_fp16")]; tensor var_37296_begin_0 = const()[name = tensor("op_37296_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_37296_end_0 = const()[name = tensor("op_37296_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_37296_end_mask_0 = const()[name = tensor("op_37296_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37296_cast_fp16 = slice_by_index(begin = var_37296_begin_0, end = var_37296_end_0, end_mask = var_37296_end_mask_0, x = var_36806_cast_fp16)[name = tensor("op_37296_cast_fp16")]; tensor var_37303_begin_0 = const()[name = tensor("op_37303_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_37303_end_0 = const()[name = tensor("op_37303_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_37303_end_mask_0 = const()[name = tensor("op_37303_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37303_cast_fp16 = slice_by_index(begin = var_37303_begin_0, end = var_37303_end_0, end_mask = var_37303_end_mask_0, x = var_36810_cast_fp16)[name = tensor("op_37303_cast_fp16")]; tensor var_37310_begin_0 = const()[name = tensor("op_37310_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_37310_end_0 = const()[name = tensor("op_37310_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_37310_end_mask_0 = const()[name = tensor("op_37310_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37310_cast_fp16 = slice_by_index(begin = var_37310_begin_0, end = var_37310_end_0, end_mask = var_37310_end_mask_0, x = var_36810_cast_fp16)[name = tensor("op_37310_cast_fp16")]; tensor var_37317_begin_0 = const()[name = tensor("op_37317_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_37317_end_0 = const()[name = tensor("op_37317_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_37317_end_mask_0 = const()[name = tensor("op_37317_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37317_cast_fp16 = slice_by_index(begin = var_37317_begin_0, end = var_37317_end_0, end_mask = var_37317_end_mask_0, x = var_36810_cast_fp16)[name = tensor("op_37317_cast_fp16")]; tensor var_37324_begin_0 = const()[name = tensor("op_37324_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_37324_end_0 = const()[name = tensor("op_37324_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_37324_end_mask_0 = const()[name = tensor("op_37324_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37324_cast_fp16 = slice_by_index(begin = var_37324_begin_0, end = var_37324_end_0, end_mask = var_37324_end_mask_0, x = var_36810_cast_fp16)[name = tensor("op_37324_cast_fp16")]; tensor var_37331_begin_0 = const()[name = tensor("op_37331_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_37331_end_0 = const()[name = tensor("op_37331_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_37331_end_mask_0 = const()[name = tensor("op_37331_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37331_cast_fp16 = slice_by_index(begin = var_37331_begin_0, end = var_37331_end_0, end_mask = var_37331_end_mask_0, x = var_36814_cast_fp16)[name = tensor("op_37331_cast_fp16")]; tensor var_37338_begin_0 = const()[name = tensor("op_37338_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_37338_end_0 = const()[name = tensor("op_37338_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_37338_end_mask_0 = const()[name = tensor("op_37338_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37338_cast_fp16 = slice_by_index(begin = var_37338_begin_0, end = var_37338_end_0, end_mask = var_37338_end_mask_0, x = var_36814_cast_fp16)[name = tensor("op_37338_cast_fp16")]; tensor var_37345_begin_0 = const()[name = tensor("op_37345_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_37345_end_0 = const()[name = tensor("op_37345_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_37345_end_mask_0 = const()[name = tensor("op_37345_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37345_cast_fp16 = slice_by_index(begin = var_37345_begin_0, end = var_37345_end_0, end_mask = var_37345_end_mask_0, x = var_36814_cast_fp16)[name = tensor("op_37345_cast_fp16")]; tensor var_37352_begin_0 = const()[name = tensor("op_37352_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_37352_end_0 = const()[name = tensor("op_37352_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_37352_end_mask_0 = const()[name = tensor("op_37352_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37352_cast_fp16 = slice_by_index(begin = var_37352_begin_0, end = var_37352_end_0, end_mask = var_37352_end_mask_0, x = var_36814_cast_fp16)[name = tensor("op_37352_cast_fp16")]; tensor var_37359_begin_0 = const()[name = tensor("op_37359_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_37359_end_0 = const()[name = tensor("op_37359_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_37359_end_mask_0 = const()[name = tensor("op_37359_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37359_cast_fp16 = slice_by_index(begin = var_37359_begin_0, end = var_37359_end_0, end_mask = var_37359_end_mask_0, x = var_36818_cast_fp16)[name = tensor("op_37359_cast_fp16")]; tensor var_37366_begin_0 = const()[name = tensor("op_37366_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_37366_end_0 = const()[name = tensor("op_37366_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_37366_end_mask_0 = const()[name = tensor("op_37366_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37366_cast_fp16 = slice_by_index(begin = var_37366_begin_0, end = var_37366_end_0, end_mask = var_37366_end_mask_0, x = var_36818_cast_fp16)[name = tensor("op_37366_cast_fp16")]; tensor var_37373_begin_0 = const()[name = tensor("op_37373_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_37373_end_0 = const()[name = tensor("op_37373_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_37373_end_mask_0 = const()[name = tensor("op_37373_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37373_cast_fp16 = slice_by_index(begin = var_37373_begin_0, end = var_37373_end_0, end_mask = var_37373_end_mask_0, x = var_36818_cast_fp16)[name = tensor("op_37373_cast_fp16")]; tensor var_37380_begin_0 = const()[name = tensor("op_37380_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_37380_end_0 = const()[name = tensor("op_37380_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_37380_end_mask_0 = const()[name = tensor("op_37380_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37380_cast_fp16 = slice_by_index(begin = var_37380_begin_0, end = var_37380_end_0, end_mask = var_37380_end_mask_0, x = var_36818_cast_fp16)[name = tensor("op_37380_cast_fp16")]; tensor k_47_perm_0 = const()[name = tensor("k_47_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_37385_begin_0 = const()[name = tensor("op_37385_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_37385_end_0 = const()[name = tensor("op_37385_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_37385_end_mask_0 = const()[name = tensor("op_37385_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_47_cast_fp16 = transpose(perm = k_47_perm_0, x = key_47_cast_fp16)[name = tensor("transpose_8")]; tensor var_37385_cast_fp16 = slice_by_index(begin = var_37385_begin_0, end = var_37385_end_0, end_mask = var_37385_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37385_cast_fp16")]; tensor var_37389_begin_0 = const()[name = tensor("op_37389_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_37389_end_0 = const()[name = tensor("op_37389_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_37389_end_mask_0 = const()[name = tensor("op_37389_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37389_cast_fp16 = slice_by_index(begin = var_37389_begin_0, end = var_37389_end_0, end_mask = var_37389_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37389_cast_fp16")]; tensor var_37393_begin_0 = const()[name = tensor("op_37393_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_37393_end_0 = const()[name = tensor("op_37393_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_37393_end_mask_0 = const()[name = tensor("op_37393_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37393_cast_fp16 = slice_by_index(begin = var_37393_begin_0, end = var_37393_end_0, end_mask = var_37393_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37393_cast_fp16")]; tensor var_37397_begin_0 = const()[name = tensor("op_37397_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_37397_end_0 = const()[name = tensor("op_37397_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_37397_end_mask_0 = const()[name = tensor("op_37397_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37397_cast_fp16 = slice_by_index(begin = var_37397_begin_0, end = var_37397_end_0, end_mask = var_37397_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37397_cast_fp16")]; tensor var_37401_begin_0 = const()[name = tensor("op_37401_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_37401_end_0 = const()[name = tensor("op_37401_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_37401_end_mask_0 = const()[name = tensor("op_37401_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37401_cast_fp16 = slice_by_index(begin = var_37401_begin_0, end = var_37401_end_0, end_mask = var_37401_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37401_cast_fp16")]; tensor var_37405_begin_0 = const()[name = tensor("op_37405_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_37405_end_0 = const()[name = tensor("op_37405_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_37405_end_mask_0 = const()[name = tensor("op_37405_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37405_cast_fp16 = slice_by_index(begin = var_37405_begin_0, end = var_37405_end_0, end_mask = var_37405_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37405_cast_fp16")]; tensor var_37409_begin_0 = const()[name = tensor("op_37409_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_37409_end_0 = const()[name = tensor("op_37409_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_37409_end_mask_0 = const()[name = tensor("op_37409_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37409_cast_fp16 = slice_by_index(begin = var_37409_begin_0, end = var_37409_end_0, end_mask = var_37409_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37409_cast_fp16")]; tensor var_37413_begin_0 = const()[name = tensor("op_37413_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_37413_end_0 = const()[name = tensor("op_37413_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_37413_end_mask_0 = const()[name = tensor("op_37413_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37413_cast_fp16 = slice_by_index(begin = var_37413_begin_0, end = var_37413_end_0, end_mask = var_37413_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37413_cast_fp16")]; tensor var_37417_begin_0 = const()[name = tensor("op_37417_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_37417_end_0 = const()[name = tensor("op_37417_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_37417_end_mask_0 = const()[name = tensor("op_37417_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37417_cast_fp16 = slice_by_index(begin = var_37417_begin_0, end = var_37417_end_0, end_mask = var_37417_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37417_cast_fp16")]; tensor var_37421_begin_0 = const()[name = tensor("op_37421_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_37421_end_0 = const()[name = tensor("op_37421_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_37421_end_mask_0 = const()[name = tensor("op_37421_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37421_cast_fp16 = slice_by_index(begin = var_37421_begin_0, end = var_37421_end_0, end_mask = var_37421_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37421_cast_fp16")]; tensor var_37425_begin_0 = const()[name = tensor("op_37425_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_37425_end_0 = const()[name = tensor("op_37425_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_37425_end_mask_0 = const()[name = tensor("op_37425_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37425_cast_fp16 = slice_by_index(begin = var_37425_begin_0, end = var_37425_end_0, end_mask = var_37425_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37425_cast_fp16")]; tensor var_37429_begin_0 = const()[name = tensor("op_37429_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_37429_end_0 = const()[name = tensor("op_37429_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_37429_end_mask_0 = const()[name = tensor("op_37429_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37429_cast_fp16 = slice_by_index(begin = var_37429_begin_0, end = var_37429_end_0, end_mask = var_37429_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37429_cast_fp16")]; tensor var_37433_begin_0 = const()[name = tensor("op_37433_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_37433_end_0 = const()[name = tensor("op_37433_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_37433_end_mask_0 = const()[name = tensor("op_37433_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37433_cast_fp16 = slice_by_index(begin = var_37433_begin_0, end = var_37433_end_0, end_mask = var_37433_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37433_cast_fp16")]; tensor var_37437_begin_0 = const()[name = tensor("op_37437_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_37437_end_0 = const()[name = tensor("op_37437_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_37437_end_mask_0 = const()[name = tensor("op_37437_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37437_cast_fp16 = slice_by_index(begin = var_37437_begin_0, end = var_37437_end_0, end_mask = var_37437_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37437_cast_fp16")]; tensor var_37441_begin_0 = const()[name = tensor("op_37441_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_37441_end_0 = const()[name = tensor("op_37441_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_37441_end_mask_0 = const()[name = tensor("op_37441_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37441_cast_fp16 = slice_by_index(begin = var_37441_begin_0, end = var_37441_end_0, end_mask = var_37441_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37441_cast_fp16")]; tensor var_37445_begin_0 = const()[name = tensor("op_37445_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_37445_end_0 = const()[name = tensor("op_37445_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_37445_end_mask_0 = const()[name = tensor("op_37445_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37445_cast_fp16 = slice_by_index(begin = var_37445_begin_0, end = var_37445_end_0, end_mask = var_37445_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37445_cast_fp16")]; tensor var_37449_begin_0 = const()[name = tensor("op_37449_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_37449_end_0 = const()[name = tensor("op_37449_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_37449_end_mask_0 = const()[name = tensor("op_37449_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37449_cast_fp16 = slice_by_index(begin = var_37449_begin_0, end = var_37449_end_0, end_mask = var_37449_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37449_cast_fp16")]; tensor var_37453_begin_0 = const()[name = tensor("op_37453_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_37453_end_0 = const()[name = tensor("op_37453_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_37453_end_mask_0 = const()[name = tensor("op_37453_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37453_cast_fp16 = slice_by_index(begin = var_37453_begin_0, end = var_37453_end_0, end_mask = var_37453_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37453_cast_fp16")]; tensor var_37457_begin_0 = const()[name = tensor("op_37457_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_37457_end_0 = const()[name = tensor("op_37457_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_37457_end_mask_0 = const()[name = tensor("op_37457_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37457_cast_fp16 = slice_by_index(begin = var_37457_begin_0, end = var_37457_end_0, end_mask = var_37457_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37457_cast_fp16")]; tensor var_37461_begin_0 = const()[name = tensor("op_37461_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_37461_end_0 = const()[name = tensor("op_37461_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_37461_end_mask_0 = const()[name = tensor("op_37461_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_37461_cast_fp16 = slice_by_index(begin = var_37461_begin_0, end = var_37461_end_0, end_mask = var_37461_end_mask_0, x = k_47_cast_fp16)[name = tensor("op_37461_cast_fp16")]; tensor var_37463_begin_0 = const()[name = tensor("op_37463_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_37463_end_0 = const()[name = tensor("op_37463_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_37463_end_mask_0 = const()[name = tensor("op_37463_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37463_cast_fp16 = slice_by_index(begin = var_37463_begin_0, end = var_37463_end_0, end_mask = var_37463_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37463_cast_fp16")]; tensor var_37467_begin_0 = const()[name = tensor("op_37467_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_37467_end_0 = const()[name = tensor("op_37467_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_37467_end_mask_0 = const()[name = tensor("op_37467_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37467_cast_fp16 = slice_by_index(begin = var_37467_begin_0, end = var_37467_end_0, end_mask = var_37467_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37467_cast_fp16")]; tensor var_37471_begin_0 = const()[name = tensor("op_37471_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_37471_end_0 = const()[name = tensor("op_37471_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_37471_end_mask_0 = const()[name = tensor("op_37471_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37471_cast_fp16 = slice_by_index(begin = var_37471_begin_0, end = var_37471_end_0, end_mask = var_37471_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37471_cast_fp16")]; tensor var_37475_begin_0 = const()[name = tensor("op_37475_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_37475_end_0 = const()[name = tensor("op_37475_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_37475_end_mask_0 = const()[name = tensor("op_37475_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37475_cast_fp16 = slice_by_index(begin = var_37475_begin_0, end = var_37475_end_0, end_mask = var_37475_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37475_cast_fp16")]; tensor var_37479_begin_0 = const()[name = tensor("op_37479_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_37479_end_0 = const()[name = tensor("op_37479_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_37479_end_mask_0 = const()[name = tensor("op_37479_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37479_cast_fp16 = slice_by_index(begin = var_37479_begin_0, end = var_37479_end_0, end_mask = var_37479_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37479_cast_fp16")]; tensor var_37483_begin_0 = const()[name = tensor("op_37483_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_37483_end_0 = const()[name = tensor("op_37483_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_37483_end_mask_0 = const()[name = tensor("op_37483_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37483_cast_fp16 = slice_by_index(begin = var_37483_begin_0, end = var_37483_end_0, end_mask = var_37483_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37483_cast_fp16")]; tensor var_37487_begin_0 = const()[name = tensor("op_37487_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_37487_end_0 = const()[name = tensor("op_37487_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_37487_end_mask_0 = const()[name = tensor("op_37487_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37487_cast_fp16 = slice_by_index(begin = var_37487_begin_0, end = var_37487_end_0, end_mask = var_37487_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37487_cast_fp16")]; tensor var_37491_begin_0 = const()[name = tensor("op_37491_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_37491_end_0 = const()[name = tensor("op_37491_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_37491_end_mask_0 = const()[name = tensor("op_37491_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37491_cast_fp16 = slice_by_index(begin = var_37491_begin_0, end = var_37491_end_0, end_mask = var_37491_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37491_cast_fp16")]; tensor var_37495_begin_0 = const()[name = tensor("op_37495_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_37495_end_0 = const()[name = tensor("op_37495_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_37495_end_mask_0 = const()[name = tensor("op_37495_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37495_cast_fp16 = slice_by_index(begin = var_37495_begin_0, end = var_37495_end_0, end_mask = var_37495_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37495_cast_fp16")]; tensor var_37499_begin_0 = const()[name = tensor("op_37499_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_37499_end_0 = const()[name = tensor("op_37499_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_37499_end_mask_0 = const()[name = tensor("op_37499_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37499_cast_fp16 = slice_by_index(begin = var_37499_begin_0, end = var_37499_end_0, end_mask = var_37499_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37499_cast_fp16")]; tensor var_37503_begin_0 = const()[name = tensor("op_37503_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_37503_end_0 = const()[name = tensor("op_37503_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_37503_end_mask_0 = const()[name = tensor("op_37503_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37503_cast_fp16 = slice_by_index(begin = var_37503_begin_0, end = var_37503_end_0, end_mask = var_37503_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37503_cast_fp16")]; tensor var_37507_begin_0 = const()[name = tensor("op_37507_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_37507_end_0 = const()[name = tensor("op_37507_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_37507_end_mask_0 = const()[name = tensor("op_37507_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37507_cast_fp16 = slice_by_index(begin = var_37507_begin_0, end = var_37507_end_0, end_mask = var_37507_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37507_cast_fp16")]; tensor var_37511_begin_0 = const()[name = tensor("op_37511_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_37511_end_0 = const()[name = tensor("op_37511_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_37511_end_mask_0 = const()[name = tensor("op_37511_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37511_cast_fp16 = slice_by_index(begin = var_37511_begin_0, end = var_37511_end_0, end_mask = var_37511_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37511_cast_fp16")]; tensor var_37515_begin_0 = const()[name = tensor("op_37515_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_37515_end_0 = const()[name = tensor("op_37515_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_37515_end_mask_0 = const()[name = tensor("op_37515_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37515_cast_fp16 = slice_by_index(begin = var_37515_begin_0, end = var_37515_end_0, end_mask = var_37515_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37515_cast_fp16")]; tensor var_37519_begin_0 = const()[name = tensor("op_37519_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_37519_end_0 = const()[name = tensor("op_37519_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_37519_end_mask_0 = const()[name = tensor("op_37519_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37519_cast_fp16 = slice_by_index(begin = var_37519_begin_0, end = var_37519_end_0, end_mask = var_37519_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37519_cast_fp16")]; tensor var_37523_begin_0 = const()[name = tensor("op_37523_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_37523_end_0 = const()[name = tensor("op_37523_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_37523_end_mask_0 = const()[name = tensor("op_37523_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37523_cast_fp16 = slice_by_index(begin = var_37523_begin_0, end = var_37523_end_0, end_mask = var_37523_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37523_cast_fp16")]; tensor var_37527_begin_0 = const()[name = tensor("op_37527_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_37527_end_0 = const()[name = tensor("op_37527_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_37527_end_mask_0 = const()[name = tensor("op_37527_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37527_cast_fp16 = slice_by_index(begin = var_37527_begin_0, end = var_37527_end_0, end_mask = var_37527_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37527_cast_fp16")]; tensor var_37531_begin_0 = const()[name = tensor("op_37531_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_37531_end_0 = const()[name = tensor("op_37531_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_37531_end_mask_0 = const()[name = tensor("op_37531_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37531_cast_fp16 = slice_by_index(begin = var_37531_begin_0, end = var_37531_end_0, end_mask = var_37531_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37531_cast_fp16")]; tensor var_37535_begin_0 = const()[name = tensor("op_37535_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_37535_end_0 = const()[name = tensor("op_37535_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_37535_end_mask_0 = const()[name = tensor("op_37535_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37535_cast_fp16 = slice_by_index(begin = var_37535_begin_0, end = var_37535_end_0, end_mask = var_37535_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37535_cast_fp16")]; tensor var_37539_begin_0 = const()[name = tensor("op_37539_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_37539_end_0 = const()[name = tensor("op_37539_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_37539_end_mask_0 = const()[name = tensor("op_37539_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_37539_cast_fp16 = slice_by_index(begin = var_37539_begin_0, end = var_37539_end_0, end_mask = var_37539_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_37539_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3681_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3681_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3681_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3681_equation_0, values = (var_37385_cast_fp16, var_36827_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3681_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3683_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3683_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3683_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3683_equation_0, values = (var_37385_cast_fp16, var_36834_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3683_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3685_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3685_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3685_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3685_equation_0, values = (var_37385_cast_fp16, var_36841_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3685_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3687_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3687_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3687_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3687_equation_0, values = (var_37385_cast_fp16, var_36848_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3687_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3689_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3689_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3689_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3689_equation_0, values = (var_37389_cast_fp16, var_36855_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3689_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3691_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3691_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3691_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3691_equation_0, values = (var_37389_cast_fp16, var_36862_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3691_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3693_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3693_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3693_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3693_equation_0, values = (var_37389_cast_fp16, var_36869_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3693_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3695_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3695_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3695_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3695_equation_0, values = (var_37389_cast_fp16, var_36876_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3695_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3697_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3697_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3697_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3697_equation_0, values = (var_37393_cast_fp16, var_36883_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3697_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3699_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3699_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3699_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3699_equation_0, values = (var_37393_cast_fp16, var_36890_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3699_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3701_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3701_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3701_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3701_equation_0, values = (var_37393_cast_fp16, var_36897_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3701_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3703_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3703_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3703_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3703_equation_0, values = (var_37393_cast_fp16, var_36904_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3703_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3705_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3705_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3705_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3705_equation_0, values = (var_37397_cast_fp16, var_36911_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3705_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3707_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3707_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3707_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3707_equation_0, values = (var_37397_cast_fp16, var_36918_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3707_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3709_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3709_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3709_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3709_equation_0, values = (var_37397_cast_fp16, var_36925_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3709_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3711_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3711_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3711_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3711_equation_0, values = (var_37397_cast_fp16, var_36932_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3711_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3713_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3713_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3713_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3713_equation_0, values = (var_37401_cast_fp16, var_36939_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3713_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3715_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3715_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3715_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3715_equation_0, values = (var_37401_cast_fp16, var_36946_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3715_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3717_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3717_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3717_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3717_equation_0, values = (var_37401_cast_fp16, var_36953_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3717_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3719_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3719_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3719_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3719_equation_0, values = (var_37401_cast_fp16, var_36960_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3719_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3721_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3721_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3721_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3721_equation_0, values = (var_37405_cast_fp16, var_36967_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3721_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3723_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3723_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3723_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3723_equation_0, values = (var_37405_cast_fp16, var_36974_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3723_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3725_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3725_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3725_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3725_equation_0, values = (var_37405_cast_fp16, var_36981_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3725_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3727_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3727_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3727_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3727_equation_0, values = (var_37405_cast_fp16, var_36988_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3727_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3729_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3729_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3729_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3729_equation_0, values = (var_37409_cast_fp16, var_36995_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3729_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3731_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3731_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3731_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3731_equation_0, values = (var_37409_cast_fp16, var_37002_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3731_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3733_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3733_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3733_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3733_equation_0, values = (var_37409_cast_fp16, var_37009_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3733_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3735_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3735_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3735_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3735_equation_0, values = (var_37409_cast_fp16, var_37016_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3735_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3737_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3737_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3737_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3737_equation_0, values = (var_37413_cast_fp16, var_37023_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3737_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3739_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3739_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3739_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3739_equation_0, values = (var_37413_cast_fp16, var_37030_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3739_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3741_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3741_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3741_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3741_equation_0, values = (var_37413_cast_fp16, var_37037_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3741_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3743_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3743_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3743_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3743_equation_0, values = (var_37413_cast_fp16, var_37044_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3743_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3745_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3745_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3745_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3745_equation_0, values = (var_37417_cast_fp16, var_37051_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3745_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3747_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3747_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3747_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3747_equation_0, values = (var_37417_cast_fp16, var_37058_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3747_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3749_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3749_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3749_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3749_equation_0, values = (var_37417_cast_fp16, var_37065_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3749_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3751_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3751_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3751_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3751_equation_0, values = (var_37417_cast_fp16, var_37072_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3751_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3753_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3753_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3753_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3753_equation_0, values = (var_37421_cast_fp16, var_37079_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3753_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3755_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3755_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3755_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3755_equation_0, values = (var_37421_cast_fp16, var_37086_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3755_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3757_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3757_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3757_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3757_equation_0, values = (var_37421_cast_fp16, var_37093_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3757_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3759_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3759_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3759_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3759_equation_0, values = (var_37421_cast_fp16, var_37100_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3759_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3761_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3761_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3761_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3761_equation_0, values = (var_37425_cast_fp16, var_37107_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3761_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3763_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3763_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3763_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3763_equation_0, values = (var_37425_cast_fp16, var_37114_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3763_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3765_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3765_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3765_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3765_equation_0, values = (var_37425_cast_fp16, var_37121_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3765_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3767_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3767_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3767_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3767_equation_0, values = (var_37425_cast_fp16, var_37128_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3767_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3769_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3769_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3769_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3769_equation_0, values = (var_37429_cast_fp16, var_37135_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3769_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3771_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3771_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3771_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3771_equation_0, values = (var_37429_cast_fp16, var_37142_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3771_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3773_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3773_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3773_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3773_equation_0, values = (var_37429_cast_fp16, var_37149_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3773_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3775_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3775_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3775_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3775_equation_0, values = (var_37429_cast_fp16, var_37156_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3775_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3777_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3777_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3777_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3777_equation_0, values = (var_37433_cast_fp16, var_37163_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3777_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3779_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3779_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3779_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3779_equation_0, values = (var_37433_cast_fp16, var_37170_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3779_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3781_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3781_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3781_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3781_equation_0, values = (var_37433_cast_fp16, var_37177_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3781_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3783_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3783_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3783_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3783_equation_0, values = (var_37433_cast_fp16, var_37184_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3783_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3785_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3785_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3785_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3785_equation_0, values = (var_37437_cast_fp16, var_37191_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3785_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3787_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3787_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3787_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3787_equation_0, values = (var_37437_cast_fp16, var_37198_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3787_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3789_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3789_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3789_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3789_equation_0, values = (var_37437_cast_fp16, var_37205_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3789_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3791_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3791_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3791_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3791_equation_0, values = (var_37437_cast_fp16, var_37212_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3791_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3793_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3793_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3793_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3793_equation_0, values = (var_37441_cast_fp16, var_37219_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3793_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3795_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3795_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3795_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3795_equation_0, values = (var_37441_cast_fp16, var_37226_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3795_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3797_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3797_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3797_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3797_equation_0, values = (var_37441_cast_fp16, var_37233_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3797_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3799_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3799_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3799_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3799_equation_0, values = (var_37441_cast_fp16, var_37240_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3799_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3801_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3801_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3801_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3801_equation_0, values = (var_37445_cast_fp16, var_37247_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3801_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3803_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3803_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3803_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3803_equation_0, values = (var_37445_cast_fp16, var_37254_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3803_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3805_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3805_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3805_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3805_equation_0, values = (var_37445_cast_fp16, var_37261_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3805_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3807_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3807_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3807_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3807_equation_0, values = (var_37445_cast_fp16, var_37268_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3807_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3809_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3809_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3809_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3809_equation_0, values = (var_37449_cast_fp16, var_37275_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3809_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3811_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3811_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3811_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3811_equation_0, values = (var_37449_cast_fp16, var_37282_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3811_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3813_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3813_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3813_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3813_equation_0, values = (var_37449_cast_fp16, var_37289_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3813_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3815_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3815_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3815_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3815_equation_0, values = (var_37449_cast_fp16, var_37296_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3815_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3817_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3817_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3817_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3817_equation_0, values = (var_37453_cast_fp16, var_37303_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3817_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3819_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3819_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3819_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3819_equation_0, values = (var_37453_cast_fp16, var_37310_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3819_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3821_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3821_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3821_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3821_equation_0, values = (var_37453_cast_fp16, var_37317_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3821_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3823_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3823_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3823_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3823_equation_0, values = (var_37453_cast_fp16, var_37324_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3823_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3825_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3825_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3825_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3825_equation_0, values = (var_37457_cast_fp16, var_37331_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3825_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3827_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3827_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3827_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3827_equation_0, values = (var_37457_cast_fp16, var_37338_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3827_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3829_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3829_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3829_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3829_equation_0, values = (var_37457_cast_fp16, var_37345_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3829_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3831_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3831_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3831_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3831_equation_0, values = (var_37457_cast_fp16, var_37352_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3831_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3833_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3833_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3833_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3833_equation_0, values = (var_37461_cast_fp16, var_37359_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3833_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3835_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3835_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3835_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3835_equation_0, values = (var_37461_cast_fp16, var_37366_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3835_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3837_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3837_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3837_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3837_equation_0, values = (var_37461_cast_fp16, var_37373_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3837_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3839_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3839_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3839_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3839_equation_0, values = (var_37461_cast_fp16, var_37380_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3839_cast_fp16")]; tensor var_37702_to_fp16 = const()[name = tensor("op_37702_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3681_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3681_cast_fp16, y = var_37702_to_fp16)[name = tensor("aw_chunk_3681_cast_fp16")]; tensor var_37704_to_fp16 = const()[name = tensor("op_37704_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3683_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3683_cast_fp16, y = var_37704_to_fp16)[name = tensor("aw_chunk_3683_cast_fp16")]; tensor var_37706_to_fp16 = const()[name = tensor("op_37706_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3685_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3685_cast_fp16, y = var_37706_to_fp16)[name = tensor("aw_chunk_3685_cast_fp16")]; tensor var_37708_to_fp16 = const()[name = tensor("op_37708_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3687_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3687_cast_fp16, y = var_37708_to_fp16)[name = tensor("aw_chunk_3687_cast_fp16")]; tensor var_37710_to_fp16 = const()[name = tensor("op_37710_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3689_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3689_cast_fp16, y = var_37710_to_fp16)[name = tensor("aw_chunk_3689_cast_fp16")]; tensor var_37712_to_fp16 = const()[name = tensor("op_37712_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3691_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3691_cast_fp16, y = var_37712_to_fp16)[name = tensor("aw_chunk_3691_cast_fp16")]; tensor var_37714_to_fp16 = const()[name = tensor("op_37714_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3693_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3693_cast_fp16, y = var_37714_to_fp16)[name = tensor("aw_chunk_3693_cast_fp16")]; tensor var_37716_to_fp16 = const()[name = tensor("op_37716_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3695_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3695_cast_fp16, y = var_37716_to_fp16)[name = tensor("aw_chunk_3695_cast_fp16")]; tensor var_37718_to_fp16 = const()[name = tensor("op_37718_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3697_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3697_cast_fp16, y = var_37718_to_fp16)[name = tensor("aw_chunk_3697_cast_fp16")]; tensor var_37720_to_fp16 = const()[name = tensor("op_37720_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3699_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3699_cast_fp16, y = var_37720_to_fp16)[name = tensor("aw_chunk_3699_cast_fp16")]; tensor var_37722_to_fp16 = const()[name = tensor("op_37722_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3701_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3701_cast_fp16, y = var_37722_to_fp16)[name = tensor("aw_chunk_3701_cast_fp16")]; tensor var_37724_to_fp16 = const()[name = tensor("op_37724_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3703_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3703_cast_fp16, y = var_37724_to_fp16)[name = tensor("aw_chunk_3703_cast_fp16")]; tensor var_37726_to_fp16 = const()[name = tensor("op_37726_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3705_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3705_cast_fp16, y = var_37726_to_fp16)[name = tensor("aw_chunk_3705_cast_fp16")]; tensor var_37728_to_fp16 = const()[name = tensor("op_37728_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3707_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3707_cast_fp16, y = var_37728_to_fp16)[name = tensor("aw_chunk_3707_cast_fp16")]; tensor var_37730_to_fp16 = const()[name = tensor("op_37730_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3709_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3709_cast_fp16, y = var_37730_to_fp16)[name = tensor("aw_chunk_3709_cast_fp16")]; tensor var_37732_to_fp16 = const()[name = tensor("op_37732_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3711_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3711_cast_fp16, y = var_37732_to_fp16)[name = tensor("aw_chunk_3711_cast_fp16")]; tensor var_37734_to_fp16 = const()[name = tensor("op_37734_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3713_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3713_cast_fp16, y = var_37734_to_fp16)[name = tensor("aw_chunk_3713_cast_fp16")]; tensor var_37736_to_fp16 = const()[name = tensor("op_37736_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3715_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3715_cast_fp16, y = var_37736_to_fp16)[name = tensor("aw_chunk_3715_cast_fp16")]; tensor var_37738_to_fp16 = const()[name = tensor("op_37738_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3717_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3717_cast_fp16, y = var_37738_to_fp16)[name = tensor("aw_chunk_3717_cast_fp16")]; tensor var_37740_to_fp16 = const()[name = tensor("op_37740_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3719_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3719_cast_fp16, y = var_37740_to_fp16)[name = tensor("aw_chunk_3719_cast_fp16")]; tensor var_37742_to_fp16 = const()[name = tensor("op_37742_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3721_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3721_cast_fp16, y = var_37742_to_fp16)[name = tensor("aw_chunk_3721_cast_fp16")]; tensor var_37744_to_fp16 = const()[name = tensor("op_37744_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3723_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3723_cast_fp16, y = var_37744_to_fp16)[name = tensor("aw_chunk_3723_cast_fp16")]; tensor var_37746_to_fp16 = const()[name = tensor("op_37746_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3725_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3725_cast_fp16, y = var_37746_to_fp16)[name = tensor("aw_chunk_3725_cast_fp16")]; tensor var_37748_to_fp16 = const()[name = tensor("op_37748_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3727_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3727_cast_fp16, y = var_37748_to_fp16)[name = tensor("aw_chunk_3727_cast_fp16")]; tensor var_37750_to_fp16 = const()[name = tensor("op_37750_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3729_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3729_cast_fp16, y = var_37750_to_fp16)[name = tensor("aw_chunk_3729_cast_fp16")]; tensor var_37752_to_fp16 = const()[name = tensor("op_37752_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3731_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3731_cast_fp16, y = var_37752_to_fp16)[name = tensor("aw_chunk_3731_cast_fp16")]; tensor var_37754_to_fp16 = const()[name = tensor("op_37754_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3733_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3733_cast_fp16, y = var_37754_to_fp16)[name = tensor("aw_chunk_3733_cast_fp16")]; tensor var_37756_to_fp16 = const()[name = tensor("op_37756_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3735_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3735_cast_fp16, y = var_37756_to_fp16)[name = tensor("aw_chunk_3735_cast_fp16")]; tensor var_37758_to_fp16 = const()[name = tensor("op_37758_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3737_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3737_cast_fp16, y = var_37758_to_fp16)[name = tensor("aw_chunk_3737_cast_fp16")]; tensor var_37760_to_fp16 = const()[name = tensor("op_37760_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3739_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3739_cast_fp16, y = var_37760_to_fp16)[name = tensor("aw_chunk_3739_cast_fp16")]; tensor var_37762_to_fp16 = const()[name = tensor("op_37762_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3741_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3741_cast_fp16, y = var_37762_to_fp16)[name = tensor("aw_chunk_3741_cast_fp16")]; tensor var_37764_to_fp16 = const()[name = tensor("op_37764_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3743_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3743_cast_fp16, y = var_37764_to_fp16)[name = tensor("aw_chunk_3743_cast_fp16")]; tensor var_37766_to_fp16 = const()[name = tensor("op_37766_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3745_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3745_cast_fp16, y = var_37766_to_fp16)[name = tensor("aw_chunk_3745_cast_fp16")]; tensor var_37768_to_fp16 = const()[name = tensor("op_37768_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3747_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3747_cast_fp16, y = var_37768_to_fp16)[name = tensor("aw_chunk_3747_cast_fp16")]; tensor var_37770_to_fp16 = const()[name = tensor("op_37770_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3749_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3749_cast_fp16, y = var_37770_to_fp16)[name = tensor("aw_chunk_3749_cast_fp16")]; tensor var_37772_to_fp16 = const()[name = tensor("op_37772_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3751_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3751_cast_fp16, y = var_37772_to_fp16)[name = tensor("aw_chunk_3751_cast_fp16")]; tensor var_37774_to_fp16 = const()[name = tensor("op_37774_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3753_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3753_cast_fp16, y = var_37774_to_fp16)[name = tensor("aw_chunk_3753_cast_fp16")]; tensor var_37776_to_fp16 = const()[name = tensor("op_37776_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3755_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3755_cast_fp16, y = var_37776_to_fp16)[name = tensor("aw_chunk_3755_cast_fp16")]; tensor var_37778_to_fp16 = const()[name = tensor("op_37778_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3757_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3757_cast_fp16, y = var_37778_to_fp16)[name = tensor("aw_chunk_3757_cast_fp16")]; tensor var_37780_to_fp16 = const()[name = tensor("op_37780_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3759_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3759_cast_fp16, y = var_37780_to_fp16)[name = tensor("aw_chunk_3759_cast_fp16")]; tensor var_37782_to_fp16 = const()[name = tensor("op_37782_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3761_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3761_cast_fp16, y = var_37782_to_fp16)[name = tensor("aw_chunk_3761_cast_fp16")]; tensor var_37784_to_fp16 = const()[name = tensor("op_37784_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3763_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3763_cast_fp16, y = var_37784_to_fp16)[name = tensor("aw_chunk_3763_cast_fp16")]; tensor var_37786_to_fp16 = const()[name = tensor("op_37786_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3765_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3765_cast_fp16, y = var_37786_to_fp16)[name = tensor("aw_chunk_3765_cast_fp16")]; tensor var_37788_to_fp16 = const()[name = tensor("op_37788_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3767_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3767_cast_fp16, y = var_37788_to_fp16)[name = tensor("aw_chunk_3767_cast_fp16")]; tensor var_37790_to_fp16 = const()[name = tensor("op_37790_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3769_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3769_cast_fp16, y = var_37790_to_fp16)[name = tensor("aw_chunk_3769_cast_fp16")]; tensor var_37792_to_fp16 = const()[name = tensor("op_37792_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3771_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3771_cast_fp16, y = var_37792_to_fp16)[name = tensor("aw_chunk_3771_cast_fp16")]; tensor var_37794_to_fp16 = const()[name = tensor("op_37794_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3773_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3773_cast_fp16, y = var_37794_to_fp16)[name = tensor("aw_chunk_3773_cast_fp16")]; tensor var_37796_to_fp16 = const()[name = tensor("op_37796_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3775_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3775_cast_fp16, y = var_37796_to_fp16)[name = tensor("aw_chunk_3775_cast_fp16")]; tensor var_37798_to_fp16 = const()[name = tensor("op_37798_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3777_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3777_cast_fp16, y = var_37798_to_fp16)[name = tensor("aw_chunk_3777_cast_fp16")]; tensor var_37800_to_fp16 = const()[name = tensor("op_37800_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3779_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3779_cast_fp16, y = var_37800_to_fp16)[name = tensor("aw_chunk_3779_cast_fp16")]; tensor var_37802_to_fp16 = const()[name = tensor("op_37802_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3781_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3781_cast_fp16, y = var_37802_to_fp16)[name = tensor("aw_chunk_3781_cast_fp16")]; tensor var_37804_to_fp16 = const()[name = tensor("op_37804_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3783_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3783_cast_fp16, y = var_37804_to_fp16)[name = tensor("aw_chunk_3783_cast_fp16")]; tensor var_37806_to_fp16 = const()[name = tensor("op_37806_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3785_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3785_cast_fp16, y = var_37806_to_fp16)[name = tensor("aw_chunk_3785_cast_fp16")]; tensor var_37808_to_fp16 = const()[name = tensor("op_37808_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3787_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3787_cast_fp16, y = var_37808_to_fp16)[name = tensor("aw_chunk_3787_cast_fp16")]; tensor var_37810_to_fp16 = const()[name = tensor("op_37810_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3789_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3789_cast_fp16, y = var_37810_to_fp16)[name = tensor("aw_chunk_3789_cast_fp16")]; tensor var_37812_to_fp16 = const()[name = tensor("op_37812_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3791_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3791_cast_fp16, y = var_37812_to_fp16)[name = tensor("aw_chunk_3791_cast_fp16")]; tensor var_37814_to_fp16 = const()[name = tensor("op_37814_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3793_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3793_cast_fp16, y = var_37814_to_fp16)[name = tensor("aw_chunk_3793_cast_fp16")]; tensor var_37816_to_fp16 = const()[name = tensor("op_37816_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3795_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3795_cast_fp16, y = var_37816_to_fp16)[name = tensor("aw_chunk_3795_cast_fp16")]; tensor var_37818_to_fp16 = const()[name = tensor("op_37818_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3797_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3797_cast_fp16, y = var_37818_to_fp16)[name = tensor("aw_chunk_3797_cast_fp16")]; tensor var_37820_to_fp16 = const()[name = tensor("op_37820_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3799_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3799_cast_fp16, y = var_37820_to_fp16)[name = tensor("aw_chunk_3799_cast_fp16")]; tensor var_37822_to_fp16 = const()[name = tensor("op_37822_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3801_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3801_cast_fp16, y = var_37822_to_fp16)[name = tensor("aw_chunk_3801_cast_fp16")]; tensor var_37824_to_fp16 = const()[name = tensor("op_37824_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3803_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3803_cast_fp16, y = var_37824_to_fp16)[name = tensor("aw_chunk_3803_cast_fp16")]; tensor var_37826_to_fp16 = const()[name = tensor("op_37826_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3805_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3805_cast_fp16, y = var_37826_to_fp16)[name = tensor("aw_chunk_3805_cast_fp16")]; tensor var_37828_to_fp16 = const()[name = tensor("op_37828_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3807_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3807_cast_fp16, y = var_37828_to_fp16)[name = tensor("aw_chunk_3807_cast_fp16")]; tensor var_37830_to_fp16 = const()[name = tensor("op_37830_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3809_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3809_cast_fp16, y = var_37830_to_fp16)[name = tensor("aw_chunk_3809_cast_fp16")]; tensor var_37832_to_fp16 = const()[name = tensor("op_37832_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3811_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3811_cast_fp16, y = var_37832_to_fp16)[name = tensor("aw_chunk_3811_cast_fp16")]; tensor var_37834_to_fp16 = const()[name = tensor("op_37834_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3813_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3813_cast_fp16, y = var_37834_to_fp16)[name = tensor("aw_chunk_3813_cast_fp16")]; tensor var_37836_to_fp16 = const()[name = tensor("op_37836_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3815_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3815_cast_fp16, y = var_37836_to_fp16)[name = tensor("aw_chunk_3815_cast_fp16")]; tensor var_37838_to_fp16 = const()[name = tensor("op_37838_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3817_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3817_cast_fp16, y = var_37838_to_fp16)[name = tensor("aw_chunk_3817_cast_fp16")]; tensor var_37840_to_fp16 = const()[name = tensor("op_37840_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3819_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3819_cast_fp16, y = var_37840_to_fp16)[name = tensor("aw_chunk_3819_cast_fp16")]; tensor var_37842_to_fp16 = const()[name = tensor("op_37842_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3821_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3821_cast_fp16, y = var_37842_to_fp16)[name = tensor("aw_chunk_3821_cast_fp16")]; tensor var_37844_to_fp16 = const()[name = tensor("op_37844_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3823_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3823_cast_fp16, y = var_37844_to_fp16)[name = tensor("aw_chunk_3823_cast_fp16")]; tensor var_37846_to_fp16 = const()[name = tensor("op_37846_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3825_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3825_cast_fp16, y = var_37846_to_fp16)[name = tensor("aw_chunk_3825_cast_fp16")]; tensor var_37848_to_fp16 = const()[name = tensor("op_37848_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3827_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3827_cast_fp16, y = var_37848_to_fp16)[name = tensor("aw_chunk_3827_cast_fp16")]; tensor var_37850_to_fp16 = const()[name = tensor("op_37850_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3829_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3829_cast_fp16, y = var_37850_to_fp16)[name = tensor("aw_chunk_3829_cast_fp16")]; tensor var_37852_to_fp16 = const()[name = tensor("op_37852_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3831_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3831_cast_fp16, y = var_37852_to_fp16)[name = tensor("aw_chunk_3831_cast_fp16")]; tensor var_37854_to_fp16 = const()[name = tensor("op_37854_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3833_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3833_cast_fp16, y = var_37854_to_fp16)[name = tensor("aw_chunk_3833_cast_fp16")]; tensor var_37856_to_fp16 = const()[name = tensor("op_37856_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3835_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3835_cast_fp16, y = var_37856_to_fp16)[name = tensor("aw_chunk_3835_cast_fp16")]; tensor var_37858_to_fp16 = const()[name = tensor("op_37858_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3837_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3837_cast_fp16, y = var_37858_to_fp16)[name = tensor("aw_chunk_3837_cast_fp16")]; tensor var_37860_to_fp16 = const()[name = tensor("op_37860_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3839_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3839_cast_fp16, y = var_37860_to_fp16)[name = tensor("aw_chunk_3839_cast_fp16")]; tensor var_37862_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3681_cast_fp16)[name = tensor("op_37862_cast_fp16")]; tensor var_37863_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3683_cast_fp16)[name = tensor("op_37863_cast_fp16")]; tensor var_37864_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3685_cast_fp16)[name = tensor("op_37864_cast_fp16")]; tensor var_37865_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3687_cast_fp16)[name = tensor("op_37865_cast_fp16")]; tensor var_37866_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3689_cast_fp16)[name = tensor("op_37866_cast_fp16")]; tensor var_37867_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3691_cast_fp16)[name = tensor("op_37867_cast_fp16")]; tensor var_37868_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3693_cast_fp16)[name = tensor("op_37868_cast_fp16")]; tensor var_37869_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3695_cast_fp16)[name = tensor("op_37869_cast_fp16")]; tensor var_37870_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3697_cast_fp16)[name = tensor("op_37870_cast_fp16")]; tensor var_37871_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3699_cast_fp16)[name = tensor("op_37871_cast_fp16")]; tensor var_37872_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3701_cast_fp16)[name = tensor("op_37872_cast_fp16")]; tensor var_37873_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3703_cast_fp16)[name = tensor("op_37873_cast_fp16")]; tensor var_37874_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3705_cast_fp16)[name = tensor("op_37874_cast_fp16")]; tensor var_37875_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3707_cast_fp16)[name = tensor("op_37875_cast_fp16")]; tensor var_37876_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3709_cast_fp16)[name = tensor("op_37876_cast_fp16")]; tensor var_37877_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3711_cast_fp16)[name = tensor("op_37877_cast_fp16")]; tensor var_37878_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3713_cast_fp16)[name = tensor("op_37878_cast_fp16")]; tensor var_37879_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3715_cast_fp16)[name = tensor("op_37879_cast_fp16")]; tensor var_37880_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3717_cast_fp16)[name = tensor("op_37880_cast_fp16")]; tensor var_37881_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3719_cast_fp16)[name = tensor("op_37881_cast_fp16")]; tensor var_37882_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3721_cast_fp16)[name = tensor("op_37882_cast_fp16")]; tensor var_37883_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3723_cast_fp16)[name = tensor("op_37883_cast_fp16")]; tensor var_37884_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3725_cast_fp16)[name = tensor("op_37884_cast_fp16")]; tensor var_37885_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3727_cast_fp16)[name = tensor("op_37885_cast_fp16")]; tensor var_37886_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3729_cast_fp16)[name = tensor("op_37886_cast_fp16")]; tensor var_37887_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3731_cast_fp16)[name = tensor("op_37887_cast_fp16")]; tensor var_37888_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3733_cast_fp16)[name = tensor("op_37888_cast_fp16")]; tensor var_37889_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3735_cast_fp16)[name = tensor("op_37889_cast_fp16")]; tensor var_37890_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3737_cast_fp16)[name = tensor("op_37890_cast_fp16")]; tensor var_37891_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3739_cast_fp16)[name = tensor("op_37891_cast_fp16")]; tensor var_37892_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3741_cast_fp16)[name = tensor("op_37892_cast_fp16")]; tensor var_37893_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3743_cast_fp16)[name = tensor("op_37893_cast_fp16")]; tensor var_37894_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3745_cast_fp16)[name = tensor("op_37894_cast_fp16")]; tensor var_37895_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3747_cast_fp16)[name = tensor("op_37895_cast_fp16")]; tensor var_37896_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3749_cast_fp16)[name = tensor("op_37896_cast_fp16")]; tensor var_37897_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3751_cast_fp16)[name = tensor("op_37897_cast_fp16")]; tensor var_37898_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3753_cast_fp16)[name = tensor("op_37898_cast_fp16")]; tensor var_37899_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3755_cast_fp16)[name = tensor("op_37899_cast_fp16")]; tensor var_37900_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3757_cast_fp16)[name = tensor("op_37900_cast_fp16")]; tensor var_37901_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3759_cast_fp16)[name = tensor("op_37901_cast_fp16")]; tensor var_37902_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3761_cast_fp16)[name = tensor("op_37902_cast_fp16")]; tensor var_37903_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3763_cast_fp16)[name = tensor("op_37903_cast_fp16")]; tensor var_37904_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3765_cast_fp16)[name = tensor("op_37904_cast_fp16")]; tensor var_37905_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3767_cast_fp16)[name = tensor("op_37905_cast_fp16")]; tensor var_37906_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3769_cast_fp16)[name = tensor("op_37906_cast_fp16")]; tensor var_37907_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3771_cast_fp16)[name = tensor("op_37907_cast_fp16")]; tensor var_37908_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3773_cast_fp16)[name = tensor("op_37908_cast_fp16")]; tensor var_37909_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3775_cast_fp16)[name = tensor("op_37909_cast_fp16")]; tensor var_37910_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3777_cast_fp16)[name = tensor("op_37910_cast_fp16")]; tensor var_37911_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3779_cast_fp16)[name = tensor("op_37911_cast_fp16")]; tensor var_37912_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3781_cast_fp16)[name = tensor("op_37912_cast_fp16")]; tensor var_37913_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3783_cast_fp16)[name = tensor("op_37913_cast_fp16")]; tensor var_37914_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3785_cast_fp16)[name = tensor("op_37914_cast_fp16")]; tensor var_37915_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3787_cast_fp16)[name = tensor("op_37915_cast_fp16")]; tensor var_37916_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3789_cast_fp16)[name = tensor("op_37916_cast_fp16")]; tensor var_37917_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3791_cast_fp16)[name = tensor("op_37917_cast_fp16")]; tensor var_37918_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3793_cast_fp16)[name = tensor("op_37918_cast_fp16")]; tensor var_37919_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3795_cast_fp16)[name = tensor("op_37919_cast_fp16")]; tensor var_37920_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3797_cast_fp16)[name = tensor("op_37920_cast_fp16")]; tensor var_37921_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3799_cast_fp16)[name = tensor("op_37921_cast_fp16")]; tensor var_37922_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3801_cast_fp16)[name = tensor("op_37922_cast_fp16")]; tensor var_37923_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3803_cast_fp16)[name = tensor("op_37923_cast_fp16")]; tensor var_37924_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3805_cast_fp16)[name = tensor("op_37924_cast_fp16")]; tensor var_37925_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3807_cast_fp16)[name = tensor("op_37925_cast_fp16")]; tensor var_37926_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3809_cast_fp16)[name = tensor("op_37926_cast_fp16")]; tensor var_37927_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3811_cast_fp16)[name = tensor("op_37927_cast_fp16")]; tensor var_37928_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3813_cast_fp16)[name = tensor("op_37928_cast_fp16")]; tensor var_37929_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3815_cast_fp16)[name = tensor("op_37929_cast_fp16")]; tensor var_37930_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3817_cast_fp16)[name = tensor("op_37930_cast_fp16")]; tensor var_37931_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3819_cast_fp16)[name = tensor("op_37931_cast_fp16")]; tensor var_37932_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3821_cast_fp16)[name = tensor("op_37932_cast_fp16")]; tensor var_37933_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3823_cast_fp16)[name = tensor("op_37933_cast_fp16")]; tensor var_37934_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3825_cast_fp16)[name = tensor("op_37934_cast_fp16")]; tensor var_37935_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3827_cast_fp16)[name = tensor("op_37935_cast_fp16")]; tensor var_37936_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3829_cast_fp16)[name = tensor("op_37936_cast_fp16")]; tensor var_37937_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3831_cast_fp16)[name = tensor("op_37937_cast_fp16")]; tensor var_37938_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3833_cast_fp16)[name = tensor("op_37938_cast_fp16")]; tensor var_37939_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3835_cast_fp16)[name = tensor("op_37939_cast_fp16")]; tensor var_37940_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3837_cast_fp16)[name = tensor("op_37940_cast_fp16")]; tensor var_37941_cast_fp16 = softmax(axis = var_36660, x = aw_chunk_3839_cast_fp16)[name = tensor("op_37941_cast_fp16")]; tensor var_37943_equation_0 = const()[name = tensor("op_37943_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37943_cast_fp16 = einsum(equation = var_37943_equation_0, values = (var_37463_cast_fp16, var_37862_cast_fp16))[name = tensor("op_37943_cast_fp16")]; tensor var_37945_equation_0 = const()[name = tensor("op_37945_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37945_cast_fp16 = einsum(equation = var_37945_equation_0, values = (var_37463_cast_fp16, var_37863_cast_fp16))[name = tensor("op_37945_cast_fp16")]; tensor var_37947_equation_0 = const()[name = tensor("op_37947_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37947_cast_fp16 = einsum(equation = var_37947_equation_0, values = (var_37463_cast_fp16, var_37864_cast_fp16))[name = tensor("op_37947_cast_fp16")]; tensor var_37949_equation_0 = const()[name = tensor("op_37949_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37949_cast_fp16 = einsum(equation = var_37949_equation_0, values = (var_37463_cast_fp16, var_37865_cast_fp16))[name = tensor("op_37949_cast_fp16")]; tensor var_37951_equation_0 = const()[name = tensor("op_37951_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37951_cast_fp16 = einsum(equation = var_37951_equation_0, values = (var_37467_cast_fp16, var_37866_cast_fp16))[name = tensor("op_37951_cast_fp16")]; tensor var_37953_equation_0 = const()[name = tensor("op_37953_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37953_cast_fp16 = einsum(equation = var_37953_equation_0, values = (var_37467_cast_fp16, var_37867_cast_fp16))[name = tensor("op_37953_cast_fp16")]; tensor var_37955_equation_0 = const()[name = tensor("op_37955_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37955_cast_fp16 = einsum(equation = var_37955_equation_0, values = (var_37467_cast_fp16, var_37868_cast_fp16))[name = tensor("op_37955_cast_fp16")]; tensor var_37957_equation_0 = const()[name = tensor("op_37957_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37957_cast_fp16 = einsum(equation = var_37957_equation_0, values = (var_37467_cast_fp16, var_37869_cast_fp16))[name = tensor("op_37957_cast_fp16")]; tensor var_37959_equation_0 = const()[name = tensor("op_37959_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37959_cast_fp16 = einsum(equation = var_37959_equation_0, values = (var_37471_cast_fp16, var_37870_cast_fp16))[name = tensor("op_37959_cast_fp16")]; tensor var_37961_equation_0 = const()[name = tensor("op_37961_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37961_cast_fp16 = einsum(equation = var_37961_equation_0, values = (var_37471_cast_fp16, var_37871_cast_fp16))[name = tensor("op_37961_cast_fp16")]; tensor var_37963_equation_0 = const()[name = tensor("op_37963_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37963_cast_fp16 = einsum(equation = var_37963_equation_0, values = (var_37471_cast_fp16, var_37872_cast_fp16))[name = tensor("op_37963_cast_fp16")]; tensor var_37965_equation_0 = const()[name = tensor("op_37965_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37965_cast_fp16 = einsum(equation = var_37965_equation_0, values = (var_37471_cast_fp16, var_37873_cast_fp16))[name = tensor("op_37965_cast_fp16")]; tensor var_37967_equation_0 = const()[name = tensor("op_37967_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37967_cast_fp16 = einsum(equation = var_37967_equation_0, values = (var_37475_cast_fp16, var_37874_cast_fp16))[name = tensor("op_37967_cast_fp16")]; tensor var_37969_equation_0 = const()[name = tensor("op_37969_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37969_cast_fp16 = einsum(equation = var_37969_equation_0, values = (var_37475_cast_fp16, var_37875_cast_fp16))[name = tensor("op_37969_cast_fp16")]; tensor var_37971_equation_0 = const()[name = tensor("op_37971_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37971_cast_fp16 = einsum(equation = var_37971_equation_0, values = (var_37475_cast_fp16, var_37876_cast_fp16))[name = tensor("op_37971_cast_fp16")]; tensor var_37973_equation_0 = const()[name = tensor("op_37973_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37973_cast_fp16 = einsum(equation = var_37973_equation_0, values = (var_37475_cast_fp16, var_37877_cast_fp16))[name = tensor("op_37973_cast_fp16")]; tensor var_37975_equation_0 = const()[name = tensor("op_37975_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37975_cast_fp16 = einsum(equation = var_37975_equation_0, values = (var_37479_cast_fp16, var_37878_cast_fp16))[name = tensor("op_37975_cast_fp16")]; tensor var_37977_equation_0 = const()[name = tensor("op_37977_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37977_cast_fp16 = einsum(equation = var_37977_equation_0, values = (var_37479_cast_fp16, var_37879_cast_fp16))[name = tensor("op_37977_cast_fp16")]; tensor var_37979_equation_0 = const()[name = tensor("op_37979_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37979_cast_fp16 = einsum(equation = var_37979_equation_0, values = (var_37479_cast_fp16, var_37880_cast_fp16))[name = tensor("op_37979_cast_fp16")]; tensor var_37981_equation_0 = const()[name = tensor("op_37981_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37981_cast_fp16 = einsum(equation = var_37981_equation_0, values = (var_37479_cast_fp16, var_37881_cast_fp16))[name = tensor("op_37981_cast_fp16")]; tensor var_37983_equation_0 = const()[name = tensor("op_37983_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37983_cast_fp16 = einsum(equation = var_37983_equation_0, values = (var_37483_cast_fp16, var_37882_cast_fp16))[name = tensor("op_37983_cast_fp16")]; tensor var_37985_equation_0 = const()[name = tensor("op_37985_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37985_cast_fp16 = einsum(equation = var_37985_equation_0, values = (var_37483_cast_fp16, var_37883_cast_fp16))[name = tensor("op_37985_cast_fp16")]; tensor var_37987_equation_0 = const()[name = tensor("op_37987_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37987_cast_fp16 = einsum(equation = var_37987_equation_0, values = (var_37483_cast_fp16, var_37884_cast_fp16))[name = tensor("op_37987_cast_fp16")]; tensor var_37989_equation_0 = const()[name = tensor("op_37989_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37989_cast_fp16 = einsum(equation = var_37989_equation_0, values = (var_37483_cast_fp16, var_37885_cast_fp16))[name = tensor("op_37989_cast_fp16")]; tensor var_37991_equation_0 = const()[name = tensor("op_37991_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37991_cast_fp16 = einsum(equation = var_37991_equation_0, values = (var_37487_cast_fp16, var_37886_cast_fp16))[name = tensor("op_37991_cast_fp16")]; tensor var_37993_equation_0 = const()[name = tensor("op_37993_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37993_cast_fp16 = einsum(equation = var_37993_equation_0, values = (var_37487_cast_fp16, var_37887_cast_fp16))[name = tensor("op_37993_cast_fp16")]; tensor var_37995_equation_0 = const()[name = tensor("op_37995_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37995_cast_fp16 = einsum(equation = var_37995_equation_0, values = (var_37487_cast_fp16, var_37888_cast_fp16))[name = tensor("op_37995_cast_fp16")]; tensor var_37997_equation_0 = const()[name = tensor("op_37997_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37997_cast_fp16 = einsum(equation = var_37997_equation_0, values = (var_37487_cast_fp16, var_37889_cast_fp16))[name = tensor("op_37997_cast_fp16")]; tensor var_37999_equation_0 = const()[name = tensor("op_37999_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_37999_cast_fp16 = einsum(equation = var_37999_equation_0, values = (var_37491_cast_fp16, var_37890_cast_fp16))[name = tensor("op_37999_cast_fp16")]; tensor var_38001_equation_0 = const()[name = tensor("op_38001_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38001_cast_fp16 = einsum(equation = var_38001_equation_0, values = (var_37491_cast_fp16, var_37891_cast_fp16))[name = tensor("op_38001_cast_fp16")]; tensor var_38003_equation_0 = const()[name = tensor("op_38003_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38003_cast_fp16 = einsum(equation = var_38003_equation_0, values = (var_37491_cast_fp16, var_37892_cast_fp16))[name = tensor("op_38003_cast_fp16")]; tensor var_38005_equation_0 = const()[name = tensor("op_38005_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38005_cast_fp16 = einsum(equation = var_38005_equation_0, values = (var_37491_cast_fp16, var_37893_cast_fp16))[name = tensor("op_38005_cast_fp16")]; tensor var_38007_equation_0 = const()[name = tensor("op_38007_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38007_cast_fp16 = einsum(equation = var_38007_equation_0, values = (var_37495_cast_fp16, var_37894_cast_fp16))[name = tensor("op_38007_cast_fp16")]; tensor var_38009_equation_0 = const()[name = tensor("op_38009_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38009_cast_fp16 = einsum(equation = var_38009_equation_0, values = (var_37495_cast_fp16, var_37895_cast_fp16))[name = tensor("op_38009_cast_fp16")]; tensor var_38011_equation_0 = const()[name = tensor("op_38011_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38011_cast_fp16 = einsum(equation = var_38011_equation_0, values = (var_37495_cast_fp16, var_37896_cast_fp16))[name = tensor("op_38011_cast_fp16")]; tensor var_38013_equation_0 = const()[name = tensor("op_38013_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38013_cast_fp16 = einsum(equation = var_38013_equation_0, values = (var_37495_cast_fp16, var_37897_cast_fp16))[name = tensor("op_38013_cast_fp16")]; tensor var_38015_equation_0 = const()[name = tensor("op_38015_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38015_cast_fp16 = einsum(equation = var_38015_equation_0, values = (var_37499_cast_fp16, var_37898_cast_fp16))[name = tensor("op_38015_cast_fp16")]; tensor var_38017_equation_0 = const()[name = tensor("op_38017_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38017_cast_fp16 = einsum(equation = var_38017_equation_0, values = (var_37499_cast_fp16, var_37899_cast_fp16))[name = tensor("op_38017_cast_fp16")]; tensor var_38019_equation_0 = const()[name = tensor("op_38019_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38019_cast_fp16 = einsum(equation = var_38019_equation_0, values = (var_37499_cast_fp16, var_37900_cast_fp16))[name = tensor("op_38019_cast_fp16")]; tensor var_38021_equation_0 = const()[name = tensor("op_38021_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38021_cast_fp16 = einsum(equation = var_38021_equation_0, values = (var_37499_cast_fp16, var_37901_cast_fp16))[name = tensor("op_38021_cast_fp16")]; tensor var_38023_equation_0 = const()[name = tensor("op_38023_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38023_cast_fp16 = einsum(equation = var_38023_equation_0, values = (var_37503_cast_fp16, var_37902_cast_fp16))[name = tensor("op_38023_cast_fp16")]; tensor var_38025_equation_0 = const()[name = tensor("op_38025_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38025_cast_fp16 = einsum(equation = var_38025_equation_0, values = (var_37503_cast_fp16, var_37903_cast_fp16))[name = tensor("op_38025_cast_fp16")]; tensor var_38027_equation_0 = const()[name = tensor("op_38027_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38027_cast_fp16 = einsum(equation = var_38027_equation_0, values = (var_37503_cast_fp16, var_37904_cast_fp16))[name = tensor("op_38027_cast_fp16")]; tensor var_38029_equation_0 = const()[name = tensor("op_38029_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38029_cast_fp16 = einsum(equation = var_38029_equation_0, values = (var_37503_cast_fp16, var_37905_cast_fp16))[name = tensor("op_38029_cast_fp16")]; tensor var_38031_equation_0 = const()[name = tensor("op_38031_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38031_cast_fp16 = einsum(equation = var_38031_equation_0, values = (var_37507_cast_fp16, var_37906_cast_fp16))[name = tensor("op_38031_cast_fp16")]; tensor var_38033_equation_0 = const()[name = tensor("op_38033_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38033_cast_fp16 = einsum(equation = var_38033_equation_0, values = (var_37507_cast_fp16, var_37907_cast_fp16))[name = tensor("op_38033_cast_fp16")]; tensor var_38035_equation_0 = const()[name = tensor("op_38035_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38035_cast_fp16 = einsum(equation = var_38035_equation_0, values = (var_37507_cast_fp16, var_37908_cast_fp16))[name = tensor("op_38035_cast_fp16")]; tensor var_38037_equation_0 = const()[name = tensor("op_38037_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38037_cast_fp16 = einsum(equation = var_38037_equation_0, values = (var_37507_cast_fp16, var_37909_cast_fp16))[name = tensor("op_38037_cast_fp16")]; tensor var_38039_equation_0 = const()[name = tensor("op_38039_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38039_cast_fp16 = einsum(equation = var_38039_equation_0, values = (var_37511_cast_fp16, var_37910_cast_fp16))[name = tensor("op_38039_cast_fp16")]; tensor var_38041_equation_0 = const()[name = tensor("op_38041_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38041_cast_fp16 = einsum(equation = var_38041_equation_0, values = (var_37511_cast_fp16, var_37911_cast_fp16))[name = tensor("op_38041_cast_fp16")]; tensor var_38043_equation_0 = const()[name = tensor("op_38043_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38043_cast_fp16 = einsum(equation = var_38043_equation_0, values = (var_37511_cast_fp16, var_37912_cast_fp16))[name = tensor("op_38043_cast_fp16")]; tensor var_38045_equation_0 = const()[name = tensor("op_38045_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38045_cast_fp16 = einsum(equation = var_38045_equation_0, values = (var_37511_cast_fp16, var_37913_cast_fp16))[name = tensor("op_38045_cast_fp16")]; tensor var_38047_equation_0 = const()[name = tensor("op_38047_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38047_cast_fp16 = einsum(equation = var_38047_equation_0, values = (var_37515_cast_fp16, var_37914_cast_fp16))[name = tensor("op_38047_cast_fp16")]; tensor var_38049_equation_0 = const()[name = tensor("op_38049_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38049_cast_fp16 = einsum(equation = var_38049_equation_0, values = (var_37515_cast_fp16, var_37915_cast_fp16))[name = tensor("op_38049_cast_fp16")]; tensor var_38051_equation_0 = const()[name = tensor("op_38051_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38051_cast_fp16 = einsum(equation = var_38051_equation_0, values = (var_37515_cast_fp16, var_37916_cast_fp16))[name = tensor("op_38051_cast_fp16")]; tensor var_38053_equation_0 = const()[name = tensor("op_38053_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38053_cast_fp16 = einsum(equation = var_38053_equation_0, values = (var_37515_cast_fp16, var_37917_cast_fp16))[name = tensor("op_38053_cast_fp16")]; tensor var_38055_equation_0 = const()[name = tensor("op_38055_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38055_cast_fp16 = einsum(equation = var_38055_equation_0, values = (var_37519_cast_fp16, var_37918_cast_fp16))[name = tensor("op_38055_cast_fp16")]; tensor var_38057_equation_0 = const()[name = tensor("op_38057_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38057_cast_fp16 = einsum(equation = var_38057_equation_0, values = (var_37519_cast_fp16, var_37919_cast_fp16))[name = tensor("op_38057_cast_fp16")]; tensor var_38059_equation_0 = const()[name = tensor("op_38059_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38059_cast_fp16 = einsum(equation = var_38059_equation_0, values = (var_37519_cast_fp16, var_37920_cast_fp16))[name = tensor("op_38059_cast_fp16")]; tensor var_38061_equation_0 = const()[name = tensor("op_38061_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38061_cast_fp16 = einsum(equation = var_38061_equation_0, values = (var_37519_cast_fp16, var_37921_cast_fp16))[name = tensor("op_38061_cast_fp16")]; tensor var_38063_equation_0 = const()[name = tensor("op_38063_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38063_cast_fp16 = einsum(equation = var_38063_equation_0, values = (var_37523_cast_fp16, var_37922_cast_fp16))[name = tensor("op_38063_cast_fp16")]; tensor var_38065_equation_0 = const()[name = tensor("op_38065_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38065_cast_fp16 = einsum(equation = var_38065_equation_0, values = (var_37523_cast_fp16, var_37923_cast_fp16))[name = tensor("op_38065_cast_fp16")]; tensor var_38067_equation_0 = const()[name = tensor("op_38067_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38067_cast_fp16 = einsum(equation = var_38067_equation_0, values = (var_37523_cast_fp16, var_37924_cast_fp16))[name = tensor("op_38067_cast_fp16")]; tensor var_38069_equation_0 = const()[name = tensor("op_38069_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38069_cast_fp16 = einsum(equation = var_38069_equation_0, values = (var_37523_cast_fp16, var_37925_cast_fp16))[name = tensor("op_38069_cast_fp16")]; tensor var_38071_equation_0 = const()[name = tensor("op_38071_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38071_cast_fp16 = einsum(equation = var_38071_equation_0, values = (var_37527_cast_fp16, var_37926_cast_fp16))[name = tensor("op_38071_cast_fp16")]; tensor var_38073_equation_0 = const()[name = tensor("op_38073_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38073_cast_fp16 = einsum(equation = var_38073_equation_0, values = (var_37527_cast_fp16, var_37927_cast_fp16))[name = tensor("op_38073_cast_fp16")]; tensor var_38075_equation_0 = const()[name = tensor("op_38075_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38075_cast_fp16 = einsum(equation = var_38075_equation_0, values = (var_37527_cast_fp16, var_37928_cast_fp16))[name = tensor("op_38075_cast_fp16")]; tensor var_38077_equation_0 = const()[name = tensor("op_38077_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38077_cast_fp16 = einsum(equation = var_38077_equation_0, values = (var_37527_cast_fp16, var_37929_cast_fp16))[name = tensor("op_38077_cast_fp16")]; tensor var_38079_equation_0 = const()[name = tensor("op_38079_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38079_cast_fp16 = einsum(equation = var_38079_equation_0, values = (var_37531_cast_fp16, var_37930_cast_fp16))[name = tensor("op_38079_cast_fp16")]; tensor var_38081_equation_0 = const()[name = tensor("op_38081_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38081_cast_fp16 = einsum(equation = var_38081_equation_0, values = (var_37531_cast_fp16, var_37931_cast_fp16))[name = tensor("op_38081_cast_fp16")]; tensor var_38083_equation_0 = const()[name = tensor("op_38083_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38083_cast_fp16 = einsum(equation = var_38083_equation_0, values = (var_37531_cast_fp16, var_37932_cast_fp16))[name = tensor("op_38083_cast_fp16")]; tensor var_38085_equation_0 = const()[name = tensor("op_38085_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38085_cast_fp16 = einsum(equation = var_38085_equation_0, values = (var_37531_cast_fp16, var_37933_cast_fp16))[name = tensor("op_38085_cast_fp16")]; tensor var_38087_equation_0 = const()[name = tensor("op_38087_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38087_cast_fp16 = einsum(equation = var_38087_equation_0, values = (var_37535_cast_fp16, var_37934_cast_fp16))[name = tensor("op_38087_cast_fp16")]; tensor var_38089_equation_0 = const()[name = tensor("op_38089_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38089_cast_fp16 = einsum(equation = var_38089_equation_0, values = (var_37535_cast_fp16, var_37935_cast_fp16))[name = tensor("op_38089_cast_fp16")]; tensor var_38091_equation_0 = const()[name = tensor("op_38091_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38091_cast_fp16 = einsum(equation = var_38091_equation_0, values = (var_37535_cast_fp16, var_37936_cast_fp16))[name = tensor("op_38091_cast_fp16")]; tensor var_38093_equation_0 = const()[name = tensor("op_38093_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38093_cast_fp16 = einsum(equation = var_38093_equation_0, values = (var_37535_cast_fp16, var_37937_cast_fp16))[name = tensor("op_38093_cast_fp16")]; tensor var_38095_equation_0 = const()[name = tensor("op_38095_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38095_cast_fp16 = einsum(equation = var_38095_equation_0, values = (var_37539_cast_fp16, var_37938_cast_fp16))[name = tensor("op_38095_cast_fp16")]; tensor var_38097_equation_0 = const()[name = tensor("op_38097_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38097_cast_fp16 = einsum(equation = var_38097_equation_0, values = (var_37539_cast_fp16, var_37939_cast_fp16))[name = tensor("op_38097_cast_fp16")]; tensor var_38099_equation_0 = const()[name = tensor("op_38099_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38099_cast_fp16 = einsum(equation = var_38099_equation_0, values = (var_37539_cast_fp16, var_37940_cast_fp16))[name = tensor("op_38099_cast_fp16")]; tensor var_38101_equation_0 = const()[name = tensor("op_38101_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_38101_cast_fp16 = einsum(equation = var_38101_equation_0, values = (var_37539_cast_fp16, var_37941_cast_fp16))[name = tensor("op_38101_cast_fp16")]; tensor var_38103_interleave_0 = const()[name = tensor("op_38103_interleave_0"), val = tensor(false)]; tensor var_38103_cast_fp16 = concat(axis = var_36635, interleave = var_38103_interleave_0, values = (var_37943_cast_fp16, var_37945_cast_fp16, var_37947_cast_fp16, var_37949_cast_fp16))[name = tensor("op_38103_cast_fp16")]; tensor var_38105_interleave_0 = const()[name = tensor("op_38105_interleave_0"), val = tensor(false)]; tensor var_38105_cast_fp16 = concat(axis = var_36635, interleave = var_38105_interleave_0, values = (var_37951_cast_fp16, var_37953_cast_fp16, var_37955_cast_fp16, var_37957_cast_fp16))[name = tensor("op_38105_cast_fp16")]; tensor var_38107_interleave_0 = const()[name = tensor("op_38107_interleave_0"), val = tensor(false)]; tensor var_38107_cast_fp16 = concat(axis = var_36635, interleave = var_38107_interleave_0, values = (var_37959_cast_fp16, var_37961_cast_fp16, var_37963_cast_fp16, var_37965_cast_fp16))[name = tensor("op_38107_cast_fp16")]; tensor var_38109_interleave_0 = const()[name = tensor("op_38109_interleave_0"), val = tensor(false)]; tensor var_38109_cast_fp16 = concat(axis = var_36635, interleave = var_38109_interleave_0, values = (var_37967_cast_fp16, var_37969_cast_fp16, var_37971_cast_fp16, var_37973_cast_fp16))[name = tensor("op_38109_cast_fp16")]; tensor var_38111_interleave_0 = const()[name = tensor("op_38111_interleave_0"), val = tensor(false)]; tensor var_38111_cast_fp16 = concat(axis = var_36635, interleave = var_38111_interleave_0, values = (var_37975_cast_fp16, var_37977_cast_fp16, var_37979_cast_fp16, var_37981_cast_fp16))[name = tensor("op_38111_cast_fp16")]; tensor var_38113_interleave_0 = const()[name = tensor("op_38113_interleave_0"), val = tensor(false)]; tensor var_38113_cast_fp16 = concat(axis = var_36635, interleave = var_38113_interleave_0, values = (var_37983_cast_fp16, var_37985_cast_fp16, var_37987_cast_fp16, var_37989_cast_fp16))[name = tensor("op_38113_cast_fp16")]; tensor var_38115_interleave_0 = const()[name = tensor("op_38115_interleave_0"), val = tensor(false)]; tensor var_38115_cast_fp16 = concat(axis = var_36635, interleave = var_38115_interleave_0, values = (var_37991_cast_fp16, var_37993_cast_fp16, var_37995_cast_fp16, var_37997_cast_fp16))[name = tensor("op_38115_cast_fp16")]; tensor var_38117_interleave_0 = const()[name = tensor("op_38117_interleave_0"), val = tensor(false)]; tensor var_38117_cast_fp16 = concat(axis = var_36635, interleave = var_38117_interleave_0, values = (var_37999_cast_fp16, var_38001_cast_fp16, var_38003_cast_fp16, var_38005_cast_fp16))[name = tensor("op_38117_cast_fp16")]; tensor var_38119_interleave_0 = const()[name = tensor("op_38119_interleave_0"), val = tensor(false)]; tensor var_38119_cast_fp16 = concat(axis = var_36635, interleave = var_38119_interleave_0, values = (var_38007_cast_fp16, var_38009_cast_fp16, var_38011_cast_fp16, var_38013_cast_fp16))[name = tensor("op_38119_cast_fp16")]; tensor var_38121_interleave_0 = const()[name = tensor("op_38121_interleave_0"), val = tensor(false)]; tensor var_38121_cast_fp16 = concat(axis = var_36635, interleave = var_38121_interleave_0, values = (var_38015_cast_fp16, var_38017_cast_fp16, var_38019_cast_fp16, var_38021_cast_fp16))[name = tensor("op_38121_cast_fp16")]; tensor var_38123_interleave_0 = const()[name = tensor("op_38123_interleave_0"), val = tensor(false)]; tensor var_38123_cast_fp16 = concat(axis = var_36635, interleave = var_38123_interleave_0, values = (var_38023_cast_fp16, var_38025_cast_fp16, var_38027_cast_fp16, var_38029_cast_fp16))[name = tensor("op_38123_cast_fp16")]; tensor var_38125_interleave_0 = const()[name = tensor("op_38125_interleave_0"), val = tensor(false)]; tensor var_38125_cast_fp16 = concat(axis = var_36635, interleave = var_38125_interleave_0, values = (var_38031_cast_fp16, var_38033_cast_fp16, var_38035_cast_fp16, var_38037_cast_fp16))[name = tensor("op_38125_cast_fp16")]; tensor var_38127_interleave_0 = const()[name = tensor("op_38127_interleave_0"), val = tensor(false)]; tensor var_38127_cast_fp16 = concat(axis = var_36635, interleave = var_38127_interleave_0, values = (var_38039_cast_fp16, var_38041_cast_fp16, var_38043_cast_fp16, var_38045_cast_fp16))[name = tensor("op_38127_cast_fp16")]; tensor var_38129_interleave_0 = const()[name = tensor("op_38129_interleave_0"), val = tensor(false)]; tensor var_38129_cast_fp16 = concat(axis = var_36635, interleave = var_38129_interleave_0, values = (var_38047_cast_fp16, var_38049_cast_fp16, var_38051_cast_fp16, var_38053_cast_fp16))[name = tensor("op_38129_cast_fp16")]; tensor var_38131_interleave_0 = const()[name = tensor("op_38131_interleave_0"), val = tensor(false)]; tensor var_38131_cast_fp16 = concat(axis = var_36635, interleave = var_38131_interleave_0, values = (var_38055_cast_fp16, var_38057_cast_fp16, var_38059_cast_fp16, var_38061_cast_fp16))[name = tensor("op_38131_cast_fp16")]; tensor var_38133_interleave_0 = const()[name = tensor("op_38133_interleave_0"), val = tensor(false)]; tensor var_38133_cast_fp16 = concat(axis = var_36635, interleave = var_38133_interleave_0, values = (var_38063_cast_fp16, var_38065_cast_fp16, var_38067_cast_fp16, var_38069_cast_fp16))[name = tensor("op_38133_cast_fp16")]; tensor var_38135_interleave_0 = const()[name = tensor("op_38135_interleave_0"), val = tensor(false)]; tensor var_38135_cast_fp16 = concat(axis = var_36635, interleave = var_38135_interleave_0, values = (var_38071_cast_fp16, var_38073_cast_fp16, var_38075_cast_fp16, var_38077_cast_fp16))[name = tensor("op_38135_cast_fp16")]; tensor var_38137_interleave_0 = const()[name = tensor("op_38137_interleave_0"), val = tensor(false)]; tensor var_38137_cast_fp16 = concat(axis = var_36635, interleave = var_38137_interleave_0, values = (var_38079_cast_fp16, var_38081_cast_fp16, var_38083_cast_fp16, var_38085_cast_fp16))[name = tensor("op_38137_cast_fp16")]; tensor var_38139_interleave_0 = const()[name = tensor("op_38139_interleave_0"), val = tensor(false)]; tensor var_38139_cast_fp16 = concat(axis = var_36635, interleave = var_38139_interleave_0, values = (var_38087_cast_fp16, var_38089_cast_fp16, var_38091_cast_fp16, var_38093_cast_fp16))[name = tensor("op_38139_cast_fp16")]; tensor var_38141_interleave_0 = const()[name = tensor("op_38141_interleave_0"), val = tensor(false)]; tensor var_38141_cast_fp16 = concat(axis = var_36635, interleave = var_38141_interleave_0, values = (var_38095_cast_fp16, var_38097_cast_fp16, var_38099_cast_fp16, var_38101_cast_fp16))[name = tensor("op_38141_cast_fp16")]; tensor input_185_interleave_0 = const()[name = tensor("input_185_interleave_0"), val = tensor(false)]; tensor input_185_cast_fp16 = concat(axis = var_36660, interleave = input_185_interleave_0, values = (var_38103_cast_fp16, var_38105_cast_fp16, var_38107_cast_fp16, var_38109_cast_fp16, var_38111_cast_fp16, var_38113_cast_fp16, var_38115_cast_fp16, var_38117_cast_fp16, var_38119_cast_fp16, var_38121_cast_fp16, var_38123_cast_fp16, var_38125_cast_fp16, var_38127_cast_fp16, var_38129_cast_fp16, var_38131_cast_fp16, var_38133_cast_fp16, var_38135_cast_fp16, var_38137_cast_fp16, var_38139_cast_fp16, var_38141_cast_fp16))[name = tensor("input_185_cast_fp16")]; tensor var_38152_pad_type_0 = const()[name = tensor("op_38152_pad_type_0"), val = tensor("valid")]; tensor var_38152_strides_0 = const()[name = tensor("op_38152_strides_0"), val = tensor([1, 1])]; tensor var_38152_pad_0 = const()[name = tensor("op_38152_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_38152_dilations_0 = const()[name = tensor("op_38152_dilations_0"), val = tensor([1, 1])]; tensor var_38152_groups_0 = const()[name = tensor("op_38152_groups_0"), val = tensor(1)]; tensor layers_23_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(312502016))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313321280))), name = tensor("layers_23_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_23_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_23_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313321408)))]; tensor var_38152_cast_fp16 = conv(bias = layers_23_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_38152_dilations_0, groups = var_38152_groups_0, pad = var_38152_pad_0, pad_type = var_38152_pad_type_0, strides = var_38152_strides_0, weight = layers_23_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_185_cast_fp16)[name = tensor("op_38152_cast_fp16")]; tensor var_38158_pad_type_0 = const()[name = tensor("op_38158_pad_type_0"), val = tensor("valid")]; tensor var_38158_strides_0 = const()[name = tensor("op_38158_strides_0"), val = tensor([1, 1])]; tensor var_38158_pad_0 = const()[name = tensor("op_38158_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_38158_dilations_0 = const()[name = tensor("op_38158_dilations_0"), val = tensor([1, 1])]; tensor var_38158_groups_0 = const()[name = tensor("op_38158_groups_0"), val = tensor(1)]; tensor layers_23_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313337280))), name = tensor("layers_23_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313324032))), shape = tensor([1280, 1280, 1, 1])]; tensor var_38158_cast_fp16 = conv(dilations = var_38158_dilations_0, groups = var_38158_groups_0, pad = var_38158_pad_0, pad_type = var_38158_pad_type_0, strides = var_38158_strides_0, weight = layers_23_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_185_cast_fp16)[name = tensor("op_38158_cast_fp16")]; tensor obj_95_cast_fp16 = add(x = var_38152_cast_fp16, y = var_38158_cast_fp16)[name = tensor("obj_95_cast_fp16")]; tensor inputs_95_cast_fp16 = add(x = inputs_93_cast_fp16, y = obj_95_cast_fp16)[name = tensor("inputs_95_cast_fp16")]; tensor out_95_axes_0 = const()[name = tensor("out_95_axes_0"), val = tensor([1])]; tensor var_38169_to_fp16 = const()[name = tensor("op_38169_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_95_cast_fp16 = layer_norm(axes = out_95_axes_0, epsilon = var_38169_to_fp16, x = inputs_95_cast_fp16)[name = tensor("out_95_cast_fp16")]; tensor input_187_gamma_0_to_fp16 = const()[name = tensor("input_187_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313542144)))]; tensor input_187_beta_0_to_fp16 = const()[name = tensor("input_187_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313544768)))]; tensor input_187_epsilon_0_to_fp16 = const()[name = tensor("input_187_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_187_cast_fp16 = batch_norm(beta = input_187_beta_0_to_fp16, epsilon = input_187_epsilon_0_to_fp16, gamma = input_187_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_95_cast_fp16)[name = tensor("input_187_cast_fp16")]; tensor var_38187_pad_type_0 = const()[name = tensor("op_38187_pad_type_0"), val = tensor("valid")]; tensor var_38187_strides_0 = const()[name = tensor("op_38187_strides_0"), val = tensor([1, 1])]; tensor var_38187_pad_0 = const()[name = tensor("op_38187_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_38187_dilations_0 = const()[name = tensor("op_38187_dilations_0"), val = tensor([1, 1])]; tensor var_38187_groups_0 = const()[name = tensor("op_38187_groups_0"), val = tensor(1)]; tensor layers_23_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(313547392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(316824256))), name = tensor("layers_23_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_23_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_23_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(316824384)))]; tensor var_38187_cast_fp16 = conv(bias = layers_23_fc1_inlier_module_bias_to_fp16, dilations = var_38187_dilations_0, groups = var_38187_groups_0, pad = var_38187_pad_0, pad_type = var_38187_pad_type_0, strides = var_38187_strides_0, weight = layers_23_fc1_inlier_module_weight_to_fp16_palettized, x = input_187_cast_fp16)[name = tensor("op_38187_cast_fp16")]; tensor var_38193_pad_type_0 = const()[name = tensor("op_38193_pad_type_0"), val = tensor("valid")]; tensor var_38193_strides_0 = const()[name = tensor("op_38193_strides_0"), val = tensor([1, 1])]; tensor var_38193_pad_0 = const()[name = tensor("op_38193_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_38193_dilations_0 = const()[name = tensor("op_38193_dilations_0"), val = tensor([1, 1])]; tensor var_38193_groups_0 = const()[name = tensor("op_38193_groups_0"), val = tensor(1)]; tensor layers_23_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(316896960))), name = tensor("layers_23_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(316834688))), shape = tensor([5120, 1280, 1, 1])]; tensor var_38193_cast_fp16 = conv(dilations = var_38193_dilations_0, groups = var_38193_groups_0, pad = var_38193_pad_0, pad_type = var_38193_pad_type_0, strides = var_38193_strides_0, weight = layers_23_fc1_outlier_module_weight_to_fp16_sparsified, x = input_187_cast_fp16)[name = tensor("op_38193_cast_fp16")]; tensor input_189_cast_fp16 = add(x = var_38187_cast_fp16, y = var_38193_cast_fp16)[name = tensor("input_189_cast_fp16")]; tensor input_191_mode_0 = const()[name = tensor("input_191_mode_0"), val = tensor("EXACT")]; tensor input_191_cast_fp16 = gelu(mode = input_191_mode_0, x = input_189_cast_fp16)[name = tensor("input_191_cast_fp16")]; tensor var_38204_pad_type_0 = const()[name = tensor("op_38204_pad_type_0"), val = tensor("valid")]; tensor var_38204_strides_0 = const()[name = tensor("op_38204_strides_0"), val = tensor([1, 1])]; tensor var_38204_pad_0 = const()[name = tensor("op_38204_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_38204_dilations_0 = const()[name = tensor("op_38204_dilations_0"), val = tensor([1, 1])]; tensor var_38204_groups_0 = const()[name = tensor("op_38204_groups_0"), val = tensor(1)]; tensor layers_23_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(317716224))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(320993088))), name = tensor("layers_23_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_23_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_23_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(320993216)))]; tensor var_38204_cast_fp16 = conv(bias = layers_23_fc2_inlier_module_bias_to_fp16, dilations = var_38204_dilations_0, groups = var_38204_groups_0, pad = var_38204_pad_0, pad_type = var_38204_pad_type_0, strides = var_38204_strides_0, weight = layers_23_fc2_inlier_module_weight_to_fp16_palettized, x = input_191_cast_fp16)[name = tensor("op_38204_cast_fp16")]; tensor var_38210_pad_type_0 = const()[name = tensor("op_38210_pad_type_0"), val = tensor("valid")]; tensor var_38210_strides_0 = const()[name = tensor("op_38210_strides_0"), val = tensor([1, 1])]; tensor var_38210_pad_0 = const()[name = tensor("op_38210_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_38210_dilations_0 = const()[name = tensor("op_38210_dilations_0"), val = tensor([1, 1])]; tensor var_38210_groups_0 = const()[name = tensor("op_38210_groups_0"), val = tensor(1)]; tensor layers_23_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(321052864))), name = tensor("layers_23_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(320995840))), shape = tensor([1280, 5120, 1, 1])]; tensor var_38210_cast_fp16 = conv(dilations = var_38210_dilations_0, groups = var_38210_groups_0, pad = var_38210_pad_0, pad_type = var_38210_pad_type_0, strides = var_38210_strides_0, weight = layers_23_fc2_outlier_module_weight_to_fp16_sparsified, x = input_191_cast_fp16)[name = tensor("op_38210_cast_fp16")]; tensor hidden_states_51_cast_fp16 = add(x = var_38204_cast_fp16, y = var_38210_cast_fp16)[name = tensor("hidden_states_51_cast_fp16")]; tensor inputs_97_cast_fp16 = add(x = inputs_95_cast_fp16, y = hidden_states_51_cast_fp16)[name = tensor("inputs_97_cast_fp16")]; tensor var_38216 = const()[name = tensor("op_38216"), val = tensor(3)]; tensor var_38241 = const()[name = tensor("op_38241"), val = tensor(1)]; tensor out_97_axes_0 = const()[name = tensor("out_97_axes_0"), val = tensor([1])]; tensor var_38258_to_fp16 = const()[name = tensor("op_38258_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_97_cast_fp16 = layer_norm(axes = out_97_axes_0, epsilon = var_38258_to_fp16, x = inputs_97_cast_fp16)[name = tensor("out_97_cast_fp16")]; tensor obj_97_gamma_0_to_fp16 = const()[name = tensor("obj_97_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(321872128)))]; tensor obj_97_beta_0_to_fp16 = const()[name = tensor("obj_97_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(321874752)))]; tensor obj_97_epsilon_0_to_fp16 = const()[name = tensor("obj_97_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_97_cast_fp16 = batch_norm(beta = obj_97_beta_0_to_fp16, epsilon = obj_97_epsilon_0_to_fp16, gamma = obj_97_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_97_cast_fp16)[name = tensor("obj_97_cast_fp16")]; tensor var_38280_pad_type_0 = const()[name = tensor("op_38280_pad_type_0"), val = tensor("valid")]; tensor var_38280_strides_0 = const()[name = tensor("op_38280_strides_0"), val = tensor([1, 1])]; tensor var_38280_pad_0 = const()[name = tensor("op_38280_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_38280_dilations_0 = const()[name = tensor("op_38280_dilations_0"), val = tensor([1, 1])]; tensor var_38280_groups_0 = const()[name = tensor("op_38280_groups_0"), val = tensor(1)]; tensor layers_24_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(321877376))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322696640))), name = tensor("layers_24_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_24_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_24_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322696768)))]; tensor var_38280_cast_fp16 = conv(bias = layers_24_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_38280_dilations_0, groups = var_38280_groups_0, pad = var_38280_pad_0, pad_type = var_38280_pad_type_0, strides = var_38280_strides_0, weight = layers_24_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = tensor("op_38280_cast_fp16")]; tensor var_38286_pad_type_0 = const()[name = tensor("op_38286_pad_type_0"), val = tensor("valid")]; tensor var_38286_strides_0 = const()[name = tensor("op_38286_strides_0"), val = tensor([1, 1])]; tensor var_38286_pad_0 = const()[name = tensor("op_38286_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_38286_dilations_0 = const()[name = tensor("op_38286_dilations_0"), val = tensor([1, 1])]; tensor var_38286_groups_0 = const()[name = tensor("op_38286_groups_0"), val = tensor(1)]; tensor layers_24_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322733056))), name = tensor("layers_24_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322699392))), shape = tensor([1280, 1280, 1, 1])]; tensor var_38286_cast_fp16 = conv(dilations = var_38286_dilations_0, groups = var_38286_groups_0, pad = var_38286_pad_0, pad_type = var_38286_pad_type_0, strides = var_38286_strides_0, weight = layers_24_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = tensor("op_38286_cast_fp16")]; tensor query_49_cast_fp16 = add(x = var_38280_cast_fp16, y = var_38286_cast_fp16)[name = tensor("query_49_cast_fp16")]; tensor var_38295_pad_type_0 = const()[name = tensor("op_38295_pad_type_0"), val = tensor("valid")]; tensor var_38295_strides_0 = const()[name = tensor("op_38295_strides_0"), val = tensor([1, 1])]; tensor var_38295_pad_0 = const()[name = tensor("op_38295_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_38295_dilations_0 = const()[name = tensor("op_38295_dilations_0"), val = tensor([1, 1])]; tensor var_38295_groups_0 = const()[name = tensor("op_38295_groups_0"), val = tensor(1)]; tensor layers_24_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(322937920))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(323757184))), name = tensor("layers_24_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_38295_cast_fp16 = conv(dilations = var_38295_dilations_0, groups = var_38295_groups_0, pad = var_38295_pad_0, pad_type = var_38295_pad_type_0, strides = var_38295_strides_0, weight = layers_24_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = tensor("op_38295_cast_fp16")]; tensor var_38301_pad_type_0 = const()[name = tensor("op_38301_pad_type_0"), val = tensor("valid")]; tensor var_38301_strides_0 = const()[name = tensor("op_38301_strides_0"), val = tensor([1, 1])]; tensor var_38301_pad_0 = const()[name = tensor("op_38301_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_38301_dilations_0 = const()[name = tensor("op_38301_dilations_0"), val = tensor([1, 1])]; tensor var_38301_groups_0 = const()[name = tensor("op_38301_groups_0"), val = tensor(1)]; tensor layers_24_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(323783744))), name = tensor("layers_24_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(323757312))), shape = tensor([1280, 1280, 1, 1])]; tensor var_38301_cast_fp16 = conv(dilations = var_38301_dilations_0, groups = var_38301_groups_0, pad = var_38301_pad_0, pad_type = var_38301_pad_type_0, strides = var_38301_strides_0, weight = layers_24_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = tensor("op_38301_cast_fp16")]; tensor key_49_cast_fp16 = add(x = var_38295_cast_fp16, y = var_38301_cast_fp16)[name = tensor("key_49_cast_fp16")]; tensor var_38311_pad_type_0 = const()[name = tensor("op_38311_pad_type_0"), val = tensor("valid")]; tensor var_38311_strides_0 = const()[name = tensor("op_38311_strides_0"), val = tensor([1, 1])]; tensor var_38311_pad_0 = const()[name = tensor("op_38311_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_38311_dilations_0 = const()[name = tensor("op_38311_dilations_0"), val = tensor([1, 1])]; tensor var_38311_groups_0 = const()[name = tensor("op_38311_groups_0"), val = tensor(1)]; tensor layers_24_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(323988608))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(324807872))), name = tensor("layers_24_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_24_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_24_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(324808000)))]; tensor var_38311_cast_fp16 = conv(bias = layers_24_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_38311_dilations_0, groups = var_38311_groups_0, pad = var_38311_pad_0, pad_type = var_38311_pad_type_0, strides = var_38311_strides_0, weight = layers_24_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_97_cast_fp16)[name = tensor("op_38311_cast_fp16")]; tensor var_38317_pad_type_0 = const()[name = tensor("op_38317_pad_type_0"), val = tensor("valid")]; tensor var_38317_strides_0 = const()[name = tensor("op_38317_strides_0"), val = tensor([1, 1])]; tensor var_38317_pad_0 = const()[name = tensor("op_38317_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_38317_dilations_0 = const()[name = tensor("op_38317_dilations_0"), val = tensor([1, 1])]; tensor var_38317_groups_0 = const()[name = tensor("op_38317_groups_0"), val = tensor(1)]; tensor layers_24_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(324822080))), name = tensor("layers_24_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(324810624))), shape = tensor([1280, 1280, 1, 1])]; tensor var_38317_cast_fp16 = conv(dilations = var_38317_dilations_0, groups = var_38317_groups_0, pad = var_38317_pad_0, pad_type = var_38317_pad_type_0, strides = var_38317_strides_0, weight = layers_24_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_97_cast_fp16)[name = tensor("op_38317_cast_fp16")]; tensor value_49_cast_fp16 = add(x = var_38311_cast_fp16, y = var_38317_cast_fp16)[name = tensor("value_49_cast_fp16")]; tensor var_38323_begin_0 = const()[name = tensor("op_38323_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38323_end_0 = const()[name = tensor("op_38323_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38323_end_mask_0 = const()[name = tensor("op_38323_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38323_cast_fp16 = slice_by_index(begin = var_38323_begin_0, end = var_38323_end_0, end_mask = var_38323_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38323_cast_fp16")]; tensor var_38327_begin_0 = const()[name = tensor("op_38327_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_38327_end_0 = const()[name = tensor("op_38327_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_38327_end_mask_0 = const()[name = tensor("op_38327_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38327_cast_fp16 = slice_by_index(begin = var_38327_begin_0, end = var_38327_end_0, end_mask = var_38327_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38327_cast_fp16")]; tensor var_38331_begin_0 = const()[name = tensor("op_38331_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_38331_end_0 = const()[name = tensor("op_38331_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_38331_end_mask_0 = const()[name = tensor("op_38331_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38331_cast_fp16 = slice_by_index(begin = var_38331_begin_0, end = var_38331_end_0, end_mask = var_38331_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38331_cast_fp16")]; tensor var_38335_begin_0 = const()[name = tensor("op_38335_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_38335_end_0 = const()[name = tensor("op_38335_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_38335_end_mask_0 = const()[name = tensor("op_38335_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38335_cast_fp16 = slice_by_index(begin = var_38335_begin_0, end = var_38335_end_0, end_mask = var_38335_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38335_cast_fp16")]; tensor var_38339_begin_0 = const()[name = tensor("op_38339_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_38339_end_0 = const()[name = tensor("op_38339_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_38339_end_mask_0 = const()[name = tensor("op_38339_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38339_cast_fp16 = slice_by_index(begin = var_38339_begin_0, end = var_38339_end_0, end_mask = var_38339_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38339_cast_fp16")]; tensor var_38343_begin_0 = const()[name = tensor("op_38343_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_38343_end_0 = const()[name = tensor("op_38343_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_38343_end_mask_0 = const()[name = tensor("op_38343_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38343_cast_fp16 = slice_by_index(begin = var_38343_begin_0, end = var_38343_end_0, end_mask = var_38343_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38343_cast_fp16")]; tensor var_38347_begin_0 = const()[name = tensor("op_38347_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_38347_end_0 = const()[name = tensor("op_38347_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_38347_end_mask_0 = const()[name = tensor("op_38347_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38347_cast_fp16 = slice_by_index(begin = var_38347_begin_0, end = var_38347_end_0, end_mask = var_38347_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38347_cast_fp16")]; tensor var_38351_begin_0 = const()[name = tensor("op_38351_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_38351_end_0 = const()[name = tensor("op_38351_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_38351_end_mask_0 = const()[name = tensor("op_38351_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38351_cast_fp16 = slice_by_index(begin = var_38351_begin_0, end = var_38351_end_0, end_mask = var_38351_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38351_cast_fp16")]; tensor var_38355_begin_0 = const()[name = tensor("op_38355_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_38355_end_0 = const()[name = tensor("op_38355_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_38355_end_mask_0 = const()[name = tensor("op_38355_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38355_cast_fp16 = slice_by_index(begin = var_38355_begin_0, end = var_38355_end_0, end_mask = var_38355_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38355_cast_fp16")]; tensor var_38359_begin_0 = const()[name = tensor("op_38359_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_38359_end_0 = const()[name = tensor("op_38359_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_38359_end_mask_0 = const()[name = tensor("op_38359_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38359_cast_fp16 = slice_by_index(begin = var_38359_begin_0, end = var_38359_end_0, end_mask = var_38359_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38359_cast_fp16")]; tensor var_38363_begin_0 = const()[name = tensor("op_38363_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_38363_end_0 = const()[name = tensor("op_38363_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_38363_end_mask_0 = const()[name = tensor("op_38363_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38363_cast_fp16 = slice_by_index(begin = var_38363_begin_0, end = var_38363_end_0, end_mask = var_38363_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38363_cast_fp16")]; tensor var_38367_begin_0 = const()[name = tensor("op_38367_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_38367_end_0 = const()[name = tensor("op_38367_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_38367_end_mask_0 = const()[name = tensor("op_38367_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38367_cast_fp16 = slice_by_index(begin = var_38367_begin_0, end = var_38367_end_0, end_mask = var_38367_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38367_cast_fp16")]; tensor var_38371_begin_0 = const()[name = tensor("op_38371_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_38371_end_0 = const()[name = tensor("op_38371_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_38371_end_mask_0 = const()[name = tensor("op_38371_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38371_cast_fp16 = slice_by_index(begin = var_38371_begin_0, end = var_38371_end_0, end_mask = var_38371_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38371_cast_fp16")]; tensor var_38375_begin_0 = const()[name = tensor("op_38375_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_38375_end_0 = const()[name = tensor("op_38375_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_38375_end_mask_0 = const()[name = tensor("op_38375_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38375_cast_fp16 = slice_by_index(begin = var_38375_begin_0, end = var_38375_end_0, end_mask = var_38375_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38375_cast_fp16")]; tensor var_38379_begin_0 = const()[name = tensor("op_38379_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_38379_end_0 = const()[name = tensor("op_38379_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_38379_end_mask_0 = const()[name = tensor("op_38379_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38379_cast_fp16 = slice_by_index(begin = var_38379_begin_0, end = var_38379_end_0, end_mask = var_38379_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38379_cast_fp16")]; tensor var_38383_begin_0 = const()[name = tensor("op_38383_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_38383_end_0 = const()[name = tensor("op_38383_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_38383_end_mask_0 = const()[name = tensor("op_38383_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38383_cast_fp16 = slice_by_index(begin = var_38383_begin_0, end = var_38383_end_0, end_mask = var_38383_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38383_cast_fp16")]; tensor var_38387_begin_0 = const()[name = tensor("op_38387_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_38387_end_0 = const()[name = tensor("op_38387_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_38387_end_mask_0 = const()[name = tensor("op_38387_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38387_cast_fp16 = slice_by_index(begin = var_38387_begin_0, end = var_38387_end_0, end_mask = var_38387_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38387_cast_fp16")]; tensor var_38391_begin_0 = const()[name = tensor("op_38391_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_38391_end_0 = const()[name = tensor("op_38391_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_38391_end_mask_0 = const()[name = tensor("op_38391_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38391_cast_fp16 = slice_by_index(begin = var_38391_begin_0, end = var_38391_end_0, end_mask = var_38391_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38391_cast_fp16")]; tensor var_38395_begin_0 = const()[name = tensor("op_38395_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_38395_end_0 = const()[name = tensor("op_38395_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_38395_end_mask_0 = const()[name = tensor("op_38395_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38395_cast_fp16 = slice_by_index(begin = var_38395_begin_0, end = var_38395_end_0, end_mask = var_38395_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38395_cast_fp16")]; tensor var_38399_begin_0 = const()[name = tensor("op_38399_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_38399_end_0 = const()[name = tensor("op_38399_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_38399_end_mask_0 = const()[name = tensor("op_38399_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_38399_cast_fp16 = slice_by_index(begin = var_38399_begin_0, end = var_38399_end_0, end_mask = var_38399_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_38399_cast_fp16")]; tensor var_38408_begin_0 = const()[name = tensor("op_38408_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38408_end_0 = const()[name = tensor("op_38408_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38408_end_mask_0 = const()[name = tensor("op_38408_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38408_cast_fp16 = slice_by_index(begin = var_38408_begin_0, end = var_38408_end_0, end_mask = var_38408_end_mask_0, x = var_38323_cast_fp16)[name = tensor("op_38408_cast_fp16")]; tensor var_38415_begin_0 = const()[name = tensor("op_38415_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38415_end_0 = const()[name = tensor("op_38415_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38415_end_mask_0 = const()[name = tensor("op_38415_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38415_cast_fp16 = slice_by_index(begin = var_38415_begin_0, end = var_38415_end_0, end_mask = var_38415_end_mask_0, x = var_38323_cast_fp16)[name = tensor("op_38415_cast_fp16")]; tensor var_38422_begin_0 = const()[name = tensor("op_38422_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38422_end_0 = const()[name = tensor("op_38422_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38422_end_mask_0 = const()[name = tensor("op_38422_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38422_cast_fp16 = slice_by_index(begin = var_38422_begin_0, end = var_38422_end_0, end_mask = var_38422_end_mask_0, x = var_38323_cast_fp16)[name = tensor("op_38422_cast_fp16")]; tensor var_38429_begin_0 = const()[name = tensor("op_38429_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38429_end_0 = const()[name = tensor("op_38429_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38429_end_mask_0 = const()[name = tensor("op_38429_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38429_cast_fp16 = slice_by_index(begin = var_38429_begin_0, end = var_38429_end_0, end_mask = var_38429_end_mask_0, x = var_38323_cast_fp16)[name = tensor("op_38429_cast_fp16")]; tensor var_38436_begin_0 = const()[name = tensor("op_38436_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38436_end_0 = const()[name = tensor("op_38436_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38436_end_mask_0 = const()[name = tensor("op_38436_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38436_cast_fp16 = slice_by_index(begin = var_38436_begin_0, end = var_38436_end_0, end_mask = var_38436_end_mask_0, x = var_38327_cast_fp16)[name = tensor("op_38436_cast_fp16")]; tensor var_38443_begin_0 = const()[name = tensor("op_38443_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38443_end_0 = const()[name = tensor("op_38443_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38443_end_mask_0 = const()[name = tensor("op_38443_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38443_cast_fp16 = slice_by_index(begin = var_38443_begin_0, end = var_38443_end_0, end_mask = var_38443_end_mask_0, x = var_38327_cast_fp16)[name = tensor("op_38443_cast_fp16")]; tensor var_38450_begin_0 = const()[name = tensor("op_38450_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38450_end_0 = const()[name = tensor("op_38450_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38450_end_mask_0 = const()[name = tensor("op_38450_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38450_cast_fp16 = slice_by_index(begin = var_38450_begin_0, end = var_38450_end_0, end_mask = var_38450_end_mask_0, x = var_38327_cast_fp16)[name = tensor("op_38450_cast_fp16")]; tensor var_38457_begin_0 = const()[name = tensor("op_38457_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38457_end_0 = const()[name = tensor("op_38457_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38457_end_mask_0 = const()[name = tensor("op_38457_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38457_cast_fp16 = slice_by_index(begin = var_38457_begin_0, end = var_38457_end_0, end_mask = var_38457_end_mask_0, x = var_38327_cast_fp16)[name = tensor("op_38457_cast_fp16")]; tensor var_38464_begin_0 = const()[name = tensor("op_38464_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38464_end_0 = const()[name = tensor("op_38464_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38464_end_mask_0 = const()[name = tensor("op_38464_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38464_cast_fp16 = slice_by_index(begin = var_38464_begin_0, end = var_38464_end_0, end_mask = var_38464_end_mask_0, x = var_38331_cast_fp16)[name = tensor("op_38464_cast_fp16")]; tensor var_38471_begin_0 = const()[name = tensor("op_38471_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38471_end_0 = const()[name = tensor("op_38471_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38471_end_mask_0 = const()[name = tensor("op_38471_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38471_cast_fp16 = slice_by_index(begin = var_38471_begin_0, end = var_38471_end_0, end_mask = var_38471_end_mask_0, x = var_38331_cast_fp16)[name = tensor("op_38471_cast_fp16")]; tensor var_38478_begin_0 = const()[name = tensor("op_38478_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38478_end_0 = const()[name = tensor("op_38478_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38478_end_mask_0 = const()[name = tensor("op_38478_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38478_cast_fp16 = slice_by_index(begin = var_38478_begin_0, end = var_38478_end_0, end_mask = var_38478_end_mask_0, x = var_38331_cast_fp16)[name = tensor("op_38478_cast_fp16")]; tensor var_38485_begin_0 = const()[name = tensor("op_38485_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38485_end_0 = const()[name = tensor("op_38485_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38485_end_mask_0 = const()[name = tensor("op_38485_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38485_cast_fp16 = slice_by_index(begin = var_38485_begin_0, end = var_38485_end_0, end_mask = var_38485_end_mask_0, x = var_38331_cast_fp16)[name = tensor("op_38485_cast_fp16")]; tensor var_38492_begin_0 = const()[name = tensor("op_38492_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38492_end_0 = const()[name = tensor("op_38492_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38492_end_mask_0 = const()[name = tensor("op_38492_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38492_cast_fp16 = slice_by_index(begin = var_38492_begin_0, end = var_38492_end_0, end_mask = var_38492_end_mask_0, x = var_38335_cast_fp16)[name = tensor("op_38492_cast_fp16")]; tensor var_38499_begin_0 = const()[name = tensor("op_38499_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38499_end_0 = const()[name = tensor("op_38499_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38499_end_mask_0 = const()[name = tensor("op_38499_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38499_cast_fp16 = slice_by_index(begin = var_38499_begin_0, end = var_38499_end_0, end_mask = var_38499_end_mask_0, x = var_38335_cast_fp16)[name = tensor("op_38499_cast_fp16")]; tensor var_38506_begin_0 = const()[name = tensor("op_38506_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38506_end_0 = const()[name = tensor("op_38506_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38506_end_mask_0 = const()[name = tensor("op_38506_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38506_cast_fp16 = slice_by_index(begin = var_38506_begin_0, end = var_38506_end_0, end_mask = var_38506_end_mask_0, x = var_38335_cast_fp16)[name = tensor("op_38506_cast_fp16")]; tensor var_38513_begin_0 = const()[name = tensor("op_38513_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38513_end_0 = const()[name = tensor("op_38513_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38513_end_mask_0 = const()[name = tensor("op_38513_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38513_cast_fp16 = slice_by_index(begin = var_38513_begin_0, end = var_38513_end_0, end_mask = var_38513_end_mask_0, x = var_38335_cast_fp16)[name = tensor("op_38513_cast_fp16")]; tensor var_38520_begin_0 = const()[name = tensor("op_38520_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38520_end_0 = const()[name = tensor("op_38520_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38520_end_mask_0 = const()[name = tensor("op_38520_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38520_cast_fp16 = slice_by_index(begin = var_38520_begin_0, end = var_38520_end_0, end_mask = var_38520_end_mask_0, x = var_38339_cast_fp16)[name = tensor("op_38520_cast_fp16")]; tensor var_38527_begin_0 = const()[name = tensor("op_38527_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38527_end_0 = const()[name = tensor("op_38527_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38527_end_mask_0 = const()[name = tensor("op_38527_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38527_cast_fp16 = slice_by_index(begin = var_38527_begin_0, end = var_38527_end_0, end_mask = var_38527_end_mask_0, x = var_38339_cast_fp16)[name = tensor("op_38527_cast_fp16")]; tensor var_38534_begin_0 = const()[name = tensor("op_38534_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38534_end_0 = const()[name = tensor("op_38534_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38534_end_mask_0 = const()[name = tensor("op_38534_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38534_cast_fp16 = slice_by_index(begin = var_38534_begin_0, end = var_38534_end_0, end_mask = var_38534_end_mask_0, x = var_38339_cast_fp16)[name = tensor("op_38534_cast_fp16")]; tensor var_38541_begin_0 = const()[name = tensor("op_38541_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38541_end_0 = const()[name = tensor("op_38541_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38541_end_mask_0 = const()[name = tensor("op_38541_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38541_cast_fp16 = slice_by_index(begin = var_38541_begin_0, end = var_38541_end_0, end_mask = var_38541_end_mask_0, x = var_38339_cast_fp16)[name = tensor("op_38541_cast_fp16")]; tensor var_38548_begin_0 = const()[name = tensor("op_38548_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38548_end_0 = const()[name = tensor("op_38548_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38548_end_mask_0 = const()[name = tensor("op_38548_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38548_cast_fp16 = slice_by_index(begin = var_38548_begin_0, end = var_38548_end_0, end_mask = var_38548_end_mask_0, x = var_38343_cast_fp16)[name = tensor("op_38548_cast_fp16")]; tensor var_38555_begin_0 = const()[name = tensor("op_38555_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38555_end_0 = const()[name = tensor("op_38555_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38555_end_mask_0 = const()[name = tensor("op_38555_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38555_cast_fp16 = slice_by_index(begin = var_38555_begin_0, end = var_38555_end_0, end_mask = var_38555_end_mask_0, x = var_38343_cast_fp16)[name = tensor("op_38555_cast_fp16")]; tensor var_38562_begin_0 = const()[name = tensor("op_38562_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38562_end_0 = const()[name = tensor("op_38562_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38562_end_mask_0 = const()[name = tensor("op_38562_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38562_cast_fp16 = slice_by_index(begin = var_38562_begin_0, end = var_38562_end_0, end_mask = var_38562_end_mask_0, x = var_38343_cast_fp16)[name = tensor("op_38562_cast_fp16")]; tensor var_38569_begin_0 = const()[name = tensor("op_38569_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38569_end_0 = const()[name = tensor("op_38569_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38569_end_mask_0 = const()[name = tensor("op_38569_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38569_cast_fp16 = slice_by_index(begin = var_38569_begin_0, end = var_38569_end_0, end_mask = var_38569_end_mask_0, x = var_38343_cast_fp16)[name = tensor("op_38569_cast_fp16")]; tensor var_38576_begin_0 = const()[name = tensor("op_38576_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38576_end_0 = const()[name = tensor("op_38576_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38576_end_mask_0 = const()[name = tensor("op_38576_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38576_cast_fp16 = slice_by_index(begin = var_38576_begin_0, end = var_38576_end_0, end_mask = var_38576_end_mask_0, x = var_38347_cast_fp16)[name = tensor("op_38576_cast_fp16")]; tensor var_38583_begin_0 = const()[name = tensor("op_38583_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38583_end_0 = const()[name = tensor("op_38583_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38583_end_mask_0 = const()[name = tensor("op_38583_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38583_cast_fp16 = slice_by_index(begin = var_38583_begin_0, end = var_38583_end_0, end_mask = var_38583_end_mask_0, x = var_38347_cast_fp16)[name = tensor("op_38583_cast_fp16")]; tensor var_38590_begin_0 = const()[name = tensor("op_38590_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38590_end_0 = const()[name = tensor("op_38590_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38590_end_mask_0 = const()[name = tensor("op_38590_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38590_cast_fp16 = slice_by_index(begin = var_38590_begin_0, end = var_38590_end_0, end_mask = var_38590_end_mask_0, x = var_38347_cast_fp16)[name = tensor("op_38590_cast_fp16")]; tensor var_38597_begin_0 = const()[name = tensor("op_38597_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38597_end_0 = const()[name = tensor("op_38597_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38597_end_mask_0 = const()[name = tensor("op_38597_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38597_cast_fp16 = slice_by_index(begin = var_38597_begin_0, end = var_38597_end_0, end_mask = var_38597_end_mask_0, x = var_38347_cast_fp16)[name = tensor("op_38597_cast_fp16")]; tensor var_38604_begin_0 = const()[name = tensor("op_38604_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38604_end_0 = const()[name = tensor("op_38604_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38604_end_mask_0 = const()[name = tensor("op_38604_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38604_cast_fp16 = slice_by_index(begin = var_38604_begin_0, end = var_38604_end_0, end_mask = var_38604_end_mask_0, x = var_38351_cast_fp16)[name = tensor("op_38604_cast_fp16")]; tensor var_38611_begin_0 = const()[name = tensor("op_38611_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38611_end_0 = const()[name = tensor("op_38611_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38611_end_mask_0 = const()[name = tensor("op_38611_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38611_cast_fp16 = slice_by_index(begin = var_38611_begin_0, end = var_38611_end_0, end_mask = var_38611_end_mask_0, x = var_38351_cast_fp16)[name = tensor("op_38611_cast_fp16")]; tensor var_38618_begin_0 = const()[name = tensor("op_38618_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38618_end_0 = const()[name = tensor("op_38618_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38618_end_mask_0 = const()[name = tensor("op_38618_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38618_cast_fp16 = slice_by_index(begin = var_38618_begin_0, end = var_38618_end_0, end_mask = var_38618_end_mask_0, x = var_38351_cast_fp16)[name = tensor("op_38618_cast_fp16")]; tensor var_38625_begin_0 = const()[name = tensor("op_38625_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38625_end_0 = const()[name = tensor("op_38625_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38625_end_mask_0 = const()[name = tensor("op_38625_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38625_cast_fp16 = slice_by_index(begin = var_38625_begin_0, end = var_38625_end_0, end_mask = var_38625_end_mask_0, x = var_38351_cast_fp16)[name = tensor("op_38625_cast_fp16")]; tensor var_38632_begin_0 = const()[name = tensor("op_38632_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38632_end_0 = const()[name = tensor("op_38632_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38632_end_mask_0 = const()[name = tensor("op_38632_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38632_cast_fp16 = slice_by_index(begin = var_38632_begin_0, end = var_38632_end_0, end_mask = var_38632_end_mask_0, x = var_38355_cast_fp16)[name = tensor("op_38632_cast_fp16")]; tensor var_38639_begin_0 = const()[name = tensor("op_38639_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38639_end_0 = const()[name = tensor("op_38639_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38639_end_mask_0 = const()[name = tensor("op_38639_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38639_cast_fp16 = slice_by_index(begin = var_38639_begin_0, end = var_38639_end_0, end_mask = var_38639_end_mask_0, x = var_38355_cast_fp16)[name = tensor("op_38639_cast_fp16")]; tensor var_38646_begin_0 = const()[name = tensor("op_38646_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38646_end_0 = const()[name = tensor("op_38646_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38646_end_mask_0 = const()[name = tensor("op_38646_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38646_cast_fp16 = slice_by_index(begin = var_38646_begin_0, end = var_38646_end_0, end_mask = var_38646_end_mask_0, x = var_38355_cast_fp16)[name = tensor("op_38646_cast_fp16")]; tensor var_38653_begin_0 = const()[name = tensor("op_38653_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38653_end_0 = const()[name = tensor("op_38653_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38653_end_mask_0 = const()[name = tensor("op_38653_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38653_cast_fp16 = slice_by_index(begin = var_38653_begin_0, end = var_38653_end_0, end_mask = var_38653_end_mask_0, x = var_38355_cast_fp16)[name = tensor("op_38653_cast_fp16")]; tensor var_38660_begin_0 = const()[name = tensor("op_38660_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38660_end_0 = const()[name = tensor("op_38660_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38660_end_mask_0 = const()[name = tensor("op_38660_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38660_cast_fp16 = slice_by_index(begin = var_38660_begin_0, end = var_38660_end_0, end_mask = var_38660_end_mask_0, x = var_38359_cast_fp16)[name = tensor("op_38660_cast_fp16")]; tensor var_38667_begin_0 = const()[name = tensor("op_38667_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38667_end_0 = const()[name = tensor("op_38667_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38667_end_mask_0 = const()[name = tensor("op_38667_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38667_cast_fp16 = slice_by_index(begin = var_38667_begin_0, end = var_38667_end_0, end_mask = var_38667_end_mask_0, x = var_38359_cast_fp16)[name = tensor("op_38667_cast_fp16")]; tensor var_38674_begin_0 = const()[name = tensor("op_38674_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38674_end_0 = const()[name = tensor("op_38674_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38674_end_mask_0 = const()[name = tensor("op_38674_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38674_cast_fp16 = slice_by_index(begin = var_38674_begin_0, end = var_38674_end_0, end_mask = var_38674_end_mask_0, x = var_38359_cast_fp16)[name = tensor("op_38674_cast_fp16")]; tensor var_38681_begin_0 = const()[name = tensor("op_38681_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38681_end_0 = const()[name = tensor("op_38681_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38681_end_mask_0 = const()[name = tensor("op_38681_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38681_cast_fp16 = slice_by_index(begin = var_38681_begin_0, end = var_38681_end_0, end_mask = var_38681_end_mask_0, x = var_38359_cast_fp16)[name = tensor("op_38681_cast_fp16")]; tensor var_38688_begin_0 = const()[name = tensor("op_38688_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38688_end_0 = const()[name = tensor("op_38688_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38688_end_mask_0 = const()[name = tensor("op_38688_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38688_cast_fp16 = slice_by_index(begin = var_38688_begin_0, end = var_38688_end_0, end_mask = var_38688_end_mask_0, x = var_38363_cast_fp16)[name = tensor("op_38688_cast_fp16")]; tensor var_38695_begin_0 = const()[name = tensor("op_38695_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38695_end_0 = const()[name = tensor("op_38695_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38695_end_mask_0 = const()[name = tensor("op_38695_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38695_cast_fp16 = slice_by_index(begin = var_38695_begin_0, end = var_38695_end_0, end_mask = var_38695_end_mask_0, x = var_38363_cast_fp16)[name = tensor("op_38695_cast_fp16")]; tensor var_38702_begin_0 = const()[name = tensor("op_38702_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38702_end_0 = const()[name = tensor("op_38702_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38702_end_mask_0 = const()[name = tensor("op_38702_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38702_cast_fp16 = slice_by_index(begin = var_38702_begin_0, end = var_38702_end_0, end_mask = var_38702_end_mask_0, x = var_38363_cast_fp16)[name = tensor("op_38702_cast_fp16")]; tensor var_38709_begin_0 = const()[name = tensor("op_38709_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38709_end_0 = const()[name = tensor("op_38709_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38709_end_mask_0 = const()[name = tensor("op_38709_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38709_cast_fp16 = slice_by_index(begin = var_38709_begin_0, end = var_38709_end_0, end_mask = var_38709_end_mask_0, x = var_38363_cast_fp16)[name = tensor("op_38709_cast_fp16")]; tensor var_38716_begin_0 = const()[name = tensor("op_38716_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38716_end_0 = const()[name = tensor("op_38716_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38716_end_mask_0 = const()[name = tensor("op_38716_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38716_cast_fp16 = slice_by_index(begin = var_38716_begin_0, end = var_38716_end_0, end_mask = var_38716_end_mask_0, x = var_38367_cast_fp16)[name = tensor("op_38716_cast_fp16")]; tensor var_38723_begin_0 = const()[name = tensor("op_38723_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38723_end_0 = const()[name = tensor("op_38723_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38723_end_mask_0 = const()[name = tensor("op_38723_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38723_cast_fp16 = slice_by_index(begin = var_38723_begin_0, end = var_38723_end_0, end_mask = var_38723_end_mask_0, x = var_38367_cast_fp16)[name = tensor("op_38723_cast_fp16")]; tensor var_38730_begin_0 = const()[name = tensor("op_38730_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38730_end_0 = const()[name = tensor("op_38730_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38730_end_mask_0 = const()[name = tensor("op_38730_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38730_cast_fp16 = slice_by_index(begin = var_38730_begin_0, end = var_38730_end_0, end_mask = var_38730_end_mask_0, x = var_38367_cast_fp16)[name = tensor("op_38730_cast_fp16")]; tensor var_38737_begin_0 = const()[name = tensor("op_38737_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38737_end_0 = const()[name = tensor("op_38737_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38737_end_mask_0 = const()[name = tensor("op_38737_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38737_cast_fp16 = slice_by_index(begin = var_38737_begin_0, end = var_38737_end_0, end_mask = var_38737_end_mask_0, x = var_38367_cast_fp16)[name = tensor("op_38737_cast_fp16")]; tensor var_38744_begin_0 = const()[name = tensor("op_38744_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38744_end_0 = const()[name = tensor("op_38744_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38744_end_mask_0 = const()[name = tensor("op_38744_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38744_cast_fp16 = slice_by_index(begin = var_38744_begin_0, end = var_38744_end_0, end_mask = var_38744_end_mask_0, x = var_38371_cast_fp16)[name = tensor("op_38744_cast_fp16")]; tensor var_38751_begin_0 = const()[name = tensor("op_38751_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38751_end_0 = const()[name = tensor("op_38751_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38751_end_mask_0 = const()[name = tensor("op_38751_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38751_cast_fp16 = slice_by_index(begin = var_38751_begin_0, end = var_38751_end_0, end_mask = var_38751_end_mask_0, x = var_38371_cast_fp16)[name = tensor("op_38751_cast_fp16")]; tensor var_38758_begin_0 = const()[name = tensor("op_38758_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38758_end_0 = const()[name = tensor("op_38758_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38758_end_mask_0 = const()[name = tensor("op_38758_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38758_cast_fp16 = slice_by_index(begin = var_38758_begin_0, end = var_38758_end_0, end_mask = var_38758_end_mask_0, x = var_38371_cast_fp16)[name = tensor("op_38758_cast_fp16")]; tensor var_38765_begin_0 = const()[name = tensor("op_38765_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38765_end_0 = const()[name = tensor("op_38765_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38765_end_mask_0 = const()[name = tensor("op_38765_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38765_cast_fp16 = slice_by_index(begin = var_38765_begin_0, end = var_38765_end_0, end_mask = var_38765_end_mask_0, x = var_38371_cast_fp16)[name = tensor("op_38765_cast_fp16")]; tensor var_38772_begin_0 = const()[name = tensor("op_38772_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38772_end_0 = const()[name = tensor("op_38772_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38772_end_mask_0 = const()[name = tensor("op_38772_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38772_cast_fp16 = slice_by_index(begin = var_38772_begin_0, end = var_38772_end_0, end_mask = var_38772_end_mask_0, x = var_38375_cast_fp16)[name = tensor("op_38772_cast_fp16")]; tensor var_38779_begin_0 = const()[name = tensor("op_38779_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38779_end_0 = const()[name = tensor("op_38779_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38779_end_mask_0 = const()[name = tensor("op_38779_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38779_cast_fp16 = slice_by_index(begin = var_38779_begin_0, end = var_38779_end_0, end_mask = var_38779_end_mask_0, x = var_38375_cast_fp16)[name = tensor("op_38779_cast_fp16")]; tensor var_38786_begin_0 = const()[name = tensor("op_38786_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38786_end_0 = const()[name = tensor("op_38786_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38786_end_mask_0 = const()[name = tensor("op_38786_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38786_cast_fp16 = slice_by_index(begin = var_38786_begin_0, end = var_38786_end_0, end_mask = var_38786_end_mask_0, x = var_38375_cast_fp16)[name = tensor("op_38786_cast_fp16")]; tensor var_38793_begin_0 = const()[name = tensor("op_38793_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38793_end_0 = const()[name = tensor("op_38793_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38793_end_mask_0 = const()[name = tensor("op_38793_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38793_cast_fp16 = slice_by_index(begin = var_38793_begin_0, end = var_38793_end_0, end_mask = var_38793_end_mask_0, x = var_38375_cast_fp16)[name = tensor("op_38793_cast_fp16")]; tensor var_38800_begin_0 = const()[name = tensor("op_38800_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38800_end_0 = const()[name = tensor("op_38800_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38800_end_mask_0 = const()[name = tensor("op_38800_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38800_cast_fp16 = slice_by_index(begin = var_38800_begin_0, end = var_38800_end_0, end_mask = var_38800_end_mask_0, x = var_38379_cast_fp16)[name = tensor("op_38800_cast_fp16")]; tensor var_38807_begin_0 = const()[name = tensor("op_38807_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38807_end_0 = const()[name = tensor("op_38807_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38807_end_mask_0 = const()[name = tensor("op_38807_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38807_cast_fp16 = slice_by_index(begin = var_38807_begin_0, end = var_38807_end_0, end_mask = var_38807_end_mask_0, x = var_38379_cast_fp16)[name = tensor("op_38807_cast_fp16")]; tensor var_38814_begin_0 = const()[name = tensor("op_38814_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38814_end_0 = const()[name = tensor("op_38814_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38814_end_mask_0 = const()[name = tensor("op_38814_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38814_cast_fp16 = slice_by_index(begin = var_38814_begin_0, end = var_38814_end_0, end_mask = var_38814_end_mask_0, x = var_38379_cast_fp16)[name = tensor("op_38814_cast_fp16")]; tensor var_38821_begin_0 = const()[name = tensor("op_38821_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38821_end_0 = const()[name = tensor("op_38821_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38821_end_mask_0 = const()[name = tensor("op_38821_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38821_cast_fp16 = slice_by_index(begin = var_38821_begin_0, end = var_38821_end_0, end_mask = var_38821_end_mask_0, x = var_38379_cast_fp16)[name = tensor("op_38821_cast_fp16")]; tensor var_38828_begin_0 = const()[name = tensor("op_38828_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38828_end_0 = const()[name = tensor("op_38828_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38828_end_mask_0 = const()[name = tensor("op_38828_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38828_cast_fp16 = slice_by_index(begin = var_38828_begin_0, end = var_38828_end_0, end_mask = var_38828_end_mask_0, x = var_38383_cast_fp16)[name = tensor("op_38828_cast_fp16")]; tensor var_38835_begin_0 = const()[name = tensor("op_38835_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38835_end_0 = const()[name = tensor("op_38835_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38835_end_mask_0 = const()[name = tensor("op_38835_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38835_cast_fp16 = slice_by_index(begin = var_38835_begin_0, end = var_38835_end_0, end_mask = var_38835_end_mask_0, x = var_38383_cast_fp16)[name = tensor("op_38835_cast_fp16")]; tensor var_38842_begin_0 = const()[name = tensor("op_38842_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38842_end_0 = const()[name = tensor("op_38842_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38842_end_mask_0 = const()[name = tensor("op_38842_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38842_cast_fp16 = slice_by_index(begin = var_38842_begin_0, end = var_38842_end_0, end_mask = var_38842_end_mask_0, x = var_38383_cast_fp16)[name = tensor("op_38842_cast_fp16")]; tensor var_38849_begin_0 = const()[name = tensor("op_38849_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38849_end_0 = const()[name = tensor("op_38849_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38849_end_mask_0 = const()[name = tensor("op_38849_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38849_cast_fp16 = slice_by_index(begin = var_38849_begin_0, end = var_38849_end_0, end_mask = var_38849_end_mask_0, x = var_38383_cast_fp16)[name = tensor("op_38849_cast_fp16")]; tensor var_38856_begin_0 = const()[name = tensor("op_38856_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38856_end_0 = const()[name = tensor("op_38856_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38856_end_mask_0 = const()[name = tensor("op_38856_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38856_cast_fp16 = slice_by_index(begin = var_38856_begin_0, end = var_38856_end_0, end_mask = var_38856_end_mask_0, x = var_38387_cast_fp16)[name = tensor("op_38856_cast_fp16")]; tensor var_38863_begin_0 = const()[name = tensor("op_38863_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38863_end_0 = const()[name = tensor("op_38863_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38863_end_mask_0 = const()[name = tensor("op_38863_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38863_cast_fp16 = slice_by_index(begin = var_38863_begin_0, end = var_38863_end_0, end_mask = var_38863_end_mask_0, x = var_38387_cast_fp16)[name = tensor("op_38863_cast_fp16")]; tensor var_38870_begin_0 = const()[name = tensor("op_38870_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38870_end_0 = const()[name = tensor("op_38870_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38870_end_mask_0 = const()[name = tensor("op_38870_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38870_cast_fp16 = slice_by_index(begin = var_38870_begin_0, end = var_38870_end_0, end_mask = var_38870_end_mask_0, x = var_38387_cast_fp16)[name = tensor("op_38870_cast_fp16")]; tensor var_38877_begin_0 = const()[name = tensor("op_38877_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38877_end_0 = const()[name = tensor("op_38877_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38877_end_mask_0 = const()[name = tensor("op_38877_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38877_cast_fp16 = slice_by_index(begin = var_38877_begin_0, end = var_38877_end_0, end_mask = var_38877_end_mask_0, x = var_38387_cast_fp16)[name = tensor("op_38877_cast_fp16")]; tensor var_38884_begin_0 = const()[name = tensor("op_38884_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38884_end_0 = const()[name = tensor("op_38884_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38884_end_mask_0 = const()[name = tensor("op_38884_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38884_cast_fp16 = slice_by_index(begin = var_38884_begin_0, end = var_38884_end_0, end_mask = var_38884_end_mask_0, x = var_38391_cast_fp16)[name = tensor("op_38884_cast_fp16")]; tensor var_38891_begin_0 = const()[name = tensor("op_38891_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38891_end_0 = const()[name = tensor("op_38891_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38891_end_mask_0 = const()[name = tensor("op_38891_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38891_cast_fp16 = slice_by_index(begin = var_38891_begin_0, end = var_38891_end_0, end_mask = var_38891_end_mask_0, x = var_38391_cast_fp16)[name = tensor("op_38891_cast_fp16")]; tensor var_38898_begin_0 = const()[name = tensor("op_38898_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38898_end_0 = const()[name = tensor("op_38898_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38898_end_mask_0 = const()[name = tensor("op_38898_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38898_cast_fp16 = slice_by_index(begin = var_38898_begin_0, end = var_38898_end_0, end_mask = var_38898_end_mask_0, x = var_38391_cast_fp16)[name = tensor("op_38898_cast_fp16")]; tensor var_38905_begin_0 = const()[name = tensor("op_38905_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38905_end_0 = const()[name = tensor("op_38905_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38905_end_mask_0 = const()[name = tensor("op_38905_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38905_cast_fp16 = slice_by_index(begin = var_38905_begin_0, end = var_38905_end_0, end_mask = var_38905_end_mask_0, x = var_38391_cast_fp16)[name = tensor("op_38905_cast_fp16")]; tensor var_38912_begin_0 = const()[name = tensor("op_38912_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38912_end_0 = const()[name = tensor("op_38912_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38912_end_mask_0 = const()[name = tensor("op_38912_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38912_cast_fp16 = slice_by_index(begin = var_38912_begin_0, end = var_38912_end_0, end_mask = var_38912_end_mask_0, x = var_38395_cast_fp16)[name = tensor("op_38912_cast_fp16")]; tensor var_38919_begin_0 = const()[name = tensor("op_38919_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38919_end_0 = const()[name = tensor("op_38919_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38919_end_mask_0 = const()[name = tensor("op_38919_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38919_cast_fp16 = slice_by_index(begin = var_38919_begin_0, end = var_38919_end_0, end_mask = var_38919_end_mask_0, x = var_38395_cast_fp16)[name = tensor("op_38919_cast_fp16")]; tensor var_38926_begin_0 = const()[name = tensor("op_38926_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38926_end_0 = const()[name = tensor("op_38926_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38926_end_mask_0 = const()[name = tensor("op_38926_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38926_cast_fp16 = slice_by_index(begin = var_38926_begin_0, end = var_38926_end_0, end_mask = var_38926_end_mask_0, x = var_38395_cast_fp16)[name = tensor("op_38926_cast_fp16")]; tensor var_38933_begin_0 = const()[name = tensor("op_38933_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38933_end_0 = const()[name = tensor("op_38933_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38933_end_mask_0 = const()[name = tensor("op_38933_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38933_cast_fp16 = slice_by_index(begin = var_38933_begin_0, end = var_38933_end_0, end_mask = var_38933_end_mask_0, x = var_38395_cast_fp16)[name = tensor("op_38933_cast_fp16")]; tensor var_38940_begin_0 = const()[name = tensor("op_38940_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38940_end_0 = const()[name = tensor("op_38940_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_38940_end_mask_0 = const()[name = tensor("op_38940_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38940_cast_fp16 = slice_by_index(begin = var_38940_begin_0, end = var_38940_end_0, end_mask = var_38940_end_mask_0, x = var_38399_cast_fp16)[name = tensor("op_38940_cast_fp16")]; tensor var_38947_begin_0 = const()[name = tensor("op_38947_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_38947_end_0 = const()[name = tensor("op_38947_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_38947_end_mask_0 = const()[name = tensor("op_38947_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38947_cast_fp16 = slice_by_index(begin = var_38947_begin_0, end = var_38947_end_0, end_mask = var_38947_end_mask_0, x = var_38399_cast_fp16)[name = tensor("op_38947_cast_fp16")]; tensor var_38954_begin_0 = const()[name = tensor("op_38954_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_38954_end_0 = const()[name = tensor("op_38954_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_38954_end_mask_0 = const()[name = tensor("op_38954_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38954_cast_fp16 = slice_by_index(begin = var_38954_begin_0, end = var_38954_end_0, end_mask = var_38954_end_mask_0, x = var_38399_cast_fp16)[name = tensor("op_38954_cast_fp16")]; tensor var_38961_begin_0 = const()[name = tensor("op_38961_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_38961_end_0 = const()[name = tensor("op_38961_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_38961_end_mask_0 = const()[name = tensor("op_38961_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38961_cast_fp16 = slice_by_index(begin = var_38961_begin_0, end = var_38961_end_0, end_mask = var_38961_end_mask_0, x = var_38399_cast_fp16)[name = tensor("op_38961_cast_fp16")]; tensor k_49_perm_0 = const()[name = tensor("k_49_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_38966_begin_0 = const()[name = tensor("op_38966_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_38966_end_0 = const()[name = tensor("op_38966_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_38966_end_mask_0 = const()[name = tensor("op_38966_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_49_cast_fp16 = transpose(perm = k_49_perm_0, x = key_49_cast_fp16)[name = tensor("transpose_7")]; tensor var_38966_cast_fp16 = slice_by_index(begin = var_38966_begin_0, end = var_38966_end_0, end_mask = var_38966_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_38966_cast_fp16")]; tensor var_38970_begin_0 = const()[name = tensor("op_38970_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_38970_end_0 = const()[name = tensor("op_38970_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_38970_end_mask_0 = const()[name = tensor("op_38970_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38970_cast_fp16 = slice_by_index(begin = var_38970_begin_0, end = var_38970_end_0, end_mask = var_38970_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_38970_cast_fp16")]; tensor var_38974_begin_0 = const()[name = tensor("op_38974_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_38974_end_0 = const()[name = tensor("op_38974_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_38974_end_mask_0 = const()[name = tensor("op_38974_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38974_cast_fp16 = slice_by_index(begin = var_38974_begin_0, end = var_38974_end_0, end_mask = var_38974_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_38974_cast_fp16")]; tensor var_38978_begin_0 = const()[name = tensor("op_38978_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_38978_end_0 = const()[name = tensor("op_38978_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_38978_end_mask_0 = const()[name = tensor("op_38978_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38978_cast_fp16 = slice_by_index(begin = var_38978_begin_0, end = var_38978_end_0, end_mask = var_38978_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_38978_cast_fp16")]; tensor var_38982_begin_0 = const()[name = tensor("op_38982_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_38982_end_0 = const()[name = tensor("op_38982_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_38982_end_mask_0 = const()[name = tensor("op_38982_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38982_cast_fp16 = slice_by_index(begin = var_38982_begin_0, end = var_38982_end_0, end_mask = var_38982_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_38982_cast_fp16")]; tensor var_38986_begin_0 = const()[name = tensor("op_38986_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_38986_end_0 = const()[name = tensor("op_38986_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_38986_end_mask_0 = const()[name = tensor("op_38986_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38986_cast_fp16 = slice_by_index(begin = var_38986_begin_0, end = var_38986_end_0, end_mask = var_38986_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_38986_cast_fp16")]; tensor var_38990_begin_0 = const()[name = tensor("op_38990_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_38990_end_0 = const()[name = tensor("op_38990_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_38990_end_mask_0 = const()[name = tensor("op_38990_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38990_cast_fp16 = slice_by_index(begin = var_38990_begin_0, end = var_38990_end_0, end_mask = var_38990_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_38990_cast_fp16")]; tensor var_38994_begin_0 = const()[name = tensor("op_38994_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_38994_end_0 = const()[name = tensor("op_38994_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_38994_end_mask_0 = const()[name = tensor("op_38994_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38994_cast_fp16 = slice_by_index(begin = var_38994_begin_0, end = var_38994_end_0, end_mask = var_38994_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_38994_cast_fp16")]; tensor var_38998_begin_0 = const()[name = tensor("op_38998_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_38998_end_0 = const()[name = tensor("op_38998_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_38998_end_mask_0 = const()[name = tensor("op_38998_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_38998_cast_fp16 = slice_by_index(begin = var_38998_begin_0, end = var_38998_end_0, end_mask = var_38998_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_38998_cast_fp16")]; tensor var_39002_begin_0 = const()[name = tensor("op_39002_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_39002_end_0 = const()[name = tensor("op_39002_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_39002_end_mask_0 = const()[name = tensor("op_39002_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_39002_cast_fp16 = slice_by_index(begin = var_39002_begin_0, end = var_39002_end_0, end_mask = var_39002_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_39002_cast_fp16")]; tensor var_39006_begin_0 = const()[name = tensor("op_39006_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_39006_end_0 = const()[name = tensor("op_39006_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_39006_end_mask_0 = const()[name = tensor("op_39006_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_39006_cast_fp16 = slice_by_index(begin = var_39006_begin_0, end = var_39006_end_0, end_mask = var_39006_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_39006_cast_fp16")]; tensor var_39010_begin_0 = const()[name = tensor("op_39010_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_39010_end_0 = const()[name = tensor("op_39010_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_39010_end_mask_0 = const()[name = tensor("op_39010_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_39010_cast_fp16 = slice_by_index(begin = var_39010_begin_0, end = var_39010_end_0, end_mask = var_39010_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_39010_cast_fp16")]; tensor var_39014_begin_0 = const()[name = tensor("op_39014_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_39014_end_0 = const()[name = tensor("op_39014_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_39014_end_mask_0 = const()[name = tensor("op_39014_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_39014_cast_fp16 = slice_by_index(begin = var_39014_begin_0, end = var_39014_end_0, end_mask = var_39014_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_39014_cast_fp16")]; tensor var_39018_begin_0 = const()[name = tensor("op_39018_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_39018_end_0 = const()[name = tensor("op_39018_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_39018_end_mask_0 = const()[name = tensor("op_39018_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_39018_cast_fp16 = slice_by_index(begin = var_39018_begin_0, end = var_39018_end_0, end_mask = var_39018_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_39018_cast_fp16")]; tensor var_39022_begin_0 = const()[name = tensor("op_39022_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_39022_end_0 = const()[name = tensor("op_39022_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_39022_end_mask_0 = const()[name = tensor("op_39022_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_39022_cast_fp16 = slice_by_index(begin = var_39022_begin_0, end = var_39022_end_0, end_mask = var_39022_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_39022_cast_fp16")]; tensor var_39026_begin_0 = const()[name = tensor("op_39026_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_39026_end_0 = const()[name = tensor("op_39026_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_39026_end_mask_0 = const()[name = tensor("op_39026_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_39026_cast_fp16 = slice_by_index(begin = var_39026_begin_0, end = var_39026_end_0, end_mask = var_39026_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_39026_cast_fp16")]; tensor var_39030_begin_0 = const()[name = tensor("op_39030_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_39030_end_0 = const()[name = tensor("op_39030_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_39030_end_mask_0 = const()[name = tensor("op_39030_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_39030_cast_fp16 = slice_by_index(begin = var_39030_begin_0, end = var_39030_end_0, end_mask = var_39030_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_39030_cast_fp16")]; tensor var_39034_begin_0 = const()[name = tensor("op_39034_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_39034_end_0 = const()[name = tensor("op_39034_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_39034_end_mask_0 = const()[name = tensor("op_39034_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_39034_cast_fp16 = slice_by_index(begin = var_39034_begin_0, end = var_39034_end_0, end_mask = var_39034_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_39034_cast_fp16")]; tensor var_39038_begin_0 = const()[name = tensor("op_39038_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_39038_end_0 = const()[name = tensor("op_39038_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_39038_end_mask_0 = const()[name = tensor("op_39038_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_39038_cast_fp16 = slice_by_index(begin = var_39038_begin_0, end = var_39038_end_0, end_mask = var_39038_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_39038_cast_fp16")]; tensor var_39042_begin_0 = const()[name = tensor("op_39042_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_39042_end_0 = const()[name = tensor("op_39042_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_39042_end_mask_0 = const()[name = tensor("op_39042_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_39042_cast_fp16 = slice_by_index(begin = var_39042_begin_0, end = var_39042_end_0, end_mask = var_39042_end_mask_0, x = k_49_cast_fp16)[name = tensor("op_39042_cast_fp16")]; tensor var_39044_begin_0 = const()[name = tensor("op_39044_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_39044_end_0 = const()[name = tensor("op_39044_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_39044_end_mask_0 = const()[name = tensor("op_39044_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39044_cast_fp16 = slice_by_index(begin = var_39044_begin_0, end = var_39044_end_0, end_mask = var_39044_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39044_cast_fp16")]; tensor var_39048_begin_0 = const()[name = tensor("op_39048_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_39048_end_0 = const()[name = tensor("op_39048_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_39048_end_mask_0 = const()[name = tensor("op_39048_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39048_cast_fp16 = slice_by_index(begin = var_39048_begin_0, end = var_39048_end_0, end_mask = var_39048_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39048_cast_fp16")]; tensor var_39052_begin_0 = const()[name = tensor("op_39052_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_39052_end_0 = const()[name = tensor("op_39052_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_39052_end_mask_0 = const()[name = tensor("op_39052_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39052_cast_fp16 = slice_by_index(begin = var_39052_begin_0, end = var_39052_end_0, end_mask = var_39052_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39052_cast_fp16")]; tensor var_39056_begin_0 = const()[name = tensor("op_39056_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_39056_end_0 = const()[name = tensor("op_39056_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_39056_end_mask_0 = const()[name = tensor("op_39056_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39056_cast_fp16 = slice_by_index(begin = var_39056_begin_0, end = var_39056_end_0, end_mask = var_39056_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39056_cast_fp16")]; tensor var_39060_begin_0 = const()[name = tensor("op_39060_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_39060_end_0 = const()[name = tensor("op_39060_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_39060_end_mask_0 = const()[name = tensor("op_39060_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39060_cast_fp16 = slice_by_index(begin = var_39060_begin_0, end = var_39060_end_0, end_mask = var_39060_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39060_cast_fp16")]; tensor var_39064_begin_0 = const()[name = tensor("op_39064_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_39064_end_0 = const()[name = tensor("op_39064_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_39064_end_mask_0 = const()[name = tensor("op_39064_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39064_cast_fp16 = slice_by_index(begin = var_39064_begin_0, end = var_39064_end_0, end_mask = var_39064_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39064_cast_fp16")]; tensor var_39068_begin_0 = const()[name = tensor("op_39068_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_39068_end_0 = const()[name = tensor("op_39068_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_39068_end_mask_0 = const()[name = tensor("op_39068_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39068_cast_fp16 = slice_by_index(begin = var_39068_begin_0, end = var_39068_end_0, end_mask = var_39068_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39068_cast_fp16")]; tensor var_39072_begin_0 = const()[name = tensor("op_39072_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_39072_end_0 = const()[name = tensor("op_39072_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_39072_end_mask_0 = const()[name = tensor("op_39072_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39072_cast_fp16 = slice_by_index(begin = var_39072_begin_0, end = var_39072_end_0, end_mask = var_39072_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39072_cast_fp16")]; tensor var_39076_begin_0 = const()[name = tensor("op_39076_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_39076_end_0 = const()[name = tensor("op_39076_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_39076_end_mask_0 = const()[name = tensor("op_39076_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39076_cast_fp16 = slice_by_index(begin = var_39076_begin_0, end = var_39076_end_0, end_mask = var_39076_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39076_cast_fp16")]; tensor var_39080_begin_0 = const()[name = tensor("op_39080_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_39080_end_0 = const()[name = tensor("op_39080_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_39080_end_mask_0 = const()[name = tensor("op_39080_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39080_cast_fp16 = slice_by_index(begin = var_39080_begin_0, end = var_39080_end_0, end_mask = var_39080_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39080_cast_fp16")]; tensor var_39084_begin_0 = const()[name = tensor("op_39084_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_39084_end_0 = const()[name = tensor("op_39084_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_39084_end_mask_0 = const()[name = tensor("op_39084_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39084_cast_fp16 = slice_by_index(begin = var_39084_begin_0, end = var_39084_end_0, end_mask = var_39084_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39084_cast_fp16")]; tensor var_39088_begin_0 = const()[name = tensor("op_39088_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_39088_end_0 = const()[name = tensor("op_39088_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_39088_end_mask_0 = const()[name = tensor("op_39088_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39088_cast_fp16 = slice_by_index(begin = var_39088_begin_0, end = var_39088_end_0, end_mask = var_39088_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39088_cast_fp16")]; tensor var_39092_begin_0 = const()[name = tensor("op_39092_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_39092_end_0 = const()[name = tensor("op_39092_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_39092_end_mask_0 = const()[name = tensor("op_39092_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39092_cast_fp16 = slice_by_index(begin = var_39092_begin_0, end = var_39092_end_0, end_mask = var_39092_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39092_cast_fp16")]; tensor var_39096_begin_0 = const()[name = tensor("op_39096_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_39096_end_0 = const()[name = tensor("op_39096_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_39096_end_mask_0 = const()[name = tensor("op_39096_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39096_cast_fp16 = slice_by_index(begin = var_39096_begin_0, end = var_39096_end_0, end_mask = var_39096_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39096_cast_fp16")]; tensor var_39100_begin_0 = const()[name = tensor("op_39100_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_39100_end_0 = const()[name = tensor("op_39100_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_39100_end_mask_0 = const()[name = tensor("op_39100_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39100_cast_fp16 = slice_by_index(begin = var_39100_begin_0, end = var_39100_end_0, end_mask = var_39100_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39100_cast_fp16")]; tensor var_39104_begin_0 = const()[name = tensor("op_39104_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_39104_end_0 = const()[name = tensor("op_39104_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_39104_end_mask_0 = const()[name = tensor("op_39104_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39104_cast_fp16 = slice_by_index(begin = var_39104_begin_0, end = var_39104_end_0, end_mask = var_39104_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39104_cast_fp16")]; tensor var_39108_begin_0 = const()[name = tensor("op_39108_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_39108_end_0 = const()[name = tensor("op_39108_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_39108_end_mask_0 = const()[name = tensor("op_39108_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39108_cast_fp16 = slice_by_index(begin = var_39108_begin_0, end = var_39108_end_0, end_mask = var_39108_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39108_cast_fp16")]; tensor var_39112_begin_0 = const()[name = tensor("op_39112_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_39112_end_0 = const()[name = tensor("op_39112_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_39112_end_mask_0 = const()[name = tensor("op_39112_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39112_cast_fp16 = slice_by_index(begin = var_39112_begin_0, end = var_39112_end_0, end_mask = var_39112_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39112_cast_fp16")]; tensor var_39116_begin_0 = const()[name = tensor("op_39116_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_39116_end_0 = const()[name = tensor("op_39116_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_39116_end_mask_0 = const()[name = tensor("op_39116_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39116_cast_fp16 = slice_by_index(begin = var_39116_begin_0, end = var_39116_end_0, end_mask = var_39116_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39116_cast_fp16")]; tensor var_39120_begin_0 = const()[name = tensor("op_39120_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_39120_end_0 = const()[name = tensor("op_39120_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_39120_end_mask_0 = const()[name = tensor("op_39120_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39120_cast_fp16 = slice_by_index(begin = var_39120_begin_0, end = var_39120_end_0, end_mask = var_39120_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_39120_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3841_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3841_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3841_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3841_equation_0, values = (var_38966_cast_fp16, var_38408_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3841_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3843_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3843_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3843_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3843_equation_0, values = (var_38966_cast_fp16, var_38415_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3843_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3845_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3845_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3845_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3845_equation_0, values = (var_38966_cast_fp16, var_38422_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3845_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3847_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3847_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3847_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3847_equation_0, values = (var_38966_cast_fp16, var_38429_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3847_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3849_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3849_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3849_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3849_equation_0, values = (var_38970_cast_fp16, var_38436_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3849_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3851_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3851_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3851_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3851_equation_0, values = (var_38970_cast_fp16, var_38443_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3851_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3853_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3853_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3853_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3853_equation_0, values = (var_38970_cast_fp16, var_38450_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3853_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3855_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3855_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3855_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3855_equation_0, values = (var_38970_cast_fp16, var_38457_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3855_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3857_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3857_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3857_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3857_equation_0, values = (var_38974_cast_fp16, var_38464_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3857_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3859_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3859_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3859_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3859_equation_0, values = (var_38974_cast_fp16, var_38471_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3859_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3861_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3861_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3861_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3861_equation_0, values = (var_38974_cast_fp16, var_38478_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3861_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3863_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3863_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3863_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3863_equation_0, values = (var_38974_cast_fp16, var_38485_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3863_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3865_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3865_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3865_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3865_equation_0, values = (var_38978_cast_fp16, var_38492_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3865_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3867_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3867_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3867_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3867_equation_0, values = (var_38978_cast_fp16, var_38499_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3867_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3869_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3869_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3869_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3869_equation_0, values = (var_38978_cast_fp16, var_38506_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3869_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3871_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3871_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3871_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3871_equation_0, values = (var_38978_cast_fp16, var_38513_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3871_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3873_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3873_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3873_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3873_equation_0, values = (var_38982_cast_fp16, var_38520_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3873_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3875_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3875_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3875_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3875_equation_0, values = (var_38982_cast_fp16, var_38527_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3875_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3877_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3877_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3877_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3877_equation_0, values = (var_38982_cast_fp16, var_38534_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3877_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3879_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3879_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3879_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3879_equation_0, values = (var_38982_cast_fp16, var_38541_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3879_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3881_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3881_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3881_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3881_equation_0, values = (var_38986_cast_fp16, var_38548_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3881_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3883_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3883_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3883_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3883_equation_0, values = (var_38986_cast_fp16, var_38555_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3883_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3885_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3885_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3885_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3885_equation_0, values = (var_38986_cast_fp16, var_38562_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3885_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3887_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3887_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3887_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3887_equation_0, values = (var_38986_cast_fp16, var_38569_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3887_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3889_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3889_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3889_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3889_equation_0, values = (var_38990_cast_fp16, var_38576_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3889_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3891_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3891_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3891_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3891_equation_0, values = (var_38990_cast_fp16, var_38583_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3891_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3893_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3893_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3893_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3893_equation_0, values = (var_38990_cast_fp16, var_38590_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3893_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3895_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3895_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3895_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3895_equation_0, values = (var_38990_cast_fp16, var_38597_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3895_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3897_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3897_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3897_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3897_equation_0, values = (var_38994_cast_fp16, var_38604_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3897_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3899_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3899_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3899_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3899_equation_0, values = (var_38994_cast_fp16, var_38611_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3899_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3901_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3901_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3901_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3901_equation_0, values = (var_38994_cast_fp16, var_38618_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3901_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3903_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3903_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3903_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3903_equation_0, values = (var_38994_cast_fp16, var_38625_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3903_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3905_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3905_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3905_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3905_equation_0, values = (var_38998_cast_fp16, var_38632_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3905_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3907_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3907_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3907_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3907_equation_0, values = (var_38998_cast_fp16, var_38639_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3907_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3909_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3909_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3909_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3909_equation_0, values = (var_38998_cast_fp16, var_38646_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3909_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3911_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3911_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3911_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3911_equation_0, values = (var_38998_cast_fp16, var_38653_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3911_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3913_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3913_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3913_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3913_equation_0, values = (var_39002_cast_fp16, var_38660_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3913_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3915_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3915_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3915_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3915_equation_0, values = (var_39002_cast_fp16, var_38667_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3915_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3917_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3917_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3917_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3917_equation_0, values = (var_39002_cast_fp16, var_38674_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3917_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3919_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3919_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3919_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3919_equation_0, values = (var_39002_cast_fp16, var_38681_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3919_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3921_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3921_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3921_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3921_equation_0, values = (var_39006_cast_fp16, var_38688_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3921_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3923_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3923_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3923_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3923_equation_0, values = (var_39006_cast_fp16, var_38695_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3923_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3925_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3925_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3925_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3925_equation_0, values = (var_39006_cast_fp16, var_38702_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3925_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3927_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3927_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3927_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3927_equation_0, values = (var_39006_cast_fp16, var_38709_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3927_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3929_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3929_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3929_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3929_equation_0, values = (var_39010_cast_fp16, var_38716_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3929_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3931_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3931_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3931_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3931_equation_0, values = (var_39010_cast_fp16, var_38723_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3931_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3933_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3933_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3933_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3933_equation_0, values = (var_39010_cast_fp16, var_38730_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3933_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3935_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3935_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3935_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3935_equation_0, values = (var_39010_cast_fp16, var_38737_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3935_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3937_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3937_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3937_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3937_equation_0, values = (var_39014_cast_fp16, var_38744_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3937_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3939_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3939_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3939_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3939_equation_0, values = (var_39014_cast_fp16, var_38751_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3939_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3941_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3941_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3941_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3941_equation_0, values = (var_39014_cast_fp16, var_38758_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3941_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3943_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3943_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3943_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3943_equation_0, values = (var_39014_cast_fp16, var_38765_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3943_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3945_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3945_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3945_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3945_equation_0, values = (var_39018_cast_fp16, var_38772_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3945_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3947_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3947_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3947_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3947_equation_0, values = (var_39018_cast_fp16, var_38779_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3947_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3949_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3949_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3949_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3949_equation_0, values = (var_39018_cast_fp16, var_38786_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3949_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3951_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3951_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3951_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3951_equation_0, values = (var_39018_cast_fp16, var_38793_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3951_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3953_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3953_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3953_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3953_equation_0, values = (var_39022_cast_fp16, var_38800_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3953_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3955_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3955_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3955_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3955_equation_0, values = (var_39022_cast_fp16, var_38807_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3955_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3957_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3957_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3957_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3957_equation_0, values = (var_39022_cast_fp16, var_38814_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3957_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3959_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3959_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3959_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3959_equation_0, values = (var_39022_cast_fp16, var_38821_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3959_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3961_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3961_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3961_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3961_equation_0, values = (var_39026_cast_fp16, var_38828_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3961_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3963_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3963_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3963_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3963_equation_0, values = (var_39026_cast_fp16, var_38835_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3963_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3965_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3965_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3965_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3965_equation_0, values = (var_39026_cast_fp16, var_38842_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3965_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3967_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3967_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3967_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3967_equation_0, values = (var_39026_cast_fp16, var_38849_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3967_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3969_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3969_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3969_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3969_equation_0, values = (var_39030_cast_fp16, var_38856_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3969_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3971_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3971_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3971_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3971_equation_0, values = (var_39030_cast_fp16, var_38863_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3971_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3973_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3973_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3973_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3973_equation_0, values = (var_39030_cast_fp16, var_38870_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3973_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3975_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3975_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3975_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3975_equation_0, values = (var_39030_cast_fp16, var_38877_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3975_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3977_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3977_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3977_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3977_equation_0, values = (var_39034_cast_fp16, var_38884_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3977_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3979_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3979_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3979_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3979_equation_0, values = (var_39034_cast_fp16, var_38891_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3979_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3981_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3981_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3981_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3981_equation_0, values = (var_39034_cast_fp16, var_38898_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3981_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3983_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3983_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3983_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3983_equation_0, values = (var_39034_cast_fp16, var_38905_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3983_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3985_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3985_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3985_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3985_equation_0, values = (var_39038_cast_fp16, var_38912_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3985_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3987_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3987_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3987_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3987_equation_0, values = (var_39038_cast_fp16, var_38919_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3987_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3989_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3989_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3989_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3989_equation_0, values = (var_39038_cast_fp16, var_38926_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3989_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3991_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3991_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3991_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3991_equation_0, values = (var_39038_cast_fp16, var_38933_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3991_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3993_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3993_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3993_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3993_equation_0, values = (var_39042_cast_fp16, var_38940_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3993_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3995_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3995_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3995_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3995_equation_0, values = (var_39042_cast_fp16, var_38947_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3995_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3997_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3997_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3997_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3997_equation_0, values = (var_39042_cast_fp16, var_38954_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3997_cast_fp16")]; tensor _SplitHeadsQ__mh_w_3999_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_3999_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_3999_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_3999_equation_0, values = (var_39042_cast_fp16, var_38961_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_3999_cast_fp16")]; tensor var_39283_to_fp16 = const()[name = tensor("op_39283_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3841_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3841_cast_fp16, y = var_39283_to_fp16)[name = tensor("aw_chunk_3841_cast_fp16")]; tensor var_39285_to_fp16 = const()[name = tensor("op_39285_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3843_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3843_cast_fp16, y = var_39285_to_fp16)[name = tensor("aw_chunk_3843_cast_fp16")]; tensor var_39287_to_fp16 = const()[name = tensor("op_39287_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3845_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3845_cast_fp16, y = var_39287_to_fp16)[name = tensor("aw_chunk_3845_cast_fp16")]; tensor var_39289_to_fp16 = const()[name = tensor("op_39289_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3847_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3847_cast_fp16, y = var_39289_to_fp16)[name = tensor("aw_chunk_3847_cast_fp16")]; tensor var_39291_to_fp16 = const()[name = tensor("op_39291_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3849_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3849_cast_fp16, y = var_39291_to_fp16)[name = tensor("aw_chunk_3849_cast_fp16")]; tensor var_39293_to_fp16 = const()[name = tensor("op_39293_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3851_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3851_cast_fp16, y = var_39293_to_fp16)[name = tensor("aw_chunk_3851_cast_fp16")]; tensor var_39295_to_fp16 = const()[name = tensor("op_39295_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3853_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3853_cast_fp16, y = var_39295_to_fp16)[name = tensor("aw_chunk_3853_cast_fp16")]; tensor var_39297_to_fp16 = const()[name = tensor("op_39297_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3855_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3855_cast_fp16, y = var_39297_to_fp16)[name = tensor("aw_chunk_3855_cast_fp16")]; tensor var_39299_to_fp16 = const()[name = tensor("op_39299_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3857_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3857_cast_fp16, y = var_39299_to_fp16)[name = tensor("aw_chunk_3857_cast_fp16")]; tensor var_39301_to_fp16 = const()[name = tensor("op_39301_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3859_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3859_cast_fp16, y = var_39301_to_fp16)[name = tensor("aw_chunk_3859_cast_fp16")]; tensor var_39303_to_fp16 = const()[name = tensor("op_39303_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3861_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3861_cast_fp16, y = var_39303_to_fp16)[name = tensor("aw_chunk_3861_cast_fp16")]; tensor var_39305_to_fp16 = const()[name = tensor("op_39305_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3863_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3863_cast_fp16, y = var_39305_to_fp16)[name = tensor("aw_chunk_3863_cast_fp16")]; tensor var_39307_to_fp16 = const()[name = tensor("op_39307_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3865_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3865_cast_fp16, y = var_39307_to_fp16)[name = tensor("aw_chunk_3865_cast_fp16")]; tensor var_39309_to_fp16 = const()[name = tensor("op_39309_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3867_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3867_cast_fp16, y = var_39309_to_fp16)[name = tensor("aw_chunk_3867_cast_fp16")]; tensor var_39311_to_fp16 = const()[name = tensor("op_39311_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3869_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3869_cast_fp16, y = var_39311_to_fp16)[name = tensor("aw_chunk_3869_cast_fp16")]; tensor var_39313_to_fp16 = const()[name = tensor("op_39313_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3871_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3871_cast_fp16, y = var_39313_to_fp16)[name = tensor("aw_chunk_3871_cast_fp16")]; tensor var_39315_to_fp16 = const()[name = tensor("op_39315_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3873_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3873_cast_fp16, y = var_39315_to_fp16)[name = tensor("aw_chunk_3873_cast_fp16")]; tensor var_39317_to_fp16 = const()[name = tensor("op_39317_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3875_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3875_cast_fp16, y = var_39317_to_fp16)[name = tensor("aw_chunk_3875_cast_fp16")]; tensor var_39319_to_fp16 = const()[name = tensor("op_39319_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3877_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3877_cast_fp16, y = var_39319_to_fp16)[name = tensor("aw_chunk_3877_cast_fp16")]; tensor var_39321_to_fp16 = const()[name = tensor("op_39321_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3879_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3879_cast_fp16, y = var_39321_to_fp16)[name = tensor("aw_chunk_3879_cast_fp16")]; tensor var_39323_to_fp16 = const()[name = tensor("op_39323_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3881_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3881_cast_fp16, y = var_39323_to_fp16)[name = tensor("aw_chunk_3881_cast_fp16")]; tensor var_39325_to_fp16 = const()[name = tensor("op_39325_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3883_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3883_cast_fp16, y = var_39325_to_fp16)[name = tensor("aw_chunk_3883_cast_fp16")]; tensor var_39327_to_fp16 = const()[name = tensor("op_39327_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3885_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3885_cast_fp16, y = var_39327_to_fp16)[name = tensor("aw_chunk_3885_cast_fp16")]; tensor var_39329_to_fp16 = const()[name = tensor("op_39329_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3887_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3887_cast_fp16, y = var_39329_to_fp16)[name = tensor("aw_chunk_3887_cast_fp16")]; tensor var_39331_to_fp16 = const()[name = tensor("op_39331_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3889_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3889_cast_fp16, y = var_39331_to_fp16)[name = tensor("aw_chunk_3889_cast_fp16")]; tensor var_39333_to_fp16 = const()[name = tensor("op_39333_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3891_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3891_cast_fp16, y = var_39333_to_fp16)[name = tensor("aw_chunk_3891_cast_fp16")]; tensor var_39335_to_fp16 = const()[name = tensor("op_39335_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3893_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3893_cast_fp16, y = var_39335_to_fp16)[name = tensor("aw_chunk_3893_cast_fp16")]; tensor var_39337_to_fp16 = const()[name = tensor("op_39337_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3895_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3895_cast_fp16, y = var_39337_to_fp16)[name = tensor("aw_chunk_3895_cast_fp16")]; tensor var_39339_to_fp16 = const()[name = tensor("op_39339_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3897_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3897_cast_fp16, y = var_39339_to_fp16)[name = tensor("aw_chunk_3897_cast_fp16")]; tensor var_39341_to_fp16 = const()[name = tensor("op_39341_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3899_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3899_cast_fp16, y = var_39341_to_fp16)[name = tensor("aw_chunk_3899_cast_fp16")]; tensor var_39343_to_fp16 = const()[name = tensor("op_39343_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3901_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3901_cast_fp16, y = var_39343_to_fp16)[name = tensor("aw_chunk_3901_cast_fp16")]; tensor var_39345_to_fp16 = const()[name = tensor("op_39345_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3903_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3903_cast_fp16, y = var_39345_to_fp16)[name = tensor("aw_chunk_3903_cast_fp16")]; tensor var_39347_to_fp16 = const()[name = tensor("op_39347_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3905_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3905_cast_fp16, y = var_39347_to_fp16)[name = tensor("aw_chunk_3905_cast_fp16")]; tensor var_39349_to_fp16 = const()[name = tensor("op_39349_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3907_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3907_cast_fp16, y = var_39349_to_fp16)[name = tensor("aw_chunk_3907_cast_fp16")]; tensor var_39351_to_fp16 = const()[name = tensor("op_39351_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3909_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3909_cast_fp16, y = var_39351_to_fp16)[name = tensor("aw_chunk_3909_cast_fp16")]; tensor var_39353_to_fp16 = const()[name = tensor("op_39353_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3911_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3911_cast_fp16, y = var_39353_to_fp16)[name = tensor("aw_chunk_3911_cast_fp16")]; tensor var_39355_to_fp16 = const()[name = tensor("op_39355_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3913_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3913_cast_fp16, y = var_39355_to_fp16)[name = tensor("aw_chunk_3913_cast_fp16")]; tensor var_39357_to_fp16 = const()[name = tensor("op_39357_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3915_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3915_cast_fp16, y = var_39357_to_fp16)[name = tensor("aw_chunk_3915_cast_fp16")]; tensor var_39359_to_fp16 = const()[name = tensor("op_39359_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3917_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3917_cast_fp16, y = var_39359_to_fp16)[name = tensor("aw_chunk_3917_cast_fp16")]; tensor var_39361_to_fp16 = const()[name = tensor("op_39361_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3919_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3919_cast_fp16, y = var_39361_to_fp16)[name = tensor("aw_chunk_3919_cast_fp16")]; tensor var_39363_to_fp16 = const()[name = tensor("op_39363_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3921_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3921_cast_fp16, y = var_39363_to_fp16)[name = tensor("aw_chunk_3921_cast_fp16")]; tensor var_39365_to_fp16 = const()[name = tensor("op_39365_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3923_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3923_cast_fp16, y = var_39365_to_fp16)[name = tensor("aw_chunk_3923_cast_fp16")]; tensor var_39367_to_fp16 = const()[name = tensor("op_39367_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3925_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3925_cast_fp16, y = var_39367_to_fp16)[name = tensor("aw_chunk_3925_cast_fp16")]; tensor var_39369_to_fp16 = const()[name = tensor("op_39369_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3927_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3927_cast_fp16, y = var_39369_to_fp16)[name = tensor("aw_chunk_3927_cast_fp16")]; tensor var_39371_to_fp16 = const()[name = tensor("op_39371_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3929_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3929_cast_fp16, y = var_39371_to_fp16)[name = tensor("aw_chunk_3929_cast_fp16")]; tensor var_39373_to_fp16 = const()[name = tensor("op_39373_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3931_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3931_cast_fp16, y = var_39373_to_fp16)[name = tensor("aw_chunk_3931_cast_fp16")]; tensor var_39375_to_fp16 = const()[name = tensor("op_39375_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3933_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3933_cast_fp16, y = var_39375_to_fp16)[name = tensor("aw_chunk_3933_cast_fp16")]; tensor var_39377_to_fp16 = const()[name = tensor("op_39377_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3935_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3935_cast_fp16, y = var_39377_to_fp16)[name = tensor("aw_chunk_3935_cast_fp16")]; tensor var_39379_to_fp16 = const()[name = tensor("op_39379_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3937_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3937_cast_fp16, y = var_39379_to_fp16)[name = tensor("aw_chunk_3937_cast_fp16")]; tensor var_39381_to_fp16 = const()[name = tensor("op_39381_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3939_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3939_cast_fp16, y = var_39381_to_fp16)[name = tensor("aw_chunk_3939_cast_fp16")]; tensor var_39383_to_fp16 = const()[name = tensor("op_39383_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3941_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3941_cast_fp16, y = var_39383_to_fp16)[name = tensor("aw_chunk_3941_cast_fp16")]; tensor var_39385_to_fp16 = const()[name = tensor("op_39385_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3943_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3943_cast_fp16, y = var_39385_to_fp16)[name = tensor("aw_chunk_3943_cast_fp16")]; tensor var_39387_to_fp16 = const()[name = tensor("op_39387_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3945_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3945_cast_fp16, y = var_39387_to_fp16)[name = tensor("aw_chunk_3945_cast_fp16")]; tensor var_39389_to_fp16 = const()[name = tensor("op_39389_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3947_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3947_cast_fp16, y = var_39389_to_fp16)[name = tensor("aw_chunk_3947_cast_fp16")]; tensor var_39391_to_fp16 = const()[name = tensor("op_39391_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3949_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3949_cast_fp16, y = var_39391_to_fp16)[name = tensor("aw_chunk_3949_cast_fp16")]; tensor var_39393_to_fp16 = const()[name = tensor("op_39393_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3951_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3951_cast_fp16, y = var_39393_to_fp16)[name = tensor("aw_chunk_3951_cast_fp16")]; tensor var_39395_to_fp16 = const()[name = tensor("op_39395_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3953_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3953_cast_fp16, y = var_39395_to_fp16)[name = tensor("aw_chunk_3953_cast_fp16")]; tensor var_39397_to_fp16 = const()[name = tensor("op_39397_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3955_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3955_cast_fp16, y = var_39397_to_fp16)[name = tensor("aw_chunk_3955_cast_fp16")]; tensor var_39399_to_fp16 = const()[name = tensor("op_39399_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3957_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3957_cast_fp16, y = var_39399_to_fp16)[name = tensor("aw_chunk_3957_cast_fp16")]; tensor var_39401_to_fp16 = const()[name = tensor("op_39401_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3959_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3959_cast_fp16, y = var_39401_to_fp16)[name = tensor("aw_chunk_3959_cast_fp16")]; tensor var_39403_to_fp16 = const()[name = tensor("op_39403_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3961_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3961_cast_fp16, y = var_39403_to_fp16)[name = tensor("aw_chunk_3961_cast_fp16")]; tensor var_39405_to_fp16 = const()[name = tensor("op_39405_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3963_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3963_cast_fp16, y = var_39405_to_fp16)[name = tensor("aw_chunk_3963_cast_fp16")]; tensor var_39407_to_fp16 = const()[name = tensor("op_39407_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3965_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3965_cast_fp16, y = var_39407_to_fp16)[name = tensor("aw_chunk_3965_cast_fp16")]; tensor var_39409_to_fp16 = const()[name = tensor("op_39409_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3967_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3967_cast_fp16, y = var_39409_to_fp16)[name = tensor("aw_chunk_3967_cast_fp16")]; tensor var_39411_to_fp16 = const()[name = tensor("op_39411_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3969_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3969_cast_fp16, y = var_39411_to_fp16)[name = tensor("aw_chunk_3969_cast_fp16")]; tensor var_39413_to_fp16 = const()[name = tensor("op_39413_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3971_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3971_cast_fp16, y = var_39413_to_fp16)[name = tensor("aw_chunk_3971_cast_fp16")]; tensor var_39415_to_fp16 = const()[name = tensor("op_39415_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3973_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3973_cast_fp16, y = var_39415_to_fp16)[name = tensor("aw_chunk_3973_cast_fp16")]; tensor var_39417_to_fp16 = const()[name = tensor("op_39417_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3975_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3975_cast_fp16, y = var_39417_to_fp16)[name = tensor("aw_chunk_3975_cast_fp16")]; tensor var_39419_to_fp16 = const()[name = tensor("op_39419_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3977_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3977_cast_fp16, y = var_39419_to_fp16)[name = tensor("aw_chunk_3977_cast_fp16")]; tensor var_39421_to_fp16 = const()[name = tensor("op_39421_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3979_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3979_cast_fp16, y = var_39421_to_fp16)[name = tensor("aw_chunk_3979_cast_fp16")]; tensor var_39423_to_fp16 = const()[name = tensor("op_39423_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3981_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3981_cast_fp16, y = var_39423_to_fp16)[name = tensor("aw_chunk_3981_cast_fp16")]; tensor var_39425_to_fp16 = const()[name = tensor("op_39425_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3983_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3983_cast_fp16, y = var_39425_to_fp16)[name = tensor("aw_chunk_3983_cast_fp16")]; tensor var_39427_to_fp16 = const()[name = tensor("op_39427_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3985_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3985_cast_fp16, y = var_39427_to_fp16)[name = tensor("aw_chunk_3985_cast_fp16")]; tensor var_39429_to_fp16 = const()[name = tensor("op_39429_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3987_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3987_cast_fp16, y = var_39429_to_fp16)[name = tensor("aw_chunk_3987_cast_fp16")]; tensor var_39431_to_fp16 = const()[name = tensor("op_39431_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3989_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3989_cast_fp16, y = var_39431_to_fp16)[name = tensor("aw_chunk_3989_cast_fp16")]; tensor var_39433_to_fp16 = const()[name = tensor("op_39433_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3991_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3991_cast_fp16, y = var_39433_to_fp16)[name = tensor("aw_chunk_3991_cast_fp16")]; tensor var_39435_to_fp16 = const()[name = tensor("op_39435_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3993_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3993_cast_fp16, y = var_39435_to_fp16)[name = tensor("aw_chunk_3993_cast_fp16")]; tensor var_39437_to_fp16 = const()[name = tensor("op_39437_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3995_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3995_cast_fp16, y = var_39437_to_fp16)[name = tensor("aw_chunk_3995_cast_fp16")]; tensor var_39439_to_fp16 = const()[name = tensor("op_39439_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3997_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3997_cast_fp16, y = var_39439_to_fp16)[name = tensor("aw_chunk_3997_cast_fp16")]; tensor var_39441_to_fp16 = const()[name = tensor("op_39441_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_3999_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_3999_cast_fp16, y = var_39441_to_fp16)[name = tensor("aw_chunk_3999_cast_fp16")]; tensor var_39443_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3841_cast_fp16)[name = tensor("op_39443_cast_fp16")]; tensor var_39444_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3843_cast_fp16)[name = tensor("op_39444_cast_fp16")]; tensor var_39445_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3845_cast_fp16)[name = tensor("op_39445_cast_fp16")]; tensor var_39446_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3847_cast_fp16)[name = tensor("op_39446_cast_fp16")]; tensor var_39447_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3849_cast_fp16)[name = tensor("op_39447_cast_fp16")]; tensor var_39448_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3851_cast_fp16)[name = tensor("op_39448_cast_fp16")]; tensor var_39449_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3853_cast_fp16)[name = tensor("op_39449_cast_fp16")]; tensor var_39450_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3855_cast_fp16)[name = tensor("op_39450_cast_fp16")]; tensor var_39451_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3857_cast_fp16)[name = tensor("op_39451_cast_fp16")]; tensor var_39452_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3859_cast_fp16)[name = tensor("op_39452_cast_fp16")]; tensor var_39453_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3861_cast_fp16)[name = tensor("op_39453_cast_fp16")]; tensor var_39454_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3863_cast_fp16)[name = tensor("op_39454_cast_fp16")]; tensor var_39455_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3865_cast_fp16)[name = tensor("op_39455_cast_fp16")]; tensor var_39456_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3867_cast_fp16)[name = tensor("op_39456_cast_fp16")]; tensor var_39457_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3869_cast_fp16)[name = tensor("op_39457_cast_fp16")]; tensor var_39458_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3871_cast_fp16)[name = tensor("op_39458_cast_fp16")]; tensor var_39459_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3873_cast_fp16)[name = tensor("op_39459_cast_fp16")]; tensor var_39460_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3875_cast_fp16)[name = tensor("op_39460_cast_fp16")]; tensor var_39461_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3877_cast_fp16)[name = tensor("op_39461_cast_fp16")]; tensor var_39462_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3879_cast_fp16)[name = tensor("op_39462_cast_fp16")]; tensor var_39463_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3881_cast_fp16)[name = tensor("op_39463_cast_fp16")]; tensor var_39464_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3883_cast_fp16)[name = tensor("op_39464_cast_fp16")]; tensor var_39465_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3885_cast_fp16)[name = tensor("op_39465_cast_fp16")]; tensor var_39466_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3887_cast_fp16)[name = tensor("op_39466_cast_fp16")]; tensor var_39467_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3889_cast_fp16)[name = tensor("op_39467_cast_fp16")]; tensor var_39468_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3891_cast_fp16)[name = tensor("op_39468_cast_fp16")]; tensor var_39469_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3893_cast_fp16)[name = tensor("op_39469_cast_fp16")]; tensor var_39470_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3895_cast_fp16)[name = tensor("op_39470_cast_fp16")]; tensor var_39471_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3897_cast_fp16)[name = tensor("op_39471_cast_fp16")]; tensor var_39472_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3899_cast_fp16)[name = tensor("op_39472_cast_fp16")]; tensor var_39473_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3901_cast_fp16)[name = tensor("op_39473_cast_fp16")]; tensor var_39474_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3903_cast_fp16)[name = tensor("op_39474_cast_fp16")]; tensor var_39475_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3905_cast_fp16)[name = tensor("op_39475_cast_fp16")]; tensor var_39476_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3907_cast_fp16)[name = tensor("op_39476_cast_fp16")]; tensor var_39477_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3909_cast_fp16)[name = tensor("op_39477_cast_fp16")]; tensor var_39478_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3911_cast_fp16)[name = tensor("op_39478_cast_fp16")]; tensor var_39479_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3913_cast_fp16)[name = tensor("op_39479_cast_fp16")]; tensor var_39480_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3915_cast_fp16)[name = tensor("op_39480_cast_fp16")]; tensor var_39481_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3917_cast_fp16)[name = tensor("op_39481_cast_fp16")]; tensor var_39482_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3919_cast_fp16)[name = tensor("op_39482_cast_fp16")]; tensor var_39483_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3921_cast_fp16)[name = tensor("op_39483_cast_fp16")]; tensor var_39484_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3923_cast_fp16)[name = tensor("op_39484_cast_fp16")]; tensor var_39485_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3925_cast_fp16)[name = tensor("op_39485_cast_fp16")]; tensor var_39486_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3927_cast_fp16)[name = tensor("op_39486_cast_fp16")]; tensor var_39487_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3929_cast_fp16)[name = tensor("op_39487_cast_fp16")]; tensor var_39488_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3931_cast_fp16)[name = tensor("op_39488_cast_fp16")]; tensor var_39489_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3933_cast_fp16)[name = tensor("op_39489_cast_fp16")]; tensor var_39490_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3935_cast_fp16)[name = tensor("op_39490_cast_fp16")]; tensor var_39491_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3937_cast_fp16)[name = tensor("op_39491_cast_fp16")]; tensor var_39492_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3939_cast_fp16)[name = tensor("op_39492_cast_fp16")]; tensor var_39493_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3941_cast_fp16)[name = tensor("op_39493_cast_fp16")]; tensor var_39494_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3943_cast_fp16)[name = tensor("op_39494_cast_fp16")]; tensor var_39495_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3945_cast_fp16)[name = tensor("op_39495_cast_fp16")]; tensor var_39496_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3947_cast_fp16)[name = tensor("op_39496_cast_fp16")]; tensor var_39497_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3949_cast_fp16)[name = tensor("op_39497_cast_fp16")]; tensor var_39498_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3951_cast_fp16)[name = tensor("op_39498_cast_fp16")]; tensor var_39499_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3953_cast_fp16)[name = tensor("op_39499_cast_fp16")]; tensor var_39500_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3955_cast_fp16)[name = tensor("op_39500_cast_fp16")]; tensor var_39501_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3957_cast_fp16)[name = tensor("op_39501_cast_fp16")]; tensor var_39502_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3959_cast_fp16)[name = tensor("op_39502_cast_fp16")]; tensor var_39503_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3961_cast_fp16)[name = tensor("op_39503_cast_fp16")]; tensor var_39504_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3963_cast_fp16)[name = tensor("op_39504_cast_fp16")]; tensor var_39505_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3965_cast_fp16)[name = tensor("op_39505_cast_fp16")]; tensor var_39506_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3967_cast_fp16)[name = tensor("op_39506_cast_fp16")]; tensor var_39507_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3969_cast_fp16)[name = tensor("op_39507_cast_fp16")]; tensor var_39508_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3971_cast_fp16)[name = tensor("op_39508_cast_fp16")]; tensor var_39509_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3973_cast_fp16)[name = tensor("op_39509_cast_fp16")]; tensor var_39510_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3975_cast_fp16)[name = tensor("op_39510_cast_fp16")]; tensor var_39511_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3977_cast_fp16)[name = tensor("op_39511_cast_fp16")]; tensor var_39512_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3979_cast_fp16)[name = tensor("op_39512_cast_fp16")]; tensor var_39513_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3981_cast_fp16)[name = tensor("op_39513_cast_fp16")]; tensor var_39514_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3983_cast_fp16)[name = tensor("op_39514_cast_fp16")]; tensor var_39515_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3985_cast_fp16)[name = tensor("op_39515_cast_fp16")]; tensor var_39516_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3987_cast_fp16)[name = tensor("op_39516_cast_fp16")]; tensor var_39517_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3989_cast_fp16)[name = tensor("op_39517_cast_fp16")]; tensor var_39518_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3991_cast_fp16)[name = tensor("op_39518_cast_fp16")]; tensor var_39519_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3993_cast_fp16)[name = tensor("op_39519_cast_fp16")]; tensor var_39520_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3995_cast_fp16)[name = tensor("op_39520_cast_fp16")]; tensor var_39521_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3997_cast_fp16)[name = tensor("op_39521_cast_fp16")]; tensor var_39522_cast_fp16 = softmax(axis = var_38241, x = aw_chunk_3999_cast_fp16)[name = tensor("op_39522_cast_fp16")]; tensor var_39524_equation_0 = const()[name = tensor("op_39524_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39524_cast_fp16 = einsum(equation = var_39524_equation_0, values = (var_39044_cast_fp16, var_39443_cast_fp16))[name = tensor("op_39524_cast_fp16")]; tensor var_39526_equation_0 = const()[name = tensor("op_39526_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39526_cast_fp16 = einsum(equation = var_39526_equation_0, values = (var_39044_cast_fp16, var_39444_cast_fp16))[name = tensor("op_39526_cast_fp16")]; tensor var_39528_equation_0 = const()[name = tensor("op_39528_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39528_cast_fp16 = einsum(equation = var_39528_equation_0, values = (var_39044_cast_fp16, var_39445_cast_fp16))[name = tensor("op_39528_cast_fp16")]; tensor var_39530_equation_0 = const()[name = tensor("op_39530_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39530_cast_fp16 = einsum(equation = var_39530_equation_0, values = (var_39044_cast_fp16, var_39446_cast_fp16))[name = tensor("op_39530_cast_fp16")]; tensor var_39532_equation_0 = const()[name = tensor("op_39532_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39532_cast_fp16 = einsum(equation = var_39532_equation_0, values = (var_39048_cast_fp16, var_39447_cast_fp16))[name = tensor("op_39532_cast_fp16")]; tensor var_39534_equation_0 = const()[name = tensor("op_39534_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39534_cast_fp16 = einsum(equation = var_39534_equation_0, values = (var_39048_cast_fp16, var_39448_cast_fp16))[name = tensor("op_39534_cast_fp16")]; tensor var_39536_equation_0 = const()[name = tensor("op_39536_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39536_cast_fp16 = einsum(equation = var_39536_equation_0, values = (var_39048_cast_fp16, var_39449_cast_fp16))[name = tensor("op_39536_cast_fp16")]; tensor var_39538_equation_0 = const()[name = tensor("op_39538_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39538_cast_fp16 = einsum(equation = var_39538_equation_0, values = (var_39048_cast_fp16, var_39450_cast_fp16))[name = tensor("op_39538_cast_fp16")]; tensor var_39540_equation_0 = const()[name = tensor("op_39540_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39540_cast_fp16 = einsum(equation = var_39540_equation_0, values = (var_39052_cast_fp16, var_39451_cast_fp16))[name = tensor("op_39540_cast_fp16")]; tensor var_39542_equation_0 = const()[name = tensor("op_39542_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39542_cast_fp16 = einsum(equation = var_39542_equation_0, values = (var_39052_cast_fp16, var_39452_cast_fp16))[name = tensor("op_39542_cast_fp16")]; tensor var_39544_equation_0 = const()[name = tensor("op_39544_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39544_cast_fp16 = einsum(equation = var_39544_equation_0, values = (var_39052_cast_fp16, var_39453_cast_fp16))[name = tensor("op_39544_cast_fp16")]; tensor var_39546_equation_0 = const()[name = tensor("op_39546_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39546_cast_fp16 = einsum(equation = var_39546_equation_0, values = (var_39052_cast_fp16, var_39454_cast_fp16))[name = tensor("op_39546_cast_fp16")]; tensor var_39548_equation_0 = const()[name = tensor("op_39548_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39548_cast_fp16 = einsum(equation = var_39548_equation_0, values = (var_39056_cast_fp16, var_39455_cast_fp16))[name = tensor("op_39548_cast_fp16")]; tensor var_39550_equation_0 = const()[name = tensor("op_39550_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39550_cast_fp16 = einsum(equation = var_39550_equation_0, values = (var_39056_cast_fp16, var_39456_cast_fp16))[name = tensor("op_39550_cast_fp16")]; tensor var_39552_equation_0 = const()[name = tensor("op_39552_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39552_cast_fp16 = einsum(equation = var_39552_equation_0, values = (var_39056_cast_fp16, var_39457_cast_fp16))[name = tensor("op_39552_cast_fp16")]; tensor var_39554_equation_0 = const()[name = tensor("op_39554_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39554_cast_fp16 = einsum(equation = var_39554_equation_0, values = (var_39056_cast_fp16, var_39458_cast_fp16))[name = tensor("op_39554_cast_fp16")]; tensor var_39556_equation_0 = const()[name = tensor("op_39556_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39556_cast_fp16 = einsum(equation = var_39556_equation_0, values = (var_39060_cast_fp16, var_39459_cast_fp16))[name = tensor("op_39556_cast_fp16")]; tensor var_39558_equation_0 = const()[name = tensor("op_39558_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39558_cast_fp16 = einsum(equation = var_39558_equation_0, values = (var_39060_cast_fp16, var_39460_cast_fp16))[name = tensor("op_39558_cast_fp16")]; tensor var_39560_equation_0 = const()[name = tensor("op_39560_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39560_cast_fp16 = einsum(equation = var_39560_equation_0, values = (var_39060_cast_fp16, var_39461_cast_fp16))[name = tensor("op_39560_cast_fp16")]; tensor var_39562_equation_0 = const()[name = tensor("op_39562_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39562_cast_fp16 = einsum(equation = var_39562_equation_0, values = (var_39060_cast_fp16, var_39462_cast_fp16))[name = tensor("op_39562_cast_fp16")]; tensor var_39564_equation_0 = const()[name = tensor("op_39564_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39564_cast_fp16 = einsum(equation = var_39564_equation_0, values = (var_39064_cast_fp16, var_39463_cast_fp16))[name = tensor("op_39564_cast_fp16")]; tensor var_39566_equation_0 = const()[name = tensor("op_39566_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39566_cast_fp16 = einsum(equation = var_39566_equation_0, values = (var_39064_cast_fp16, var_39464_cast_fp16))[name = tensor("op_39566_cast_fp16")]; tensor var_39568_equation_0 = const()[name = tensor("op_39568_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39568_cast_fp16 = einsum(equation = var_39568_equation_0, values = (var_39064_cast_fp16, var_39465_cast_fp16))[name = tensor("op_39568_cast_fp16")]; tensor var_39570_equation_0 = const()[name = tensor("op_39570_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39570_cast_fp16 = einsum(equation = var_39570_equation_0, values = (var_39064_cast_fp16, var_39466_cast_fp16))[name = tensor("op_39570_cast_fp16")]; tensor var_39572_equation_0 = const()[name = tensor("op_39572_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39572_cast_fp16 = einsum(equation = var_39572_equation_0, values = (var_39068_cast_fp16, var_39467_cast_fp16))[name = tensor("op_39572_cast_fp16")]; tensor var_39574_equation_0 = const()[name = tensor("op_39574_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39574_cast_fp16 = einsum(equation = var_39574_equation_0, values = (var_39068_cast_fp16, var_39468_cast_fp16))[name = tensor("op_39574_cast_fp16")]; tensor var_39576_equation_0 = const()[name = tensor("op_39576_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39576_cast_fp16 = einsum(equation = var_39576_equation_0, values = (var_39068_cast_fp16, var_39469_cast_fp16))[name = tensor("op_39576_cast_fp16")]; tensor var_39578_equation_0 = const()[name = tensor("op_39578_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39578_cast_fp16 = einsum(equation = var_39578_equation_0, values = (var_39068_cast_fp16, var_39470_cast_fp16))[name = tensor("op_39578_cast_fp16")]; tensor var_39580_equation_0 = const()[name = tensor("op_39580_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39580_cast_fp16 = einsum(equation = var_39580_equation_0, values = (var_39072_cast_fp16, var_39471_cast_fp16))[name = tensor("op_39580_cast_fp16")]; tensor var_39582_equation_0 = const()[name = tensor("op_39582_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39582_cast_fp16 = einsum(equation = var_39582_equation_0, values = (var_39072_cast_fp16, var_39472_cast_fp16))[name = tensor("op_39582_cast_fp16")]; tensor var_39584_equation_0 = const()[name = tensor("op_39584_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39584_cast_fp16 = einsum(equation = var_39584_equation_0, values = (var_39072_cast_fp16, var_39473_cast_fp16))[name = tensor("op_39584_cast_fp16")]; tensor var_39586_equation_0 = const()[name = tensor("op_39586_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39586_cast_fp16 = einsum(equation = var_39586_equation_0, values = (var_39072_cast_fp16, var_39474_cast_fp16))[name = tensor("op_39586_cast_fp16")]; tensor var_39588_equation_0 = const()[name = tensor("op_39588_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39588_cast_fp16 = einsum(equation = var_39588_equation_0, values = (var_39076_cast_fp16, var_39475_cast_fp16))[name = tensor("op_39588_cast_fp16")]; tensor var_39590_equation_0 = const()[name = tensor("op_39590_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39590_cast_fp16 = einsum(equation = var_39590_equation_0, values = (var_39076_cast_fp16, var_39476_cast_fp16))[name = tensor("op_39590_cast_fp16")]; tensor var_39592_equation_0 = const()[name = tensor("op_39592_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39592_cast_fp16 = einsum(equation = var_39592_equation_0, values = (var_39076_cast_fp16, var_39477_cast_fp16))[name = tensor("op_39592_cast_fp16")]; tensor var_39594_equation_0 = const()[name = tensor("op_39594_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39594_cast_fp16 = einsum(equation = var_39594_equation_0, values = (var_39076_cast_fp16, var_39478_cast_fp16))[name = tensor("op_39594_cast_fp16")]; tensor var_39596_equation_0 = const()[name = tensor("op_39596_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39596_cast_fp16 = einsum(equation = var_39596_equation_0, values = (var_39080_cast_fp16, var_39479_cast_fp16))[name = tensor("op_39596_cast_fp16")]; tensor var_39598_equation_0 = const()[name = tensor("op_39598_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39598_cast_fp16 = einsum(equation = var_39598_equation_0, values = (var_39080_cast_fp16, var_39480_cast_fp16))[name = tensor("op_39598_cast_fp16")]; tensor var_39600_equation_0 = const()[name = tensor("op_39600_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39600_cast_fp16 = einsum(equation = var_39600_equation_0, values = (var_39080_cast_fp16, var_39481_cast_fp16))[name = tensor("op_39600_cast_fp16")]; tensor var_39602_equation_0 = const()[name = tensor("op_39602_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39602_cast_fp16 = einsum(equation = var_39602_equation_0, values = (var_39080_cast_fp16, var_39482_cast_fp16))[name = tensor("op_39602_cast_fp16")]; tensor var_39604_equation_0 = const()[name = tensor("op_39604_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39604_cast_fp16 = einsum(equation = var_39604_equation_0, values = (var_39084_cast_fp16, var_39483_cast_fp16))[name = tensor("op_39604_cast_fp16")]; tensor var_39606_equation_0 = const()[name = tensor("op_39606_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39606_cast_fp16 = einsum(equation = var_39606_equation_0, values = (var_39084_cast_fp16, var_39484_cast_fp16))[name = tensor("op_39606_cast_fp16")]; tensor var_39608_equation_0 = const()[name = tensor("op_39608_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39608_cast_fp16 = einsum(equation = var_39608_equation_0, values = (var_39084_cast_fp16, var_39485_cast_fp16))[name = tensor("op_39608_cast_fp16")]; tensor var_39610_equation_0 = const()[name = tensor("op_39610_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39610_cast_fp16 = einsum(equation = var_39610_equation_0, values = (var_39084_cast_fp16, var_39486_cast_fp16))[name = tensor("op_39610_cast_fp16")]; tensor var_39612_equation_0 = const()[name = tensor("op_39612_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39612_cast_fp16 = einsum(equation = var_39612_equation_0, values = (var_39088_cast_fp16, var_39487_cast_fp16))[name = tensor("op_39612_cast_fp16")]; tensor var_39614_equation_0 = const()[name = tensor("op_39614_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39614_cast_fp16 = einsum(equation = var_39614_equation_0, values = (var_39088_cast_fp16, var_39488_cast_fp16))[name = tensor("op_39614_cast_fp16")]; tensor var_39616_equation_0 = const()[name = tensor("op_39616_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39616_cast_fp16 = einsum(equation = var_39616_equation_0, values = (var_39088_cast_fp16, var_39489_cast_fp16))[name = tensor("op_39616_cast_fp16")]; tensor var_39618_equation_0 = const()[name = tensor("op_39618_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39618_cast_fp16 = einsum(equation = var_39618_equation_0, values = (var_39088_cast_fp16, var_39490_cast_fp16))[name = tensor("op_39618_cast_fp16")]; tensor var_39620_equation_0 = const()[name = tensor("op_39620_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39620_cast_fp16 = einsum(equation = var_39620_equation_0, values = (var_39092_cast_fp16, var_39491_cast_fp16))[name = tensor("op_39620_cast_fp16")]; tensor var_39622_equation_0 = const()[name = tensor("op_39622_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39622_cast_fp16 = einsum(equation = var_39622_equation_0, values = (var_39092_cast_fp16, var_39492_cast_fp16))[name = tensor("op_39622_cast_fp16")]; tensor var_39624_equation_0 = const()[name = tensor("op_39624_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39624_cast_fp16 = einsum(equation = var_39624_equation_0, values = (var_39092_cast_fp16, var_39493_cast_fp16))[name = tensor("op_39624_cast_fp16")]; tensor var_39626_equation_0 = const()[name = tensor("op_39626_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39626_cast_fp16 = einsum(equation = var_39626_equation_0, values = (var_39092_cast_fp16, var_39494_cast_fp16))[name = tensor("op_39626_cast_fp16")]; tensor var_39628_equation_0 = const()[name = tensor("op_39628_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39628_cast_fp16 = einsum(equation = var_39628_equation_0, values = (var_39096_cast_fp16, var_39495_cast_fp16))[name = tensor("op_39628_cast_fp16")]; tensor var_39630_equation_0 = const()[name = tensor("op_39630_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39630_cast_fp16 = einsum(equation = var_39630_equation_0, values = (var_39096_cast_fp16, var_39496_cast_fp16))[name = tensor("op_39630_cast_fp16")]; tensor var_39632_equation_0 = const()[name = tensor("op_39632_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39632_cast_fp16 = einsum(equation = var_39632_equation_0, values = (var_39096_cast_fp16, var_39497_cast_fp16))[name = tensor("op_39632_cast_fp16")]; tensor var_39634_equation_0 = const()[name = tensor("op_39634_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39634_cast_fp16 = einsum(equation = var_39634_equation_0, values = (var_39096_cast_fp16, var_39498_cast_fp16))[name = tensor("op_39634_cast_fp16")]; tensor var_39636_equation_0 = const()[name = tensor("op_39636_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39636_cast_fp16 = einsum(equation = var_39636_equation_0, values = (var_39100_cast_fp16, var_39499_cast_fp16))[name = tensor("op_39636_cast_fp16")]; tensor var_39638_equation_0 = const()[name = tensor("op_39638_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39638_cast_fp16 = einsum(equation = var_39638_equation_0, values = (var_39100_cast_fp16, var_39500_cast_fp16))[name = tensor("op_39638_cast_fp16")]; tensor var_39640_equation_0 = const()[name = tensor("op_39640_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39640_cast_fp16 = einsum(equation = var_39640_equation_0, values = (var_39100_cast_fp16, var_39501_cast_fp16))[name = tensor("op_39640_cast_fp16")]; tensor var_39642_equation_0 = const()[name = tensor("op_39642_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39642_cast_fp16 = einsum(equation = var_39642_equation_0, values = (var_39100_cast_fp16, var_39502_cast_fp16))[name = tensor("op_39642_cast_fp16")]; tensor var_39644_equation_0 = const()[name = tensor("op_39644_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39644_cast_fp16 = einsum(equation = var_39644_equation_0, values = (var_39104_cast_fp16, var_39503_cast_fp16))[name = tensor("op_39644_cast_fp16")]; tensor var_39646_equation_0 = const()[name = tensor("op_39646_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39646_cast_fp16 = einsum(equation = var_39646_equation_0, values = (var_39104_cast_fp16, var_39504_cast_fp16))[name = tensor("op_39646_cast_fp16")]; tensor var_39648_equation_0 = const()[name = tensor("op_39648_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39648_cast_fp16 = einsum(equation = var_39648_equation_0, values = (var_39104_cast_fp16, var_39505_cast_fp16))[name = tensor("op_39648_cast_fp16")]; tensor var_39650_equation_0 = const()[name = tensor("op_39650_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39650_cast_fp16 = einsum(equation = var_39650_equation_0, values = (var_39104_cast_fp16, var_39506_cast_fp16))[name = tensor("op_39650_cast_fp16")]; tensor var_39652_equation_0 = const()[name = tensor("op_39652_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39652_cast_fp16 = einsum(equation = var_39652_equation_0, values = (var_39108_cast_fp16, var_39507_cast_fp16))[name = tensor("op_39652_cast_fp16")]; tensor var_39654_equation_0 = const()[name = tensor("op_39654_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39654_cast_fp16 = einsum(equation = var_39654_equation_0, values = (var_39108_cast_fp16, var_39508_cast_fp16))[name = tensor("op_39654_cast_fp16")]; tensor var_39656_equation_0 = const()[name = tensor("op_39656_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39656_cast_fp16 = einsum(equation = var_39656_equation_0, values = (var_39108_cast_fp16, var_39509_cast_fp16))[name = tensor("op_39656_cast_fp16")]; tensor var_39658_equation_0 = const()[name = tensor("op_39658_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39658_cast_fp16 = einsum(equation = var_39658_equation_0, values = (var_39108_cast_fp16, var_39510_cast_fp16))[name = tensor("op_39658_cast_fp16")]; tensor var_39660_equation_0 = const()[name = tensor("op_39660_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39660_cast_fp16 = einsum(equation = var_39660_equation_0, values = (var_39112_cast_fp16, var_39511_cast_fp16))[name = tensor("op_39660_cast_fp16")]; tensor var_39662_equation_0 = const()[name = tensor("op_39662_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39662_cast_fp16 = einsum(equation = var_39662_equation_0, values = (var_39112_cast_fp16, var_39512_cast_fp16))[name = tensor("op_39662_cast_fp16")]; tensor var_39664_equation_0 = const()[name = tensor("op_39664_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39664_cast_fp16 = einsum(equation = var_39664_equation_0, values = (var_39112_cast_fp16, var_39513_cast_fp16))[name = tensor("op_39664_cast_fp16")]; tensor var_39666_equation_0 = const()[name = tensor("op_39666_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39666_cast_fp16 = einsum(equation = var_39666_equation_0, values = (var_39112_cast_fp16, var_39514_cast_fp16))[name = tensor("op_39666_cast_fp16")]; tensor var_39668_equation_0 = const()[name = tensor("op_39668_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39668_cast_fp16 = einsum(equation = var_39668_equation_0, values = (var_39116_cast_fp16, var_39515_cast_fp16))[name = tensor("op_39668_cast_fp16")]; tensor var_39670_equation_0 = const()[name = tensor("op_39670_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39670_cast_fp16 = einsum(equation = var_39670_equation_0, values = (var_39116_cast_fp16, var_39516_cast_fp16))[name = tensor("op_39670_cast_fp16")]; tensor var_39672_equation_0 = const()[name = tensor("op_39672_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39672_cast_fp16 = einsum(equation = var_39672_equation_0, values = (var_39116_cast_fp16, var_39517_cast_fp16))[name = tensor("op_39672_cast_fp16")]; tensor var_39674_equation_0 = const()[name = tensor("op_39674_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39674_cast_fp16 = einsum(equation = var_39674_equation_0, values = (var_39116_cast_fp16, var_39518_cast_fp16))[name = tensor("op_39674_cast_fp16")]; tensor var_39676_equation_0 = const()[name = tensor("op_39676_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39676_cast_fp16 = einsum(equation = var_39676_equation_0, values = (var_39120_cast_fp16, var_39519_cast_fp16))[name = tensor("op_39676_cast_fp16")]; tensor var_39678_equation_0 = const()[name = tensor("op_39678_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39678_cast_fp16 = einsum(equation = var_39678_equation_0, values = (var_39120_cast_fp16, var_39520_cast_fp16))[name = tensor("op_39678_cast_fp16")]; tensor var_39680_equation_0 = const()[name = tensor("op_39680_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39680_cast_fp16 = einsum(equation = var_39680_equation_0, values = (var_39120_cast_fp16, var_39521_cast_fp16))[name = tensor("op_39680_cast_fp16")]; tensor var_39682_equation_0 = const()[name = tensor("op_39682_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_39682_cast_fp16 = einsum(equation = var_39682_equation_0, values = (var_39120_cast_fp16, var_39522_cast_fp16))[name = tensor("op_39682_cast_fp16")]; tensor var_39684_interleave_0 = const()[name = tensor("op_39684_interleave_0"), val = tensor(false)]; tensor var_39684_cast_fp16 = concat(axis = var_38216, interleave = var_39684_interleave_0, values = (var_39524_cast_fp16, var_39526_cast_fp16, var_39528_cast_fp16, var_39530_cast_fp16))[name = tensor("op_39684_cast_fp16")]; tensor var_39686_interleave_0 = const()[name = tensor("op_39686_interleave_0"), val = tensor(false)]; tensor var_39686_cast_fp16 = concat(axis = var_38216, interleave = var_39686_interleave_0, values = (var_39532_cast_fp16, var_39534_cast_fp16, var_39536_cast_fp16, var_39538_cast_fp16))[name = tensor("op_39686_cast_fp16")]; tensor var_39688_interleave_0 = const()[name = tensor("op_39688_interleave_0"), val = tensor(false)]; tensor var_39688_cast_fp16 = concat(axis = var_38216, interleave = var_39688_interleave_0, values = (var_39540_cast_fp16, var_39542_cast_fp16, var_39544_cast_fp16, var_39546_cast_fp16))[name = tensor("op_39688_cast_fp16")]; tensor var_39690_interleave_0 = const()[name = tensor("op_39690_interleave_0"), val = tensor(false)]; tensor var_39690_cast_fp16 = concat(axis = var_38216, interleave = var_39690_interleave_0, values = (var_39548_cast_fp16, var_39550_cast_fp16, var_39552_cast_fp16, var_39554_cast_fp16))[name = tensor("op_39690_cast_fp16")]; tensor var_39692_interleave_0 = const()[name = tensor("op_39692_interleave_0"), val = tensor(false)]; tensor var_39692_cast_fp16 = concat(axis = var_38216, interleave = var_39692_interleave_0, values = (var_39556_cast_fp16, var_39558_cast_fp16, var_39560_cast_fp16, var_39562_cast_fp16))[name = tensor("op_39692_cast_fp16")]; tensor var_39694_interleave_0 = const()[name = tensor("op_39694_interleave_0"), val = tensor(false)]; tensor var_39694_cast_fp16 = concat(axis = var_38216, interleave = var_39694_interleave_0, values = (var_39564_cast_fp16, var_39566_cast_fp16, var_39568_cast_fp16, var_39570_cast_fp16))[name = tensor("op_39694_cast_fp16")]; tensor var_39696_interleave_0 = const()[name = tensor("op_39696_interleave_0"), val = tensor(false)]; tensor var_39696_cast_fp16 = concat(axis = var_38216, interleave = var_39696_interleave_0, values = (var_39572_cast_fp16, var_39574_cast_fp16, var_39576_cast_fp16, var_39578_cast_fp16))[name = tensor("op_39696_cast_fp16")]; tensor var_39698_interleave_0 = const()[name = tensor("op_39698_interleave_0"), val = tensor(false)]; tensor var_39698_cast_fp16 = concat(axis = var_38216, interleave = var_39698_interleave_0, values = (var_39580_cast_fp16, var_39582_cast_fp16, var_39584_cast_fp16, var_39586_cast_fp16))[name = tensor("op_39698_cast_fp16")]; tensor var_39700_interleave_0 = const()[name = tensor("op_39700_interleave_0"), val = tensor(false)]; tensor var_39700_cast_fp16 = concat(axis = var_38216, interleave = var_39700_interleave_0, values = (var_39588_cast_fp16, var_39590_cast_fp16, var_39592_cast_fp16, var_39594_cast_fp16))[name = tensor("op_39700_cast_fp16")]; tensor var_39702_interleave_0 = const()[name = tensor("op_39702_interleave_0"), val = tensor(false)]; tensor var_39702_cast_fp16 = concat(axis = var_38216, interleave = var_39702_interleave_0, values = (var_39596_cast_fp16, var_39598_cast_fp16, var_39600_cast_fp16, var_39602_cast_fp16))[name = tensor("op_39702_cast_fp16")]; tensor var_39704_interleave_0 = const()[name = tensor("op_39704_interleave_0"), val = tensor(false)]; tensor var_39704_cast_fp16 = concat(axis = var_38216, interleave = var_39704_interleave_0, values = (var_39604_cast_fp16, var_39606_cast_fp16, var_39608_cast_fp16, var_39610_cast_fp16))[name = tensor("op_39704_cast_fp16")]; tensor var_39706_interleave_0 = const()[name = tensor("op_39706_interleave_0"), val = tensor(false)]; tensor var_39706_cast_fp16 = concat(axis = var_38216, interleave = var_39706_interleave_0, values = (var_39612_cast_fp16, var_39614_cast_fp16, var_39616_cast_fp16, var_39618_cast_fp16))[name = tensor("op_39706_cast_fp16")]; tensor var_39708_interleave_0 = const()[name = tensor("op_39708_interleave_0"), val = tensor(false)]; tensor var_39708_cast_fp16 = concat(axis = var_38216, interleave = var_39708_interleave_0, values = (var_39620_cast_fp16, var_39622_cast_fp16, var_39624_cast_fp16, var_39626_cast_fp16))[name = tensor("op_39708_cast_fp16")]; tensor var_39710_interleave_0 = const()[name = tensor("op_39710_interleave_0"), val = tensor(false)]; tensor var_39710_cast_fp16 = concat(axis = var_38216, interleave = var_39710_interleave_0, values = (var_39628_cast_fp16, var_39630_cast_fp16, var_39632_cast_fp16, var_39634_cast_fp16))[name = tensor("op_39710_cast_fp16")]; tensor var_39712_interleave_0 = const()[name = tensor("op_39712_interleave_0"), val = tensor(false)]; tensor var_39712_cast_fp16 = concat(axis = var_38216, interleave = var_39712_interleave_0, values = (var_39636_cast_fp16, var_39638_cast_fp16, var_39640_cast_fp16, var_39642_cast_fp16))[name = tensor("op_39712_cast_fp16")]; tensor var_39714_interleave_0 = const()[name = tensor("op_39714_interleave_0"), val = tensor(false)]; tensor var_39714_cast_fp16 = concat(axis = var_38216, interleave = var_39714_interleave_0, values = (var_39644_cast_fp16, var_39646_cast_fp16, var_39648_cast_fp16, var_39650_cast_fp16))[name = tensor("op_39714_cast_fp16")]; tensor var_39716_interleave_0 = const()[name = tensor("op_39716_interleave_0"), val = tensor(false)]; tensor var_39716_cast_fp16 = concat(axis = var_38216, interleave = var_39716_interleave_0, values = (var_39652_cast_fp16, var_39654_cast_fp16, var_39656_cast_fp16, var_39658_cast_fp16))[name = tensor("op_39716_cast_fp16")]; tensor var_39718_interleave_0 = const()[name = tensor("op_39718_interleave_0"), val = tensor(false)]; tensor var_39718_cast_fp16 = concat(axis = var_38216, interleave = var_39718_interleave_0, values = (var_39660_cast_fp16, var_39662_cast_fp16, var_39664_cast_fp16, var_39666_cast_fp16))[name = tensor("op_39718_cast_fp16")]; tensor var_39720_interleave_0 = const()[name = tensor("op_39720_interleave_0"), val = tensor(false)]; tensor var_39720_cast_fp16 = concat(axis = var_38216, interleave = var_39720_interleave_0, values = (var_39668_cast_fp16, var_39670_cast_fp16, var_39672_cast_fp16, var_39674_cast_fp16))[name = tensor("op_39720_cast_fp16")]; tensor var_39722_interleave_0 = const()[name = tensor("op_39722_interleave_0"), val = tensor(false)]; tensor var_39722_cast_fp16 = concat(axis = var_38216, interleave = var_39722_interleave_0, values = (var_39676_cast_fp16, var_39678_cast_fp16, var_39680_cast_fp16, var_39682_cast_fp16))[name = tensor("op_39722_cast_fp16")]; tensor input_193_interleave_0 = const()[name = tensor("input_193_interleave_0"), val = tensor(false)]; tensor input_193_cast_fp16 = concat(axis = var_38241, interleave = input_193_interleave_0, values = (var_39684_cast_fp16, var_39686_cast_fp16, var_39688_cast_fp16, var_39690_cast_fp16, var_39692_cast_fp16, var_39694_cast_fp16, var_39696_cast_fp16, var_39698_cast_fp16, var_39700_cast_fp16, var_39702_cast_fp16, var_39704_cast_fp16, var_39706_cast_fp16, var_39708_cast_fp16, var_39710_cast_fp16, var_39712_cast_fp16, var_39714_cast_fp16, var_39716_cast_fp16, var_39718_cast_fp16, var_39720_cast_fp16, var_39722_cast_fp16))[name = tensor("input_193_cast_fp16")]; tensor var_39733_pad_type_0 = const()[name = tensor("op_39733_pad_type_0"), val = tensor("valid")]; tensor var_39733_strides_0 = const()[name = tensor("op_39733_strides_0"), val = tensor([1, 1])]; tensor var_39733_pad_0 = const()[name = tensor("op_39733_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_39733_dilations_0 = const()[name = tensor("op_39733_dilations_0"), val = tensor([1, 1])]; tensor var_39733_groups_0 = const()[name = tensor("op_39733_groups_0"), val = tensor(1)]; tensor layers_24_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(325026944))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(325846208))), name = tensor("layers_24_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_24_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_24_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(325846336)))]; tensor var_39733_cast_fp16 = conv(bias = layers_24_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_39733_dilations_0, groups = var_39733_groups_0, pad = var_39733_pad_0, pad_type = var_39733_pad_type_0, strides = var_39733_strides_0, weight = layers_24_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_193_cast_fp16)[name = tensor("op_39733_cast_fp16")]; tensor var_39739_pad_type_0 = const()[name = tensor("op_39739_pad_type_0"), val = tensor("valid")]; tensor var_39739_strides_0 = const()[name = tensor("op_39739_strides_0"), val = tensor([1, 1])]; tensor var_39739_pad_0 = const()[name = tensor("op_39739_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_39739_dilations_0 = const()[name = tensor("op_39739_dilations_0"), val = tensor([1, 1])]; tensor var_39739_groups_0 = const()[name = tensor("op_39739_groups_0"), val = tensor(1)]; tensor layers_24_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(325859712))), name = tensor("layers_24_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(325848960))), shape = tensor([1280, 1280, 1, 1])]; tensor var_39739_cast_fp16 = conv(dilations = var_39739_dilations_0, groups = var_39739_groups_0, pad = var_39739_pad_0, pad_type = var_39739_pad_type_0, strides = var_39739_strides_0, weight = layers_24_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_193_cast_fp16)[name = tensor("op_39739_cast_fp16")]; tensor obj_99_cast_fp16 = add(x = var_39733_cast_fp16, y = var_39739_cast_fp16)[name = tensor("obj_99_cast_fp16")]; tensor inputs_99_cast_fp16 = add(x = inputs_97_cast_fp16, y = obj_99_cast_fp16)[name = tensor("inputs_99_cast_fp16")]; tensor out_99_axes_0 = const()[name = tensor("out_99_axes_0"), val = tensor([1])]; tensor var_39750_to_fp16 = const()[name = tensor("op_39750_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_99_cast_fp16 = layer_norm(axes = out_99_axes_0, epsilon = var_39750_to_fp16, x = inputs_99_cast_fp16)[name = tensor("out_99_cast_fp16")]; tensor input_195_gamma_0_to_fp16 = const()[name = tensor("input_195_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(326064576)))]; tensor input_195_beta_0_to_fp16 = const()[name = tensor("input_195_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(326067200)))]; tensor input_195_epsilon_0_to_fp16 = const()[name = tensor("input_195_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_195_cast_fp16 = batch_norm(beta = input_195_beta_0_to_fp16, epsilon = input_195_epsilon_0_to_fp16, gamma = input_195_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_99_cast_fp16)[name = tensor("input_195_cast_fp16")]; tensor var_39768_pad_type_0 = const()[name = tensor("op_39768_pad_type_0"), val = tensor("valid")]; tensor var_39768_strides_0 = const()[name = tensor("op_39768_strides_0"), val = tensor([1, 1])]; tensor var_39768_pad_0 = const()[name = tensor("op_39768_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_39768_dilations_0 = const()[name = tensor("op_39768_dilations_0"), val = tensor([1, 1])]; tensor var_39768_groups_0 = const()[name = tensor("op_39768_groups_0"), val = tensor(1)]; tensor layers_24_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(326069824))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(329346688))), name = tensor("layers_24_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_24_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_24_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(329346816)))]; tensor var_39768_cast_fp16 = conv(bias = layers_24_fc1_inlier_module_bias_to_fp16, dilations = var_39768_dilations_0, groups = var_39768_groups_0, pad = var_39768_pad_0, pad_type = var_39768_pad_type_0, strides = var_39768_strides_0, weight = layers_24_fc1_inlier_module_weight_to_fp16_palettized, x = input_195_cast_fp16)[name = tensor("op_39768_cast_fp16")]; tensor var_39774_pad_type_0 = const()[name = tensor("op_39774_pad_type_0"), val = tensor("valid")]; tensor var_39774_strides_0 = const()[name = tensor("op_39774_strides_0"), val = tensor([1, 1])]; tensor var_39774_pad_0 = const()[name = tensor("op_39774_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_39774_dilations_0 = const()[name = tensor("op_39774_dilations_0"), val = tensor([1, 1])]; tensor var_39774_groups_0 = const()[name = tensor("op_39774_groups_0"), val = tensor(1)]; tensor layers_24_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(329417984))), name = tensor("layers_24_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(329357120))), shape = tensor([5120, 1280, 1, 1])]; tensor var_39774_cast_fp16 = conv(dilations = var_39774_dilations_0, groups = var_39774_groups_0, pad = var_39774_pad_0, pad_type = var_39774_pad_type_0, strides = var_39774_strides_0, weight = layers_24_fc1_outlier_module_weight_to_fp16_sparsified, x = input_195_cast_fp16)[name = tensor("op_39774_cast_fp16")]; tensor input_197_cast_fp16 = add(x = var_39768_cast_fp16, y = var_39774_cast_fp16)[name = tensor("input_197_cast_fp16")]; tensor input_199_mode_0 = const()[name = tensor("input_199_mode_0"), val = tensor("EXACT")]; tensor input_199_cast_fp16 = gelu(mode = input_199_mode_0, x = input_197_cast_fp16)[name = tensor("input_199_cast_fp16")]; tensor var_39785_pad_type_0 = const()[name = tensor("op_39785_pad_type_0"), val = tensor("valid")]; tensor var_39785_strides_0 = const()[name = tensor("op_39785_strides_0"), val = tensor([1, 1])]; tensor var_39785_pad_0 = const()[name = tensor("op_39785_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_39785_dilations_0 = const()[name = tensor("op_39785_dilations_0"), val = tensor([1, 1])]; tensor var_39785_groups_0 = const()[name = tensor("op_39785_groups_0"), val = tensor(1)]; tensor layers_24_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(330237248))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333514112))), name = tensor("layers_24_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_24_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_24_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333514240)))]; tensor var_39785_cast_fp16 = conv(bias = layers_24_fc2_inlier_module_bias_to_fp16, dilations = var_39785_dilations_0, groups = var_39785_groups_0, pad = var_39785_pad_0, pad_type = var_39785_pad_type_0, strides = var_39785_strides_0, weight = layers_24_fc2_inlier_module_weight_to_fp16_palettized, x = input_199_cast_fp16)[name = tensor("op_39785_cast_fp16")]; tensor var_39791_pad_type_0 = const()[name = tensor("op_39791_pad_type_0"), val = tensor("valid")]; tensor var_39791_strides_0 = const()[name = tensor("op_39791_strides_0"), val = tensor([1, 1])]; tensor var_39791_pad_0 = const()[name = tensor("op_39791_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_39791_dilations_0 = const()[name = tensor("op_39791_dilations_0"), val = tensor([1, 1])]; tensor var_39791_groups_0 = const()[name = tensor("op_39791_groups_0"), val = tensor(1)]; tensor layers_24_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333569216))), name = tensor("layers_24_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(333516864))), shape = tensor([1280, 5120, 1, 1])]; tensor var_39791_cast_fp16 = conv(dilations = var_39791_dilations_0, groups = var_39791_groups_0, pad = var_39791_pad_0, pad_type = var_39791_pad_type_0, strides = var_39791_strides_0, weight = layers_24_fc2_outlier_module_weight_to_fp16_sparsified, x = input_199_cast_fp16)[name = tensor("op_39791_cast_fp16")]; tensor hidden_states_53_cast_fp16 = add(x = var_39785_cast_fp16, y = var_39791_cast_fp16)[name = tensor("hidden_states_53_cast_fp16")]; tensor inputs_101_cast_fp16 = add(x = inputs_99_cast_fp16, y = hidden_states_53_cast_fp16)[name = tensor("inputs_101_cast_fp16")]; tensor var_39797 = const()[name = tensor("op_39797"), val = tensor(3)]; tensor var_39822 = const()[name = tensor("op_39822"), val = tensor(1)]; tensor out_101_axes_0 = const()[name = tensor("out_101_axes_0"), val = tensor([1])]; tensor var_39839_to_fp16 = const()[name = tensor("op_39839_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_101_cast_fp16 = layer_norm(axes = out_101_axes_0, epsilon = var_39839_to_fp16, x = inputs_101_cast_fp16)[name = tensor("out_101_cast_fp16")]; tensor obj_101_gamma_0_to_fp16 = const()[name = tensor("obj_101_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(334388480)))]; tensor obj_101_beta_0_to_fp16 = const()[name = tensor("obj_101_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(334391104)))]; tensor obj_101_epsilon_0_to_fp16 = const()[name = tensor("obj_101_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_101_cast_fp16 = batch_norm(beta = obj_101_beta_0_to_fp16, epsilon = obj_101_epsilon_0_to_fp16, gamma = obj_101_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_101_cast_fp16)[name = tensor("obj_101_cast_fp16")]; tensor var_39861_pad_type_0 = const()[name = tensor("op_39861_pad_type_0"), val = tensor("valid")]; tensor var_39861_strides_0 = const()[name = tensor("op_39861_strides_0"), val = tensor([1, 1])]; tensor var_39861_pad_0 = const()[name = tensor("op_39861_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_39861_dilations_0 = const()[name = tensor("op_39861_dilations_0"), val = tensor([1, 1])]; tensor var_39861_groups_0 = const()[name = tensor("op_39861_groups_0"), val = tensor(1)]; tensor layers_25_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(334393728))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335212992))), name = tensor("layers_25_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_25_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_25_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335213120)))]; tensor var_39861_cast_fp16 = conv(bias = layers_25_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_39861_dilations_0, groups = var_39861_groups_0, pad = var_39861_pad_0, pad_type = var_39861_pad_type_0, strides = var_39861_strides_0, weight = layers_25_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_101_cast_fp16)[name = tensor("op_39861_cast_fp16")]; tensor var_39867_pad_type_0 = const()[name = tensor("op_39867_pad_type_0"), val = tensor("valid")]; tensor var_39867_strides_0 = const()[name = tensor("op_39867_strides_0"), val = tensor([1, 1])]; tensor var_39867_pad_0 = const()[name = tensor("op_39867_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_39867_dilations_0 = const()[name = tensor("op_39867_dilations_0"), val = tensor([1, 1])]; tensor var_39867_groups_0 = const()[name = tensor("op_39867_groups_0"), val = tensor(1)]; tensor layers_25_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335253376))), name = tensor("layers_25_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335215744))), shape = tensor([1280, 1280, 1, 1])]; tensor var_39867_cast_fp16 = conv(dilations = var_39867_dilations_0, groups = var_39867_groups_0, pad = var_39867_pad_0, pad_type = var_39867_pad_type_0, strides = var_39867_strides_0, weight = layers_25_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_101_cast_fp16)[name = tensor("op_39867_cast_fp16")]; tensor query_51_cast_fp16 = add(x = var_39861_cast_fp16, y = var_39867_cast_fp16)[name = tensor("query_51_cast_fp16")]; tensor var_39876_pad_type_0 = const()[name = tensor("op_39876_pad_type_0"), val = tensor("valid")]; tensor var_39876_strides_0 = const()[name = tensor("op_39876_strides_0"), val = tensor([1, 1])]; tensor var_39876_pad_0 = const()[name = tensor("op_39876_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_39876_dilations_0 = const()[name = tensor("op_39876_dilations_0"), val = tensor([1, 1])]; tensor var_39876_groups_0 = const()[name = tensor("op_39876_groups_0"), val = tensor(1)]; tensor layers_25_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335458240))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336277504))), name = tensor("layers_25_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_39876_cast_fp16 = conv(dilations = var_39876_dilations_0, groups = var_39876_groups_0, pad = var_39876_pad_0, pad_type = var_39876_pad_type_0, strides = var_39876_strides_0, weight = layers_25_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_101_cast_fp16)[name = tensor("op_39876_cast_fp16")]; tensor var_39882_pad_type_0 = const()[name = tensor("op_39882_pad_type_0"), val = tensor("valid")]; tensor var_39882_strides_0 = const()[name = tensor("op_39882_strides_0"), val = tensor([1, 1])]; tensor var_39882_pad_0 = const()[name = tensor("op_39882_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_39882_dilations_0 = const()[name = tensor("op_39882_dilations_0"), val = tensor([1, 1])]; tensor var_39882_groups_0 = const()[name = tensor("op_39882_groups_0"), val = tensor(1)]; tensor layers_25_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336302912))), name = tensor("layers_25_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336277632))), shape = tensor([1280, 1280, 1, 1])]; tensor var_39882_cast_fp16 = conv(dilations = var_39882_dilations_0, groups = var_39882_groups_0, pad = var_39882_pad_0, pad_type = var_39882_pad_type_0, strides = var_39882_strides_0, weight = layers_25_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_101_cast_fp16)[name = tensor("op_39882_cast_fp16")]; tensor key_51_cast_fp16 = add(x = var_39876_cast_fp16, y = var_39882_cast_fp16)[name = tensor("key_51_cast_fp16")]; tensor var_39892_pad_type_0 = const()[name = tensor("op_39892_pad_type_0"), val = tensor("valid")]; tensor var_39892_strides_0 = const()[name = tensor("op_39892_strides_0"), val = tensor([1, 1])]; tensor var_39892_pad_0 = const()[name = tensor("op_39892_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_39892_dilations_0 = const()[name = tensor("op_39892_dilations_0"), val = tensor([1, 1])]; tensor var_39892_groups_0 = const()[name = tensor("op_39892_groups_0"), val = tensor(1)]; tensor layers_25_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(336507776))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337327040))), name = tensor("layers_25_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_25_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_25_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337327168)))]; tensor var_39892_cast_fp16 = conv(bias = layers_25_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_39892_dilations_0, groups = var_39892_groups_0, pad = var_39892_pad_0, pad_type = var_39892_pad_type_0, strides = var_39892_strides_0, weight = layers_25_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_101_cast_fp16)[name = tensor("op_39892_cast_fp16")]; tensor var_39898_pad_type_0 = const()[name = tensor("op_39898_pad_type_0"), val = tensor("valid")]; tensor var_39898_strides_0 = const()[name = tensor("op_39898_strides_0"), val = tensor([1, 1])]; tensor var_39898_pad_0 = const()[name = tensor("op_39898_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_39898_dilations_0 = const()[name = tensor("op_39898_dilations_0"), val = tensor([1, 1])]; tensor var_39898_groups_0 = const()[name = tensor("op_39898_groups_0"), val = tensor(1)]; tensor layers_25_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337341760))), name = tensor("layers_25_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337329792))), shape = tensor([1280, 1280, 1, 1])]; tensor var_39898_cast_fp16 = conv(dilations = var_39898_dilations_0, groups = var_39898_groups_0, pad = var_39898_pad_0, pad_type = var_39898_pad_type_0, strides = var_39898_strides_0, weight = layers_25_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_101_cast_fp16)[name = tensor("op_39898_cast_fp16")]; tensor value_51_cast_fp16 = add(x = var_39892_cast_fp16, y = var_39898_cast_fp16)[name = tensor("value_51_cast_fp16")]; tensor var_39904_begin_0 = const()[name = tensor("op_39904_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_39904_end_0 = const()[name = tensor("op_39904_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_39904_end_mask_0 = const()[name = tensor("op_39904_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39904_cast_fp16 = slice_by_index(begin = var_39904_begin_0, end = var_39904_end_0, end_mask = var_39904_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39904_cast_fp16")]; tensor var_39908_begin_0 = const()[name = tensor("op_39908_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_39908_end_0 = const()[name = tensor("op_39908_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_39908_end_mask_0 = const()[name = tensor("op_39908_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39908_cast_fp16 = slice_by_index(begin = var_39908_begin_0, end = var_39908_end_0, end_mask = var_39908_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39908_cast_fp16")]; tensor var_39912_begin_0 = const()[name = tensor("op_39912_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_39912_end_0 = const()[name = tensor("op_39912_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_39912_end_mask_0 = const()[name = tensor("op_39912_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39912_cast_fp16 = slice_by_index(begin = var_39912_begin_0, end = var_39912_end_0, end_mask = var_39912_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39912_cast_fp16")]; tensor var_39916_begin_0 = const()[name = tensor("op_39916_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_39916_end_0 = const()[name = tensor("op_39916_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_39916_end_mask_0 = const()[name = tensor("op_39916_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39916_cast_fp16 = slice_by_index(begin = var_39916_begin_0, end = var_39916_end_0, end_mask = var_39916_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39916_cast_fp16")]; tensor var_39920_begin_0 = const()[name = tensor("op_39920_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_39920_end_0 = const()[name = tensor("op_39920_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_39920_end_mask_0 = const()[name = tensor("op_39920_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39920_cast_fp16 = slice_by_index(begin = var_39920_begin_0, end = var_39920_end_0, end_mask = var_39920_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39920_cast_fp16")]; tensor var_39924_begin_0 = const()[name = tensor("op_39924_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_39924_end_0 = const()[name = tensor("op_39924_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_39924_end_mask_0 = const()[name = tensor("op_39924_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39924_cast_fp16 = slice_by_index(begin = var_39924_begin_0, end = var_39924_end_0, end_mask = var_39924_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39924_cast_fp16")]; tensor var_39928_begin_0 = const()[name = tensor("op_39928_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_39928_end_0 = const()[name = tensor("op_39928_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_39928_end_mask_0 = const()[name = tensor("op_39928_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39928_cast_fp16 = slice_by_index(begin = var_39928_begin_0, end = var_39928_end_0, end_mask = var_39928_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39928_cast_fp16")]; tensor var_39932_begin_0 = const()[name = tensor("op_39932_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_39932_end_0 = const()[name = tensor("op_39932_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_39932_end_mask_0 = const()[name = tensor("op_39932_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39932_cast_fp16 = slice_by_index(begin = var_39932_begin_0, end = var_39932_end_0, end_mask = var_39932_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39932_cast_fp16")]; tensor var_39936_begin_0 = const()[name = tensor("op_39936_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_39936_end_0 = const()[name = tensor("op_39936_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_39936_end_mask_0 = const()[name = tensor("op_39936_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39936_cast_fp16 = slice_by_index(begin = var_39936_begin_0, end = var_39936_end_0, end_mask = var_39936_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39936_cast_fp16")]; tensor var_39940_begin_0 = const()[name = tensor("op_39940_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_39940_end_0 = const()[name = tensor("op_39940_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_39940_end_mask_0 = const()[name = tensor("op_39940_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39940_cast_fp16 = slice_by_index(begin = var_39940_begin_0, end = var_39940_end_0, end_mask = var_39940_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39940_cast_fp16")]; tensor var_39944_begin_0 = const()[name = tensor("op_39944_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_39944_end_0 = const()[name = tensor("op_39944_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_39944_end_mask_0 = const()[name = tensor("op_39944_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39944_cast_fp16 = slice_by_index(begin = var_39944_begin_0, end = var_39944_end_0, end_mask = var_39944_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39944_cast_fp16")]; tensor var_39948_begin_0 = const()[name = tensor("op_39948_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_39948_end_0 = const()[name = tensor("op_39948_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_39948_end_mask_0 = const()[name = tensor("op_39948_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39948_cast_fp16 = slice_by_index(begin = var_39948_begin_0, end = var_39948_end_0, end_mask = var_39948_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39948_cast_fp16")]; tensor var_39952_begin_0 = const()[name = tensor("op_39952_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_39952_end_0 = const()[name = tensor("op_39952_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_39952_end_mask_0 = const()[name = tensor("op_39952_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39952_cast_fp16 = slice_by_index(begin = var_39952_begin_0, end = var_39952_end_0, end_mask = var_39952_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39952_cast_fp16")]; tensor var_39956_begin_0 = const()[name = tensor("op_39956_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_39956_end_0 = const()[name = tensor("op_39956_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_39956_end_mask_0 = const()[name = tensor("op_39956_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39956_cast_fp16 = slice_by_index(begin = var_39956_begin_0, end = var_39956_end_0, end_mask = var_39956_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39956_cast_fp16")]; tensor var_39960_begin_0 = const()[name = tensor("op_39960_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_39960_end_0 = const()[name = tensor("op_39960_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_39960_end_mask_0 = const()[name = tensor("op_39960_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39960_cast_fp16 = slice_by_index(begin = var_39960_begin_0, end = var_39960_end_0, end_mask = var_39960_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39960_cast_fp16")]; tensor var_39964_begin_0 = const()[name = tensor("op_39964_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_39964_end_0 = const()[name = tensor("op_39964_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_39964_end_mask_0 = const()[name = tensor("op_39964_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39964_cast_fp16 = slice_by_index(begin = var_39964_begin_0, end = var_39964_end_0, end_mask = var_39964_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39964_cast_fp16")]; tensor var_39968_begin_0 = const()[name = tensor("op_39968_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_39968_end_0 = const()[name = tensor("op_39968_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_39968_end_mask_0 = const()[name = tensor("op_39968_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39968_cast_fp16 = slice_by_index(begin = var_39968_begin_0, end = var_39968_end_0, end_mask = var_39968_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39968_cast_fp16")]; tensor var_39972_begin_0 = const()[name = tensor("op_39972_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_39972_end_0 = const()[name = tensor("op_39972_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_39972_end_mask_0 = const()[name = tensor("op_39972_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39972_cast_fp16 = slice_by_index(begin = var_39972_begin_0, end = var_39972_end_0, end_mask = var_39972_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39972_cast_fp16")]; tensor var_39976_begin_0 = const()[name = tensor("op_39976_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_39976_end_0 = const()[name = tensor("op_39976_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_39976_end_mask_0 = const()[name = tensor("op_39976_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39976_cast_fp16 = slice_by_index(begin = var_39976_begin_0, end = var_39976_end_0, end_mask = var_39976_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39976_cast_fp16")]; tensor var_39980_begin_0 = const()[name = tensor("op_39980_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_39980_end_0 = const()[name = tensor("op_39980_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_39980_end_mask_0 = const()[name = tensor("op_39980_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_39980_cast_fp16 = slice_by_index(begin = var_39980_begin_0, end = var_39980_end_0, end_mask = var_39980_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_39980_cast_fp16")]; tensor var_39989_begin_0 = const()[name = tensor("op_39989_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_39989_end_0 = const()[name = tensor("op_39989_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_39989_end_mask_0 = const()[name = tensor("op_39989_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_39989_cast_fp16 = slice_by_index(begin = var_39989_begin_0, end = var_39989_end_0, end_mask = var_39989_end_mask_0, x = var_39904_cast_fp16)[name = tensor("op_39989_cast_fp16")]; tensor var_39996_begin_0 = const()[name = tensor("op_39996_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_39996_end_0 = const()[name = tensor("op_39996_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_39996_end_mask_0 = const()[name = tensor("op_39996_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_39996_cast_fp16 = slice_by_index(begin = var_39996_begin_0, end = var_39996_end_0, end_mask = var_39996_end_mask_0, x = var_39904_cast_fp16)[name = tensor("op_39996_cast_fp16")]; tensor var_40003_begin_0 = const()[name = tensor("op_40003_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40003_end_0 = const()[name = tensor("op_40003_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40003_end_mask_0 = const()[name = tensor("op_40003_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40003_cast_fp16 = slice_by_index(begin = var_40003_begin_0, end = var_40003_end_0, end_mask = var_40003_end_mask_0, x = var_39904_cast_fp16)[name = tensor("op_40003_cast_fp16")]; tensor var_40010_begin_0 = const()[name = tensor("op_40010_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40010_end_0 = const()[name = tensor("op_40010_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40010_end_mask_0 = const()[name = tensor("op_40010_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40010_cast_fp16 = slice_by_index(begin = var_40010_begin_0, end = var_40010_end_0, end_mask = var_40010_end_mask_0, x = var_39904_cast_fp16)[name = tensor("op_40010_cast_fp16")]; tensor var_40017_begin_0 = const()[name = tensor("op_40017_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40017_end_0 = const()[name = tensor("op_40017_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40017_end_mask_0 = const()[name = tensor("op_40017_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40017_cast_fp16 = slice_by_index(begin = var_40017_begin_0, end = var_40017_end_0, end_mask = var_40017_end_mask_0, x = var_39908_cast_fp16)[name = tensor("op_40017_cast_fp16")]; tensor var_40024_begin_0 = const()[name = tensor("op_40024_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40024_end_0 = const()[name = tensor("op_40024_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40024_end_mask_0 = const()[name = tensor("op_40024_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40024_cast_fp16 = slice_by_index(begin = var_40024_begin_0, end = var_40024_end_0, end_mask = var_40024_end_mask_0, x = var_39908_cast_fp16)[name = tensor("op_40024_cast_fp16")]; tensor var_40031_begin_0 = const()[name = tensor("op_40031_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40031_end_0 = const()[name = tensor("op_40031_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40031_end_mask_0 = const()[name = tensor("op_40031_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40031_cast_fp16 = slice_by_index(begin = var_40031_begin_0, end = var_40031_end_0, end_mask = var_40031_end_mask_0, x = var_39908_cast_fp16)[name = tensor("op_40031_cast_fp16")]; tensor var_40038_begin_0 = const()[name = tensor("op_40038_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40038_end_0 = const()[name = tensor("op_40038_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40038_end_mask_0 = const()[name = tensor("op_40038_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40038_cast_fp16 = slice_by_index(begin = var_40038_begin_0, end = var_40038_end_0, end_mask = var_40038_end_mask_0, x = var_39908_cast_fp16)[name = tensor("op_40038_cast_fp16")]; tensor var_40045_begin_0 = const()[name = tensor("op_40045_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40045_end_0 = const()[name = tensor("op_40045_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40045_end_mask_0 = const()[name = tensor("op_40045_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40045_cast_fp16 = slice_by_index(begin = var_40045_begin_0, end = var_40045_end_0, end_mask = var_40045_end_mask_0, x = var_39912_cast_fp16)[name = tensor("op_40045_cast_fp16")]; tensor var_40052_begin_0 = const()[name = tensor("op_40052_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40052_end_0 = const()[name = tensor("op_40052_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40052_end_mask_0 = const()[name = tensor("op_40052_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40052_cast_fp16 = slice_by_index(begin = var_40052_begin_0, end = var_40052_end_0, end_mask = var_40052_end_mask_0, x = var_39912_cast_fp16)[name = tensor("op_40052_cast_fp16")]; tensor var_40059_begin_0 = const()[name = tensor("op_40059_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40059_end_0 = const()[name = tensor("op_40059_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40059_end_mask_0 = const()[name = tensor("op_40059_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40059_cast_fp16 = slice_by_index(begin = var_40059_begin_0, end = var_40059_end_0, end_mask = var_40059_end_mask_0, x = var_39912_cast_fp16)[name = tensor("op_40059_cast_fp16")]; tensor var_40066_begin_0 = const()[name = tensor("op_40066_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40066_end_0 = const()[name = tensor("op_40066_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40066_end_mask_0 = const()[name = tensor("op_40066_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40066_cast_fp16 = slice_by_index(begin = var_40066_begin_0, end = var_40066_end_0, end_mask = var_40066_end_mask_0, x = var_39912_cast_fp16)[name = tensor("op_40066_cast_fp16")]; tensor var_40073_begin_0 = const()[name = tensor("op_40073_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40073_end_0 = const()[name = tensor("op_40073_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40073_end_mask_0 = const()[name = tensor("op_40073_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40073_cast_fp16 = slice_by_index(begin = var_40073_begin_0, end = var_40073_end_0, end_mask = var_40073_end_mask_0, x = var_39916_cast_fp16)[name = tensor("op_40073_cast_fp16")]; tensor var_40080_begin_0 = const()[name = tensor("op_40080_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40080_end_0 = const()[name = tensor("op_40080_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40080_end_mask_0 = const()[name = tensor("op_40080_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40080_cast_fp16 = slice_by_index(begin = var_40080_begin_0, end = var_40080_end_0, end_mask = var_40080_end_mask_0, x = var_39916_cast_fp16)[name = tensor("op_40080_cast_fp16")]; tensor var_40087_begin_0 = const()[name = tensor("op_40087_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40087_end_0 = const()[name = tensor("op_40087_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40087_end_mask_0 = const()[name = tensor("op_40087_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40087_cast_fp16 = slice_by_index(begin = var_40087_begin_0, end = var_40087_end_0, end_mask = var_40087_end_mask_0, x = var_39916_cast_fp16)[name = tensor("op_40087_cast_fp16")]; tensor var_40094_begin_0 = const()[name = tensor("op_40094_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40094_end_0 = const()[name = tensor("op_40094_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40094_end_mask_0 = const()[name = tensor("op_40094_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40094_cast_fp16 = slice_by_index(begin = var_40094_begin_0, end = var_40094_end_0, end_mask = var_40094_end_mask_0, x = var_39916_cast_fp16)[name = tensor("op_40094_cast_fp16")]; tensor var_40101_begin_0 = const()[name = tensor("op_40101_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40101_end_0 = const()[name = tensor("op_40101_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40101_end_mask_0 = const()[name = tensor("op_40101_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40101_cast_fp16 = slice_by_index(begin = var_40101_begin_0, end = var_40101_end_0, end_mask = var_40101_end_mask_0, x = var_39920_cast_fp16)[name = tensor("op_40101_cast_fp16")]; tensor var_40108_begin_0 = const()[name = tensor("op_40108_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40108_end_0 = const()[name = tensor("op_40108_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40108_end_mask_0 = const()[name = tensor("op_40108_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40108_cast_fp16 = slice_by_index(begin = var_40108_begin_0, end = var_40108_end_0, end_mask = var_40108_end_mask_0, x = var_39920_cast_fp16)[name = tensor("op_40108_cast_fp16")]; tensor var_40115_begin_0 = const()[name = tensor("op_40115_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40115_end_0 = const()[name = tensor("op_40115_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40115_end_mask_0 = const()[name = tensor("op_40115_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40115_cast_fp16 = slice_by_index(begin = var_40115_begin_0, end = var_40115_end_0, end_mask = var_40115_end_mask_0, x = var_39920_cast_fp16)[name = tensor("op_40115_cast_fp16")]; tensor var_40122_begin_0 = const()[name = tensor("op_40122_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40122_end_0 = const()[name = tensor("op_40122_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40122_end_mask_0 = const()[name = tensor("op_40122_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40122_cast_fp16 = slice_by_index(begin = var_40122_begin_0, end = var_40122_end_0, end_mask = var_40122_end_mask_0, x = var_39920_cast_fp16)[name = tensor("op_40122_cast_fp16")]; tensor var_40129_begin_0 = const()[name = tensor("op_40129_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40129_end_0 = const()[name = tensor("op_40129_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40129_end_mask_0 = const()[name = tensor("op_40129_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40129_cast_fp16 = slice_by_index(begin = var_40129_begin_0, end = var_40129_end_0, end_mask = var_40129_end_mask_0, x = var_39924_cast_fp16)[name = tensor("op_40129_cast_fp16")]; tensor var_40136_begin_0 = const()[name = tensor("op_40136_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40136_end_0 = const()[name = tensor("op_40136_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40136_end_mask_0 = const()[name = tensor("op_40136_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40136_cast_fp16 = slice_by_index(begin = var_40136_begin_0, end = var_40136_end_0, end_mask = var_40136_end_mask_0, x = var_39924_cast_fp16)[name = tensor("op_40136_cast_fp16")]; tensor var_40143_begin_0 = const()[name = tensor("op_40143_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40143_end_0 = const()[name = tensor("op_40143_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40143_end_mask_0 = const()[name = tensor("op_40143_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40143_cast_fp16 = slice_by_index(begin = var_40143_begin_0, end = var_40143_end_0, end_mask = var_40143_end_mask_0, x = var_39924_cast_fp16)[name = tensor("op_40143_cast_fp16")]; tensor var_40150_begin_0 = const()[name = tensor("op_40150_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40150_end_0 = const()[name = tensor("op_40150_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40150_end_mask_0 = const()[name = tensor("op_40150_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40150_cast_fp16 = slice_by_index(begin = var_40150_begin_0, end = var_40150_end_0, end_mask = var_40150_end_mask_0, x = var_39924_cast_fp16)[name = tensor("op_40150_cast_fp16")]; tensor var_40157_begin_0 = const()[name = tensor("op_40157_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40157_end_0 = const()[name = tensor("op_40157_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40157_end_mask_0 = const()[name = tensor("op_40157_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40157_cast_fp16 = slice_by_index(begin = var_40157_begin_0, end = var_40157_end_0, end_mask = var_40157_end_mask_0, x = var_39928_cast_fp16)[name = tensor("op_40157_cast_fp16")]; tensor var_40164_begin_0 = const()[name = tensor("op_40164_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40164_end_0 = const()[name = tensor("op_40164_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40164_end_mask_0 = const()[name = tensor("op_40164_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40164_cast_fp16 = slice_by_index(begin = var_40164_begin_0, end = var_40164_end_0, end_mask = var_40164_end_mask_0, x = var_39928_cast_fp16)[name = tensor("op_40164_cast_fp16")]; tensor var_40171_begin_0 = const()[name = tensor("op_40171_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40171_end_0 = const()[name = tensor("op_40171_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40171_end_mask_0 = const()[name = tensor("op_40171_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40171_cast_fp16 = slice_by_index(begin = var_40171_begin_0, end = var_40171_end_0, end_mask = var_40171_end_mask_0, x = var_39928_cast_fp16)[name = tensor("op_40171_cast_fp16")]; tensor var_40178_begin_0 = const()[name = tensor("op_40178_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40178_end_0 = const()[name = tensor("op_40178_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40178_end_mask_0 = const()[name = tensor("op_40178_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40178_cast_fp16 = slice_by_index(begin = var_40178_begin_0, end = var_40178_end_0, end_mask = var_40178_end_mask_0, x = var_39928_cast_fp16)[name = tensor("op_40178_cast_fp16")]; tensor var_40185_begin_0 = const()[name = tensor("op_40185_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40185_end_0 = const()[name = tensor("op_40185_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40185_end_mask_0 = const()[name = tensor("op_40185_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40185_cast_fp16 = slice_by_index(begin = var_40185_begin_0, end = var_40185_end_0, end_mask = var_40185_end_mask_0, x = var_39932_cast_fp16)[name = tensor("op_40185_cast_fp16")]; tensor var_40192_begin_0 = const()[name = tensor("op_40192_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40192_end_0 = const()[name = tensor("op_40192_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40192_end_mask_0 = const()[name = tensor("op_40192_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40192_cast_fp16 = slice_by_index(begin = var_40192_begin_0, end = var_40192_end_0, end_mask = var_40192_end_mask_0, x = var_39932_cast_fp16)[name = tensor("op_40192_cast_fp16")]; tensor var_40199_begin_0 = const()[name = tensor("op_40199_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40199_end_0 = const()[name = tensor("op_40199_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40199_end_mask_0 = const()[name = tensor("op_40199_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40199_cast_fp16 = slice_by_index(begin = var_40199_begin_0, end = var_40199_end_0, end_mask = var_40199_end_mask_0, x = var_39932_cast_fp16)[name = tensor("op_40199_cast_fp16")]; tensor var_40206_begin_0 = const()[name = tensor("op_40206_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40206_end_0 = const()[name = tensor("op_40206_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40206_end_mask_0 = const()[name = tensor("op_40206_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40206_cast_fp16 = slice_by_index(begin = var_40206_begin_0, end = var_40206_end_0, end_mask = var_40206_end_mask_0, x = var_39932_cast_fp16)[name = tensor("op_40206_cast_fp16")]; tensor var_40213_begin_0 = const()[name = tensor("op_40213_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40213_end_0 = const()[name = tensor("op_40213_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40213_end_mask_0 = const()[name = tensor("op_40213_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40213_cast_fp16 = slice_by_index(begin = var_40213_begin_0, end = var_40213_end_0, end_mask = var_40213_end_mask_0, x = var_39936_cast_fp16)[name = tensor("op_40213_cast_fp16")]; tensor var_40220_begin_0 = const()[name = tensor("op_40220_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40220_end_0 = const()[name = tensor("op_40220_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40220_end_mask_0 = const()[name = tensor("op_40220_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40220_cast_fp16 = slice_by_index(begin = var_40220_begin_0, end = var_40220_end_0, end_mask = var_40220_end_mask_0, x = var_39936_cast_fp16)[name = tensor("op_40220_cast_fp16")]; tensor var_40227_begin_0 = const()[name = tensor("op_40227_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40227_end_0 = const()[name = tensor("op_40227_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40227_end_mask_0 = const()[name = tensor("op_40227_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40227_cast_fp16 = slice_by_index(begin = var_40227_begin_0, end = var_40227_end_0, end_mask = var_40227_end_mask_0, x = var_39936_cast_fp16)[name = tensor("op_40227_cast_fp16")]; tensor var_40234_begin_0 = const()[name = tensor("op_40234_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40234_end_0 = const()[name = tensor("op_40234_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40234_end_mask_0 = const()[name = tensor("op_40234_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40234_cast_fp16 = slice_by_index(begin = var_40234_begin_0, end = var_40234_end_0, end_mask = var_40234_end_mask_0, x = var_39936_cast_fp16)[name = tensor("op_40234_cast_fp16")]; tensor var_40241_begin_0 = const()[name = tensor("op_40241_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40241_end_0 = const()[name = tensor("op_40241_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40241_end_mask_0 = const()[name = tensor("op_40241_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40241_cast_fp16 = slice_by_index(begin = var_40241_begin_0, end = var_40241_end_0, end_mask = var_40241_end_mask_0, x = var_39940_cast_fp16)[name = tensor("op_40241_cast_fp16")]; tensor var_40248_begin_0 = const()[name = tensor("op_40248_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40248_end_0 = const()[name = tensor("op_40248_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40248_end_mask_0 = const()[name = tensor("op_40248_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40248_cast_fp16 = slice_by_index(begin = var_40248_begin_0, end = var_40248_end_0, end_mask = var_40248_end_mask_0, x = var_39940_cast_fp16)[name = tensor("op_40248_cast_fp16")]; tensor var_40255_begin_0 = const()[name = tensor("op_40255_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40255_end_0 = const()[name = tensor("op_40255_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40255_end_mask_0 = const()[name = tensor("op_40255_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40255_cast_fp16 = slice_by_index(begin = var_40255_begin_0, end = var_40255_end_0, end_mask = var_40255_end_mask_0, x = var_39940_cast_fp16)[name = tensor("op_40255_cast_fp16")]; tensor var_40262_begin_0 = const()[name = tensor("op_40262_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40262_end_0 = const()[name = tensor("op_40262_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40262_end_mask_0 = const()[name = tensor("op_40262_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40262_cast_fp16 = slice_by_index(begin = var_40262_begin_0, end = var_40262_end_0, end_mask = var_40262_end_mask_0, x = var_39940_cast_fp16)[name = tensor("op_40262_cast_fp16")]; tensor var_40269_begin_0 = const()[name = tensor("op_40269_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40269_end_0 = const()[name = tensor("op_40269_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40269_end_mask_0 = const()[name = tensor("op_40269_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40269_cast_fp16 = slice_by_index(begin = var_40269_begin_0, end = var_40269_end_0, end_mask = var_40269_end_mask_0, x = var_39944_cast_fp16)[name = tensor("op_40269_cast_fp16")]; tensor var_40276_begin_0 = const()[name = tensor("op_40276_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40276_end_0 = const()[name = tensor("op_40276_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40276_end_mask_0 = const()[name = tensor("op_40276_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40276_cast_fp16 = slice_by_index(begin = var_40276_begin_0, end = var_40276_end_0, end_mask = var_40276_end_mask_0, x = var_39944_cast_fp16)[name = tensor("op_40276_cast_fp16")]; tensor var_40283_begin_0 = const()[name = tensor("op_40283_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40283_end_0 = const()[name = tensor("op_40283_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40283_end_mask_0 = const()[name = tensor("op_40283_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40283_cast_fp16 = slice_by_index(begin = var_40283_begin_0, end = var_40283_end_0, end_mask = var_40283_end_mask_0, x = var_39944_cast_fp16)[name = tensor("op_40283_cast_fp16")]; tensor var_40290_begin_0 = const()[name = tensor("op_40290_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40290_end_0 = const()[name = tensor("op_40290_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40290_end_mask_0 = const()[name = tensor("op_40290_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40290_cast_fp16 = slice_by_index(begin = var_40290_begin_0, end = var_40290_end_0, end_mask = var_40290_end_mask_0, x = var_39944_cast_fp16)[name = tensor("op_40290_cast_fp16")]; tensor var_40297_begin_0 = const()[name = tensor("op_40297_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40297_end_0 = const()[name = tensor("op_40297_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40297_end_mask_0 = const()[name = tensor("op_40297_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40297_cast_fp16 = slice_by_index(begin = var_40297_begin_0, end = var_40297_end_0, end_mask = var_40297_end_mask_0, x = var_39948_cast_fp16)[name = tensor("op_40297_cast_fp16")]; tensor var_40304_begin_0 = const()[name = tensor("op_40304_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40304_end_0 = const()[name = tensor("op_40304_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40304_end_mask_0 = const()[name = tensor("op_40304_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40304_cast_fp16 = slice_by_index(begin = var_40304_begin_0, end = var_40304_end_0, end_mask = var_40304_end_mask_0, x = var_39948_cast_fp16)[name = tensor("op_40304_cast_fp16")]; tensor var_40311_begin_0 = const()[name = tensor("op_40311_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40311_end_0 = const()[name = tensor("op_40311_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40311_end_mask_0 = const()[name = tensor("op_40311_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40311_cast_fp16 = slice_by_index(begin = var_40311_begin_0, end = var_40311_end_0, end_mask = var_40311_end_mask_0, x = var_39948_cast_fp16)[name = tensor("op_40311_cast_fp16")]; tensor var_40318_begin_0 = const()[name = tensor("op_40318_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40318_end_0 = const()[name = tensor("op_40318_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40318_end_mask_0 = const()[name = tensor("op_40318_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40318_cast_fp16 = slice_by_index(begin = var_40318_begin_0, end = var_40318_end_0, end_mask = var_40318_end_mask_0, x = var_39948_cast_fp16)[name = tensor("op_40318_cast_fp16")]; tensor var_40325_begin_0 = const()[name = tensor("op_40325_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40325_end_0 = const()[name = tensor("op_40325_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40325_end_mask_0 = const()[name = tensor("op_40325_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40325_cast_fp16 = slice_by_index(begin = var_40325_begin_0, end = var_40325_end_0, end_mask = var_40325_end_mask_0, x = var_39952_cast_fp16)[name = tensor("op_40325_cast_fp16")]; tensor var_40332_begin_0 = const()[name = tensor("op_40332_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40332_end_0 = const()[name = tensor("op_40332_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40332_end_mask_0 = const()[name = tensor("op_40332_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40332_cast_fp16 = slice_by_index(begin = var_40332_begin_0, end = var_40332_end_0, end_mask = var_40332_end_mask_0, x = var_39952_cast_fp16)[name = tensor("op_40332_cast_fp16")]; tensor var_40339_begin_0 = const()[name = tensor("op_40339_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40339_end_0 = const()[name = tensor("op_40339_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40339_end_mask_0 = const()[name = tensor("op_40339_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40339_cast_fp16 = slice_by_index(begin = var_40339_begin_0, end = var_40339_end_0, end_mask = var_40339_end_mask_0, x = var_39952_cast_fp16)[name = tensor("op_40339_cast_fp16")]; tensor var_40346_begin_0 = const()[name = tensor("op_40346_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40346_end_0 = const()[name = tensor("op_40346_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40346_end_mask_0 = const()[name = tensor("op_40346_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40346_cast_fp16 = slice_by_index(begin = var_40346_begin_0, end = var_40346_end_0, end_mask = var_40346_end_mask_0, x = var_39952_cast_fp16)[name = tensor("op_40346_cast_fp16")]; tensor var_40353_begin_0 = const()[name = tensor("op_40353_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40353_end_0 = const()[name = tensor("op_40353_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40353_end_mask_0 = const()[name = tensor("op_40353_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40353_cast_fp16 = slice_by_index(begin = var_40353_begin_0, end = var_40353_end_0, end_mask = var_40353_end_mask_0, x = var_39956_cast_fp16)[name = tensor("op_40353_cast_fp16")]; tensor var_40360_begin_0 = const()[name = tensor("op_40360_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40360_end_0 = const()[name = tensor("op_40360_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40360_end_mask_0 = const()[name = tensor("op_40360_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40360_cast_fp16 = slice_by_index(begin = var_40360_begin_0, end = var_40360_end_0, end_mask = var_40360_end_mask_0, x = var_39956_cast_fp16)[name = tensor("op_40360_cast_fp16")]; tensor var_40367_begin_0 = const()[name = tensor("op_40367_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40367_end_0 = const()[name = tensor("op_40367_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40367_end_mask_0 = const()[name = tensor("op_40367_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40367_cast_fp16 = slice_by_index(begin = var_40367_begin_0, end = var_40367_end_0, end_mask = var_40367_end_mask_0, x = var_39956_cast_fp16)[name = tensor("op_40367_cast_fp16")]; tensor var_40374_begin_0 = const()[name = tensor("op_40374_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40374_end_0 = const()[name = tensor("op_40374_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40374_end_mask_0 = const()[name = tensor("op_40374_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40374_cast_fp16 = slice_by_index(begin = var_40374_begin_0, end = var_40374_end_0, end_mask = var_40374_end_mask_0, x = var_39956_cast_fp16)[name = tensor("op_40374_cast_fp16")]; tensor var_40381_begin_0 = const()[name = tensor("op_40381_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40381_end_0 = const()[name = tensor("op_40381_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40381_end_mask_0 = const()[name = tensor("op_40381_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40381_cast_fp16 = slice_by_index(begin = var_40381_begin_0, end = var_40381_end_0, end_mask = var_40381_end_mask_0, x = var_39960_cast_fp16)[name = tensor("op_40381_cast_fp16")]; tensor var_40388_begin_0 = const()[name = tensor("op_40388_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40388_end_0 = const()[name = tensor("op_40388_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40388_end_mask_0 = const()[name = tensor("op_40388_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40388_cast_fp16 = slice_by_index(begin = var_40388_begin_0, end = var_40388_end_0, end_mask = var_40388_end_mask_0, x = var_39960_cast_fp16)[name = tensor("op_40388_cast_fp16")]; tensor var_40395_begin_0 = const()[name = tensor("op_40395_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40395_end_0 = const()[name = tensor("op_40395_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40395_end_mask_0 = const()[name = tensor("op_40395_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40395_cast_fp16 = slice_by_index(begin = var_40395_begin_0, end = var_40395_end_0, end_mask = var_40395_end_mask_0, x = var_39960_cast_fp16)[name = tensor("op_40395_cast_fp16")]; tensor var_40402_begin_0 = const()[name = tensor("op_40402_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40402_end_0 = const()[name = tensor("op_40402_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40402_end_mask_0 = const()[name = tensor("op_40402_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40402_cast_fp16 = slice_by_index(begin = var_40402_begin_0, end = var_40402_end_0, end_mask = var_40402_end_mask_0, x = var_39960_cast_fp16)[name = tensor("op_40402_cast_fp16")]; tensor var_40409_begin_0 = const()[name = tensor("op_40409_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40409_end_0 = const()[name = tensor("op_40409_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40409_end_mask_0 = const()[name = tensor("op_40409_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40409_cast_fp16 = slice_by_index(begin = var_40409_begin_0, end = var_40409_end_0, end_mask = var_40409_end_mask_0, x = var_39964_cast_fp16)[name = tensor("op_40409_cast_fp16")]; tensor var_40416_begin_0 = const()[name = tensor("op_40416_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40416_end_0 = const()[name = tensor("op_40416_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40416_end_mask_0 = const()[name = tensor("op_40416_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40416_cast_fp16 = slice_by_index(begin = var_40416_begin_0, end = var_40416_end_0, end_mask = var_40416_end_mask_0, x = var_39964_cast_fp16)[name = tensor("op_40416_cast_fp16")]; tensor var_40423_begin_0 = const()[name = tensor("op_40423_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40423_end_0 = const()[name = tensor("op_40423_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40423_end_mask_0 = const()[name = tensor("op_40423_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40423_cast_fp16 = slice_by_index(begin = var_40423_begin_0, end = var_40423_end_0, end_mask = var_40423_end_mask_0, x = var_39964_cast_fp16)[name = tensor("op_40423_cast_fp16")]; tensor var_40430_begin_0 = const()[name = tensor("op_40430_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40430_end_0 = const()[name = tensor("op_40430_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40430_end_mask_0 = const()[name = tensor("op_40430_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40430_cast_fp16 = slice_by_index(begin = var_40430_begin_0, end = var_40430_end_0, end_mask = var_40430_end_mask_0, x = var_39964_cast_fp16)[name = tensor("op_40430_cast_fp16")]; tensor var_40437_begin_0 = const()[name = tensor("op_40437_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40437_end_0 = const()[name = tensor("op_40437_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40437_end_mask_0 = const()[name = tensor("op_40437_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40437_cast_fp16 = slice_by_index(begin = var_40437_begin_0, end = var_40437_end_0, end_mask = var_40437_end_mask_0, x = var_39968_cast_fp16)[name = tensor("op_40437_cast_fp16")]; tensor var_40444_begin_0 = const()[name = tensor("op_40444_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40444_end_0 = const()[name = tensor("op_40444_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40444_end_mask_0 = const()[name = tensor("op_40444_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40444_cast_fp16 = slice_by_index(begin = var_40444_begin_0, end = var_40444_end_0, end_mask = var_40444_end_mask_0, x = var_39968_cast_fp16)[name = tensor("op_40444_cast_fp16")]; tensor var_40451_begin_0 = const()[name = tensor("op_40451_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40451_end_0 = const()[name = tensor("op_40451_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40451_end_mask_0 = const()[name = tensor("op_40451_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40451_cast_fp16 = slice_by_index(begin = var_40451_begin_0, end = var_40451_end_0, end_mask = var_40451_end_mask_0, x = var_39968_cast_fp16)[name = tensor("op_40451_cast_fp16")]; tensor var_40458_begin_0 = const()[name = tensor("op_40458_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40458_end_0 = const()[name = tensor("op_40458_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40458_end_mask_0 = const()[name = tensor("op_40458_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40458_cast_fp16 = slice_by_index(begin = var_40458_begin_0, end = var_40458_end_0, end_mask = var_40458_end_mask_0, x = var_39968_cast_fp16)[name = tensor("op_40458_cast_fp16")]; tensor var_40465_begin_0 = const()[name = tensor("op_40465_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40465_end_0 = const()[name = tensor("op_40465_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40465_end_mask_0 = const()[name = tensor("op_40465_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40465_cast_fp16 = slice_by_index(begin = var_40465_begin_0, end = var_40465_end_0, end_mask = var_40465_end_mask_0, x = var_39972_cast_fp16)[name = tensor("op_40465_cast_fp16")]; tensor var_40472_begin_0 = const()[name = tensor("op_40472_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40472_end_0 = const()[name = tensor("op_40472_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40472_end_mask_0 = const()[name = tensor("op_40472_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40472_cast_fp16 = slice_by_index(begin = var_40472_begin_0, end = var_40472_end_0, end_mask = var_40472_end_mask_0, x = var_39972_cast_fp16)[name = tensor("op_40472_cast_fp16")]; tensor var_40479_begin_0 = const()[name = tensor("op_40479_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40479_end_0 = const()[name = tensor("op_40479_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40479_end_mask_0 = const()[name = tensor("op_40479_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40479_cast_fp16 = slice_by_index(begin = var_40479_begin_0, end = var_40479_end_0, end_mask = var_40479_end_mask_0, x = var_39972_cast_fp16)[name = tensor("op_40479_cast_fp16")]; tensor var_40486_begin_0 = const()[name = tensor("op_40486_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40486_end_0 = const()[name = tensor("op_40486_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40486_end_mask_0 = const()[name = tensor("op_40486_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40486_cast_fp16 = slice_by_index(begin = var_40486_begin_0, end = var_40486_end_0, end_mask = var_40486_end_mask_0, x = var_39972_cast_fp16)[name = tensor("op_40486_cast_fp16")]; tensor var_40493_begin_0 = const()[name = tensor("op_40493_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40493_end_0 = const()[name = tensor("op_40493_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40493_end_mask_0 = const()[name = tensor("op_40493_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40493_cast_fp16 = slice_by_index(begin = var_40493_begin_0, end = var_40493_end_0, end_mask = var_40493_end_mask_0, x = var_39976_cast_fp16)[name = tensor("op_40493_cast_fp16")]; tensor var_40500_begin_0 = const()[name = tensor("op_40500_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40500_end_0 = const()[name = tensor("op_40500_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40500_end_mask_0 = const()[name = tensor("op_40500_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40500_cast_fp16 = slice_by_index(begin = var_40500_begin_0, end = var_40500_end_0, end_mask = var_40500_end_mask_0, x = var_39976_cast_fp16)[name = tensor("op_40500_cast_fp16")]; tensor var_40507_begin_0 = const()[name = tensor("op_40507_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40507_end_0 = const()[name = tensor("op_40507_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40507_end_mask_0 = const()[name = tensor("op_40507_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40507_cast_fp16 = slice_by_index(begin = var_40507_begin_0, end = var_40507_end_0, end_mask = var_40507_end_mask_0, x = var_39976_cast_fp16)[name = tensor("op_40507_cast_fp16")]; tensor var_40514_begin_0 = const()[name = tensor("op_40514_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40514_end_0 = const()[name = tensor("op_40514_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40514_end_mask_0 = const()[name = tensor("op_40514_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40514_cast_fp16 = slice_by_index(begin = var_40514_begin_0, end = var_40514_end_0, end_mask = var_40514_end_mask_0, x = var_39976_cast_fp16)[name = tensor("op_40514_cast_fp16")]; tensor var_40521_begin_0 = const()[name = tensor("op_40521_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40521_end_0 = const()[name = tensor("op_40521_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_40521_end_mask_0 = const()[name = tensor("op_40521_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40521_cast_fp16 = slice_by_index(begin = var_40521_begin_0, end = var_40521_end_0, end_mask = var_40521_end_mask_0, x = var_39980_cast_fp16)[name = tensor("op_40521_cast_fp16")]; tensor var_40528_begin_0 = const()[name = tensor("op_40528_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_40528_end_0 = const()[name = tensor("op_40528_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_40528_end_mask_0 = const()[name = tensor("op_40528_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40528_cast_fp16 = slice_by_index(begin = var_40528_begin_0, end = var_40528_end_0, end_mask = var_40528_end_mask_0, x = var_39980_cast_fp16)[name = tensor("op_40528_cast_fp16")]; tensor var_40535_begin_0 = const()[name = tensor("op_40535_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_40535_end_0 = const()[name = tensor("op_40535_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_40535_end_mask_0 = const()[name = tensor("op_40535_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40535_cast_fp16 = slice_by_index(begin = var_40535_begin_0, end = var_40535_end_0, end_mask = var_40535_end_mask_0, x = var_39980_cast_fp16)[name = tensor("op_40535_cast_fp16")]; tensor var_40542_begin_0 = const()[name = tensor("op_40542_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_40542_end_0 = const()[name = tensor("op_40542_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40542_end_mask_0 = const()[name = tensor("op_40542_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40542_cast_fp16 = slice_by_index(begin = var_40542_begin_0, end = var_40542_end_0, end_mask = var_40542_end_mask_0, x = var_39980_cast_fp16)[name = tensor("op_40542_cast_fp16")]; tensor k_51_perm_0 = const()[name = tensor("k_51_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_40547_begin_0 = const()[name = tensor("op_40547_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40547_end_0 = const()[name = tensor("op_40547_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_40547_end_mask_0 = const()[name = tensor("op_40547_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_51_cast_fp16 = transpose(perm = k_51_perm_0, x = key_51_cast_fp16)[name = tensor("transpose_6")]; tensor var_40547_cast_fp16 = slice_by_index(begin = var_40547_begin_0, end = var_40547_end_0, end_mask = var_40547_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40547_cast_fp16")]; tensor var_40551_begin_0 = const()[name = tensor("op_40551_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_40551_end_0 = const()[name = tensor("op_40551_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_40551_end_mask_0 = const()[name = tensor("op_40551_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40551_cast_fp16 = slice_by_index(begin = var_40551_begin_0, end = var_40551_end_0, end_mask = var_40551_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40551_cast_fp16")]; tensor var_40555_begin_0 = const()[name = tensor("op_40555_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_40555_end_0 = const()[name = tensor("op_40555_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_40555_end_mask_0 = const()[name = tensor("op_40555_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40555_cast_fp16 = slice_by_index(begin = var_40555_begin_0, end = var_40555_end_0, end_mask = var_40555_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40555_cast_fp16")]; tensor var_40559_begin_0 = const()[name = tensor("op_40559_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_40559_end_0 = const()[name = tensor("op_40559_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_40559_end_mask_0 = const()[name = tensor("op_40559_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40559_cast_fp16 = slice_by_index(begin = var_40559_begin_0, end = var_40559_end_0, end_mask = var_40559_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40559_cast_fp16")]; tensor var_40563_begin_0 = const()[name = tensor("op_40563_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_40563_end_0 = const()[name = tensor("op_40563_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_40563_end_mask_0 = const()[name = tensor("op_40563_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40563_cast_fp16 = slice_by_index(begin = var_40563_begin_0, end = var_40563_end_0, end_mask = var_40563_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40563_cast_fp16")]; tensor var_40567_begin_0 = const()[name = tensor("op_40567_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_40567_end_0 = const()[name = tensor("op_40567_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_40567_end_mask_0 = const()[name = tensor("op_40567_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40567_cast_fp16 = slice_by_index(begin = var_40567_begin_0, end = var_40567_end_0, end_mask = var_40567_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40567_cast_fp16")]; tensor var_40571_begin_0 = const()[name = tensor("op_40571_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_40571_end_0 = const()[name = tensor("op_40571_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_40571_end_mask_0 = const()[name = tensor("op_40571_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40571_cast_fp16 = slice_by_index(begin = var_40571_begin_0, end = var_40571_end_0, end_mask = var_40571_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40571_cast_fp16")]; tensor var_40575_begin_0 = const()[name = tensor("op_40575_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_40575_end_0 = const()[name = tensor("op_40575_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_40575_end_mask_0 = const()[name = tensor("op_40575_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40575_cast_fp16 = slice_by_index(begin = var_40575_begin_0, end = var_40575_end_0, end_mask = var_40575_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40575_cast_fp16")]; tensor var_40579_begin_0 = const()[name = tensor("op_40579_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_40579_end_0 = const()[name = tensor("op_40579_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_40579_end_mask_0 = const()[name = tensor("op_40579_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40579_cast_fp16 = slice_by_index(begin = var_40579_begin_0, end = var_40579_end_0, end_mask = var_40579_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40579_cast_fp16")]; tensor var_40583_begin_0 = const()[name = tensor("op_40583_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_40583_end_0 = const()[name = tensor("op_40583_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_40583_end_mask_0 = const()[name = tensor("op_40583_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40583_cast_fp16 = slice_by_index(begin = var_40583_begin_0, end = var_40583_end_0, end_mask = var_40583_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40583_cast_fp16")]; tensor var_40587_begin_0 = const()[name = tensor("op_40587_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_40587_end_0 = const()[name = tensor("op_40587_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_40587_end_mask_0 = const()[name = tensor("op_40587_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40587_cast_fp16 = slice_by_index(begin = var_40587_begin_0, end = var_40587_end_0, end_mask = var_40587_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40587_cast_fp16")]; tensor var_40591_begin_0 = const()[name = tensor("op_40591_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_40591_end_0 = const()[name = tensor("op_40591_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_40591_end_mask_0 = const()[name = tensor("op_40591_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40591_cast_fp16 = slice_by_index(begin = var_40591_begin_0, end = var_40591_end_0, end_mask = var_40591_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40591_cast_fp16")]; tensor var_40595_begin_0 = const()[name = tensor("op_40595_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_40595_end_0 = const()[name = tensor("op_40595_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_40595_end_mask_0 = const()[name = tensor("op_40595_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40595_cast_fp16 = slice_by_index(begin = var_40595_begin_0, end = var_40595_end_0, end_mask = var_40595_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40595_cast_fp16")]; tensor var_40599_begin_0 = const()[name = tensor("op_40599_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_40599_end_0 = const()[name = tensor("op_40599_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_40599_end_mask_0 = const()[name = tensor("op_40599_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40599_cast_fp16 = slice_by_index(begin = var_40599_begin_0, end = var_40599_end_0, end_mask = var_40599_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40599_cast_fp16")]; tensor var_40603_begin_0 = const()[name = tensor("op_40603_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_40603_end_0 = const()[name = tensor("op_40603_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_40603_end_mask_0 = const()[name = tensor("op_40603_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40603_cast_fp16 = slice_by_index(begin = var_40603_begin_0, end = var_40603_end_0, end_mask = var_40603_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40603_cast_fp16")]; tensor var_40607_begin_0 = const()[name = tensor("op_40607_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_40607_end_0 = const()[name = tensor("op_40607_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_40607_end_mask_0 = const()[name = tensor("op_40607_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40607_cast_fp16 = slice_by_index(begin = var_40607_begin_0, end = var_40607_end_0, end_mask = var_40607_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40607_cast_fp16")]; tensor var_40611_begin_0 = const()[name = tensor("op_40611_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_40611_end_0 = const()[name = tensor("op_40611_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_40611_end_mask_0 = const()[name = tensor("op_40611_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40611_cast_fp16 = slice_by_index(begin = var_40611_begin_0, end = var_40611_end_0, end_mask = var_40611_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40611_cast_fp16")]; tensor var_40615_begin_0 = const()[name = tensor("op_40615_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_40615_end_0 = const()[name = tensor("op_40615_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_40615_end_mask_0 = const()[name = tensor("op_40615_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40615_cast_fp16 = slice_by_index(begin = var_40615_begin_0, end = var_40615_end_0, end_mask = var_40615_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40615_cast_fp16")]; tensor var_40619_begin_0 = const()[name = tensor("op_40619_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_40619_end_0 = const()[name = tensor("op_40619_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_40619_end_mask_0 = const()[name = tensor("op_40619_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40619_cast_fp16 = slice_by_index(begin = var_40619_begin_0, end = var_40619_end_0, end_mask = var_40619_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40619_cast_fp16")]; tensor var_40623_begin_0 = const()[name = tensor("op_40623_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_40623_end_0 = const()[name = tensor("op_40623_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_40623_end_mask_0 = const()[name = tensor("op_40623_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_40623_cast_fp16 = slice_by_index(begin = var_40623_begin_0, end = var_40623_end_0, end_mask = var_40623_end_mask_0, x = k_51_cast_fp16)[name = tensor("op_40623_cast_fp16")]; tensor var_40625_begin_0 = const()[name = tensor("op_40625_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_40625_end_0 = const()[name = tensor("op_40625_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_40625_end_mask_0 = const()[name = tensor("op_40625_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40625_cast_fp16 = slice_by_index(begin = var_40625_begin_0, end = var_40625_end_0, end_mask = var_40625_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40625_cast_fp16")]; tensor var_40629_begin_0 = const()[name = tensor("op_40629_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_40629_end_0 = const()[name = tensor("op_40629_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_40629_end_mask_0 = const()[name = tensor("op_40629_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40629_cast_fp16 = slice_by_index(begin = var_40629_begin_0, end = var_40629_end_0, end_mask = var_40629_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40629_cast_fp16")]; tensor var_40633_begin_0 = const()[name = tensor("op_40633_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_40633_end_0 = const()[name = tensor("op_40633_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_40633_end_mask_0 = const()[name = tensor("op_40633_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40633_cast_fp16 = slice_by_index(begin = var_40633_begin_0, end = var_40633_end_0, end_mask = var_40633_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40633_cast_fp16")]; tensor var_40637_begin_0 = const()[name = tensor("op_40637_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_40637_end_0 = const()[name = tensor("op_40637_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_40637_end_mask_0 = const()[name = tensor("op_40637_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40637_cast_fp16 = slice_by_index(begin = var_40637_begin_0, end = var_40637_end_0, end_mask = var_40637_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40637_cast_fp16")]; tensor var_40641_begin_0 = const()[name = tensor("op_40641_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_40641_end_0 = const()[name = tensor("op_40641_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_40641_end_mask_0 = const()[name = tensor("op_40641_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40641_cast_fp16 = slice_by_index(begin = var_40641_begin_0, end = var_40641_end_0, end_mask = var_40641_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40641_cast_fp16")]; tensor var_40645_begin_0 = const()[name = tensor("op_40645_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_40645_end_0 = const()[name = tensor("op_40645_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_40645_end_mask_0 = const()[name = tensor("op_40645_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40645_cast_fp16 = slice_by_index(begin = var_40645_begin_0, end = var_40645_end_0, end_mask = var_40645_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40645_cast_fp16")]; tensor var_40649_begin_0 = const()[name = tensor("op_40649_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_40649_end_0 = const()[name = tensor("op_40649_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_40649_end_mask_0 = const()[name = tensor("op_40649_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40649_cast_fp16 = slice_by_index(begin = var_40649_begin_0, end = var_40649_end_0, end_mask = var_40649_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40649_cast_fp16")]; tensor var_40653_begin_0 = const()[name = tensor("op_40653_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_40653_end_0 = const()[name = tensor("op_40653_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_40653_end_mask_0 = const()[name = tensor("op_40653_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40653_cast_fp16 = slice_by_index(begin = var_40653_begin_0, end = var_40653_end_0, end_mask = var_40653_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40653_cast_fp16")]; tensor var_40657_begin_0 = const()[name = tensor("op_40657_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_40657_end_0 = const()[name = tensor("op_40657_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_40657_end_mask_0 = const()[name = tensor("op_40657_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40657_cast_fp16 = slice_by_index(begin = var_40657_begin_0, end = var_40657_end_0, end_mask = var_40657_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40657_cast_fp16")]; tensor var_40661_begin_0 = const()[name = tensor("op_40661_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_40661_end_0 = const()[name = tensor("op_40661_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_40661_end_mask_0 = const()[name = tensor("op_40661_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40661_cast_fp16 = slice_by_index(begin = var_40661_begin_0, end = var_40661_end_0, end_mask = var_40661_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40661_cast_fp16")]; tensor var_40665_begin_0 = const()[name = tensor("op_40665_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_40665_end_0 = const()[name = tensor("op_40665_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_40665_end_mask_0 = const()[name = tensor("op_40665_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40665_cast_fp16 = slice_by_index(begin = var_40665_begin_0, end = var_40665_end_0, end_mask = var_40665_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40665_cast_fp16")]; tensor var_40669_begin_0 = const()[name = tensor("op_40669_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_40669_end_0 = const()[name = tensor("op_40669_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_40669_end_mask_0 = const()[name = tensor("op_40669_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40669_cast_fp16 = slice_by_index(begin = var_40669_begin_0, end = var_40669_end_0, end_mask = var_40669_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40669_cast_fp16")]; tensor var_40673_begin_0 = const()[name = tensor("op_40673_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_40673_end_0 = const()[name = tensor("op_40673_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_40673_end_mask_0 = const()[name = tensor("op_40673_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40673_cast_fp16 = slice_by_index(begin = var_40673_begin_0, end = var_40673_end_0, end_mask = var_40673_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40673_cast_fp16")]; tensor var_40677_begin_0 = const()[name = tensor("op_40677_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_40677_end_0 = const()[name = tensor("op_40677_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_40677_end_mask_0 = const()[name = tensor("op_40677_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40677_cast_fp16 = slice_by_index(begin = var_40677_begin_0, end = var_40677_end_0, end_mask = var_40677_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40677_cast_fp16")]; tensor var_40681_begin_0 = const()[name = tensor("op_40681_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_40681_end_0 = const()[name = tensor("op_40681_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_40681_end_mask_0 = const()[name = tensor("op_40681_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40681_cast_fp16 = slice_by_index(begin = var_40681_begin_0, end = var_40681_end_0, end_mask = var_40681_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40681_cast_fp16")]; tensor var_40685_begin_0 = const()[name = tensor("op_40685_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_40685_end_0 = const()[name = tensor("op_40685_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_40685_end_mask_0 = const()[name = tensor("op_40685_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40685_cast_fp16 = slice_by_index(begin = var_40685_begin_0, end = var_40685_end_0, end_mask = var_40685_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40685_cast_fp16")]; tensor var_40689_begin_0 = const()[name = tensor("op_40689_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_40689_end_0 = const()[name = tensor("op_40689_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_40689_end_mask_0 = const()[name = tensor("op_40689_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40689_cast_fp16 = slice_by_index(begin = var_40689_begin_0, end = var_40689_end_0, end_mask = var_40689_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40689_cast_fp16")]; tensor var_40693_begin_0 = const()[name = tensor("op_40693_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_40693_end_0 = const()[name = tensor("op_40693_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_40693_end_mask_0 = const()[name = tensor("op_40693_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40693_cast_fp16 = slice_by_index(begin = var_40693_begin_0, end = var_40693_end_0, end_mask = var_40693_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40693_cast_fp16")]; tensor var_40697_begin_0 = const()[name = tensor("op_40697_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_40697_end_0 = const()[name = tensor("op_40697_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_40697_end_mask_0 = const()[name = tensor("op_40697_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40697_cast_fp16 = slice_by_index(begin = var_40697_begin_0, end = var_40697_end_0, end_mask = var_40697_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40697_cast_fp16")]; tensor var_40701_begin_0 = const()[name = tensor("op_40701_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_40701_end_0 = const()[name = tensor("op_40701_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_40701_end_mask_0 = const()[name = tensor("op_40701_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_40701_cast_fp16 = slice_by_index(begin = var_40701_begin_0, end = var_40701_end_0, end_mask = var_40701_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_40701_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4001_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4001_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4001_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4001_equation_0, values = (var_40547_cast_fp16, var_39989_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4001_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4003_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4003_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4003_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4003_equation_0, values = (var_40547_cast_fp16, var_39996_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4003_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4005_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4005_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4005_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4005_equation_0, values = (var_40547_cast_fp16, var_40003_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4005_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4007_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4007_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4007_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4007_equation_0, values = (var_40547_cast_fp16, var_40010_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4007_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4009_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4009_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4009_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4009_equation_0, values = (var_40551_cast_fp16, var_40017_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4009_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4011_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4011_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4011_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4011_equation_0, values = (var_40551_cast_fp16, var_40024_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4011_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4013_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4013_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4013_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4013_equation_0, values = (var_40551_cast_fp16, var_40031_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4013_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4015_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4015_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4015_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4015_equation_0, values = (var_40551_cast_fp16, var_40038_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4015_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4017_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4017_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4017_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4017_equation_0, values = (var_40555_cast_fp16, var_40045_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4017_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4019_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4019_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4019_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4019_equation_0, values = (var_40555_cast_fp16, var_40052_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4019_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4021_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4021_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4021_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4021_equation_0, values = (var_40555_cast_fp16, var_40059_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4021_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4023_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4023_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4023_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4023_equation_0, values = (var_40555_cast_fp16, var_40066_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4023_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4025_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4025_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4025_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4025_equation_0, values = (var_40559_cast_fp16, var_40073_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4025_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4027_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4027_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4027_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4027_equation_0, values = (var_40559_cast_fp16, var_40080_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4027_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4029_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4029_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4029_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4029_equation_0, values = (var_40559_cast_fp16, var_40087_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4029_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4031_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4031_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4031_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4031_equation_0, values = (var_40559_cast_fp16, var_40094_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4031_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4033_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4033_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4033_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4033_equation_0, values = (var_40563_cast_fp16, var_40101_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4033_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4035_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4035_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4035_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4035_equation_0, values = (var_40563_cast_fp16, var_40108_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4035_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4037_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4037_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4037_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4037_equation_0, values = (var_40563_cast_fp16, var_40115_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4037_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4039_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4039_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4039_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4039_equation_0, values = (var_40563_cast_fp16, var_40122_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4039_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4041_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4041_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4041_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4041_equation_0, values = (var_40567_cast_fp16, var_40129_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4041_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4043_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4043_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4043_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4043_equation_0, values = (var_40567_cast_fp16, var_40136_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4043_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4045_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4045_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4045_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4045_equation_0, values = (var_40567_cast_fp16, var_40143_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4045_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4047_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4047_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4047_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4047_equation_0, values = (var_40567_cast_fp16, var_40150_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4047_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4049_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4049_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4049_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4049_equation_0, values = (var_40571_cast_fp16, var_40157_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4049_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4051_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4051_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4051_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4051_equation_0, values = (var_40571_cast_fp16, var_40164_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4051_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4053_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4053_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4053_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4053_equation_0, values = (var_40571_cast_fp16, var_40171_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4053_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4055_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4055_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4055_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4055_equation_0, values = (var_40571_cast_fp16, var_40178_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4055_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4057_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4057_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4057_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4057_equation_0, values = (var_40575_cast_fp16, var_40185_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4057_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4059_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4059_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4059_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4059_equation_0, values = (var_40575_cast_fp16, var_40192_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4059_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4061_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4061_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4061_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4061_equation_0, values = (var_40575_cast_fp16, var_40199_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4061_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4063_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4063_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4063_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4063_equation_0, values = (var_40575_cast_fp16, var_40206_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4063_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4065_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4065_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4065_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4065_equation_0, values = (var_40579_cast_fp16, var_40213_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4065_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4067_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4067_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4067_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4067_equation_0, values = (var_40579_cast_fp16, var_40220_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4067_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4069_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4069_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4069_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4069_equation_0, values = (var_40579_cast_fp16, var_40227_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4069_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4071_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4071_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4071_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4071_equation_0, values = (var_40579_cast_fp16, var_40234_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4071_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4073_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4073_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4073_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4073_equation_0, values = (var_40583_cast_fp16, var_40241_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4073_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4075_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4075_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4075_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4075_equation_0, values = (var_40583_cast_fp16, var_40248_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4075_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4077_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4077_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4077_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4077_equation_0, values = (var_40583_cast_fp16, var_40255_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4077_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4079_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4079_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4079_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4079_equation_0, values = (var_40583_cast_fp16, var_40262_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4079_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4081_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4081_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4081_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4081_equation_0, values = (var_40587_cast_fp16, var_40269_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4081_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4083_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4083_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4083_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4083_equation_0, values = (var_40587_cast_fp16, var_40276_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4083_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4085_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4085_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4085_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4085_equation_0, values = (var_40587_cast_fp16, var_40283_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4085_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4087_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4087_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4087_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4087_equation_0, values = (var_40587_cast_fp16, var_40290_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4087_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4089_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4089_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4089_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4089_equation_0, values = (var_40591_cast_fp16, var_40297_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4089_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4091_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4091_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4091_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4091_equation_0, values = (var_40591_cast_fp16, var_40304_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4091_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4093_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4093_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4093_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4093_equation_0, values = (var_40591_cast_fp16, var_40311_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4093_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4095_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4095_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4095_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4095_equation_0, values = (var_40591_cast_fp16, var_40318_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4095_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4097_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4097_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4097_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4097_equation_0, values = (var_40595_cast_fp16, var_40325_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4097_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4099_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4099_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4099_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4099_equation_0, values = (var_40595_cast_fp16, var_40332_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4099_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4101_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4101_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4101_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4101_equation_0, values = (var_40595_cast_fp16, var_40339_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4101_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4103_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4103_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4103_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4103_equation_0, values = (var_40595_cast_fp16, var_40346_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4103_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4105_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4105_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4105_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4105_equation_0, values = (var_40599_cast_fp16, var_40353_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4105_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4107_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4107_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4107_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4107_equation_0, values = (var_40599_cast_fp16, var_40360_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4107_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4109_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4109_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4109_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4109_equation_0, values = (var_40599_cast_fp16, var_40367_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4109_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4111_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4111_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4111_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4111_equation_0, values = (var_40599_cast_fp16, var_40374_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4111_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4113_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4113_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4113_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4113_equation_0, values = (var_40603_cast_fp16, var_40381_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4113_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4115_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4115_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4115_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4115_equation_0, values = (var_40603_cast_fp16, var_40388_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4115_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4117_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4117_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4117_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4117_equation_0, values = (var_40603_cast_fp16, var_40395_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4117_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4119_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4119_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4119_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4119_equation_0, values = (var_40603_cast_fp16, var_40402_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4119_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4121_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4121_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4121_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4121_equation_0, values = (var_40607_cast_fp16, var_40409_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4121_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4123_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4123_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4123_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4123_equation_0, values = (var_40607_cast_fp16, var_40416_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4123_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4125_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4125_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4125_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4125_equation_0, values = (var_40607_cast_fp16, var_40423_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4125_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4127_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4127_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4127_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4127_equation_0, values = (var_40607_cast_fp16, var_40430_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4127_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4129_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4129_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4129_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4129_equation_0, values = (var_40611_cast_fp16, var_40437_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4129_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4131_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4131_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4131_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4131_equation_0, values = (var_40611_cast_fp16, var_40444_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4131_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4133_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4133_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4133_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4133_equation_0, values = (var_40611_cast_fp16, var_40451_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4133_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4135_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4135_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4135_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4135_equation_0, values = (var_40611_cast_fp16, var_40458_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4135_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4137_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4137_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4137_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4137_equation_0, values = (var_40615_cast_fp16, var_40465_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4137_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4139_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4139_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4139_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4139_equation_0, values = (var_40615_cast_fp16, var_40472_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4139_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4141_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4141_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4141_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4141_equation_0, values = (var_40615_cast_fp16, var_40479_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4141_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4143_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4143_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4143_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4143_equation_0, values = (var_40615_cast_fp16, var_40486_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4143_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4145_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4145_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4145_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4145_equation_0, values = (var_40619_cast_fp16, var_40493_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4145_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4147_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4147_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4147_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4147_equation_0, values = (var_40619_cast_fp16, var_40500_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4147_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4149_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4149_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4149_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4149_equation_0, values = (var_40619_cast_fp16, var_40507_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4149_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4151_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4151_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4151_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4151_equation_0, values = (var_40619_cast_fp16, var_40514_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4151_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4153_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4153_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4153_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4153_equation_0, values = (var_40623_cast_fp16, var_40521_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4153_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4155_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4155_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4155_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4155_equation_0, values = (var_40623_cast_fp16, var_40528_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4155_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4157_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4157_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4157_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4157_equation_0, values = (var_40623_cast_fp16, var_40535_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4157_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4159_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4159_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4159_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4159_equation_0, values = (var_40623_cast_fp16, var_40542_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4159_cast_fp16")]; tensor var_40864_to_fp16 = const()[name = tensor("op_40864_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4001_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4001_cast_fp16, y = var_40864_to_fp16)[name = tensor("aw_chunk_4001_cast_fp16")]; tensor var_40866_to_fp16 = const()[name = tensor("op_40866_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4003_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4003_cast_fp16, y = var_40866_to_fp16)[name = tensor("aw_chunk_4003_cast_fp16")]; tensor var_40868_to_fp16 = const()[name = tensor("op_40868_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4005_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4005_cast_fp16, y = var_40868_to_fp16)[name = tensor("aw_chunk_4005_cast_fp16")]; tensor var_40870_to_fp16 = const()[name = tensor("op_40870_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4007_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4007_cast_fp16, y = var_40870_to_fp16)[name = tensor("aw_chunk_4007_cast_fp16")]; tensor var_40872_to_fp16 = const()[name = tensor("op_40872_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4009_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4009_cast_fp16, y = var_40872_to_fp16)[name = tensor("aw_chunk_4009_cast_fp16")]; tensor var_40874_to_fp16 = const()[name = tensor("op_40874_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4011_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4011_cast_fp16, y = var_40874_to_fp16)[name = tensor("aw_chunk_4011_cast_fp16")]; tensor var_40876_to_fp16 = const()[name = tensor("op_40876_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4013_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4013_cast_fp16, y = var_40876_to_fp16)[name = tensor("aw_chunk_4013_cast_fp16")]; tensor var_40878_to_fp16 = const()[name = tensor("op_40878_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4015_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4015_cast_fp16, y = var_40878_to_fp16)[name = tensor("aw_chunk_4015_cast_fp16")]; tensor var_40880_to_fp16 = const()[name = tensor("op_40880_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4017_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4017_cast_fp16, y = var_40880_to_fp16)[name = tensor("aw_chunk_4017_cast_fp16")]; tensor var_40882_to_fp16 = const()[name = tensor("op_40882_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4019_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4019_cast_fp16, y = var_40882_to_fp16)[name = tensor("aw_chunk_4019_cast_fp16")]; tensor var_40884_to_fp16 = const()[name = tensor("op_40884_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4021_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4021_cast_fp16, y = var_40884_to_fp16)[name = tensor("aw_chunk_4021_cast_fp16")]; tensor var_40886_to_fp16 = const()[name = tensor("op_40886_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4023_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4023_cast_fp16, y = var_40886_to_fp16)[name = tensor("aw_chunk_4023_cast_fp16")]; tensor var_40888_to_fp16 = const()[name = tensor("op_40888_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4025_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4025_cast_fp16, y = var_40888_to_fp16)[name = tensor("aw_chunk_4025_cast_fp16")]; tensor var_40890_to_fp16 = const()[name = tensor("op_40890_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4027_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4027_cast_fp16, y = var_40890_to_fp16)[name = tensor("aw_chunk_4027_cast_fp16")]; tensor var_40892_to_fp16 = const()[name = tensor("op_40892_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4029_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4029_cast_fp16, y = var_40892_to_fp16)[name = tensor("aw_chunk_4029_cast_fp16")]; tensor var_40894_to_fp16 = const()[name = tensor("op_40894_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4031_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4031_cast_fp16, y = var_40894_to_fp16)[name = tensor("aw_chunk_4031_cast_fp16")]; tensor var_40896_to_fp16 = const()[name = tensor("op_40896_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4033_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4033_cast_fp16, y = var_40896_to_fp16)[name = tensor("aw_chunk_4033_cast_fp16")]; tensor var_40898_to_fp16 = const()[name = tensor("op_40898_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4035_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4035_cast_fp16, y = var_40898_to_fp16)[name = tensor("aw_chunk_4035_cast_fp16")]; tensor var_40900_to_fp16 = const()[name = tensor("op_40900_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4037_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4037_cast_fp16, y = var_40900_to_fp16)[name = tensor("aw_chunk_4037_cast_fp16")]; tensor var_40902_to_fp16 = const()[name = tensor("op_40902_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4039_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4039_cast_fp16, y = var_40902_to_fp16)[name = tensor("aw_chunk_4039_cast_fp16")]; tensor var_40904_to_fp16 = const()[name = tensor("op_40904_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4041_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4041_cast_fp16, y = var_40904_to_fp16)[name = tensor("aw_chunk_4041_cast_fp16")]; tensor var_40906_to_fp16 = const()[name = tensor("op_40906_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4043_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4043_cast_fp16, y = var_40906_to_fp16)[name = tensor("aw_chunk_4043_cast_fp16")]; tensor var_40908_to_fp16 = const()[name = tensor("op_40908_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4045_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4045_cast_fp16, y = var_40908_to_fp16)[name = tensor("aw_chunk_4045_cast_fp16")]; tensor var_40910_to_fp16 = const()[name = tensor("op_40910_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4047_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4047_cast_fp16, y = var_40910_to_fp16)[name = tensor("aw_chunk_4047_cast_fp16")]; tensor var_40912_to_fp16 = const()[name = tensor("op_40912_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4049_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4049_cast_fp16, y = var_40912_to_fp16)[name = tensor("aw_chunk_4049_cast_fp16")]; tensor var_40914_to_fp16 = const()[name = tensor("op_40914_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4051_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4051_cast_fp16, y = var_40914_to_fp16)[name = tensor("aw_chunk_4051_cast_fp16")]; tensor var_40916_to_fp16 = const()[name = tensor("op_40916_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4053_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4053_cast_fp16, y = var_40916_to_fp16)[name = tensor("aw_chunk_4053_cast_fp16")]; tensor var_40918_to_fp16 = const()[name = tensor("op_40918_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4055_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4055_cast_fp16, y = var_40918_to_fp16)[name = tensor("aw_chunk_4055_cast_fp16")]; tensor var_40920_to_fp16 = const()[name = tensor("op_40920_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4057_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4057_cast_fp16, y = var_40920_to_fp16)[name = tensor("aw_chunk_4057_cast_fp16")]; tensor var_40922_to_fp16 = const()[name = tensor("op_40922_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4059_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4059_cast_fp16, y = var_40922_to_fp16)[name = tensor("aw_chunk_4059_cast_fp16")]; tensor var_40924_to_fp16 = const()[name = tensor("op_40924_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4061_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4061_cast_fp16, y = var_40924_to_fp16)[name = tensor("aw_chunk_4061_cast_fp16")]; tensor var_40926_to_fp16 = const()[name = tensor("op_40926_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4063_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4063_cast_fp16, y = var_40926_to_fp16)[name = tensor("aw_chunk_4063_cast_fp16")]; tensor var_40928_to_fp16 = const()[name = tensor("op_40928_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4065_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4065_cast_fp16, y = var_40928_to_fp16)[name = tensor("aw_chunk_4065_cast_fp16")]; tensor var_40930_to_fp16 = const()[name = tensor("op_40930_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4067_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4067_cast_fp16, y = var_40930_to_fp16)[name = tensor("aw_chunk_4067_cast_fp16")]; tensor var_40932_to_fp16 = const()[name = tensor("op_40932_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4069_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4069_cast_fp16, y = var_40932_to_fp16)[name = tensor("aw_chunk_4069_cast_fp16")]; tensor var_40934_to_fp16 = const()[name = tensor("op_40934_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4071_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4071_cast_fp16, y = var_40934_to_fp16)[name = tensor("aw_chunk_4071_cast_fp16")]; tensor var_40936_to_fp16 = const()[name = tensor("op_40936_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4073_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4073_cast_fp16, y = var_40936_to_fp16)[name = tensor("aw_chunk_4073_cast_fp16")]; tensor var_40938_to_fp16 = const()[name = tensor("op_40938_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4075_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4075_cast_fp16, y = var_40938_to_fp16)[name = tensor("aw_chunk_4075_cast_fp16")]; tensor var_40940_to_fp16 = const()[name = tensor("op_40940_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4077_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4077_cast_fp16, y = var_40940_to_fp16)[name = tensor("aw_chunk_4077_cast_fp16")]; tensor var_40942_to_fp16 = const()[name = tensor("op_40942_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4079_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4079_cast_fp16, y = var_40942_to_fp16)[name = tensor("aw_chunk_4079_cast_fp16")]; tensor var_40944_to_fp16 = const()[name = tensor("op_40944_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4081_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4081_cast_fp16, y = var_40944_to_fp16)[name = tensor("aw_chunk_4081_cast_fp16")]; tensor var_40946_to_fp16 = const()[name = tensor("op_40946_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4083_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4083_cast_fp16, y = var_40946_to_fp16)[name = tensor("aw_chunk_4083_cast_fp16")]; tensor var_40948_to_fp16 = const()[name = tensor("op_40948_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4085_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4085_cast_fp16, y = var_40948_to_fp16)[name = tensor("aw_chunk_4085_cast_fp16")]; tensor var_40950_to_fp16 = const()[name = tensor("op_40950_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4087_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4087_cast_fp16, y = var_40950_to_fp16)[name = tensor("aw_chunk_4087_cast_fp16")]; tensor var_40952_to_fp16 = const()[name = tensor("op_40952_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4089_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4089_cast_fp16, y = var_40952_to_fp16)[name = tensor("aw_chunk_4089_cast_fp16")]; tensor var_40954_to_fp16 = const()[name = tensor("op_40954_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4091_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4091_cast_fp16, y = var_40954_to_fp16)[name = tensor("aw_chunk_4091_cast_fp16")]; tensor var_40956_to_fp16 = const()[name = tensor("op_40956_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4093_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4093_cast_fp16, y = var_40956_to_fp16)[name = tensor("aw_chunk_4093_cast_fp16")]; tensor var_40958_to_fp16 = const()[name = tensor("op_40958_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4095_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4095_cast_fp16, y = var_40958_to_fp16)[name = tensor("aw_chunk_4095_cast_fp16")]; tensor var_40960_to_fp16 = const()[name = tensor("op_40960_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4097_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4097_cast_fp16, y = var_40960_to_fp16)[name = tensor("aw_chunk_4097_cast_fp16")]; tensor var_40962_to_fp16 = const()[name = tensor("op_40962_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4099_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4099_cast_fp16, y = var_40962_to_fp16)[name = tensor("aw_chunk_4099_cast_fp16")]; tensor var_40964_to_fp16 = const()[name = tensor("op_40964_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4101_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4101_cast_fp16, y = var_40964_to_fp16)[name = tensor("aw_chunk_4101_cast_fp16")]; tensor var_40966_to_fp16 = const()[name = tensor("op_40966_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4103_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4103_cast_fp16, y = var_40966_to_fp16)[name = tensor("aw_chunk_4103_cast_fp16")]; tensor var_40968_to_fp16 = const()[name = tensor("op_40968_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4105_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4105_cast_fp16, y = var_40968_to_fp16)[name = tensor("aw_chunk_4105_cast_fp16")]; tensor var_40970_to_fp16 = const()[name = tensor("op_40970_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4107_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4107_cast_fp16, y = var_40970_to_fp16)[name = tensor("aw_chunk_4107_cast_fp16")]; tensor var_40972_to_fp16 = const()[name = tensor("op_40972_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4109_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4109_cast_fp16, y = var_40972_to_fp16)[name = tensor("aw_chunk_4109_cast_fp16")]; tensor var_40974_to_fp16 = const()[name = tensor("op_40974_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4111_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4111_cast_fp16, y = var_40974_to_fp16)[name = tensor("aw_chunk_4111_cast_fp16")]; tensor var_40976_to_fp16 = const()[name = tensor("op_40976_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4113_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4113_cast_fp16, y = var_40976_to_fp16)[name = tensor("aw_chunk_4113_cast_fp16")]; tensor var_40978_to_fp16 = const()[name = tensor("op_40978_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4115_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4115_cast_fp16, y = var_40978_to_fp16)[name = tensor("aw_chunk_4115_cast_fp16")]; tensor var_40980_to_fp16 = const()[name = tensor("op_40980_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4117_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4117_cast_fp16, y = var_40980_to_fp16)[name = tensor("aw_chunk_4117_cast_fp16")]; tensor var_40982_to_fp16 = const()[name = tensor("op_40982_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4119_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4119_cast_fp16, y = var_40982_to_fp16)[name = tensor("aw_chunk_4119_cast_fp16")]; tensor var_40984_to_fp16 = const()[name = tensor("op_40984_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4121_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4121_cast_fp16, y = var_40984_to_fp16)[name = tensor("aw_chunk_4121_cast_fp16")]; tensor var_40986_to_fp16 = const()[name = tensor("op_40986_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4123_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4123_cast_fp16, y = var_40986_to_fp16)[name = tensor("aw_chunk_4123_cast_fp16")]; tensor var_40988_to_fp16 = const()[name = tensor("op_40988_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4125_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4125_cast_fp16, y = var_40988_to_fp16)[name = tensor("aw_chunk_4125_cast_fp16")]; tensor var_40990_to_fp16 = const()[name = tensor("op_40990_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4127_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4127_cast_fp16, y = var_40990_to_fp16)[name = tensor("aw_chunk_4127_cast_fp16")]; tensor var_40992_to_fp16 = const()[name = tensor("op_40992_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4129_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4129_cast_fp16, y = var_40992_to_fp16)[name = tensor("aw_chunk_4129_cast_fp16")]; tensor var_40994_to_fp16 = const()[name = tensor("op_40994_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4131_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4131_cast_fp16, y = var_40994_to_fp16)[name = tensor("aw_chunk_4131_cast_fp16")]; tensor var_40996_to_fp16 = const()[name = tensor("op_40996_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4133_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4133_cast_fp16, y = var_40996_to_fp16)[name = tensor("aw_chunk_4133_cast_fp16")]; tensor var_40998_to_fp16 = const()[name = tensor("op_40998_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4135_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4135_cast_fp16, y = var_40998_to_fp16)[name = tensor("aw_chunk_4135_cast_fp16")]; tensor var_41000_to_fp16 = const()[name = tensor("op_41000_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4137_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4137_cast_fp16, y = var_41000_to_fp16)[name = tensor("aw_chunk_4137_cast_fp16")]; tensor var_41002_to_fp16 = const()[name = tensor("op_41002_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4139_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4139_cast_fp16, y = var_41002_to_fp16)[name = tensor("aw_chunk_4139_cast_fp16")]; tensor var_41004_to_fp16 = const()[name = tensor("op_41004_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4141_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4141_cast_fp16, y = var_41004_to_fp16)[name = tensor("aw_chunk_4141_cast_fp16")]; tensor var_41006_to_fp16 = const()[name = tensor("op_41006_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4143_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4143_cast_fp16, y = var_41006_to_fp16)[name = tensor("aw_chunk_4143_cast_fp16")]; tensor var_41008_to_fp16 = const()[name = tensor("op_41008_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4145_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4145_cast_fp16, y = var_41008_to_fp16)[name = tensor("aw_chunk_4145_cast_fp16")]; tensor var_41010_to_fp16 = const()[name = tensor("op_41010_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4147_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4147_cast_fp16, y = var_41010_to_fp16)[name = tensor("aw_chunk_4147_cast_fp16")]; tensor var_41012_to_fp16 = const()[name = tensor("op_41012_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4149_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4149_cast_fp16, y = var_41012_to_fp16)[name = tensor("aw_chunk_4149_cast_fp16")]; tensor var_41014_to_fp16 = const()[name = tensor("op_41014_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4151_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4151_cast_fp16, y = var_41014_to_fp16)[name = tensor("aw_chunk_4151_cast_fp16")]; tensor var_41016_to_fp16 = const()[name = tensor("op_41016_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4153_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4153_cast_fp16, y = var_41016_to_fp16)[name = tensor("aw_chunk_4153_cast_fp16")]; tensor var_41018_to_fp16 = const()[name = tensor("op_41018_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4155_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4155_cast_fp16, y = var_41018_to_fp16)[name = tensor("aw_chunk_4155_cast_fp16")]; tensor var_41020_to_fp16 = const()[name = tensor("op_41020_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4157_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4157_cast_fp16, y = var_41020_to_fp16)[name = tensor("aw_chunk_4157_cast_fp16")]; tensor var_41022_to_fp16 = const()[name = tensor("op_41022_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4159_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4159_cast_fp16, y = var_41022_to_fp16)[name = tensor("aw_chunk_4159_cast_fp16")]; tensor var_41024_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4001_cast_fp16)[name = tensor("op_41024_cast_fp16")]; tensor var_41025_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4003_cast_fp16)[name = tensor("op_41025_cast_fp16")]; tensor var_41026_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4005_cast_fp16)[name = tensor("op_41026_cast_fp16")]; tensor var_41027_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4007_cast_fp16)[name = tensor("op_41027_cast_fp16")]; tensor var_41028_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4009_cast_fp16)[name = tensor("op_41028_cast_fp16")]; tensor var_41029_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4011_cast_fp16)[name = tensor("op_41029_cast_fp16")]; tensor var_41030_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4013_cast_fp16)[name = tensor("op_41030_cast_fp16")]; tensor var_41031_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4015_cast_fp16)[name = tensor("op_41031_cast_fp16")]; tensor var_41032_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4017_cast_fp16)[name = tensor("op_41032_cast_fp16")]; tensor var_41033_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4019_cast_fp16)[name = tensor("op_41033_cast_fp16")]; tensor var_41034_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4021_cast_fp16)[name = tensor("op_41034_cast_fp16")]; tensor var_41035_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4023_cast_fp16)[name = tensor("op_41035_cast_fp16")]; tensor var_41036_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4025_cast_fp16)[name = tensor("op_41036_cast_fp16")]; tensor var_41037_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4027_cast_fp16)[name = tensor("op_41037_cast_fp16")]; tensor var_41038_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4029_cast_fp16)[name = tensor("op_41038_cast_fp16")]; tensor var_41039_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4031_cast_fp16)[name = tensor("op_41039_cast_fp16")]; tensor var_41040_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4033_cast_fp16)[name = tensor("op_41040_cast_fp16")]; tensor var_41041_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4035_cast_fp16)[name = tensor("op_41041_cast_fp16")]; tensor var_41042_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4037_cast_fp16)[name = tensor("op_41042_cast_fp16")]; tensor var_41043_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4039_cast_fp16)[name = tensor("op_41043_cast_fp16")]; tensor var_41044_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4041_cast_fp16)[name = tensor("op_41044_cast_fp16")]; tensor var_41045_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4043_cast_fp16)[name = tensor("op_41045_cast_fp16")]; tensor var_41046_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4045_cast_fp16)[name = tensor("op_41046_cast_fp16")]; tensor var_41047_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4047_cast_fp16)[name = tensor("op_41047_cast_fp16")]; tensor var_41048_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4049_cast_fp16)[name = tensor("op_41048_cast_fp16")]; tensor var_41049_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4051_cast_fp16)[name = tensor("op_41049_cast_fp16")]; tensor var_41050_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4053_cast_fp16)[name = tensor("op_41050_cast_fp16")]; tensor var_41051_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4055_cast_fp16)[name = tensor("op_41051_cast_fp16")]; tensor var_41052_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4057_cast_fp16)[name = tensor("op_41052_cast_fp16")]; tensor var_41053_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4059_cast_fp16)[name = tensor("op_41053_cast_fp16")]; tensor var_41054_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4061_cast_fp16)[name = tensor("op_41054_cast_fp16")]; tensor var_41055_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4063_cast_fp16)[name = tensor("op_41055_cast_fp16")]; tensor var_41056_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4065_cast_fp16)[name = tensor("op_41056_cast_fp16")]; tensor var_41057_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4067_cast_fp16)[name = tensor("op_41057_cast_fp16")]; tensor var_41058_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4069_cast_fp16)[name = tensor("op_41058_cast_fp16")]; tensor var_41059_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4071_cast_fp16)[name = tensor("op_41059_cast_fp16")]; tensor var_41060_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4073_cast_fp16)[name = tensor("op_41060_cast_fp16")]; tensor var_41061_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4075_cast_fp16)[name = tensor("op_41061_cast_fp16")]; tensor var_41062_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4077_cast_fp16)[name = tensor("op_41062_cast_fp16")]; tensor var_41063_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4079_cast_fp16)[name = tensor("op_41063_cast_fp16")]; tensor var_41064_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4081_cast_fp16)[name = tensor("op_41064_cast_fp16")]; tensor var_41065_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4083_cast_fp16)[name = tensor("op_41065_cast_fp16")]; tensor var_41066_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4085_cast_fp16)[name = tensor("op_41066_cast_fp16")]; tensor var_41067_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4087_cast_fp16)[name = tensor("op_41067_cast_fp16")]; tensor var_41068_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4089_cast_fp16)[name = tensor("op_41068_cast_fp16")]; tensor var_41069_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4091_cast_fp16)[name = tensor("op_41069_cast_fp16")]; tensor var_41070_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4093_cast_fp16)[name = tensor("op_41070_cast_fp16")]; tensor var_41071_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4095_cast_fp16)[name = tensor("op_41071_cast_fp16")]; tensor var_41072_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4097_cast_fp16)[name = tensor("op_41072_cast_fp16")]; tensor var_41073_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4099_cast_fp16)[name = tensor("op_41073_cast_fp16")]; tensor var_41074_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4101_cast_fp16)[name = tensor("op_41074_cast_fp16")]; tensor var_41075_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4103_cast_fp16)[name = tensor("op_41075_cast_fp16")]; tensor var_41076_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4105_cast_fp16)[name = tensor("op_41076_cast_fp16")]; tensor var_41077_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4107_cast_fp16)[name = tensor("op_41077_cast_fp16")]; tensor var_41078_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4109_cast_fp16)[name = tensor("op_41078_cast_fp16")]; tensor var_41079_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4111_cast_fp16)[name = tensor("op_41079_cast_fp16")]; tensor var_41080_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4113_cast_fp16)[name = tensor("op_41080_cast_fp16")]; tensor var_41081_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4115_cast_fp16)[name = tensor("op_41081_cast_fp16")]; tensor var_41082_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4117_cast_fp16)[name = tensor("op_41082_cast_fp16")]; tensor var_41083_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4119_cast_fp16)[name = tensor("op_41083_cast_fp16")]; tensor var_41084_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4121_cast_fp16)[name = tensor("op_41084_cast_fp16")]; tensor var_41085_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4123_cast_fp16)[name = tensor("op_41085_cast_fp16")]; tensor var_41086_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4125_cast_fp16)[name = tensor("op_41086_cast_fp16")]; tensor var_41087_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4127_cast_fp16)[name = tensor("op_41087_cast_fp16")]; tensor var_41088_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4129_cast_fp16)[name = tensor("op_41088_cast_fp16")]; tensor var_41089_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4131_cast_fp16)[name = tensor("op_41089_cast_fp16")]; tensor var_41090_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4133_cast_fp16)[name = tensor("op_41090_cast_fp16")]; tensor var_41091_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4135_cast_fp16)[name = tensor("op_41091_cast_fp16")]; tensor var_41092_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4137_cast_fp16)[name = tensor("op_41092_cast_fp16")]; tensor var_41093_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4139_cast_fp16)[name = tensor("op_41093_cast_fp16")]; tensor var_41094_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4141_cast_fp16)[name = tensor("op_41094_cast_fp16")]; tensor var_41095_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4143_cast_fp16)[name = tensor("op_41095_cast_fp16")]; tensor var_41096_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4145_cast_fp16)[name = tensor("op_41096_cast_fp16")]; tensor var_41097_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4147_cast_fp16)[name = tensor("op_41097_cast_fp16")]; tensor var_41098_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4149_cast_fp16)[name = tensor("op_41098_cast_fp16")]; tensor var_41099_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4151_cast_fp16)[name = tensor("op_41099_cast_fp16")]; tensor var_41100_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4153_cast_fp16)[name = tensor("op_41100_cast_fp16")]; tensor var_41101_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4155_cast_fp16)[name = tensor("op_41101_cast_fp16")]; tensor var_41102_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4157_cast_fp16)[name = tensor("op_41102_cast_fp16")]; tensor var_41103_cast_fp16 = softmax(axis = var_39822, x = aw_chunk_4159_cast_fp16)[name = tensor("op_41103_cast_fp16")]; tensor var_41105_equation_0 = const()[name = tensor("op_41105_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41105_cast_fp16 = einsum(equation = var_41105_equation_0, values = (var_40625_cast_fp16, var_41024_cast_fp16))[name = tensor("op_41105_cast_fp16")]; tensor var_41107_equation_0 = const()[name = tensor("op_41107_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41107_cast_fp16 = einsum(equation = var_41107_equation_0, values = (var_40625_cast_fp16, var_41025_cast_fp16))[name = tensor("op_41107_cast_fp16")]; tensor var_41109_equation_0 = const()[name = tensor("op_41109_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41109_cast_fp16 = einsum(equation = var_41109_equation_0, values = (var_40625_cast_fp16, var_41026_cast_fp16))[name = tensor("op_41109_cast_fp16")]; tensor var_41111_equation_0 = const()[name = tensor("op_41111_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41111_cast_fp16 = einsum(equation = var_41111_equation_0, values = (var_40625_cast_fp16, var_41027_cast_fp16))[name = tensor("op_41111_cast_fp16")]; tensor var_41113_equation_0 = const()[name = tensor("op_41113_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41113_cast_fp16 = einsum(equation = var_41113_equation_0, values = (var_40629_cast_fp16, var_41028_cast_fp16))[name = tensor("op_41113_cast_fp16")]; tensor var_41115_equation_0 = const()[name = tensor("op_41115_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41115_cast_fp16 = einsum(equation = var_41115_equation_0, values = (var_40629_cast_fp16, var_41029_cast_fp16))[name = tensor("op_41115_cast_fp16")]; tensor var_41117_equation_0 = const()[name = tensor("op_41117_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41117_cast_fp16 = einsum(equation = var_41117_equation_0, values = (var_40629_cast_fp16, var_41030_cast_fp16))[name = tensor("op_41117_cast_fp16")]; tensor var_41119_equation_0 = const()[name = tensor("op_41119_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41119_cast_fp16 = einsum(equation = var_41119_equation_0, values = (var_40629_cast_fp16, var_41031_cast_fp16))[name = tensor("op_41119_cast_fp16")]; tensor var_41121_equation_0 = const()[name = tensor("op_41121_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41121_cast_fp16 = einsum(equation = var_41121_equation_0, values = (var_40633_cast_fp16, var_41032_cast_fp16))[name = tensor("op_41121_cast_fp16")]; tensor var_41123_equation_0 = const()[name = tensor("op_41123_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41123_cast_fp16 = einsum(equation = var_41123_equation_0, values = (var_40633_cast_fp16, var_41033_cast_fp16))[name = tensor("op_41123_cast_fp16")]; tensor var_41125_equation_0 = const()[name = tensor("op_41125_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41125_cast_fp16 = einsum(equation = var_41125_equation_0, values = (var_40633_cast_fp16, var_41034_cast_fp16))[name = tensor("op_41125_cast_fp16")]; tensor var_41127_equation_0 = const()[name = tensor("op_41127_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41127_cast_fp16 = einsum(equation = var_41127_equation_0, values = (var_40633_cast_fp16, var_41035_cast_fp16))[name = tensor("op_41127_cast_fp16")]; tensor var_41129_equation_0 = const()[name = tensor("op_41129_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41129_cast_fp16 = einsum(equation = var_41129_equation_0, values = (var_40637_cast_fp16, var_41036_cast_fp16))[name = tensor("op_41129_cast_fp16")]; tensor var_41131_equation_0 = const()[name = tensor("op_41131_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41131_cast_fp16 = einsum(equation = var_41131_equation_0, values = (var_40637_cast_fp16, var_41037_cast_fp16))[name = tensor("op_41131_cast_fp16")]; tensor var_41133_equation_0 = const()[name = tensor("op_41133_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41133_cast_fp16 = einsum(equation = var_41133_equation_0, values = (var_40637_cast_fp16, var_41038_cast_fp16))[name = tensor("op_41133_cast_fp16")]; tensor var_41135_equation_0 = const()[name = tensor("op_41135_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41135_cast_fp16 = einsum(equation = var_41135_equation_0, values = (var_40637_cast_fp16, var_41039_cast_fp16))[name = tensor("op_41135_cast_fp16")]; tensor var_41137_equation_0 = const()[name = tensor("op_41137_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41137_cast_fp16 = einsum(equation = var_41137_equation_0, values = (var_40641_cast_fp16, var_41040_cast_fp16))[name = tensor("op_41137_cast_fp16")]; tensor var_41139_equation_0 = const()[name = tensor("op_41139_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41139_cast_fp16 = einsum(equation = var_41139_equation_0, values = (var_40641_cast_fp16, var_41041_cast_fp16))[name = tensor("op_41139_cast_fp16")]; tensor var_41141_equation_0 = const()[name = tensor("op_41141_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41141_cast_fp16 = einsum(equation = var_41141_equation_0, values = (var_40641_cast_fp16, var_41042_cast_fp16))[name = tensor("op_41141_cast_fp16")]; tensor var_41143_equation_0 = const()[name = tensor("op_41143_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41143_cast_fp16 = einsum(equation = var_41143_equation_0, values = (var_40641_cast_fp16, var_41043_cast_fp16))[name = tensor("op_41143_cast_fp16")]; tensor var_41145_equation_0 = const()[name = tensor("op_41145_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41145_cast_fp16 = einsum(equation = var_41145_equation_0, values = (var_40645_cast_fp16, var_41044_cast_fp16))[name = tensor("op_41145_cast_fp16")]; tensor var_41147_equation_0 = const()[name = tensor("op_41147_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41147_cast_fp16 = einsum(equation = var_41147_equation_0, values = (var_40645_cast_fp16, var_41045_cast_fp16))[name = tensor("op_41147_cast_fp16")]; tensor var_41149_equation_0 = const()[name = tensor("op_41149_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41149_cast_fp16 = einsum(equation = var_41149_equation_0, values = (var_40645_cast_fp16, var_41046_cast_fp16))[name = tensor("op_41149_cast_fp16")]; tensor var_41151_equation_0 = const()[name = tensor("op_41151_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41151_cast_fp16 = einsum(equation = var_41151_equation_0, values = (var_40645_cast_fp16, var_41047_cast_fp16))[name = tensor("op_41151_cast_fp16")]; tensor var_41153_equation_0 = const()[name = tensor("op_41153_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41153_cast_fp16 = einsum(equation = var_41153_equation_0, values = (var_40649_cast_fp16, var_41048_cast_fp16))[name = tensor("op_41153_cast_fp16")]; tensor var_41155_equation_0 = const()[name = tensor("op_41155_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41155_cast_fp16 = einsum(equation = var_41155_equation_0, values = (var_40649_cast_fp16, var_41049_cast_fp16))[name = tensor("op_41155_cast_fp16")]; tensor var_41157_equation_0 = const()[name = tensor("op_41157_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41157_cast_fp16 = einsum(equation = var_41157_equation_0, values = (var_40649_cast_fp16, var_41050_cast_fp16))[name = tensor("op_41157_cast_fp16")]; tensor var_41159_equation_0 = const()[name = tensor("op_41159_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41159_cast_fp16 = einsum(equation = var_41159_equation_0, values = (var_40649_cast_fp16, var_41051_cast_fp16))[name = tensor("op_41159_cast_fp16")]; tensor var_41161_equation_0 = const()[name = tensor("op_41161_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41161_cast_fp16 = einsum(equation = var_41161_equation_0, values = (var_40653_cast_fp16, var_41052_cast_fp16))[name = tensor("op_41161_cast_fp16")]; tensor var_41163_equation_0 = const()[name = tensor("op_41163_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41163_cast_fp16 = einsum(equation = var_41163_equation_0, values = (var_40653_cast_fp16, var_41053_cast_fp16))[name = tensor("op_41163_cast_fp16")]; tensor var_41165_equation_0 = const()[name = tensor("op_41165_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41165_cast_fp16 = einsum(equation = var_41165_equation_0, values = (var_40653_cast_fp16, var_41054_cast_fp16))[name = tensor("op_41165_cast_fp16")]; tensor var_41167_equation_0 = const()[name = tensor("op_41167_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41167_cast_fp16 = einsum(equation = var_41167_equation_0, values = (var_40653_cast_fp16, var_41055_cast_fp16))[name = tensor("op_41167_cast_fp16")]; tensor var_41169_equation_0 = const()[name = tensor("op_41169_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41169_cast_fp16 = einsum(equation = var_41169_equation_0, values = (var_40657_cast_fp16, var_41056_cast_fp16))[name = tensor("op_41169_cast_fp16")]; tensor var_41171_equation_0 = const()[name = tensor("op_41171_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41171_cast_fp16 = einsum(equation = var_41171_equation_0, values = (var_40657_cast_fp16, var_41057_cast_fp16))[name = tensor("op_41171_cast_fp16")]; tensor var_41173_equation_0 = const()[name = tensor("op_41173_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41173_cast_fp16 = einsum(equation = var_41173_equation_0, values = (var_40657_cast_fp16, var_41058_cast_fp16))[name = tensor("op_41173_cast_fp16")]; tensor var_41175_equation_0 = const()[name = tensor("op_41175_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41175_cast_fp16 = einsum(equation = var_41175_equation_0, values = (var_40657_cast_fp16, var_41059_cast_fp16))[name = tensor("op_41175_cast_fp16")]; tensor var_41177_equation_0 = const()[name = tensor("op_41177_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41177_cast_fp16 = einsum(equation = var_41177_equation_0, values = (var_40661_cast_fp16, var_41060_cast_fp16))[name = tensor("op_41177_cast_fp16")]; tensor var_41179_equation_0 = const()[name = tensor("op_41179_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41179_cast_fp16 = einsum(equation = var_41179_equation_0, values = (var_40661_cast_fp16, var_41061_cast_fp16))[name = tensor("op_41179_cast_fp16")]; tensor var_41181_equation_0 = const()[name = tensor("op_41181_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41181_cast_fp16 = einsum(equation = var_41181_equation_0, values = (var_40661_cast_fp16, var_41062_cast_fp16))[name = tensor("op_41181_cast_fp16")]; tensor var_41183_equation_0 = const()[name = tensor("op_41183_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41183_cast_fp16 = einsum(equation = var_41183_equation_0, values = (var_40661_cast_fp16, var_41063_cast_fp16))[name = tensor("op_41183_cast_fp16")]; tensor var_41185_equation_0 = const()[name = tensor("op_41185_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41185_cast_fp16 = einsum(equation = var_41185_equation_0, values = (var_40665_cast_fp16, var_41064_cast_fp16))[name = tensor("op_41185_cast_fp16")]; tensor var_41187_equation_0 = const()[name = tensor("op_41187_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41187_cast_fp16 = einsum(equation = var_41187_equation_0, values = (var_40665_cast_fp16, var_41065_cast_fp16))[name = tensor("op_41187_cast_fp16")]; tensor var_41189_equation_0 = const()[name = tensor("op_41189_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41189_cast_fp16 = einsum(equation = var_41189_equation_0, values = (var_40665_cast_fp16, var_41066_cast_fp16))[name = tensor("op_41189_cast_fp16")]; tensor var_41191_equation_0 = const()[name = tensor("op_41191_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41191_cast_fp16 = einsum(equation = var_41191_equation_0, values = (var_40665_cast_fp16, var_41067_cast_fp16))[name = tensor("op_41191_cast_fp16")]; tensor var_41193_equation_0 = const()[name = tensor("op_41193_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41193_cast_fp16 = einsum(equation = var_41193_equation_0, values = (var_40669_cast_fp16, var_41068_cast_fp16))[name = tensor("op_41193_cast_fp16")]; tensor var_41195_equation_0 = const()[name = tensor("op_41195_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41195_cast_fp16 = einsum(equation = var_41195_equation_0, values = (var_40669_cast_fp16, var_41069_cast_fp16))[name = tensor("op_41195_cast_fp16")]; tensor var_41197_equation_0 = const()[name = tensor("op_41197_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41197_cast_fp16 = einsum(equation = var_41197_equation_0, values = (var_40669_cast_fp16, var_41070_cast_fp16))[name = tensor("op_41197_cast_fp16")]; tensor var_41199_equation_0 = const()[name = tensor("op_41199_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41199_cast_fp16 = einsum(equation = var_41199_equation_0, values = (var_40669_cast_fp16, var_41071_cast_fp16))[name = tensor("op_41199_cast_fp16")]; tensor var_41201_equation_0 = const()[name = tensor("op_41201_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41201_cast_fp16 = einsum(equation = var_41201_equation_0, values = (var_40673_cast_fp16, var_41072_cast_fp16))[name = tensor("op_41201_cast_fp16")]; tensor var_41203_equation_0 = const()[name = tensor("op_41203_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41203_cast_fp16 = einsum(equation = var_41203_equation_0, values = (var_40673_cast_fp16, var_41073_cast_fp16))[name = tensor("op_41203_cast_fp16")]; tensor var_41205_equation_0 = const()[name = tensor("op_41205_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41205_cast_fp16 = einsum(equation = var_41205_equation_0, values = (var_40673_cast_fp16, var_41074_cast_fp16))[name = tensor("op_41205_cast_fp16")]; tensor var_41207_equation_0 = const()[name = tensor("op_41207_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41207_cast_fp16 = einsum(equation = var_41207_equation_0, values = (var_40673_cast_fp16, var_41075_cast_fp16))[name = tensor("op_41207_cast_fp16")]; tensor var_41209_equation_0 = const()[name = tensor("op_41209_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41209_cast_fp16 = einsum(equation = var_41209_equation_0, values = (var_40677_cast_fp16, var_41076_cast_fp16))[name = tensor("op_41209_cast_fp16")]; tensor var_41211_equation_0 = const()[name = tensor("op_41211_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41211_cast_fp16 = einsum(equation = var_41211_equation_0, values = (var_40677_cast_fp16, var_41077_cast_fp16))[name = tensor("op_41211_cast_fp16")]; tensor var_41213_equation_0 = const()[name = tensor("op_41213_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41213_cast_fp16 = einsum(equation = var_41213_equation_0, values = (var_40677_cast_fp16, var_41078_cast_fp16))[name = tensor("op_41213_cast_fp16")]; tensor var_41215_equation_0 = const()[name = tensor("op_41215_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41215_cast_fp16 = einsum(equation = var_41215_equation_0, values = (var_40677_cast_fp16, var_41079_cast_fp16))[name = tensor("op_41215_cast_fp16")]; tensor var_41217_equation_0 = const()[name = tensor("op_41217_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41217_cast_fp16 = einsum(equation = var_41217_equation_0, values = (var_40681_cast_fp16, var_41080_cast_fp16))[name = tensor("op_41217_cast_fp16")]; tensor var_41219_equation_0 = const()[name = tensor("op_41219_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41219_cast_fp16 = einsum(equation = var_41219_equation_0, values = (var_40681_cast_fp16, var_41081_cast_fp16))[name = tensor("op_41219_cast_fp16")]; tensor var_41221_equation_0 = const()[name = tensor("op_41221_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41221_cast_fp16 = einsum(equation = var_41221_equation_0, values = (var_40681_cast_fp16, var_41082_cast_fp16))[name = tensor("op_41221_cast_fp16")]; tensor var_41223_equation_0 = const()[name = tensor("op_41223_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41223_cast_fp16 = einsum(equation = var_41223_equation_0, values = (var_40681_cast_fp16, var_41083_cast_fp16))[name = tensor("op_41223_cast_fp16")]; tensor var_41225_equation_0 = const()[name = tensor("op_41225_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41225_cast_fp16 = einsum(equation = var_41225_equation_0, values = (var_40685_cast_fp16, var_41084_cast_fp16))[name = tensor("op_41225_cast_fp16")]; tensor var_41227_equation_0 = const()[name = tensor("op_41227_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41227_cast_fp16 = einsum(equation = var_41227_equation_0, values = (var_40685_cast_fp16, var_41085_cast_fp16))[name = tensor("op_41227_cast_fp16")]; tensor var_41229_equation_0 = const()[name = tensor("op_41229_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41229_cast_fp16 = einsum(equation = var_41229_equation_0, values = (var_40685_cast_fp16, var_41086_cast_fp16))[name = tensor("op_41229_cast_fp16")]; tensor var_41231_equation_0 = const()[name = tensor("op_41231_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41231_cast_fp16 = einsum(equation = var_41231_equation_0, values = (var_40685_cast_fp16, var_41087_cast_fp16))[name = tensor("op_41231_cast_fp16")]; tensor var_41233_equation_0 = const()[name = tensor("op_41233_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41233_cast_fp16 = einsum(equation = var_41233_equation_0, values = (var_40689_cast_fp16, var_41088_cast_fp16))[name = tensor("op_41233_cast_fp16")]; tensor var_41235_equation_0 = const()[name = tensor("op_41235_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41235_cast_fp16 = einsum(equation = var_41235_equation_0, values = (var_40689_cast_fp16, var_41089_cast_fp16))[name = tensor("op_41235_cast_fp16")]; tensor var_41237_equation_0 = const()[name = tensor("op_41237_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41237_cast_fp16 = einsum(equation = var_41237_equation_0, values = (var_40689_cast_fp16, var_41090_cast_fp16))[name = tensor("op_41237_cast_fp16")]; tensor var_41239_equation_0 = const()[name = tensor("op_41239_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41239_cast_fp16 = einsum(equation = var_41239_equation_0, values = (var_40689_cast_fp16, var_41091_cast_fp16))[name = tensor("op_41239_cast_fp16")]; tensor var_41241_equation_0 = const()[name = tensor("op_41241_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41241_cast_fp16 = einsum(equation = var_41241_equation_0, values = (var_40693_cast_fp16, var_41092_cast_fp16))[name = tensor("op_41241_cast_fp16")]; tensor var_41243_equation_0 = const()[name = tensor("op_41243_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41243_cast_fp16 = einsum(equation = var_41243_equation_0, values = (var_40693_cast_fp16, var_41093_cast_fp16))[name = tensor("op_41243_cast_fp16")]; tensor var_41245_equation_0 = const()[name = tensor("op_41245_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41245_cast_fp16 = einsum(equation = var_41245_equation_0, values = (var_40693_cast_fp16, var_41094_cast_fp16))[name = tensor("op_41245_cast_fp16")]; tensor var_41247_equation_0 = const()[name = tensor("op_41247_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41247_cast_fp16 = einsum(equation = var_41247_equation_0, values = (var_40693_cast_fp16, var_41095_cast_fp16))[name = tensor("op_41247_cast_fp16")]; tensor var_41249_equation_0 = const()[name = tensor("op_41249_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41249_cast_fp16 = einsum(equation = var_41249_equation_0, values = (var_40697_cast_fp16, var_41096_cast_fp16))[name = tensor("op_41249_cast_fp16")]; tensor var_41251_equation_0 = const()[name = tensor("op_41251_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41251_cast_fp16 = einsum(equation = var_41251_equation_0, values = (var_40697_cast_fp16, var_41097_cast_fp16))[name = tensor("op_41251_cast_fp16")]; tensor var_41253_equation_0 = const()[name = tensor("op_41253_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41253_cast_fp16 = einsum(equation = var_41253_equation_0, values = (var_40697_cast_fp16, var_41098_cast_fp16))[name = tensor("op_41253_cast_fp16")]; tensor var_41255_equation_0 = const()[name = tensor("op_41255_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41255_cast_fp16 = einsum(equation = var_41255_equation_0, values = (var_40697_cast_fp16, var_41099_cast_fp16))[name = tensor("op_41255_cast_fp16")]; tensor var_41257_equation_0 = const()[name = tensor("op_41257_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41257_cast_fp16 = einsum(equation = var_41257_equation_0, values = (var_40701_cast_fp16, var_41100_cast_fp16))[name = tensor("op_41257_cast_fp16")]; tensor var_41259_equation_0 = const()[name = tensor("op_41259_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41259_cast_fp16 = einsum(equation = var_41259_equation_0, values = (var_40701_cast_fp16, var_41101_cast_fp16))[name = tensor("op_41259_cast_fp16")]; tensor var_41261_equation_0 = const()[name = tensor("op_41261_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41261_cast_fp16 = einsum(equation = var_41261_equation_0, values = (var_40701_cast_fp16, var_41102_cast_fp16))[name = tensor("op_41261_cast_fp16")]; tensor var_41263_equation_0 = const()[name = tensor("op_41263_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_41263_cast_fp16 = einsum(equation = var_41263_equation_0, values = (var_40701_cast_fp16, var_41103_cast_fp16))[name = tensor("op_41263_cast_fp16")]; tensor var_41265_interleave_0 = const()[name = tensor("op_41265_interleave_0"), val = tensor(false)]; tensor var_41265_cast_fp16 = concat(axis = var_39797, interleave = var_41265_interleave_0, values = (var_41105_cast_fp16, var_41107_cast_fp16, var_41109_cast_fp16, var_41111_cast_fp16))[name = tensor("op_41265_cast_fp16")]; tensor var_41267_interleave_0 = const()[name = tensor("op_41267_interleave_0"), val = tensor(false)]; tensor var_41267_cast_fp16 = concat(axis = var_39797, interleave = var_41267_interleave_0, values = (var_41113_cast_fp16, var_41115_cast_fp16, var_41117_cast_fp16, var_41119_cast_fp16))[name = tensor("op_41267_cast_fp16")]; tensor var_41269_interleave_0 = const()[name = tensor("op_41269_interleave_0"), val = tensor(false)]; tensor var_41269_cast_fp16 = concat(axis = var_39797, interleave = var_41269_interleave_0, values = (var_41121_cast_fp16, var_41123_cast_fp16, var_41125_cast_fp16, var_41127_cast_fp16))[name = tensor("op_41269_cast_fp16")]; tensor var_41271_interleave_0 = const()[name = tensor("op_41271_interleave_0"), val = tensor(false)]; tensor var_41271_cast_fp16 = concat(axis = var_39797, interleave = var_41271_interleave_0, values = (var_41129_cast_fp16, var_41131_cast_fp16, var_41133_cast_fp16, var_41135_cast_fp16))[name = tensor("op_41271_cast_fp16")]; tensor var_41273_interleave_0 = const()[name = tensor("op_41273_interleave_0"), val = tensor(false)]; tensor var_41273_cast_fp16 = concat(axis = var_39797, interleave = var_41273_interleave_0, values = (var_41137_cast_fp16, var_41139_cast_fp16, var_41141_cast_fp16, var_41143_cast_fp16))[name = tensor("op_41273_cast_fp16")]; tensor var_41275_interleave_0 = const()[name = tensor("op_41275_interleave_0"), val = tensor(false)]; tensor var_41275_cast_fp16 = concat(axis = var_39797, interleave = var_41275_interleave_0, values = (var_41145_cast_fp16, var_41147_cast_fp16, var_41149_cast_fp16, var_41151_cast_fp16))[name = tensor("op_41275_cast_fp16")]; tensor var_41277_interleave_0 = const()[name = tensor("op_41277_interleave_0"), val = tensor(false)]; tensor var_41277_cast_fp16 = concat(axis = var_39797, interleave = var_41277_interleave_0, values = (var_41153_cast_fp16, var_41155_cast_fp16, var_41157_cast_fp16, var_41159_cast_fp16))[name = tensor("op_41277_cast_fp16")]; tensor var_41279_interleave_0 = const()[name = tensor("op_41279_interleave_0"), val = tensor(false)]; tensor var_41279_cast_fp16 = concat(axis = var_39797, interleave = var_41279_interleave_0, values = (var_41161_cast_fp16, var_41163_cast_fp16, var_41165_cast_fp16, var_41167_cast_fp16))[name = tensor("op_41279_cast_fp16")]; tensor var_41281_interleave_0 = const()[name = tensor("op_41281_interleave_0"), val = tensor(false)]; tensor var_41281_cast_fp16 = concat(axis = var_39797, interleave = var_41281_interleave_0, values = (var_41169_cast_fp16, var_41171_cast_fp16, var_41173_cast_fp16, var_41175_cast_fp16))[name = tensor("op_41281_cast_fp16")]; tensor var_41283_interleave_0 = const()[name = tensor("op_41283_interleave_0"), val = tensor(false)]; tensor var_41283_cast_fp16 = concat(axis = var_39797, interleave = var_41283_interleave_0, values = (var_41177_cast_fp16, var_41179_cast_fp16, var_41181_cast_fp16, var_41183_cast_fp16))[name = tensor("op_41283_cast_fp16")]; tensor var_41285_interleave_0 = const()[name = tensor("op_41285_interleave_0"), val = tensor(false)]; tensor var_41285_cast_fp16 = concat(axis = var_39797, interleave = var_41285_interleave_0, values = (var_41185_cast_fp16, var_41187_cast_fp16, var_41189_cast_fp16, var_41191_cast_fp16))[name = tensor("op_41285_cast_fp16")]; tensor var_41287_interleave_0 = const()[name = tensor("op_41287_interleave_0"), val = tensor(false)]; tensor var_41287_cast_fp16 = concat(axis = var_39797, interleave = var_41287_interleave_0, values = (var_41193_cast_fp16, var_41195_cast_fp16, var_41197_cast_fp16, var_41199_cast_fp16))[name = tensor("op_41287_cast_fp16")]; tensor var_41289_interleave_0 = const()[name = tensor("op_41289_interleave_0"), val = tensor(false)]; tensor var_41289_cast_fp16 = concat(axis = var_39797, interleave = var_41289_interleave_0, values = (var_41201_cast_fp16, var_41203_cast_fp16, var_41205_cast_fp16, var_41207_cast_fp16))[name = tensor("op_41289_cast_fp16")]; tensor var_41291_interleave_0 = const()[name = tensor("op_41291_interleave_0"), val = tensor(false)]; tensor var_41291_cast_fp16 = concat(axis = var_39797, interleave = var_41291_interleave_0, values = (var_41209_cast_fp16, var_41211_cast_fp16, var_41213_cast_fp16, var_41215_cast_fp16))[name = tensor("op_41291_cast_fp16")]; tensor var_41293_interleave_0 = const()[name = tensor("op_41293_interleave_0"), val = tensor(false)]; tensor var_41293_cast_fp16 = concat(axis = var_39797, interleave = var_41293_interleave_0, values = (var_41217_cast_fp16, var_41219_cast_fp16, var_41221_cast_fp16, var_41223_cast_fp16))[name = tensor("op_41293_cast_fp16")]; tensor var_41295_interleave_0 = const()[name = tensor("op_41295_interleave_0"), val = tensor(false)]; tensor var_41295_cast_fp16 = concat(axis = var_39797, interleave = var_41295_interleave_0, values = (var_41225_cast_fp16, var_41227_cast_fp16, var_41229_cast_fp16, var_41231_cast_fp16))[name = tensor("op_41295_cast_fp16")]; tensor var_41297_interleave_0 = const()[name = tensor("op_41297_interleave_0"), val = tensor(false)]; tensor var_41297_cast_fp16 = concat(axis = var_39797, interleave = var_41297_interleave_0, values = (var_41233_cast_fp16, var_41235_cast_fp16, var_41237_cast_fp16, var_41239_cast_fp16))[name = tensor("op_41297_cast_fp16")]; tensor var_41299_interleave_0 = const()[name = tensor("op_41299_interleave_0"), val = tensor(false)]; tensor var_41299_cast_fp16 = concat(axis = var_39797, interleave = var_41299_interleave_0, values = (var_41241_cast_fp16, var_41243_cast_fp16, var_41245_cast_fp16, var_41247_cast_fp16))[name = tensor("op_41299_cast_fp16")]; tensor var_41301_interleave_0 = const()[name = tensor("op_41301_interleave_0"), val = tensor(false)]; tensor var_41301_cast_fp16 = concat(axis = var_39797, interleave = var_41301_interleave_0, values = (var_41249_cast_fp16, var_41251_cast_fp16, var_41253_cast_fp16, var_41255_cast_fp16))[name = tensor("op_41301_cast_fp16")]; tensor var_41303_interleave_0 = const()[name = tensor("op_41303_interleave_0"), val = tensor(false)]; tensor var_41303_cast_fp16 = concat(axis = var_39797, interleave = var_41303_interleave_0, values = (var_41257_cast_fp16, var_41259_cast_fp16, var_41261_cast_fp16, var_41263_cast_fp16))[name = tensor("op_41303_cast_fp16")]; tensor input_201_interleave_0 = const()[name = tensor("input_201_interleave_0"), val = tensor(false)]; tensor input_201_cast_fp16 = concat(axis = var_39822, interleave = input_201_interleave_0, values = (var_41265_cast_fp16, var_41267_cast_fp16, var_41269_cast_fp16, var_41271_cast_fp16, var_41273_cast_fp16, var_41275_cast_fp16, var_41277_cast_fp16, var_41279_cast_fp16, var_41281_cast_fp16, var_41283_cast_fp16, var_41285_cast_fp16, var_41287_cast_fp16, var_41289_cast_fp16, var_41291_cast_fp16, var_41293_cast_fp16, var_41295_cast_fp16, var_41297_cast_fp16, var_41299_cast_fp16, var_41301_cast_fp16, var_41303_cast_fp16))[name = tensor("input_201_cast_fp16")]; tensor var_41314_pad_type_0 = const()[name = tensor("op_41314_pad_type_0"), val = tensor("valid")]; tensor var_41314_strides_0 = const()[name = tensor("op_41314_strides_0"), val = tensor([1, 1])]; tensor var_41314_pad_0 = const()[name = tensor("op_41314_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_41314_dilations_0 = const()[name = tensor("op_41314_dilations_0"), val = tensor([1, 1])]; tensor var_41314_groups_0 = const()[name = tensor("op_41314_groups_0"), val = tensor(1)]; tensor layers_25_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(337546624))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338365888))), name = tensor("layers_25_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_25_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_25_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338366016)))]; tensor var_41314_cast_fp16 = conv(bias = layers_25_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_41314_dilations_0, groups = var_41314_groups_0, pad = var_41314_pad_0, pad_type = var_41314_pad_type_0, strides = var_41314_strides_0, weight = layers_25_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_201_cast_fp16)[name = tensor("op_41314_cast_fp16")]; tensor var_41320_pad_type_0 = const()[name = tensor("op_41320_pad_type_0"), val = tensor("valid")]; tensor var_41320_strides_0 = const()[name = tensor("op_41320_strides_0"), val = tensor([1, 1])]; tensor var_41320_pad_0 = const()[name = tensor("op_41320_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_41320_dilations_0 = const()[name = tensor("op_41320_dilations_0"), val = tensor([1, 1])]; tensor var_41320_groups_0 = const()[name = tensor("op_41320_groups_0"), val = tensor(1)]; tensor layers_25_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338379584))), name = tensor("layers_25_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338368640))), shape = tensor([1280, 1280, 1, 1])]; tensor var_41320_cast_fp16 = conv(dilations = var_41320_dilations_0, groups = var_41320_groups_0, pad = var_41320_pad_0, pad_type = var_41320_pad_type_0, strides = var_41320_strides_0, weight = layers_25_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_201_cast_fp16)[name = tensor("op_41320_cast_fp16")]; tensor obj_103_cast_fp16 = add(x = var_41314_cast_fp16, y = var_41320_cast_fp16)[name = tensor("obj_103_cast_fp16")]; tensor inputs_103_cast_fp16 = add(x = inputs_101_cast_fp16, y = obj_103_cast_fp16)[name = tensor("inputs_103_cast_fp16")]; tensor out_103_axes_0 = const()[name = tensor("out_103_axes_0"), val = tensor([1])]; tensor var_41331_to_fp16 = const()[name = tensor("op_41331_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_103_cast_fp16 = layer_norm(axes = out_103_axes_0, epsilon = var_41331_to_fp16, x = inputs_103_cast_fp16)[name = tensor("out_103_cast_fp16")]; tensor input_203_gamma_0_to_fp16 = const()[name = tensor("input_203_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338584448)))]; tensor input_203_beta_0_to_fp16 = const()[name = tensor("input_203_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338587072)))]; tensor input_203_epsilon_0_to_fp16 = const()[name = tensor("input_203_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_203_cast_fp16 = batch_norm(beta = input_203_beta_0_to_fp16, epsilon = input_203_epsilon_0_to_fp16, gamma = input_203_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_103_cast_fp16)[name = tensor("input_203_cast_fp16")]; tensor var_41349_pad_type_0 = const()[name = tensor("op_41349_pad_type_0"), val = tensor("valid")]; tensor var_41349_strides_0 = const()[name = tensor("op_41349_strides_0"), val = tensor([1, 1])]; tensor var_41349_pad_0 = const()[name = tensor("op_41349_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_41349_dilations_0 = const()[name = tensor("op_41349_dilations_0"), val = tensor([1, 1])]; tensor var_41349_groups_0 = const()[name = tensor("op_41349_groups_0"), val = tensor(1)]; tensor layers_25_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338589696))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(341866560))), name = tensor("layers_25_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_25_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_25_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(341866688)))]; tensor var_41349_cast_fp16 = conv(bias = layers_25_fc1_inlier_module_bias_to_fp16, dilations = var_41349_dilations_0, groups = var_41349_groups_0, pad = var_41349_pad_0, pad_type = var_41349_pad_type_0, strides = var_41349_strides_0, weight = layers_25_fc1_inlier_module_weight_to_fp16_palettized, x = input_203_cast_fp16)[name = tensor("op_41349_cast_fp16")]; tensor var_41355_pad_type_0 = const()[name = tensor("op_41355_pad_type_0"), val = tensor("valid")]; tensor var_41355_strides_0 = const()[name = tensor("op_41355_strides_0"), val = tensor([1, 1])]; tensor var_41355_pad_0 = const()[name = tensor("op_41355_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_41355_dilations_0 = const()[name = tensor("op_41355_dilations_0"), val = tensor([1, 1])]; tensor var_41355_groups_0 = const()[name = tensor("op_41355_groups_0"), val = tensor(1)]; tensor layers_25_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(341933760))), name = tensor("layers_25_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(341876992))), shape = tensor([5120, 1280, 1, 1])]; tensor var_41355_cast_fp16 = conv(dilations = var_41355_dilations_0, groups = var_41355_groups_0, pad = var_41355_pad_0, pad_type = var_41355_pad_type_0, strides = var_41355_strides_0, weight = layers_25_fc1_outlier_module_weight_to_fp16_sparsified, x = input_203_cast_fp16)[name = tensor("op_41355_cast_fp16")]; tensor input_205_cast_fp16 = add(x = var_41349_cast_fp16, y = var_41355_cast_fp16)[name = tensor("input_205_cast_fp16")]; tensor input_207_mode_0 = const()[name = tensor("input_207_mode_0"), val = tensor("EXACT")]; tensor input_207_cast_fp16 = gelu(mode = input_207_mode_0, x = input_205_cast_fp16)[name = tensor("input_207_cast_fp16")]; tensor var_41366_pad_type_0 = const()[name = tensor("op_41366_pad_type_0"), val = tensor("valid")]; tensor var_41366_strides_0 = const()[name = tensor("op_41366_strides_0"), val = tensor([1, 1])]; tensor var_41366_pad_0 = const()[name = tensor("op_41366_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_41366_dilations_0 = const()[name = tensor("op_41366_dilations_0"), val = tensor([1, 1])]; tensor var_41366_groups_0 = const()[name = tensor("op_41366_groups_0"), val = tensor(1)]; tensor layers_25_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(342753024))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346029888))), name = tensor("layers_25_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_25_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_25_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346030016)))]; tensor var_41366_cast_fp16 = conv(bias = layers_25_fc2_inlier_module_bias_to_fp16, dilations = var_41366_dilations_0, groups = var_41366_groups_0, pad = var_41366_pad_0, pad_type = var_41366_pad_type_0, strides = var_41366_strides_0, weight = layers_25_fc2_inlier_module_weight_to_fp16_palettized, x = input_207_cast_fp16)[name = tensor("op_41366_cast_fp16")]; tensor var_41372_pad_type_0 = const()[name = tensor("op_41372_pad_type_0"), val = tensor("valid")]; tensor var_41372_strides_0 = const()[name = tensor("op_41372_strides_0"), val = tensor([1, 1])]; tensor var_41372_pad_0 = const()[name = tensor("op_41372_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_41372_dilations_0 = const()[name = tensor("op_41372_dilations_0"), val = tensor([1, 1])]; tensor var_41372_groups_0 = const()[name = tensor("op_41372_groups_0"), val = tensor(1)]; tensor layers_25_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346081152))), name = tensor("layers_25_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346032640))), shape = tensor([1280, 5120, 1, 1])]; tensor var_41372_cast_fp16 = conv(dilations = var_41372_dilations_0, groups = var_41372_groups_0, pad = var_41372_pad_0, pad_type = var_41372_pad_type_0, strides = var_41372_strides_0, weight = layers_25_fc2_outlier_module_weight_to_fp16_sparsified, x = input_207_cast_fp16)[name = tensor("op_41372_cast_fp16")]; tensor hidden_states_55_cast_fp16 = add(x = var_41366_cast_fp16, y = var_41372_cast_fp16)[name = tensor("hidden_states_55_cast_fp16")]; tensor inputs_105_cast_fp16 = add(x = inputs_103_cast_fp16, y = hidden_states_55_cast_fp16)[name = tensor("inputs_105_cast_fp16")]; tensor var_41378 = const()[name = tensor("op_41378"), val = tensor(3)]; tensor var_41403 = const()[name = tensor("op_41403"), val = tensor(1)]; tensor out_105_axes_0 = const()[name = tensor("out_105_axes_0"), val = tensor([1])]; tensor var_41420_to_fp16 = const()[name = tensor("op_41420_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_105_cast_fp16 = layer_norm(axes = out_105_axes_0, epsilon = var_41420_to_fp16, x = inputs_105_cast_fp16)[name = tensor("out_105_cast_fp16")]; tensor obj_105_gamma_0_to_fp16 = const()[name = tensor("obj_105_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346900416)))]; tensor obj_105_beta_0_to_fp16 = const()[name = tensor("obj_105_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346903040)))]; tensor obj_105_epsilon_0_to_fp16 = const()[name = tensor("obj_105_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_105_cast_fp16 = batch_norm(beta = obj_105_beta_0_to_fp16, epsilon = obj_105_epsilon_0_to_fp16, gamma = obj_105_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_105_cast_fp16)[name = tensor("obj_105_cast_fp16")]; tensor var_41442_pad_type_0 = const()[name = tensor("op_41442_pad_type_0"), val = tensor("valid")]; tensor var_41442_strides_0 = const()[name = tensor("op_41442_strides_0"), val = tensor([1, 1])]; tensor var_41442_pad_0 = const()[name = tensor("op_41442_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_41442_dilations_0 = const()[name = tensor("op_41442_dilations_0"), val = tensor([1, 1])]; tensor var_41442_groups_0 = const()[name = tensor("op_41442_groups_0"), val = tensor(1)]; tensor layers_26_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(346905664))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(347724928))), name = tensor("layers_26_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_26_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_26_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(347725056)))]; tensor var_41442_cast_fp16 = conv(bias = layers_26_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_41442_dilations_0, groups = var_41442_groups_0, pad = var_41442_pad_0, pad_type = var_41442_pad_type_0, strides = var_41442_strides_0, weight = layers_26_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_105_cast_fp16)[name = tensor("op_41442_cast_fp16")]; tensor var_41448_pad_type_0 = const()[name = tensor("op_41448_pad_type_0"), val = tensor("valid")]; tensor var_41448_strides_0 = const()[name = tensor("op_41448_strides_0"), val = tensor([1, 1])]; tensor var_41448_pad_0 = const()[name = tensor("op_41448_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_41448_dilations_0 = const()[name = tensor("op_41448_dilations_0"), val = tensor([1, 1])]; tensor var_41448_groups_0 = const()[name = tensor("op_41448_groups_0"), val = tensor(1)]; tensor layers_26_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(347761152))), name = tensor("layers_26_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(347727680))), shape = tensor([1280, 1280, 1, 1])]; tensor var_41448_cast_fp16 = conv(dilations = var_41448_dilations_0, groups = var_41448_groups_0, pad = var_41448_pad_0, pad_type = var_41448_pad_type_0, strides = var_41448_strides_0, weight = layers_26_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_105_cast_fp16)[name = tensor("op_41448_cast_fp16")]; tensor query_53_cast_fp16 = add(x = var_41442_cast_fp16, y = var_41448_cast_fp16)[name = tensor("query_53_cast_fp16")]; tensor var_41457_pad_type_0 = const()[name = tensor("op_41457_pad_type_0"), val = tensor("valid")]; tensor var_41457_strides_0 = const()[name = tensor("op_41457_strides_0"), val = tensor([1, 1])]; tensor var_41457_pad_0 = const()[name = tensor("op_41457_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_41457_dilations_0 = const()[name = tensor("op_41457_dilations_0"), val = tensor([1, 1])]; tensor var_41457_groups_0 = const()[name = tensor("op_41457_groups_0"), val = tensor(1)]; tensor layers_26_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(347966016))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(348785280))), name = tensor("layers_26_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_41457_cast_fp16 = conv(dilations = var_41457_dilations_0, groups = var_41457_groups_0, pad = var_41457_pad_0, pad_type = var_41457_pad_type_0, strides = var_41457_strides_0, weight = layers_26_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_105_cast_fp16)[name = tensor("op_41457_cast_fp16")]; tensor var_41463_pad_type_0 = const()[name = tensor("op_41463_pad_type_0"), val = tensor("valid")]; tensor var_41463_strides_0 = const()[name = tensor("op_41463_strides_0"), val = tensor([1, 1])]; tensor var_41463_pad_0 = const()[name = tensor("op_41463_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_41463_dilations_0 = const()[name = tensor("op_41463_dilations_0"), val = tensor([1, 1])]; tensor var_41463_groups_0 = const()[name = tensor("op_41463_groups_0"), val = tensor(1)]; tensor layers_26_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(348811904))), name = tensor("layers_26_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(348785408))), shape = tensor([1280, 1280, 1, 1])]; tensor var_41463_cast_fp16 = conv(dilations = var_41463_dilations_0, groups = var_41463_groups_0, pad = var_41463_pad_0, pad_type = var_41463_pad_type_0, strides = var_41463_strides_0, weight = layers_26_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_105_cast_fp16)[name = tensor("op_41463_cast_fp16")]; tensor key_53_cast_fp16 = add(x = var_41457_cast_fp16, y = var_41463_cast_fp16)[name = tensor("key_53_cast_fp16")]; tensor var_41473_pad_type_0 = const()[name = tensor("op_41473_pad_type_0"), val = tensor("valid")]; tensor var_41473_strides_0 = const()[name = tensor("op_41473_strides_0"), val = tensor([1, 1])]; tensor var_41473_pad_0 = const()[name = tensor("op_41473_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_41473_dilations_0 = const()[name = tensor("op_41473_dilations_0"), val = tensor([1, 1])]; tensor var_41473_groups_0 = const()[name = tensor("op_41473_groups_0"), val = tensor(1)]; tensor layers_26_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349016768))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349836032))), name = tensor("layers_26_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_26_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_26_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349836160)))]; tensor var_41473_cast_fp16 = conv(bias = layers_26_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_41473_dilations_0, groups = var_41473_groups_0, pad = var_41473_pad_0, pad_type = var_41473_pad_type_0, strides = var_41473_strides_0, weight = layers_26_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_105_cast_fp16)[name = tensor("op_41473_cast_fp16")]; tensor var_41479_pad_type_0 = const()[name = tensor("op_41479_pad_type_0"), val = tensor("valid")]; tensor var_41479_strides_0 = const()[name = tensor("op_41479_strides_0"), val = tensor([1, 1])]; tensor var_41479_pad_0 = const()[name = tensor("op_41479_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_41479_dilations_0 = const()[name = tensor("op_41479_dilations_0"), val = tensor([1, 1])]; tensor var_41479_groups_0 = const()[name = tensor("op_41479_groups_0"), val = tensor(1)]; tensor layers_26_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349851264))), name = tensor("layers_26_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(349838784))), shape = tensor([1280, 1280, 1, 1])]; tensor var_41479_cast_fp16 = conv(dilations = var_41479_dilations_0, groups = var_41479_groups_0, pad = var_41479_pad_0, pad_type = var_41479_pad_type_0, strides = var_41479_strides_0, weight = layers_26_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_105_cast_fp16)[name = tensor("op_41479_cast_fp16")]; tensor value_53_cast_fp16 = add(x = var_41473_cast_fp16, y = var_41479_cast_fp16)[name = tensor("value_53_cast_fp16")]; tensor var_41485_begin_0 = const()[name = tensor("op_41485_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_41485_end_0 = const()[name = tensor("op_41485_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_41485_end_mask_0 = const()[name = tensor("op_41485_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41485_cast_fp16 = slice_by_index(begin = var_41485_begin_0, end = var_41485_end_0, end_mask = var_41485_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41485_cast_fp16")]; tensor var_41489_begin_0 = const()[name = tensor("op_41489_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_41489_end_0 = const()[name = tensor("op_41489_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_41489_end_mask_0 = const()[name = tensor("op_41489_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41489_cast_fp16 = slice_by_index(begin = var_41489_begin_0, end = var_41489_end_0, end_mask = var_41489_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41489_cast_fp16")]; tensor var_41493_begin_0 = const()[name = tensor("op_41493_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_41493_end_0 = const()[name = tensor("op_41493_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_41493_end_mask_0 = const()[name = tensor("op_41493_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41493_cast_fp16 = slice_by_index(begin = var_41493_begin_0, end = var_41493_end_0, end_mask = var_41493_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41493_cast_fp16")]; tensor var_41497_begin_0 = const()[name = tensor("op_41497_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_41497_end_0 = const()[name = tensor("op_41497_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_41497_end_mask_0 = const()[name = tensor("op_41497_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41497_cast_fp16 = slice_by_index(begin = var_41497_begin_0, end = var_41497_end_0, end_mask = var_41497_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41497_cast_fp16")]; tensor var_41501_begin_0 = const()[name = tensor("op_41501_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_41501_end_0 = const()[name = tensor("op_41501_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_41501_end_mask_0 = const()[name = tensor("op_41501_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41501_cast_fp16 = slice_by_index(begin = var_41501_begin_0, end = var_41501_end_0, end_mask = var_41501_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41501_cast_fp16")]; tensor var_41505_begin_0 = const()[name = tensor("op_41505_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_41505_end_0 = const()[name = tensor("op_41505_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_41505_end_mask_0 = const()[name = tensor("op_41505_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41505_cast_fp16 = slice_by_index(begin = var_41505_begin_0, end = var_41505_end_0, end_mask = var_41505_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41505_cast_fp16")]; tensor var_41509_begin_0 = const()[name = tensor("op_41509_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_41509_end_0 = const()[name = tensor("op_41509_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_41509_end_mask_0 = const()[name = tensor("op_41509_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41509_cast_fp16 = slice_by_index(begin = var_41509_begin_0, end = var_41509_end_0, end_mask = var_41509_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41509_cast_fp16")]; tensor var_41513_begin_0 = const()[name = tensor("op_41513_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_41513_end_0 = const()[name = tensor("op_41513_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_41513_end_mask_0 = const()[name = tensor("op_41513_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41513_cast_fp16 = slice_by_index(begin = var_41513_begin_0, end = var_41513_end_0, end_mask = var_41513_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41513_cast_fp16")]; tensor var_41517_begin_0 = const()[name = tensor("op_41517_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_41517_end_0 = const()[name = tensor("op_41517_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_41517_end_mask_0 = const()[name = tensor("op_41517_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41517_cast_fp16 = slice_by_index(begin = var_41517_begin_0, end = var_41517_end_0, end_mask = var_41517_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41517_cast_fp16")]; tensor var_41521_begin_0 = const()[name = tensor("op_41521_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_41521_end_0 = const()[name = tensor("op_41521_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_41521_end_mask_0 = const()[name = tensor("op_41521_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41521_cast_fp16 = slice_by_index(begin = var_41521_begin_0, end = var_41521_end_0, end_mask = var_41521_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41521_cast_fp16")]; tensor var_41525_begin_0 = const()[name = tensor("op_41525_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_41525_end_0 = const()[name = tensor("op_41525_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_41525_end_mask_0 = const()[name = tensor("op_41525_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41525_cast_fp16 = slice_by_index(begin = var_41525_begin_0, end = var_41525_end_0, end_mask = var_41525_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41525_cast_fp16")]; tensor var_41529_begin_0 = const()[name = tensor("op_41529_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_41529_end_0 = const()[name = tensor("op_41529_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_41529_end_mask_0 = const()[name = tensor("op_41529_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41529_cast_fp16 = slice_by_index(begin = var_41529_begin_0, end = var_41529_end_0, end_mask = var_41529_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41529_cast_fp16")]; tensor var_41533_begin_0 = const()[name = tensor("op_41533_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_41533_end_0 = const()[name = tensor("op_41533_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_41533_end_mask_0 = const()[name = tensor("op_41533_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41533_cast_fp16 = slice_by_index(begin = var_41533_begin_0, end = var_41533_end_0, end_mask = var_41533_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41533_cast_fp16")]; tensor var_41537_begin_0 = const()[name = tensor("op_41537_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_41537_end_0 = const()[name = tensor("op_41537_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_41537_end_mask_0 = const()[name = tensor("op_41537_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41537_cast_fp16 = slice_by_index(begin = var_41537_begin_0, end = var_41537_end_0, end_mask = var_41537_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41537_cast_fp16")]; tensor var_41541_begin_0 = const()[name = tensor("op_41541_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_41541_end_0 = const()[name = tensor("op_41541_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_41541_end_mask_0 = const()[name = tensor("op_41541_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41541_cast_fp16 = slice_by_index(begin = var_41541_begin_0, end = var_41541_end_0, end_mask = var_41541_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41541_cast_fp16")]; tensor var_41545_begin_0 = const()[name = tensor("op_41545_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_41545_end_0 = const()[name = tensor("op_41545_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_41545_end_mask_0 = const()[name = tensor("op_41545_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41545_cast_fp16 = slice_by_index(begin = var_41545_begin_0, end = var_41545_end_0, end_mask = var_41545_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41545_cast_fp16")]; tensor var_41549_begin_0 = const()[name = tensor("op_41549_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_41549_end_0 = const()[name = tensor("op_41549_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_41549_end_mask_0 = const()[name = tensor("op_41549_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41549_cast_fp16 = slice_by_index(begin = var_41549_begin_0, end = var_41549_end_0, end_mask = var_41549_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41549_cast_fp16")]; tensor var_41553_begin_0 = const()[name = tensor("op_41553_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_41553_end_0 = const()[name = tensor("op_41553_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_41553_end_mask_0 = const()[name = tensor("op_41553_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41553_cast_fp16 = slice_by_index(begin = var_41553_begin_0, end = var_41553_end_0, end_mask = var_41553_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41553_cast_fp16")]; tensor var_41557_begin_0 = const()[name = tensor("op_41557_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_41557_end_0 = const()[name = tensor("op_41557_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_41557_end_mask_0 = const()[name = tensor("op_41557_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41557_cast_fp16 = slice_by_index(begin = var_41557_begin_0, end = var_41557_end_0, end_mask = var_41557_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41557_cast_fp16")]; tensor var_41561_begin_0 = const()[name = tensor("op_41561_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_41561_end_0 = const()[name = tensor("op_41561_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_41561_end_mask_0 = const()[name = tensor("op_41561_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_41561_cast_fp16 = slice_by_index(begin = var_41561_begin_0, end = var_41561_end_0, end_mask = var_41561_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_41561_cast_fp16")]; tensor var_41570_begin_0 = const()[name = tensor("op_41570_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_41570_end_0 = const()[name = tensor("op_41570_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_41570_end_mask_0 = const()[name = tensor("op_41570_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41570_cast_fp16 = slice_by_index(begin = var_41570_begin_0, end = var_41570_end_0, end_mask = var_41570_end_mask_0, x = var_41485_cast_fp16)[name = tensor("op_41570_cast_fp16")]; tensor var_41577_begin_0 = const()[name = tensor("op_41577_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_41577_end_0 = const()[name = tensor("op_41577_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_41577_end_mask_0 = const()[name = tensor("op_41577_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41577_cast_fp16 = slice_by_index(begin = var_41577_begin_0, end = var_41577_end_0, end_mask = var_41577_end_mask_0, x = var_41485_cast_fp16)[name = tensor("op_41577_cast_fp16")]; tensor var_41584_begin_0 = const()[name = tensor("op_41584_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_41584_end_0 = const()[name = tensor("op_41584_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_41584_end_mask_0 = const()[name = tensor("op_41584_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41584_cast_fp16 = slice_by_index(begin = var_41584_begin_0, end = var_41584_end_0, end_mask = var_41584_end_mask_0, x = var_41485_cast_fp16)[name = tensor("op_41584_cast_fp16")]; tensor var_41591_begin_0 = const()[name = tensor("op_41591_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_41591_end_0 = const()[name = tensor("op_41591_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_41591_end_mask_0 = const()[name = tensor("op_41591_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41591_cast_fp16 = slice_by_index(begin = var_41591_begin_0, end = var_41591_end_0, end_mask = var_41591_end_mask_0, x = var_41485_cast_fp16)[name = tensor("op_41591_cast_fp16")]; tensor var_41598_begin_0 = const()[name = tensor("op_41598_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_41598_end_0 = const()[name = tensor("op_41598_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_41598_end_mask_0 = const()[name = tensor("op_41598_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41598_cast_fp16 = slice_by_index(begin = var_41598_begin_0, end = var_41598_end_0, end_mask = var_41598_end_mask_0, x = var_41489_cast_fp16)[name = tensor("op_41598_cast_fp16")]; tensor var_41605_begin_0 = const()[name = tensor("op_41605_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_41605_end_0 = const()[name = tensor("op_41605_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_41605_end_mask_0 = const()[name = tensor("op_41605_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41605_cast_fp16 = slice_by_index(begin = var_41605_begin_0, end = var_41605_end_0, end_mask = var_41605_end_mask_0, x = var_41489_cast_fp16)[name = tensor("op_41605_cast_fp16")]; tensor var_41612_begin_0 = const()[name = tensor("op_41612_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_41612_end_0 = const()[name = tensor("op_41612_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_41612_end_mask_0 = const()[name = tensor("op_41612_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41612_cast_fp16 = slice_by_index(begin = var_41612_begin_0, end = var_41612_end_0, end_mask = var_41612_end_mask_0, x = var_41489_cast_fp16)[name = tensor("op_41612_cast_fp16")]; tensor var_41619_begin_0 = const()[name = tensor("op_41619_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_41619_end_0 = const()[name = tensor("op_41619_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_41619_end_mask_0 = const()[name = tensor("op_41619_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41619_cast_fp16 = slice_by_index(begin = var_41619_begin_0, end = var_41619_end_0, end_mask = var_41619_end_mask_0, x = var_41489_cast_fp16)[name = tensor("op_41619_cast_fp16")]; tensor var_41626_begin_0 = const()[name = tensor("op_41626_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_41626_end_0 = const()[name = tensor("op_41626_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_41626_end_mask_0 = const()[name = tensor("op_41626_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41626_cast_fp16 = slice_by_index(begin = var_41626_begin_0, end = var_41626_end_0, end_mask = var_41626_end_mask_0, x = var_41493_cast_fp16)[name = tensor("op_41626_cast_fp16")]; tensor var_41633_begin_0 = const()[name = tensor("op_41633_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_41633_end_0 = const()[name = tensor("op_41633_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_41633_end_mask_0 = const()[name = tensor("op_41633_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41633_cast_fp16 = slice_by_index(begin = var_41633_begin_0, end = var_41633_end_0, end_mask = var_41633_end_mask_0, x = var_41493_cast_fp16)[name = tensor("op_41633_cast_fp16")]; tensor var_41640_begin_0 = const()[name = tensor("op_41640_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_41640_end_0 = const()[name = tensor("op_41640_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_41640_end_mask_0 = const()[name = tensor("op_41640_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41640_cast_fp16 = slice_by_index(begin = var_41640_begin_0, end = var_41640_end_0, end_mask = var_41640_end_mask_0, x = var_41493_cast_fp16)[name = tensor("op_41640_cast_fp16")]; tensor var_41647_begin_0 = const()[name = tensor("op_41647_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_41647_end_0 = const()[name = tensor("op_41647_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_41647_end_mask_0 = const()[name = tensor("op_41647_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41647_cast_fp16 = slice_by_index(begin = var_41647_begin_0, end = var_41647_end_0, end_mask = var_41647_end_mask_0, x = var_41493_cast_fp16)[name = tensor("op_41647_cast_fp16")]; tensor var_41654_begin_0 = const()[name = tensor("op_41654_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_41654_end_0 = const()[name = tensor("op_41654_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_41654_end_mask_0 = const()[name = tensor("op_41654_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41654_cast_fp16 = slice_by_index(begin = var_41654_begin_0, end = var_41654_end_0, end_mask = var_41654_end_mask_0, x = var_41497_cast_fp16)[name = tensor("op_41654_cast_fp16")]; tensor var_41661_begin_0 = const()[name = tensor("op_41661_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_41661_end_0 = const()[name = tensor("op_41661_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_41661_end_mask_0 = const()[name = tensor("op_41661_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41661_cast_fp16 = slice_by_index(begin = var_41661_begin_0, end = var_41661_end_0, end_mask = var_41661_end_mask_0, x = var_41497_cast_fp16)[name = tensor("op_41661_cast_fp16")]; tensor var_41668_begin_0 = const()[name = tensor("op_41668_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_41668_end_0 = const()[name = tensor("op_41668_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_41668_end_mask_0 = const()[name = tensor("op_41668_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41668_cast_fp16 = slice_by_index(begin = var_41668_begin_0, end = var_41668_end_0, end_mask = var_41668_end_mask_0, x = var_41497_cast_fp16)[name = tensor("op_41668_cast_fp16")]; tensor var_41675_begin_0 = const()[name = tensor("op_41675_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_41675_end_0 = const()[name = tensor("op_41675_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_41675_end_mask_0 = const()[name = tensor("op_41675_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41675_cast_fp16 = slice_by_index(begin = var_41675_begin_0, end = var_41675_end_0, end_mask = var_41675_end_mask_0, x = var_41497_cast_fp16)[name = tensor("op_41675_cast_fp16")]; tensor var_41682_begin_0 = const()[name = tensor("op_41682_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_41682_end_0 = const()[name = tensor("op_41682_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_41682_end_mask_0 = const()[name = tensor("op_41682_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41682_cast_fp16 = slice_by_index(begin = var_41682_begin_0, end = var_41682_end_0, end_mask = var_41682_end_mask_0, x = var_41501_cast_fp16)[name = tensor("op_41682_cast_fp16")]; tensor var_41689_begin_0 = const()[name = tensor("op_41689_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_41689_end_0 = const()[name = tensor("op_41689_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_41689_end_mask_0 = const()[name = tensor("op_41689_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41689_cast_fp16 = slice_by_index(begin = var_41689_begin_0, end = var_41689_end_0, end_mask = var_41689_end_mask_0, x = var_41501_cast_fp16)[name = tensor("op_41689_cast_fp16")]; tensor var_41696_begin_0 = const()[name = tensor("op_41696_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_41696_end_0 = const()[name = tensor("op_41696_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_41696_end_mask_0 = const()[name = tensor("op_41696_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41696_cast_fp16 = slice_by_index(begin = var_41696_begin_0, end = var_41696_end_0, end_mask = var_41696_end_mask_0, x = var_41501_cast_fp16)[name = tensor("op_41696_cast_fp16")]; tensor var_41703_begin_0 = const()[name = tensor("op_41703_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_41703_end_0 = const()[name = tensor("op_41703_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_41703_end_mask_0 = const()[name = tensor("op_41703_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41703_cast_fp16 = slice_by_index(begin = var_41703_begin_0, end = var_41703_end_0, end_mask = var_41703_end_mask_0, x = var_41501_cast_fp16)[name = tensor("op_41703_cast_fp16")]; tensor var_41710_begin_0 = const()[name = tensor("op_41710_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_41710_end_0 = const()[name = tensor("op_41710_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_41710_end_mask_0 = const()[name = tensor("op_41710_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41710_cast_fp16 = slice_by_index(begin = var_41710_begin_0, end = var_41710_end_0, end_mask = var_41710_end_mask_0, x = var_41505_cast_fp16)[name = tensor("op_41710_cast_fp16")]; tensor var_41717_begin_0 = const()[name = tensor("op_41717_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_41717_end_0 = const()[name = tensor("op_41717_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_41717_end_mask_0 = const()[name = tensor("op_41717_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41717_cast_fp16 = slice_by_index(begin = var_41717_begin_0, end = var_41717_end_0, end_mask = var_41717_end_mask_0, x = var_41505_cast_fp16)[name = tensor("op_41717_cast_fp16")]; tensor var_41724_begin_0 = const()[name = tensor("op_41724_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_41724_end_0 = const()[name = tensor("op_41724_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_41724_end_mask_0 = const()[name = tensor("op_41724_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41724_cast_fp16 = slice_by_index(begin = var_41724_begin_0, end = var_41724_end_0, end_mask = var_41724_end_mask_0, x = var_41505_cast_fp16)[name = tensor("op_41724_cast_fp16")]; tensor var_41731_begin_0 = const()[name = tensor("op_41731_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_41731_end_0 = const()[name = tensor("op_41731_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_41731_end_mask_0 = const()[name = tensor("op_41731_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41731_cast_fp16 = slice_by_index(begin = var_41731_begin_0, end = var_41731_end_0, end_mask = var_41731_end_mask_0, x = var_41505_cast_fp16)[name = tensor("op_41731_cast_fp16")]; tensor var_41738_begin_0 = const()[name = tensor("op_41738_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_41738_end_0 = const()[name = tensor("op_41738_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_41738_end_mask_0 = const()[name = tensor("op_41738_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41738_cast_fp16 = slice_by_index(begin = var_41738_begin_0, end = var_41738_end_0, end_mask = var_41738_end_mask_0, x = var_41509_cast_fp16)[name = tensor("op_41738_cast_fp16")]; tensor var_41745_begin_0 = const()[name = tensor("op_41745_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_41745_end_0 = const()[name = tensor("op_41745_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_41745_end_mask_0 = const()[name = tensor("op_41745_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41745_cast_fp16 = slice_by_index(begin = var_41745_begin_0, end = var_41745_end_0, end_mask = var_41745_end_mask_0, x = var_41509_cast_fp16)[name = tensor("op_41745_cast_fp16")]; tensor var_41752_begin_0 = const()[name = tensor("op_41752_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_41752_end_0 = const()[name = tensor("op_41752_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_41752_end_mask_0 = const()[name = tensor("op_41752_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41752_cast_fp16 = slice_by_index(begin = var_41752_begin_0, end = var_41752_end_0, end_mask = var_41752_end_mask_0, x = var_41509_cast_fp16)[name = tensor("op_41752_cast_fp16")]; tensor var_41759_begin_0 = const()[name = tensor("op_41759_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_41759_end_0 = const()[name = tensor("op_41759_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_41759_end_mask_0 = const()[name = tensor("op_41759_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41759_cast_fp16 = slice_by_index(begin = var_41759_begin_0, end = var_41759_end_0, end_mask = var_41759_end_mask_0, x = var_41509_cast_fp16)[name = tensor("op_41759_cast_fp16")]; tensor var_41766_begin_0 = const()[name = tensor("op_41766_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_41766_end_0 = const()[name = tensor("op_41766_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_41766_end_mask_0 = const()[name = tensor("op_41766_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41766_cast_fp16 = slice_by_index(begin = var_41766_begin_0, end = var_41766_end_0, end_mask = var_41766_end_mask_0, x = var_41513_cast_fp16)[name = tensor("op_41766_cast_fp16")]; tensor var_41773_begin_0 = const()[name = tensor("op_41773_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_41773_end_0 = const()[name = tensor("op_41773_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_41773_end_mask_0 = const()[name = tensor("op_41773_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41773_cast_fp16 = slice_by_index(begin = var_41773_begin_0, end = var_41773_end_0, end_mask = var_41773_end_mask_0, x = var_41513_cast_fp16)[name = tensor("op_41773_cast_fp16")]; tensor var_41780_begin_0 = const()[name = tensor("op_41780_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_41780_end_0 = const()[name = tensor("op_41780_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_41780_end_mask_0 = const()[name = tensor("op_41780_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41780_cast_fp16 = slice_by_index(begin = var_41780_begin_0, end = var_41780_end_0, end_mask = var_41780_end_mask_0, x = var_41513_cast_fp16)[name = tensor("op_41780_cast_fp16")]; tensor var_41787_begin_0 = const()[name = tensor("op_41787_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_41787_end_0 = const()[name = tensor("op_41787_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_41787_end_mask_0 = const()[name = tensor("op_41787_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41787_cast_fp16 = slice_by_index(begin = var_41787_begin_0, end = var_41787_end_0, end_mask = var_41787_end_mask_0, x = var_41513_cast_fp16)[name = tensor("op_41787_cast_fp16")]; tensor var_41794_begin_0 = const()[name = tensor("op_41794_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_41794_end_0 = const()[name = tensor("op_41794_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_41794_end_mask_0 = const()[name = tensor("op_41794_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41794_cast_fp16 = slice_by_index(begin = var_41794_begin_0, end = var_41794_end_0, end_mask = var_41794_end_mask_0, x = var_41517_cast_fp16)[name = tensor("op_41794_cast_fp16")]; tensor var_41801_begin_0 = const()[name = tensor("op_41801_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_41801_end_0 = const()[name = tensor("op_41801_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_41801_end_mask_0 = const()[name = tensor("op_41801_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41801_cast_fp16 = slice_by_index(begin = var_41801_begin_0, end = var_41801_end_0, end_mask = var_41801_end_mask_0, x = var_41517_cast_fp16)[name = tensor("op_41801_cast_fp16")]; tensor var_41808_begin_0 = const()[name = tensor("op_41808_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_41808_end_0 = const()[name = tensor("op_41808_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_41808_end_mask_0 = const()[name = tensor("op_41808_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41808_cast_fp16 = slice_by_index(begin = var_41808_begin_0, end = var_41808_end_0, end_mask = var_41808_end_mask_0, x = var_41517_cast_fp16)[name = tensor("op_41808_cast_fp16")]; tensor var_41815_begin_0 = const()[name = tensor("op_41815_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_41815_end_0 = const()[name = tensor("op_41815_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_41815_end_mask_0 = const()[name = tensor("op_41815_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41815_cast_fp16 = slice_by_index(begin = var_41815_begin_0, end = var_41815_end_0, end_mask = var_41815_end_mask_0, x = var_41517_cast_fp16)[name = tensor("op_41815_cast_fp16")]; tensor var_41822_begin_0 = const()[name = tensor("op_41822_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_41822_end_0 = const()[name = tensor("op_41822_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_41822_end_mask_0 = const()[name = tensor("op_41822_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41822_cast_fp16 = slice_by_index(begin = var_41822_begin_0, end = var_41822_end_0, end_mask = var_41822_end_mask_0, x = var_41521_cast_fp16)[name = tensor("op_41822_cast_fp16")]; tensor var_41829_begin_0 = const()[name = tensor("op_41829_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_41829_end_0 = const()[name = tensor("op_41829_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_41829_end_mask_0 = const()[name = tensor("op_41829_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41829_cast_fp16 = slice_by_index(begin = var_41829_begin_0, end = var_41829_end_0, end_mask = var_41829_end_mask_0, x = var_41521_cast_fp16)[name = tensor("op_41829_cast_fp16")]; tensor var_41836_begin_0 = const()[name = tensor("op_41836_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_41836_end_0 = const()[name = tensor("op_41836_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_41836_end_mask_0 = const()[name = tensor("op_41836_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41836_cast_fp16 = slice_by_index(begin = var_41836_begin_0, end = var_41836_end_0, end_mask = var_41836_end_mask_0, x = var_41521_cast_fp16)[name = tensor("op_41836_cast_fp16")]; tensor var_41843_begin_0 = const()[name = tensor("op_41843_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_41843_end_0 = const()[name = tensor("op_41843_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_41843_end_mask_0 = const()[name = tensor("op_41843_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41843_cast_fp16 = slice_by_index(begin = var_41843_begin_0, end = var_41843_end_0, end_mask = var_41843_end_mask_0, x = var_41521_cast_fp16)[name = tensor("op_41843_cast_fp16")]; tensor var_41850_begin_0 = const()[name = tensor("op_41850_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_41850_end_0 = const()[name = tensor("op_41850_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_41850_end_mask_0 = const()[name = tensor("op_41850_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41850_cast_fp16 = slice_by_index(begin = var_41850_begin_0, end = var_41850_end_0, end_mask = var_41850_end_mask_0, x = var_41525_cast_fp16)[name = tensor("op_41850_cast_fp16")]; tensor var_41857_begin_0 = const()[name = tensor("op_41857_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_41857_end_0 = const()[name = tensor("op_41857_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_41857_end_mask_0 = const()[name = tensor("op_41857_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41857_cast_fp16 = slice_by_index(begin = var_41857_begin_0, end = var_41857_end_0, end_mask = var_41857_end_mask_0, x = var_41525_cast_fp16)[name = tensor("op_41857_cast_fp16")]; tensor var_41864_begin_0 = const()[name = tensor("op_41864_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_41864_end_0 = const()[name = tensor("op_41864_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_41864_end_mask_0 = const()[name = tensor("op_41864_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41864_cast_fp16 = slice_by_index(begin = var_41864_begin_0, end = var_41864_end_0, end_mask = var_41864_end_mask_0, x = var_41525_cast_fp16)[name = tensor("op_41864_cast_fp16")]; tensor var_41871_begin_0 = const()[name = tensor("op_41871_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_41871_end_0 = const()[name = tensor("op_41871_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_41871_end_mask_0 = const()[name = tensor("op_41871_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41871_cast_fp16 = slice_by_index(begin = var_41871_begin_0, end = var_41871_end_0, end_mask = var_41871_end_mask_0, x = var_41525_cast_fp16)[name = tensor("op_41871_cast_fp16")]; tensor var_41878_begin_0 = const()[name = tensor("op_41878_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_41878_end_0 = const()[name = tensor("op_41878_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_41878_end_mask_0 = const()[name = tensor("op_41878_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41878_cast_fp16 = slice_by_index(begin = var_41878_begin_0, end = var_41878_end_0, end_mask = var_41878_end_mask_0, x = var_41529_cast_fp16)[name = tensor("op_41878_cast_fp16")]; tensor var_41885_begin_0 = const()[name = tensor("op_41885_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_41885_end_0 = const()[name = tensor("op_41885_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_41885_end_mask_0 = const()[name = tensor("op_41885_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41885_cast_fp16 = slice_by_index(begin = var_41885_begin_0, end = var_41885_end_0, end_mask = var_41885_end_mask_0, x = var_41529_cast_fp16)[name = tensor("op_41885_cast_fp16")]; tensor var_41892_begin_0 = const()[name = tensor("op_41892_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_41892_end_0 = const()[name = tensor("op_41892_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_41892_end_mask_0 = const()[name = tensor("op_41892_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41892_cast_fp16 = slice_by_index(begin = var_41892_begin_0, end = var_41892_end_0, end_mask = var_41892_end_mask_0, x = var_41529_cast_fp16)[name = tensor("op_41892_cast_fp16")]; tensor var_41899_begin_0 = const()[name = tensor("op_41899_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_41899_end_0 = const()[name = tensor("op_41899_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_41899_end_mask_0 = const()[name = tensor("op_41899_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41899_cast_fp16 = slice_by_index(begin = var_41899_begin_0, end = var_41899_end_0, end_mask = var_41899_end_mask_0, x = var_41529_cast_fp16)[name = tensor("op_41899_cast_fp16")]; tensor var_41906_begin_0 = const()[name = tensor("op_41906_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_41906_end_0 = const()[name = tensor("op_41906_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_41906_end_mask_0 = const()[name = tensor("op_41906_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41906_cast_fp16 = slice_by_index(begin = var_41906_begin_0, end = var_41906_end_0, end_mask = var_41906_end_mask_0, x = var_41533_cast_fp16)[name = tensor("op_41906_cast_fp16")]; tensor var_41913_begin_0 = const()[name = tensor("op_41913_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_41913_end_0 = const()[name = tensor("op_41913_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_41913_end_mask_0 = const()[name = tensor("op_41913_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41913_cast_fp16 = slice_by_index(begin = var_41913_begin_0, end = var_41913_end_0, end_mask = var_41913_end_mask_0, x = var_41533_cast_fp16)[name = tensor("op_41913_cast_fp16")]; tensor var_41920_begin_0 = const()[name = tensor("op_41920_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_41920_end_0 = const()[name = tensor("op_41920_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_41920_end_mask_0 = const()[name = tensor("op_41920_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41920_cast_fp16 = slice_by_index(begin = var_41920_begin_0, end = var_41920_end_0, end_mask = var_41920_end_mask_0, x = var_41533_cast_fp16)[name = tensor("op_41920_cast_fp16")]; tensor var_41927_begin_0 = const()[name = tensor("op_41927_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_41927_end_0 = const()[name = tensor("op_41927_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_41927_end_mask_0 = const()[name = tensor("op_41927_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41927_cast_fp16 = slice_by_index(begin = var_41927_begin_0, end = var_41927_end_0, end_mask = var_41927_end_mask_0, x = var_41533_cast_fp16)[name = tensor("op_41927_cast_fp16")]; tensor var_41934_begin_0 = const()[name = tensor("op_41934_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_41934_end_0 = const()[name = tensor("op_41934_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_41934_end_mask_0 = const()[name = tensor("op_41934_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41934_cast_fp16 = slice_by_index(begin = var_41934_begin_0, end = var_41934_end_0, end_mask = var_41934_end_mask_0, x = var_41537_cast_fp16)[name = tensor("op_41934_cast_fp16")]; tensor var_41941_begin_0 = const()[name = tensor("op_41941_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_41941_end_0 = const()[name = tensor("op_41941_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_41941_end_mask_0 = const()[name = tensor("op_41941_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41941_cast_fp16 = slice_by_index(begin = var_41941_begin_0, end = var_41941_end_0, end_mask = var_41941_end_mask_0, x = var_41537_cast_fp16)[name = tensor("op_41941_cast_fp16")]; tensor var_41948_begin_0 = const()[name = tensor("op_41948_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_41948_end_0 = const()[name = tensor("op_41948_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_41948_end_mask_0 = const()[name = tensor("op_41948_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41948_cast_fp16 = slice_by_index(begin = var_41948_begin_0, end = var_41948_end_0, end_mask = var_41948_end_mask_0, x = var_41537_cast_fp16)[name = tensor("op_41948_cast_fp16")]; tensor var_41955_begin_0 = const()[name = tensor("op_41955_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_41955_end_0 = const()[name = tensor("op_41955_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_41955_end_mask_0 = const()[name = tensor("op_41955_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41955_cast_fp16 = slice_by_index(begin = var_41955_begin_0, end = var_41955_end_0, end_mask = var_41955_end_mask_0, x = var_41537_cast_fp16)[name = tensor("op_41955_cast_fp16")]; tensor var_41962_begin_0 = const()[name = tensor("op_41962_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_41962_end_0 = const()[name = tensor("op_41962_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_41962_end_mask_0 = const()[name = tensor("op_41962_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41962_cast_fp16 = slice_by_index(begin = var_41962_begin_0, end = var_41962_end_0, end_mask = var_41962_end_mask_0, x = var_41541_cast_fp16)[name = tensor("op_41962_cast_fp16")]; tensor var_41969_begin_0 = const()[name = tensor("op_41969_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_41969_end_0 = const()[name = tensor("op_41969_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_41969_end_mask_0 = const()[name = tensor("op_41969_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41969_cast_fp16 = slice_by_index(begin = var_41969_begin_0, end = var_41969_end_0, end_mask = var_41969_end_mask_0, x = var_41541_cast_fp16)[name = tensor("op_41969_cast_fp16")]; tensor var_41976_begin_0 = const()[name = tensor("op_41976_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_41976_end_0 = const()[name = tensor("op_41976_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_41976_end_mask_0 = const()[name = tensor("op_41976_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41976_cast_fp16 = slice_by_index(begin = var_41976_begin_0, end = var_41976_end_0, end_mask = var_41976_end_mask_0, x = var_41541_cast_fp16)[name = tensor("op_41976_cast_fp16")]; tensor var_41983_begin_0 = const()[name = tensor("op_41983_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_41983_end_0 = const()[name = tensor("op_41983_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_41983_end_mask_0 = const()[name = tensor("op_41983_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41983_cast_fp16 = slice_by_index(begin = var_41983_begin_0, end = var_41983_end_0, end_mask = var_41983_end_mask_0, x = var_41541_cast_fp16)[name = tensor("op_41983_cast_fp16")]; tensor var_41990_begin_0 = const()[name = tensor("op_41990_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_41990_end_0 = const()[name = tensor("op_41990_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_41990_end_mask_0 = const()[name = tensor("op_41990_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41990_cast_fp16 = slice_by_index(begin = var_41990_begin_0, end = var_41990_end_0, end_mask = var_41990_end_mask_0, x = var_41545_cast_fp16)[name = tensor("op_41990_cast_fp16")]; tensor var_41997_begin_0 = const()[name = tensor("op_41997_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_41997_end_0 = const()[name = tensor("op_41997_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_41997_end_mask_0 = const()[name = tensor("op_41997_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_41997_cast_fp16 = slice_by_index(begin = var_41997_begin_0, end = var_41997_end_0, end_mask = var_41997_end_mask_0, x = var_41545_cast_fp16)[name = tensor("op_41997_cast_fp16")]; tensor var_42004_begin_0 = const()[name = tensor("op_42004_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_42004_end_0 = const()[name = tensor("op_42004_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_42004_end_mask_0 = const()[name = tensor("op_42004_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42004_cast_fp16 = slice_by_index(begin = var_42004_begin_0, end = var_42004_end_0, end_mask = var_42004_end_mask_0, x = var_41545_cast_fp16)[name = tensor("op_42004_cast_fp16")]; tensor var_42011_begin_0 = const()[name = tensor("op_42011_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_42011_end_0 = const()[name = tensor("op_42011_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_42011_end_mask_0 = const()[name = tensor("op_42011_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42011_cast_fp16 = slice_by_index(begin = var_42011_begin_0, end = var_42011_end_0, end_mask = var_42011_end_mask_0, x = var_41545_cast_fp16)[name = tensor("op_42011_cast_fp16")]; tensor var_42018_begin_0 = const()[name = tensor("op_42018_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_42018_end_0 = const()[name = tensor("op_42018_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_42018_end_mask_0 = const()[name = tensor("op_42018_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42018_cast_fp16 = slice_by_index(begin = var_42018_begin_0, end = var_42018_end_0, end_mask = var_42018_end_mask_0, x = var_41549_cast_fp16)[name = tensor("op_42018_cast_fp16")]; tensor var_42025_begin_0 = const()[name = tensor("op_42025_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_42025_end_0 = const()[name = tensor("op_42025_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_42025_end_mask_0 = const()[name = tensor("op_42025_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42025_cast_fp16 = slice_by_index(begin = var_42025_begin_0, end = var_42025_end_0, end_mask = var_42025_end_mask_0, x = var_41549_cast_fp16)[name = tensor("op_42025_cast_fp16")]; tensor var_42032_begin_0 = const()[name = tensor("op_42032_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_42032_end_0 = const()[name = tensor("op_42032_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_42032_end_mask_0 = const()[name = tensor("op_42032_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42032_cast_fp16 = slice_by_index(begin = var_42032_begin_0, end = var_42032_end_0, end_mask = var_42032_end_mask_0, x = var_41549_cast_fp16)[name = tensor("op_42032_cast_fp16")]; tensor var_42039_begin_0 = const()[name = tensor("op_42039_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_42039_end_0 = const()[name = tensor("op_42039_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_42039_end_mask_0 = const()[name = tensor("op_42039_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42039_cast_fp16 = slice_by_index(begin = var_42039_begin_0, end = var_42039_end_0, end_mask = var_42039_end_mask_0, x = var_41549_cast_fp16)[name = tensor("op_42039_cast_fp16")]; tensor var_42046_begin_0 = const()[name = tensor("op_42046_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_42046_end_0 = const()[name = tensor("op_42046_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_42046_end_mask_0 = const()[name = tensor("op_42046_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42046_cast_fp16 = slice_by_index(begin = var_42046_begin_0, end = var_42046_end_0, end_mask = var_42046_end_mask_0, x = var_41553_cast_fp16)[name = tensor("op_42046_cast_fp16")]; tensor var_42053_begin_0 = const()[name = tensor("op_42053_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_42053_end_0 = const()[name = tensor("op_42053_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_42053_end_mask_0 = const()[name = tensor("op_42053_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42053_cast_fp16 = slice_by_index(begin = var_42053_begin_0, end = var_42053_end_0, end_mask = var_42053_end_mask_0, x = var_41553_cast_fp16)[name = tensor("op_42053_cast_fp16")]; tensor var_42060_begin_0 = const()[name = tensor("op_42060_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_42060_end_0 = const()[name = tensor("op_42060_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_42060_end_mask_0 = const()[name = tensor("op_42060_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42060_cast_fp16 = slice_by_index(begin = var_42060_begin_0, end = var_42060_end_0, end_mask = var_42060_end_mask_0, x = var_41553_cast_fp16)[name = tensor("op_42060_cast_fp16")]; tensor var_42067_begin_0 = const()[name = tensor("op_42067_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_42067_end_0 = const()[name = tensor("op_42067_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_42067_end_mask_0 = const()[name = tensor("op_42067_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42067_cast_fp16 = slice_by_index(begin = var_42067_begin_0, end = var_42067_end_0, end_mask = var_42067_end_mask_0, x = var_41553_cast_fp16)[name = tensor("op_42067_cast_fp16")]; tensor var_42074_begin_0 = const()[name = tensor("op_42074_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_42074_end_0 = const()[name = tensor("op_42074_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_42074_end_mask_0 = const()[name = tensor("op_42074_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42074_cast_fp16 = slice_by_index(begin = var_42074_begin_0, end = var_42074_end_0, end_mask = var_42074_end_mask_0, x = var_41557_cast_fp16)[name = tensor("op_42074_cast_fp16")]; tensor var_42081_begin_0 = const()[name = tensor("op_42081_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_42081_end_0 = const()[name = tensor("op_42081_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_42081_end_mask_0 = const()[name = tensor("op_42081_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42081_cast_fp16 = slice_by_index(begin = var_42081_begin_0, end = var_42081_end_0, end_mask = var_42081_end_mask_0, x = var_41557_cast_fp16)[name = tensor("op_42081_cast_fp16")]; tensor var_42088_begin_0 = const()[name = tensor("op_42088_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_42088_end_0 = const()[name = tensor("op_42088_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_42088_end_mask_0 = const()[name = tensor("op_42088_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42088_cast_fp16 = slice_by_index(begin = var_42088_begin_0, end = var_42088_end_0, end_mask = var_42088_end_mask_0, x = var_41557_cast_fp16)[name = tensor("op_42088_cast_fp16")]; tensor var_42095_begin_0 = const()[name = tensor("op_42095_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_42095_end_0 = const()[name = tensor("op_42095_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_42095_end_mask_0 = const()[name = tensor("op_42095_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42095_cast_fp16 = slice_by_index(begin = var_42095_begin_0, end = var_42095_end_0, end_mask = var_42095_end_mask_0, x = var_41557_cast_fp16)[name = tensor("op_42095_cast_fp16")]; tensor var_42102_begin_0 = const()[name = tensor("op_42102_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_42102_end_0 = const()[name = tensor("op_42102_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_42102_end_mask_0 = const()[name = tensor("op_42102_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42102_cast_fp16 = slice_by_index(begin = var_42102_begin_0, end = var_42102_end_0, end_mask = var_42102_end_mask_0, x = var_41561_cast_fp16)[name = tensor("op_42102_cast_fp16")]; tensor var_42109_begin_0 = const()[name = tensor("op_42109_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_42109_end_0 = const()[name = tensor("op_42109_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_42109_end_mask_0 = const()[name = tensor("op_42109_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42109_cast_fp16 = slice_by_index(begin = var_42109_begin_0, end = var_42109_end_0, end_mask = var_42109_end_mask_0, x = var_41561_cast_fp16)[name = tensor("op_42109_cast_fp16")]; tensor var_42116_begin_0 = const()[name = tensor("op_42116_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_42116_end_0 = const()[name = tensor("op_42116_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_42116_end_mask_0 = const()[name = tensor("op_42116_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42116_cast_fp16 = slice_by_index(begin = var_42116_begin_0, end = var_42116_end_0, end_mask = var_42116_end_mask_0, x = var_41561_cast_fp16)[name = tensor("op_42116_cast_fp16")]; tensor var_42123_begin_0 = const()[name = tensor("op_42123_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_42123_end_0 = const()[name = tensor("op_42123_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_42123_end_mask_0 = const()[name = tensor("op_42123_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42123_cast_fp16 = slice_by_index(begin = var_42123_begin_0, end = var_42123_end_0, end_mask = var_42123_end_mask_0, x = var_41561_cast_fp16)[name = tensor("op_42123_cast_fp16")]; tensor k_53_perm_0 = const()[name = tensor("k_53_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_42128_begin_0 = const()[name = tensor("op_42128_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_42128_end_0 = const()[name = tensor("op_42128_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_42128_end_mask_0 = const()[name = tensor("op_42128_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_53_cast_fp16 = transpose(perm = k_53_perm_0, x = key_53_cast_fp16)[name = tensor("transpose_5")]; tensor var_42128_cast_fp16 = slice_by_index(begin = var_42128_begin_0, end = var_42128_end_0, end_mask = var_42128_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42128_cast_fp16")]; tensor var_42132_begin_0 = const()[name = tensor("op_42132_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_42132_end_0 = const()[name = tensor("op_42132_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_42132_end_mask_0 = const()[name = tensor("op_42132_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42132_cast_fp16 = slice_by_index(begin = var_42132_begin_0, end = var_42132_end_0, end_mask = var_42132_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42132_cast_fp16")]; tensor var_42136_begin_0 = const()[name = tensor("op_42136_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_42136_end_0 = const()[name = tensor("op_42136_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_42136_end_mask_0 = const()[name = tensor("op_42136_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42136_cast_fp16 = slice_by_index(begin = var_42136_begin_0, end = var_42136_end_0, end_mask = var_42136_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42136_cast_fp16")]; tensor var_42140_begin_0 = const()[name = tensor("op_42140_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_42140_end_0 = const()[name = tensor("op_42140_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_42140_end_mask_0 = const()[name = tensor("op_42140_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42140_cast_fp16 = slice_by_index(begin = var_42140_begin_0, end = var_42140_end_0, end_mask = var_42140_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42140_cast_fp16")]; tensor var_42144_begin_0 = const()[name = tensor("op_42144_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_42144_end_0 = const()[name = tensor("op_42144_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_42144_end_mask_0 = const()[name = tensor("op_42144_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42144_cast_fp16 = slice_by_index(begin = var_42144_begin_0, end = var_42144_end_0, end_mask = var_42144_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42144_cast_fp16")]; tensor var_42148_begin_0 = const()[name = tensor("op_42148_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_42148_end_0 = const()[name = tensor("op_42148_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_42148_end_mask_0 = const()[name = tensor("op_42148_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42148_cast_fp16 = slice_by_index(begin = var_42148_begin_0, end = var_42148_end_0, end_mask = var_42148_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42148_cast_fp16")]; tensor var_42152_begin_0 = const()[name = tensor("op_42152_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_42152_end_0 = const()[name = tensor("op_42152_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_42152_end_mask_0 = const()[name = tensor("op_42152_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42152_cast_fp16 = slice_by_index(begin = var_42152_begin_0, end = var_42152_end_0, end_mask = var_42152_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42152_cast_fp16")]; tensor var_42156_begin_0 = const()[name = tensor("op_42156_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_42156_end_0 = const()[name = tensor("op_42156_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_42156_end_mask_0 = const()[name = tensor("op_42156_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42156_cast_fp16 = slice_by_index(begin = var_42156_begin_0, end = var_42156_end_0, end_mask = var_42156_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42156_cast_fp16")]; tensor var_42160_begin_0 = const()[name = tensor("op_42160_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_42160_end_0 = const()[name = tensor("op_42160_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_42160_end_mask_0 = const()[name = tensor("op_42160_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42160_cast_fp16 = slice_by_index(begin = var_42160_begin_0, end = var_42160_end_0, end_mask = var_42160_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42160_cast_fp16")]; tensor var_42164_begin_0 = const()[name = tensor("op_42164_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_42164_end_0 = const()[name = tensor("op_42164_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_42164_end_mask_0 = const()[name = tensor("op_42164_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42164_cast_fp16 = slice_by_index(begin = var_42164_begin_0, end = var_42164_end_0, end_mask = var_42164_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42164_cast_fp16")]; tensor var_42168_begin_0 = const()[name = tensor("op_42168_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_42168_end_0 = const()[name = tensor("op_42168_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_42168_end_mask_0 = const()[name = tensor("op_42168_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42168_cast_fp16 = slice_by_index(begin = var_42168_begin_0, end = var_42168_end_0, end_mask = var_42168_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42168_cast_fp16")]; tensor var_42172_begin_0 = const()[name = tensor("op_42172_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_42172_end_0 = const()[name = tensor("op_42172_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_42172_end_mask_0 = const()[name = tensor("op_42172_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42172_cast_fp16 = slice_by_index(begin = var_42172_begin_0, end = var_42172_end_0, end_mask = var_42172_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42172_cast_fp16")]; tensor var_42176_begin_0 = const()[name = tensor("op_42176_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_42176_end_0 = const()[name = tensor("op_42176_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_42176_end_mask_0 = const()[name = tensor("op_42176_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42176_cast_fp16 = slice_by_index(begin = var_42176_begin_0, end = var_42176_end_0, end_mask = var_42176_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42176_cast_fp16")]; tensor var_42180_begin_0 = const()[name = tensor("op_42180_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_42180_end_0 = const()[name = tensor("op_42180_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_42180_end_mask_0 = const()[name = tensor("op_42180_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42180_cast_fp16 = slice_by_index(begin = var_42180_begin_0, end = var_42180_end_0, end_mask = var_42180_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42180_cast_fp16")]; tensor var_42184_begin_0 = const()[name = tensor("op_42184_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_42184_end_0 = const()[name = tensor("op_42184_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_42184_end_mask_0 = const()[name = tensor("op_42184_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42184_cast_fp16 = slice_by_index(begin = var_42184_begin_0, end = var_42184_end_0, end_mask = var_42184_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42184_cast_fp16")]; tensor var_42188_begin_0 = const()[name = tensor("op_42188_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_42188_end_0 = const()[name = tensor("op_42188_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_42188_end_mask_0 = const()[name = tensor("op_42188_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42188_cast_fp16 = slice_by_index(begin = var_42188_begin_0, end = var_42188_end_0, end_mask = var_42188_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42188_cast_fp16")]; tensor var_42192_begin_0 = const()[name = tensor("op_42192_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_42192_end_0 = const()[name = tensor("op_42192_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_42192_end_mask_0 = const()[name = tensor("op_42192_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42192_cast_fp16 = slice_by_index(begin = var_42192_begin_0, end = var_42192_end_0, end_mask = var_42192_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42192_cast_fp16")]; tensor var_42196_begin_0 = const()[name = tensor("op_42196_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_42196_end_0 = const()[name = tensor("op_42196_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_42196_end_mask_0 = const()[name = tensor("op_42196_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42196_cast_fp16 = slice_by_index(begin = var_42196_begin_0, end = var_42196_end_0, end_mask = var_42196_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42196_cast_fp16")]; tensor var_42200_begin_0 = const()[name = tensor("op_42200_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_42200_end_0 = const()[name = tensor("op_42200_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_42200_end_mask_0 = const()[name = tensor("op_42200_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42200_cast_fp16 = slice_by_index(begin = var_42200_begin_0, end = var_42200_end_0, end_mask = var_42200_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42200_cast_fp16")]; tensor var_42204_begin_0 = const()[name = tensor("op_42204_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_42204_end_0 = const()[name = tensor("op_42204_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_42204_end_mask_0 = const()[name = tensor("op_42204_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_42204_cast_fp16 = slice_by_index(begin = var_42204_begin_0, end = var_42204_end_0, end_mask = var_42204_end_mask_0, x = k_53_cast_fp16)[name = tensor("op_42204_cast_fp16")]; tensor var_42206_begin_0 = const()[name = tensor("op_42206_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_42206_end_0 = const()[name = tensor("op_42206_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_42206_end_mask_0 = const()[name = tensor("op_42206_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42206_cast_fp16 = slice_by_index(begin = var_42206_begin_0, end = var_42206_end_0, end_mask = var_42206_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42206_cast_fp16")]; tensor var_42210_begin_0 = const()[name = tensor("op_42210_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_42210_end_0 = const()[name = tensor("op_42210_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_42210_end_mask_0 = const()[name = tensor("op_42210_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42210_cast_fp16 = slice_by_index(begin = var_42210_begin_0, end = var_42210_end_0, end_mask = var_42210_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42210_cast_fp16")]; tensor var_42214_begin_0 = const()[name = tensor("op_42214_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_42214_end_0 = const()[name = tensor("op_42214_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_42214_end_mask_0 = const()[name = tensor("op_42214_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42214_cast_fp16 = slice_by_index(begin = var_42214_begin_0, end = var_42214_end_0, end_mask = var_42214_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42214_cast_fp16")]; tensor var_42218_begin_0 = const()[name = tensor("op_42218_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_42218_end_0 = const()[name = tensor("op_42218_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_42218_end_mask_0 = const()[name = tensor("op_42218_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42218_cast_fp16 = slice_by_index(begin = var_42218_begin_0, end = var_42218_end_0, end_mask = var_42218_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42218_cast_fp16")]; tensor var_42222_begin_0 = const()[name = tensor("op_42222_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_42222_end_0 = const()[name = tensor("op_42222_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_42222_end_mask_0 = const()[name = tensor("op_42222_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42222_cast_fp16 = slice_by_index(begin = var_42222_begin_0, end = var_42222_end_0, end_mask = var_42222_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42222_cast_fp16")]; tensor var_42226_begin_0 = const()[name = tensor("op_42226_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_42226_end_0 = const()[name = tensor("op_42226_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_42226_end_mask_0 = const()[name = tensor("op_42226_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42226_cast_fp16 = slice_by_index(begin = var_42226_begin_0, end = var_42226_end_0, end_mask = var_42226_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42226_cast_fp16")]; tensor var_42230_begin_0 = const()[name = tensor("op_42230_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_42230_end_0 = const()[name = tensor("op_42230_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_42230_end_mask_0 = const()[name = tensor("op_42230_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42230_cast_fp16 = slice_by_index(begin = var_42230_begin_0, end = var_42230_end_0, end_mask = var_42230_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42230_cast_fp16")]; tensor var_42234_begin_0 = const()[name = tensor("op_42234_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_42234_end_0 = const()[name = tensor("op_42234_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_42234_end_mask_0 = const()[name = tensor("op_42234_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42234_cast_fp16 = slice_by_index(begin = var_42234_begin_0, end = var_42234_end_0, end_mask = var_42234_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42234_cast_fp16")]; tensor var_42238_begin_0 = const()[name = tensor("op_42238_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_42238_end_0 = const()[name = tensor("op_42238_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_42238_end_mask_0 = const()[name = tensor("op_42238_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42238_cast_fp16 = slice_by_index(begin = var_42238_begin_0, end = var_42238_end_0, end_mask = var_42238_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42238_cast_fp16")]; tensor var_42242_begin_0 = const()[name = tensor("op_42242_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_42242_end_0 = const()[name = tensor("op_42242_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_42242_end_mask_0 = const()[name = tensor("op_42242_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42242_cast_fp16 = slice_by_index(begin = var_42242_begin_0, end = var_42242_end_0, end_mask = var_42242_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42242_cast_fp16")]; tensor var_42246_begin_0 = const()[name = tensor("op_42246_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_42246_end_0 = const()[name = tensor("op_42246_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_42246_end_mask_0 = const()[name = tensor("op_42246_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42246_cast_fp16 = slice_by_index(begin = var_42246_begin_0, end = var_42246_end_0, end_mask = var_42246_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42246_cast_fp16")]; tensor var_42250_begin_0 = const()[name = tensor("op_42250_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_42250_end_0 = const()[name = tensor("op_42250_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_42250_end_mask_0 = const()[name = tensor("op_42250_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42250_cast_fp16 = slice_by_index(begin = var_42250_begin_0, end = var_42250_end_0, end_mask = var_42250_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42250_cast_fp16")]; tensor var_42254_begin_0 = const()[name = tensor("op_42254_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_42254_end_0 = const()[name = tensor("op_42254_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_42254_end_mask_0 = const()[name = tensor("op_42254_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42254_cast_fp16 = slice_by_index(begin = var_42254_begin_0, end = var_42254_end_0, end_mask = var_42254_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42254_cast_fp16")]; tensor var_42258_begin_0 = const()[name = tensor("op_42258_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_42258_end_0 = const()[name = tensor("op_42258_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_42258_end_mask_0 = const()[name = tensor("op_42258_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42258_cast_fp16 = slice_by_index(begin = var_42258_begin_0, end = var_42258_end_0, end_mask = var_42258_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42258_cast_fp16")]; tensor var_42262_begin_0 = const()[name = tensor("op_42262_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_42262_end_0 = const()[name = tensor("op_42262_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_42262_end_mask_0 = const()[name = tensor("op_42262_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42262_cast_fp16 = slice_by_index(begin = var_42262_begin_0, end = var_42262_end_0, end_mask = var_42262_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42262_cast_fp16")]; tensor var_42266_begin_0 = const()[name = tensor("op_42266_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_42266_end_0 = const()[name = tensor("op_42266_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_42266_end_mask_0 = const()[name = tensor("op_42266_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42266_cast_fp16 = slice_by_index(begin = var_42266_begin_0, end = var_42266_end_0, end_mask = var_42266_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42266_cast_fp16")]; tensor var_42270_begin_0 = const()[name = tensor("op_42270_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_42270_end_0 = const()[name = tensor("op_42270_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_42270_end_mask_0 = const()[name = tensor("op_42270_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42270_cast_fp16 = slice_by_index(begin = var_42270_begin_0, end = var_42270_end_0, end_mask = var_42270_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42270_cast_fp16")]; tensor var_42274_begin_0 = const()[name = tensor("op_42274_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_42274_end_0 = const()[name = tensor("op_42274_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_42274_end_mask_0 = const()[name = tensor("op_42274_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42274_cast_fp16 = slice_by_index(begin = var_42274_begin_0, end = var_42274_end_0, end_mask = var_42274_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42274_cast_fp16")]; tensor var_42278_begin_0 = const()[name = tensor("op_42278_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_42278_end_0 = const()[name = tensor("op_42278_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_42278_end_mask_0 = const()[name = tensor("op_42278_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42278_cast_fp16 = slice_by_index(begin = var_42278_begin_0, end = var_42278_end_0, end_mask = var_42278_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42278_cast_fp16")]; tensor var_42282_begin_0 = const()[name = tensor("op_42282_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_42282_end_0 = const()[name = tensor("op_42282_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_42282_end_mask_0 = const()[name = tensor("op_42282_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_42282_cast_fp16 = slice_by_index(begin = var_42282_begin_0, end = var_42282_end_0, end_mask = var_42282_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_42282_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4161_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4161_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4161_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4161_equation_0, values = (var_42128_cast_fp16, var_41570_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4161_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4163_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4163_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4163_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4163_equation_0, values = (var_42128_cast_fp16, var_41577_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4163_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4165_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4165_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4165_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4165_equation_0, values = (var_42128_cast_fp16, var_41584_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4165_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4167_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4167_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4167_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4167_equation_0, values = (var_42128_cast_fp16, var_41591_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4167_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4169_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4169_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4169_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4169_equation_0, values = (var_42132_cast_fp16, var_41598_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4169_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4171_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4171_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4171_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4171_equation_0, values = (var_42132_cast_fp16, var_41605_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4171_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4173_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4173_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4173_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4173_equation_0, values = (var_42132_cast_fp16, var_41612_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4173_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4175_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4175_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4175_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4175_equation_0, values = (var_42132_cast_fp16, var_41619_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4175_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4177_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4177_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4177_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4177_equation_0, values = (var_42136_cast_fp16, var_41626_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4177_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4179_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4179_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4179_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4179_equation_0, values = (var_42136_cast_fp16, var_41633_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4179_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4181_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4181_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4181_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4181_equation_0, values = (var_42136_cast_fp16, var_41640_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4181_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4183_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4183_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4183_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4183_equation_0, values = (var_42136_cast_fp16, var_41647_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4183_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4185_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4185_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4185_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4185_equation_0, values = (var_42140_cast_fp16, var_41654_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4185_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4187_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4187_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4187_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4187_equation_0, values = (var_42140_cast_fp16, var_41661_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4187_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4189_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4189_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4189_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4189_equation_0, values = (var_42140_cast_fp16, var_41668_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4189_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4191_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4191_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4191_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4191_equation_0, values = (var_42140_cast_fp16, var_41675_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4191_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4193_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4193_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4193_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4193_equation_0, values = (var_42144_cast_fp16, var_41682_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4193_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4195_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4195_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4195_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4195_equation_0, values = (var_42144_cast_fp16, var_41689_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4195_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4197_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4197_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4197_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4197_equation_0, values = (var_42144_cast_fp16, var_41696_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4197_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4199_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4199_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4199_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4199_equation_0, values = (var_42144_cast_fp16, var_41703_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4199_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4201_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4201_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4201_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4201_equation_0, values = (var_42148_cast_fp16, var_41710_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4201_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4203_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4203_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4203_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4203_equation_0, values = (var_42148_cast_fp16, var_41717_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4203_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4205_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4205_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4205_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4205_equation_0, values = (var_42148_cast_fp16, var_41724_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4205_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4207_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4207_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4207_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4207_equation_0, values = (var_42148_cast_fp16, var_41731_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4207_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4209_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4209_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4209_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4209_equation_0, values = (var_42152_cast_fp16, var_41738_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4209_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4211_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4211_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4211_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4211_equation_0, values = (var_42152_cast_fp16, var_41745_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4211_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4213_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4213_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4213_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4213_equation_0, values = (var_42152_cast_fp16, var_41752_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4213_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4215_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4215_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4215_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4215_equation_0, values = (var_42152_cast_fp16, var_41759_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4215_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4217_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4217_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4217_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4217_equation_0, values = (var_42156_cast_fp16, var_41766_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4217_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4219_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4219_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4219_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4219_equation_0, values = (var_42156_cast_fp16, var_41773_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4219_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4221_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4221_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4221_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4221_equation_0, values = (var_42156_cast_fp16, var_41780_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4221_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4223_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4223_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4223_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4223_equation_0, values = (var_42156_cast_fp16, var_41787_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4223_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4225_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4225_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4225_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4225_equation_0, values = (var_42160_cast_fp16, var_41794_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4225_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4227_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4227_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4227_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4227_equation_0, values = (var_42160_cast_fp16, var_41801_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4227_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4229_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4229_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4229_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4229_equation_0, values = (var_42160_cast_fp16, var_41808_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4229_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4231_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4231_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4231_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4231_equation_0, values = (var_42160_cast_fp16, var_41815_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4231_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4233_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4233_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4233_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4233_equation_0, values = (var_42164_cast_fp16, var_41822_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4233_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4235_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4235_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4235_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4235_equation_0, values = (var_42164_cast_fp16, var_41829_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4235_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4237_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4237_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4237_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4237_equation_0, values = (var_42164_cast_fp16, var_41836_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4237_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4239_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4239_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4239_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4239_equation_0, values = (var_42164_cast_fp16, var_41843_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4239_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4241_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4241_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4241_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4241_equation_0, values = (var_42168_cast_fp16, var_41850_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4241_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4243_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4243_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4243_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4243_equation_0, values = (var_42168_cast_fp16, var_41857_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4243_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4245_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4245_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4245_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4245_equation_0, values = (var_42168_cast_fp16, var_41864_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4245_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4247_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4247_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4247_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4247_equation_0, values = (var_42168_cast_fp16, var_41871_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4247_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4249_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4249_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4249_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4249_equation_0, values = (var_42172_cast_fp16, var_41878_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4249_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4251_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4251_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4251_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4251_equation_0, values = (var_42172_cast_fp16, var_41885_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4251_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4253_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4253_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4253_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4253_equation_0, values = (var_42172_cast_fp16, var_41892_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4253_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4255_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4255_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4255_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4255_equation_0, values = (var_42172_cast_fp16, var_41899_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4255_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4257_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4257_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4257_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4257_equation_0, values = (var_42176_cast_fp16, var_41906_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4257_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4259_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4259_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4259_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4259_equation_0, values = (var_42176_cast_fp16, var_41913_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4259_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4261_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4261_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4261_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4261_equation_0, values = (var_42176_cast_fp16, var_41920_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4261_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4263_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4263_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4263_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4263_equation_0, values = (var_42176_cast_fp16, var_41927_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4263_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4265_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4265_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4265_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4265_equation_0, values = (var_42180_cast_fp16, var_41934_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4265_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4267_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4267_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4267_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4267_equation_0, values = (var_42180_cast_fp16, var_41941_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4267_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4269_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4269_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4269_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4269_equation_0, values = (var_42180_cast_fp16, var_41948_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4269_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4271_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4271_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4271_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4271_equation_0, values = (var_42180_cast_fp16, var_41955_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4271_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4273_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4273_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4273_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4273_equation_0, values = (var_42184_cast_fp16, var_41962_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4273_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4275_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4275_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4275_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4275_equation_0, values = (var_42184_cast_fp16, var_41969_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4275_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4277_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4277_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4277_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4277_equation_0, values = (var_42184_cast_fp16, var_41976_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4277_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4279_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4279_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4279_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4279_equation_0, values = (var_42184_cast_fp16, var_41983_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4279_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4281_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4281_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4281_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4281_equation_0, values = (var_42188_cast_fp16, var_41990_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4281_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4283_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4283_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4283_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4283_equation_0, values = (var_42188_cast_fp16, var_41997_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4283_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4285_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4285_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4285_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4285_equation_0, values = (var_42188_cast_fp16, var_42004_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4285_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4287_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4287_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4287_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4287_equation_0, values = (var_42188_cast_fp16, var_42011_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4287_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4289_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4289_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4289_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4289_equation_0, values = (var_42192_cast_fp16, var_42018_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4289_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4291_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4291_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4291_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4291_equation_0, values = (var_42192_cast_fp16, var_42025_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4291_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4293_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4293_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4293_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4293_equation_0, values = (var_42192_cast_fp16, var_42032_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4293_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4295_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4295_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4295_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4295_equation_0, values = (var_42192_cast_fp16, var_42039_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4295_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4297_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4297_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4297_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4297_equation_0, values = (var_42196_cast_fp16, var_42046_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4297_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4299_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4299_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4299_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4299_equation_0, values = (var_42196_cast_fp16, var_42053_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4299_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4301_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4301_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4301_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4301_equation_0, values = (var_42196_cast_fp16, var_42060_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4301_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4303_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4303_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4303_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4303_equation_0, values = (var_42196_cast_fp16, var_42067_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4303_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4305_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4305_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4305_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4305_equation_0, values = (var_42200_cast_fp16, var_42074_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4305_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4307_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4307_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4307_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4307_equation_0, values = (var_42200_cast_fp16, var_42081_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4307_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4309_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4309_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4309_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4309_equation_0, values = (var_42200_cast_fp16, var_42088_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4309_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4311_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4311_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4311_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4311_equation_0, values = (var_42200_cast_fp16, var_42095_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4311_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4313_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4313_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4313_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4313_equation_0, values = (var_42204_cast_fp16, var_42102_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4313_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4315_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4315_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4315_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4315_equation_0, values = (var_42204_cast_fp16, var_42109_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4315_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4317_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4317_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4317_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4317_equation_0, values = (var_42204_cast_fp16, var_42116_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4317_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4319_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4319_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4319_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4319_equation_0, values = (var_42204_cast_fp16, var_42123_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4319_cast_fp16")]; tensor var_42445_to_fp16 = const()[name = tensor("op_42445_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4161_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4161_cast_fp16, y = var_42445_to_fp16)[name = tensor("aw_chunk_4161_cast_fp16")]; tensor var_42447_to_fp16 = const()[name = tensor("op_42447_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4163_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4163_cast_fp16, y = var_42447_to_fp16)[name = tensor("aw_chunk_4163_cast_fp16")]; tensor var_42449_to_fp16 = const()[name = tensor("op_42449_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4165_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4165_cast_fp16, y = var_42449_to_fp16)[name = tensor("aw_chunk_4165_cast_fp16")]; tensor var_42451_to_fp16 = const()[name = tensor("op_42451_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4167_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4167_cast_fp16, y = var_42451_to_fp16)[name = tensor("aw_chunk_4167_cast_fp16")]; tensor var_42453_to_fp16 = const()[name = tensor("op_42453_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4169_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4169_cast_fp16, y = var_42453_to_fp16)[name = tensor("aw_chunk_4169_cast_fp16")]; tensor var_42455_to_fp16 = const()[name = tensor("op_42455_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4171_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4171_cast_fp16, y = var_42455_to_fp16)[name = tensor("aw_chunk_4171_cast_fp16")]; tensor var_42457_to_fp16 = const()[name = tensor("op_42457_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4173_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4173_cast_fp16, y = var_42457_to_fp16)[name = tensor("aw_chunk_4173_cast_fp16")]; tensor var_42459_to_fp16 = const()[name = tensor("op_42459_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4175_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4175_cast_fp16, y = var_42459_to_fp16)[name = tensor("aw_chunk_4175_cast_fp16")]; tensor var_42461_to_fp16 = const()[name = tensor("op_42461_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4177_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4177_cast_fp16, y = var_42461_to_fp16)[name = tensor("aw_chunk_4177_cast_fp16")]; tensor var_42463_to_fp16 = const()[name = tensor("op_42463_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4179_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4179_cast_fp16, y = var_42463_to_fp16)[name = tensor("aw_chunk_4179_cast_fp16")]; tensor var_42465_to_fp16 = const()[name = tensor("op_42465_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4181_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4181_cast_fp16, y = var_42465_to_fp16)[name = tensor("aw_chunk_4181_cast_fp16")]; tensor var_42467_to_fp16 = const()[name = tensor("op_42467_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4183_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4183_cast_fp16, y = var_42467_to_fp16)[name = tensor("aw_chunk_4183_cast_fp16")]; tensor var_42469_to_fp16 = const()[name = tensor("op_42469_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4185_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4185_cast_fp16, y = var_42469_to_fp16)[name = tensor("aw_chunk_4185_cast_fp16")]; tensor var_42471_to_fp16 = const()[name = tensor("op_42471_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4187_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4187_cast_fp16, y = var_42471_to_fp16)[name = tensor("aw_chunk_4187_cast_fp16")]; tensor var_42473_to_fp16 = const()[name = tensor("op_42473_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4189_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4189_cast_fp16, y = var_42473_to_fp16)[name = tensor("aw_chunk_4189_cast_fp16")]; tensor var_42475_to_fp16 = const()[name = tensor("op_42475_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4191_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4191_cast_fp16, y = var_42475_to_fp16)[name = tensor("aw_chunk_4191_cast_fp16")]; tensor var_42477_to_fp16 = const()[name = tensor("op_42477_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4193_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4193_cast_fp16, y = var_42477_to_fp16)[name = tensor("aw_chunk_4193_cast_fp16")]; tensor var_42479_to_fp16 = const()[name = tensor("op_42479_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4195_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4195_cast_fp16, y = var_42479_to_fp16)[name = tensor("aw_chunk_4195_cast_fp16")]; tensor var_42481_to_fp16 = const()[name = tensor("op_42481_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4197_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4197_cast_fp16, y = var_42481_to_fp16)[name = tensor("aw_chunk_4197_cast_fp16")]; tensor var_42483_to_fp16 = const()[name = tensor("op_42483_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4199_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4199_cast_fp16, y = var_42483_to_fp16)[name = tensor("aw_chunk_4199_cast_fp16")]; tensor var_42485_to_fp16 = const()[name = tensor("op_42485_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4201_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4201_cast_fp16, y = var_42485_to_fp16)[name = tensor("aw_chunk_4201_cast_fp16")]; tensor var_42487_to_fp16 = const()[name = tensor("op_42487_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4203_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4203_cast_fp16, y = var_42487_to_fp16)[name = tensor("aw_chunk_4203_cast_fp16")]; tensor var_42489_to_fp16 = const()[name = tensor("op_42489_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4205_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4205_cast_fp16, y = var_42489_to_fp16)[name = tensor("aw_chunk_4205_cast_fp16")]; tensor var_42491_to_fp16 = const()[name = tensor("op_42491_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4207_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4207_cast_fp16, y = var_42491_to_fp16)[name = tensor("aw_chunk_4207_cast_fp16")]; tensor var_42493_to_fp16 = const()[name = tensor("op_42493_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4209_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4209_cast_fp16, y = var_42493_to_fp16)[name = tensor("aw_chunk_4209_cast_fp16")]; tensor var_42495_to_fp16 = const()[name = tensor("op_42495_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4211_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4211_cast_fp16, y = var_42495_to_fp16)[name = tensor("aw_chunk_4211_cast_fp16")]; tensor var_42497_to_fp16 = const()[name = tensor("op_42497_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4213_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4213_cast_fp16, y = var_42497_to_fp16)[name = tensor("aw_chunk_4213_cast_fp16")]; tensor var_42499_to_fp16 = const()[name = tensor("op_42499_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4215_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4215_cast_fp16, y = var_42499_to_fp16)[name = tensor("aw_chunk_4215_cast_fp16")]; tensor var_42501_to_fp16 = const()[name = tensor("op_42501_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4217_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4217_cast_fp16, y = var_42501_to_fp16)[name = tensor("aw_chunk_4217_cast_fp16")]; tensor var_42503_to_fp16 = const()[name = tensor("op_42503_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4219_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4219_cast_fp16, y = var_42503_to_fp16)[name = tensor("aw_chunk_4219_cast_fp16")]; tensor var_42505_to_fp16 = const()[name = tensor("op_42505_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4221_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4221_cast_fp16, y = var_42505_to_fp16)[name = tensor("aw_chunk_4221_cast_fp16")]; tensor var_42507_to_fp16 = const()[name = tensor("op_42507_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4223_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4223_cast_fp16, y = var_42507_to_fp16)[name = tensor("aw_chunk_4223_cast_fp16")]; tensor var_42509_to_fp16 = const()[name = tensor("op_42509_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4225_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4225_cast_fp16, y = var_42509_to_fp16)[name = tensor("aw_chunk_4225_cast_fp16")]; tensor var_42511_to_fp16 = const()[name = tensor("op_42511_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4227_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4227_cast_fp16, y = var_42511_to_fp16)[name = tensor("aw_chunk_4227_cast_fp16")]; tensor var_42513_to_fp16 = const()[name = tensor("op_42513_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4229_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4229_cast_fp16, y = var_42513_to_fp16)[name = tensor("aw_chunk_4229_cast_fp16")]; tensor var_42515_to_fp16 = const()[name = tensor("op_42515_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4231_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4231_cast_fp16, y = var_42515_to_fp16)[name = tensor("aw_chunk_4231_cast_fp16")]; tensor var_42517_to_fp16 = const()[name = tensor("op_42517_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4233_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4233_cast_fp16, y = var_42517_to_fp16)[name = tensor("aw_chunk_4233_cast_fp16")]; tensor var_42519_to_fp16 = const()[name = tensor("op_42519_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4235_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4235_cast_fp16, y = var_42519_to_fp16)[name = tensor("aw_chunk_4235_cast_fp16")]; tensor var_42521_to_fp16 = const()[name = tensor("op_42521_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4237_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4237_cast_fp16, y = var_42521_to_fp16)[name = tensor("aw_chunk_4237_cast_fp16")]; tensor var_42523_to_fp16 = const()[name = tensor("op_42523_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4239_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4239_cast_fp16, y = var_42523_to_fp16)[name = tensor("aw_chunk_4239_cast_fp16")]; tensor var_42525_to_fp16 = const()[name = tensor("op_42525_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4241_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4241_cast_fp16, y = var_42525_to_fp16)[name = tensor("aw_chunk_4241_cast_fp16")]; tensor var_42527_to_fp16 = const()[name = tensor("op_42527_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4243_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4243_cast_fp16, y = var_42527_to_fp16)[name = tensor("aw_chunk_4243_cast_fp16")]; tensor var_42529_to_fp16 = const()[name = tensor("op_42529_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4245_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4245_cast_fp16, y = var_42529_to_fp16)[name = tensor("aw_chunk_4245_cast_fp16")]; tensor var_42531_to_fp16 = const()[name = tensor("op_42531_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4247_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4247_cast_fp16, y = var_42531_to_fp16)[name = tensor("aw_chunk_4247_cast_fp16")]; tensor var_42533_to_fp16 = const()[name = tensor("op_42533_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4249_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4249_cast_fp16, y = var_42533_to_fp16)[name = tensor("aw_chunk_4249_cast_fp16")]; tensor var_42535_to_fp16 = const()[name = tensor("op_42535_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4251_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4251_cast_fp16, y = var_42535_to_fp16)[name = tensor("aw_chunk_4251_cast_fp16")]; tensor var_42537_to_fp16 = const()[name = tensor("op_42537_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4253_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4253_cast_fp16, y = var_42537_to_fp16)[name = tensor("aw_chunk_4253_cast_fp16")]; tensor var_42539_to_fp16 = const()[name = tensor("op_42539_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4255_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4255_cast_fp16, y = var_42539_to_fp16)[name = tensor("aw_chunk_4255_cast_fp16")]; tensor var_42541_to_fp16 = const()[name = tensor("op_42541_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4257_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4257_cast_fp16, y = var_42541_to_fp16)[name = tensor("aw_chunk_4257_cast_fp16")]; tensor var_42543_to_fp16 = const()[name = tensor("op_42543_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4259_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4259_cast_fp16, y = var_42543_to_fp16)[name = tensor("aw_chunk_4259_cast_fp16")]; tensor var_42545_to_fp16 = const()[name = tensor("op_42545_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4261_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4261_cast_fp16, y = var_42545_to_fp16)[name = tensor("aw_chunk_4261_cast_fp16")]; tensor var_42547_to_fp16 = const()[name = tensor("op_42547_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4263_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4263_cast_fp16, y = var_42547_to_fp16)[name = tensor("aw_chunk_4263_cast_fp16")]; tensor var_42549_to_fp16 = const()[name = tensor("op_42549_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4265_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4265_cast_fp16, y = var_42549_to_fp16)[name = tensor("aw_chunk_4265_cast_fp16")]; tensor var_42551_to_fp16 = const()[name = tensor("op_42551_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4267_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4267_cast_fp16, y = var_42551_to_fp16)[name = tensor("aw_chunk_4267_cast_fp16")]; tensor var_42553_to_fp16 = const()[name = tensor("op_42553_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4269_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4269_cast_fp16, y = var_42553_to_fp16)[name = tensor("aw_chunk_4269_cast_fp16")]; tensor var_42555_to_fp16 = const()[name = tensor("op_42555_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4271_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4271_cast_fp16, y = var_42555_to_fp16)[name = tensor("aw_chunk_4271_cast_fp16")]; tensor var_42557_to_fp16 = const()[name = tensor("op_42557_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4273_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4273_cast_fp16, y = var_42557_to_fp16)[name = tensor("aw_chunk_4273_cast_fp16")]; tensor var_42559_to_fp16 = const()[name = tensor("op_42559_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4275_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4275_cast_fp16, y = var_42559_to_fp16)[name = tensor("aw_chunk_4275_cast_fp16")]; tensor var_42561_to_fp16 = const()[name = tensor("op_42561_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4277_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4277_cast_fp16, y = var_42561_to_fp16)[name = tensor("aw_chunk_4277_cast_fp16")]; tensor var_42563_to_fp16 = const()[name = tensor("op_42563_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4279_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4279_cast_fp16, y = var_42563_to_fp16)[name = tensor("aw_chunk_4279_cast_fp16")]; tensor var_42565_to_fp16 = const()[name = tensor("op_42565_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4281_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4281_cast_fp16, y = var_42565_to_fp16)[name = tensor("aw_chunk_4281_cast_fp16")]; tensor var_42567_to_fp16 = const()[name = tensor("op_42567_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4283_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4283_cast_fp16, y = var_42567_to_fp16)[name = tensor("aw_chunk_4283_cast_fp16")]; tensor var_42569_to_fp16 = const()[name = tensor("op_42569_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4285_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4285_cast_fp16, y = var_42569_to_fp16)[name = tensor("aw_chunk_4285_cast_fp16")]; tensor var_42571_to_fp16 = const()[name = tensor("op_42571_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4287_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4287_cast_fp16, y = var_42571_to_fp16)[name = tensor("aw_chunk_4287_cast_fp16")]; tensor var_42573_to_fp16 = const()[name = tensor("op_42573_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4289_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4289_cast_fp16, y = var_42573_to_fp16)[name = tensor("aw_chunk_4289_cast_fp16")]; tensor var_42575_to_fp16 = const()[name = tensor("op_42575_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4291_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4291_cast_fp16, y = var_42575_to_fp16)[name = tensor("aw_chunk_4291_cast_fp16")]; tensor var_42577_to_fp16 = const()[name = tensor("op_42577_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4293_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4293_cast_fp16, y = var_42577_to_fp16)[name = tensor("aw_chunk_4293_cast_fp16")]; tensor var_42579_to_fp16 = const()[name = tensor("op_42579_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4295_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4295_cast_fp16, y = var_42579_to_fp16)[name = tensor("aw_chunk_4295_cast_fp16")]; tensor var_42581_to_fp16 = const()[name = tensor("op_42581_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4297_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4297_cast_fp16, y = var_42581_to_fp16)[name = tensor("aw_chunk_4297_cast_fp16")]; tensor var_42583_to_fp16 = const()[name = tensor("op_42583_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4299_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4299_cast_fp16, y = var_42583_to_fp16)[name = tensor("aw_chunk_4299_cast_fp16")]; tensor var_42585_to_fp16 = const()[name = tensor("op_42585_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4301_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4301_cast_fp16, y = var_42585_to_fp16)[name = tensor("aw_chunk_4301_cast_fp16")]; tensor var_42587_to_fp16 = const()[name = tensor("op_42587_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4303_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4303_cast_fp16, y = var_42587_to_fp16)[name = tensor("aw_chunk_4303_cast_fp16")]; tensor var_42589_to_fp16 = const()[name = tensor("op_42589_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4305_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4305_cast_fp16, y = var_42589_to_fp16)[name = tensor("aw_chunk_4305_cast_fp16")]; tensor var_42591_to_fp16 = const()[name = tensor("op_42591_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4307_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4307_cast_fp16, y = var_42591_to_fp16)[name = tensor("aw_chunk_4307_cast_fp16")]; tensor var_42593_to_fp16 = const()[name = tensor("op_42593_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4309_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4309_cast_fp16, y = var_42593_to_fp16)[name = tensor("aw_chunk_4309_cast_fp16")]; tensor var_42595_to_fp16 = const()[name = tensor("op_42595_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4311_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4311_cast_fp16, y = var_42595_to_fp16)[name = tensor("aw_chunk_4311_cast_fp16")]; tensor var_42597_to_fp16 = const()[name = tensor("op_42597_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4313_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4313_cast_fp16, y = var_42597_to_fp16)[name = tensor("aw_chunk_4313_cast_fp16")]; tensor var_42599_to_fp16 = const()[name = tensor("op_42599_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4315_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4315_cast_fp16, y = var_42599_to_fp16)[name = tensor("aw_chunk_4315_cast_fp16")]; tensor var_42601_to_fp16 = const()[name = tensor("op_42601_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4317_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4317_cast_fp16, y = var_42601_to_fp16)[name = tensor("aw_chunk_4317_cast_fp16")]; tensor var_42603_to_fp16 = const()[name = tensor("op_42603_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4319_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4319_cast_fp16, y = var_42603_to_fp16)[name = tensor("aw_chunk_4319_cast_fp16")]; tensor var_42605_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4161_cast_fp16)[name = tensor("op_42605_cast_fp16")]; tensor var_42606_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4163_cast_fp16)[name = tensor("op_42606_cast_fp16")]; tensor var_42607_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4165_cast_fp16)[name = tensor("op_42607_cast_fp16")]; tensor var_42608_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4167_cast_fp16)[name = tensor("op_42608_cast_fp16")]; tensor var_42609_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4169_cast_fp16)[name = tensor("op_42609_cast_fp16")]; tensor var_42610_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4171_cast_fp16)[name = tensor("op_42610_cast_fp16")]; tensor var_42611_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4173_cast_fp16)[name = tensor("op_42611_cast_fp16")]; tensor var_42612_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4175_cast_fp16)[name = tensor("op_42612_cast_fp16")]; tensor var_42613_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4177_cast_fp16)[name = tensor("op_42613_cast_fp16")]; tensor var_42614_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4179_cast_fp16)[name = tensor("op_42614_cast_fp16")]; tensor var_42615_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4181_cast_fp16)[name = tensor("op_42615_cast_fp16")]; tensor var_42616_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4183_cast_fp16)[name = tensor("op_42616_cast_fp16")]; tensor var_42617_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4185_cast_fp16)[name = tensor("op_42617_cast_fp16")]; tensor var_42618_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4187_cast_fp16)[name = tensor("op_42618_cast_fp16")]; tensor var_42619_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4189_cast_fp16)[name = tensor("op_42619_cast_fp16")]; tensor var_42620_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4191_cast_fp16)[name = tensor("op_42620_cast_fp16")]; tensor var_42621_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4193_cast_fp16)[name = tensor("op_42621_cast_fp16")]; tensor var_42622_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4195_cast_fp16)[name = tensor("op_42622_cast_fp16")]; tensor var_42623_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4197_cast_fp16)[name = tensor("op_42623_cast_fp16")]; tensor var_42624_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4199_cast_fp16)[name = tensor("op_42624_cast_fp16")]; tensor var_42625_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4201_cast_fp16)[name = tensor("op_42625_cast_fp16")]; tensor var_42626_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4203_cast_fp16)[name = tensor("op_42626_cast_fp16")]; tensor var_42627_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4205_cast_fp16)[name = tensor("op_42627_cast_fp16")]; tensor var_42628_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4207_cast_fp16)[name = tensor("op_42628_cast_fp16")]; tensor var_42629_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4209_cast_fp16)[name = tensor("op_42629_cast_fp16")]; tensor var_42630_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4211_cast_fp16)[name = tensor("op_42630_cast_fp16")]; tensor var_42631_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4213_cast_fp16)[name = tensor("op_42631_cast_fp16")]; tensor var_42632_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4215_cast_fp16)[name = tensor("op_42632_cast_fp16")]; tensor var_42633_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4217_cast_fp16)[name = tensor("op_42633_cast_fp16")]; tensor var_42634_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4219_cast_fp16)[name = tensor("op_42634_cast_fp16")]; tensor var_42635_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4221_cast_fp16)[name = tensor("op_42635_cast_fp16")]; tensor var_42636_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4223_cast_fp16)[name = tensor("op_42636_cast_fp16")]; tensor var_42637_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4225_cast_fp16)[name = tensor("op_42637_cast_fp16")]; tensor var_42638_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4227_cast_fp16)[name = tensor("op_42638_cast_fp16")]; tensor var_42639_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4229_cast_fp16)[name = tensor("op_42639_cast_fp16")]; tensor var_42640_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4231_cast_fp16)[name = tensor("op_42640_cast_fp16")]; tensor var_42641_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4233_cast_fp16)[name = tensor("op_42641_cast_fp16")]; tensor var_42642_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4235_cast_fp16)[name = tensor("op_42642_cast_fp16")]; tensor var_42643_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4237_cast_fp16)[name = tensor("op_42643_cast_fp16")]; tensor var_42644_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4239_cast_fp16)[name = tensor("op_42644_cast_fp16")]; tensor var_42645_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4241_cast_fp16)[name = tensor("op_42645_cast_fp16")]; tensor var_42646_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4243_cast_fp16)[name = tensor("op_42646_cast_fp16")]; tensor var_42647_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4245_cast_fp16)[name = tensor("op_42647_cast_fp16")]; tensor var_42648_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4247_cast_fp16)[name = tensor("op_42648_cast_fp16")]; tensor var_42649_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4249_cast_fp16)[name = tensor("op_42649_cast_fp16")]; tensor var_42650_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4251_cast_fp16)[name = tensor("op_42650_cast_fp16")]; tensor var_42651_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4253_cast_fp16)[name = tensor("op_42651_cast_fp16")]; tensor var_42652_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4255_cast_fp16)[name = tensor("op_42652_cast_fp16")]; tensor var_42653_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4257_cast_fp16)[name = tensor("op_42653_cast_fp16")]; tensor var_42654_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4259_cast_fp16)[name = tensor("op_42654_cast_fp16")]; tensor var_42655_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4261_cast_fp16)[name = tensor("op_42655_cast_fp16")]; tensor var_42656_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4263_cast_fp16)[name = tensor("op_42656_cast_fp16")]; tensor var_42657_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4265_cast_fp16)[name = tensor("op_42657_cast_fp16")]; tensor var_42658_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4267_cast_fp16)[name = tensor("op_42658_cast_fp16")]; tensor var_42659_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4269_cast_fp16)[name = tensor("op_42659_cast_fp16")]; tensor var_42660_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4271_cast_fp16)[name = tensor("op_42660_cast_fp16")]; tensor var_42661_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4273_cast_fp16)[name = tensor("op_42661_cast_fp16")]; tensor var_42662_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4275_cast_fp16)[name = tensor("op_42662_cast_fp16")]; tensor var_42663_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4277_cast_fp16)[name = tensor("op_42663_cast_fp16")]; tensor var_42664_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4279_cast_fp16)[name = tensor("op_42664_cast_fp16")]; tensor var_42665_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4281_cast_fp16)[name = tensor("op_42665_cast_fp16")]; tensor var_42666_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4283_cast_fp16)[name = tensor("op_42666_cast_fp16")]; tensor var_42667_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4285_cast_fp16)[name = tensor("op_42667_cast_fp16")]; tensor var_42668_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4287_cast_fp16)[name = tensor("op_42668_cast_fp16")]; tensor var_42669_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4289_cast_fp16)[name = tensor("op_42669_cast_fp16")]; tensor var_42670_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4291_cast_fp16)[name = tensor("op_42670_cast_fp16")]; tensor var_42671_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4293_cast_fp16)[name = tensor("op_42671_cast_fp16")]; tensor var_42672_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4295_cast_fp16)[name = tensor("op_42672_cast_fp16")]; tensor var_42673_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4297_cast_fp16)[name = tensor("op_42673_cast_fp16")]; tensor var_42674_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4299_cast_fp16)[name = tensor("op_42674_cast_fp16")]; tensor var_42675_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4301_cast_fp16)[name = tensor("op_42675_cast_fp16")]; tensor var_42676_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4303_cast_fp16)[name = tensor("op_42676_cast_fp16")]; tensor var_42677_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4305_cast_fp16)[name = tensor("op_42677_cast_fp16")]; tensor var_42678_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4307_cast_fp16)[name = tensor("op_42678_cast_fp16")]; tensor var_42679_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4309_cast_fp16)[name = tensor("op_42679_cast_fp16")]; tensor var_42680_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4311_cast_fp16)[name = tensor("op_42680_cast_fp16")]; tensor var_42681_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4313_cast_fp16)[name = tensor("op_42681_cast_fp16")]; tensor var_42682_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4315_cast_fp16)[name = tensor("op_42682_cast_fp16")]; tensor var_42683_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4317_cast_fp16)[name = tensor("op_42683_cast_fp16")]; tensor var_42684_cast_fp16 = softmax(axis = var_41403, x = aw_chunk_4319_cast_fp16)[name = tensor("op_42684_cast_fp16")]; tensor var_42686_equation_0 = const()[name = tensor("op_42686_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42686_cast_fp16 = einsum(equation = var_42686_equation_0, values = (var_42206_cast_fp16, var_42605_cast_fp16))[name = tensor("op_42686_cast_fp16")]; tensor var_42688_equation_0 = const()[name = tensor("op_42688_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42688_cast_fp16 = einsum(equation = var_42688_equation_0, values = (var_42206_cast_fp16, var_42606_cast_fp16))[name = tensor("op_42688_cast_fp16")]; tensor var_42690_equation_0 = const()[name = tensor("op_42690_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42690_cast_fp16 = einsum(equation = var_42690_equation_0, values = (var_42206_cast_fp16, var_42607_cast_fp16))[name = tensor("op_42690_cast_fp16")]; tensor var_42692_equation_0 = const()[name = tensor("op_42692_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42692_cast_fp16 = einsum(equation = var_42692_equation_0, values = (var_42206_cast_fp16, var_42608_cast_fp16))[name = tensor("op_42692_cast_fp16")]; tensor var_42694_equation_0 = const()[name = tensor("op_42694_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42694_cast_fp16 = einsum(equation = var_42694_equation_0, values = (var_42210_cast_fp16, var_42609_cast_fp16))[name = tensor("op_42694_cast_fp16")]; tensor var_42696_equation_0 = const()[name = tensor("op_42696_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42696_cast_fp16 = einsum(equation = var_42696_equation_0, values = (var_42210_cast_fp16, var_42610_cast_fp16))[name = tensor("op_42696_cast_fp16")]; tensor var_42698_equation_0 = const()[name = tensor("op_42698_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42698_cast_fp16 = einsum(equation = var_42698_equation_0, values = (var_42210_cast_fp16, var_42611_cast_fp16))[name = tensor("op_42698_cast_fp16")]; tensor var_42700_equation_0 = const()[name = tensor("op_42700_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42700_cast_fp16 = einsum(equation = var_42700_equation_0, values = (var_42210_cast_fp16, var_42612_cast_fp16))[name = tensor("op_42700_cast_fp16")]; tensor var_42702_equation_0 = const()[name = tensor("op_42702_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42702_cast_fp16 = einsum(equation = var_42702_equation_0, values = (var_42214_cast_fp16, var_42613_cast_fp16))[name = tensor("op_42702_cast_fp16")]; tensor var_42704_equation_0 = const()[name = tensor("op_42704_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42704_cast_fp16 = einsum(equation = var_42704_equation_0, values = (var_42214_cast_fp16, var_42614_cast_fp16))[name = tensor("op_42704_cast_fp16")]; tensor var_42706_equation_0 = const()[name = tensor("op_42706_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42706_cast_fp16 = einsum(equation = var_42706_equation_0, values = (var_42214_cast_fp16, var_42615_cast_fp16))[name = tensor("op_42706_cast_fp16")]; tensor var_42708_equation_0 = const()[name = tensor("op_42708_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42708_cast_fp16 = einsum(equation = var_42708_equation_0, values = (var_42214_cast_fp16, var_42616_cast_fp16))[name = tensor("op_42708_cast_fp16")]; tensor var_42710_equation_0 = const()[name = tensor("op_42710_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42710_cast_fp16 = einsum(equation = var_42710_equation_0, values = (var_42218_cast_fp16, var_42617_cast_fp16))[name = tensor("op_42710_cast_fp16")]; tensor var_42712_equation_0 = const()[name = tensor("op_42712_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42712_cast_fp16 = einsum(equation = var_42712_equation_0, values = (var_42218_cast_fp16, var_42618_cast_fp16))[name = tensor("op_42712_cast_fp16")]; tensor var_42714_equation_0 = const()[name = tensor("op_42714_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42714_cast_fp16 = einsum(equation = var_42714_equation_0, values = (var_42218_cast_fp16, var_42619_cast_fp16))[name = tensor("op_42714_cast_fp16")]; tensor var_42716_equation_0 = const()[name = tensor("op_42716_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42716_cast_fp16 = einsum(equation = var_42716_equation_0, values = (var_42218_cast_fp16, var_42620_cast_fp16))[name = tensor("op_42716_cast_fp16")]; tensor var_42718_equation_0 = const()[name = tensor("op_42718_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42718_cast_fp16 = einsum(equation = var_42718_equation_0, values = (var_42222_cast_fp16, var_42621_cast_fp16))[name = tensor("op_42718_cast_fp16")]; tensor var_42720_equation_0 = const()[name = tensor("op_42720_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42720_cast_fp16 = einsum(equation = var_42720_equation_0, values = (var_42222_cast_fp16, var_42622_cast_fp16))[name = tensor("op_42720_cast_fp16")]; tensor var_42722_equation_0 = const()[name = tensor("op_42722_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42722_cast_fp16 = einsum(equation = var_42722_equation_0, values = (var_42222_cast_fp16, var_42623_cast_fp16))[name = tensor("op_42722_cast_fp16")]; tensor var_42724_equation_0 = const()[name = tensor("op_42724_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42724_cast_fp16 = einsum(equation = var_42724_equation_0, values = (var_42222_cast_fp16, var_42624_cast_fp16))[name = tensor("op_42724_cast_fp16")]; tensor var_42726_equation_0 = const()[name = tensor("op_42726_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42726_cast_fp16 = einsum(equation = var_42726_equation_0, values = (var_42226_cast_fp16, var_42625_cast_fp16))[name = tensor("op_42726_cast_fp16")]; tensor var_42728_equation_0 = const()[name = tensor("op_42728_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42728_cast_fp16 = einsum(equation = var_42728_equation_0, values = (var_42226_cast_fp16, var_42626_cast_fp16))[name = tensor("op_42728_cast_fp16")]; tensor var_42730_equation_0 = const()[name = tensor("op_42730_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42730_cast_fp16 = einsum(equation = var_42730_equation_0, values = (var_42226_cast_fp16, var_42627_cast_fp16))[name = tensor("op_42730_cast_fp16")]; tensor var_42732_equation_0 = const()[name = tensor("op_42732_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42732_cast_fp16 = einsum(equation = var_42732_equation_0, values = (var_42226_cast_fp16, var_42628_cast_fp16))[name = tensor("op_42732_cast_fp16")]; tensor var_42734_equation_0 = const()[name = tensor("op_42734_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42734_cast_fp16 = einsum(equation = var_42734_equation_0, values = (var_42230_cast_fp16, var_42629_cast_fp16))[name = tensor("op_42734_cast_fp16")]; tensor var_42736_equation_0 = const()[name = tensor("op_42736_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42736_cast_fp16 = einsum(equation = var_42736_equation_0, values = (var_42230_cast_fp16, var_42630_cast_fp16))[name = tensor("op_42736_cast_fp16")]; tensor var_42738_equation_0 = const()[name = tensor("op_42738_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42738_cast_fp16 = einsum(equation = var_42738_equation_0, values = (var_42230_cast_fp16, var_42631_cast_fp16))[name = tensor("op_42738_cast_fp16")]; tensor var_42740_equation_0 = const()[name = tensor("op_42740_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42740_cast_fp16 = einsum(equation = var_42740_equation_0, values = (var_42230_cast_fp16, var_42632_cast_fp16))[name = tensor("op_42740_cast_fp16")]; tensor var_42742_equation_0 = const()[name = tensor("op_42742_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42742_cast_fp16 = einsum(equation = var_42742_equation_0, values = (var_42234_cast_fp16, var_42633_cast_fp16))[name = tensor("op_42742_cast_fp16")]; tensor var_42744_equation_0 = const()[name = tensor("op_42744_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42744_cast_fp16 = einsum(equation = var_42744_equation_0, values = (var_42234_cast_fp16, var_42634_cast_fp16))[name = tensor("op_42744_cast_fp16")]; tensor var_42746_equation_0 = const()[name = tensor("op_42746_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42746_cast_fp16 = einsum(equation = var_42746_equation_0, values = (var_42234_cast_fp16, var_42635_cast_fp16))[name = tensor("op_42746_cast_fp16")]; tensor var_42748_equation_0 = const()[name = tensor("op_42748_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42748_cast_fp16 = einsum(equation = var_42748_equation_0, values = (var_42234_cast_fp16, var_42636_cast_fp16))[name = tensor("op_42748_cast_fp16")]; tensor var_42750_equation_0 = const()[name = tensor("op_42750_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42750_cast_fp16 = einsum(equation = var_42750_equation_0, values = (var_42238_cast_fp16, var_42637_cast_fp16))[name = tensor("op_42750_cast_fp16")]; tensor var_42752_equation_0 = const()[name = tensor("op_42752_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42752_cast_fp16 = einsum(equation = var_42752_equation_0, values = (var_42238_cast_fp16, var_42638_cast_fp16))[name = tensor("op_42752_cast_fp16")]; tensor var_42754_equation_0 = const()[name = tensor("op_42754_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42754_cast_fp16 = einsum(equation = var_42754_equation_0, values = (var_42238_cast_fp16, var_42639_cast_fp16))[name = tensor("op_42754_cast_fp16")]; tensor var_42756_equation_0 = const()[name = tensor("op_42756_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42756_cast_fp16 = einsum(equation = var_42756_equation_0, values = (var_42238_cast_fp16, var_42640_cast_fp16))[name = tensor("op_42756_cast_fp16")]; tensor var_42758_equation_0 = const()[name = tensor("op_42758_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42758_cast_fp16 = einsum(equation = var_42758_equation_0, values = (var_42242_cast_fp16, var_42641_cast_fp16))[name = tensor("op_42758_cast_fp16")]; tensor var_42760_equation_0 = const()[name = tensor("op_42760_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42760_cast_fp16 = einsum(equation = var_42760_equation_0, values = (var_42242_cast_fp16, var_42642_cast_fp16))[name = tensor("op_42760_cast_fp16")]; tensor var_42762_equation_0 = const()[name = tensor("op_42762_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42762_cast_fp16 = einsum(equation = var_42762_equation_0, values = (var_42242_cast_fp16, var_42643_cast_fp16))[name = tensor("op_42762_cast_fp16")]; tensor var_42764_equation_0 = const()[name = tensor("op_42764_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42764_cast_fp16 = einsum(equation = var_42764_equation_0, values = (var_42242_cast_fp16, var_42644_cast_fp16))[name = tensor("op_42764_cast_fp16")]; tensor var_42766_equation_0 = const()[name = tensor("op_42766_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42766_cast_fp16 = einsum(equation = var_42766_equation_0, values = (var_42246_cast_fp16, var_42645_cast_fp16))[name = tensor("op_42766_cast_fp16")]; tensor var_42768_equation_0 = const()[name = tensor("op_42768_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42768_cast_fp16 = einsum(equation = var_42768_equation_0, values = (var_42246_cast_fp16, var_42646_cast_fp16))[name = tensor("op_42768_cast_fp16")]; tensor var_42770_equation_0 = const()[name = tensor("op_42770_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42770_cast_fp16 = einsum(equation = var_42770_equation_0, values = (var_42246_cast_fp16, var_42647_cast_fp16))[name = tensor("op_42770_cast_fp16")]; tensor var_42772_equation_0 = const()[name = tensor("op_42772_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42772_cast_fp16 = einsum(equation = var_42772_equation_0, values = (var_42246_cast_fp16, var_42648_cast_fp16))[name = tensor("op_42772_cast_fp16")]; tensor var_42774_equation_0 = const()[name = tensor("op_42774_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42774_cast_fp16 = einsum(equation = var_42774_equation_0, values = (var_42250_cast_fp16, var_42649_cast_fp16))[name = tensor("op_42774_cast_fp16")]; tensor var_42776_equation_0 = const()[name = tensor("op_42776_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42776_cast_fp16 = einsum(equation = var_42776_equation_0, values = (var_42250_cast_fp16, var_42650_cast_fp16))[name = tensor("op_42776_cast_fp16")]; tensor var_42778_equation_0 = const()[name = tensor("op_42778_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42778_cast_fp16 = einsum(equation = var_42778_equation_0, values = (var_42250_cast_fp16, var_42651_cast_fp16))[name = tensor("op_42778_cast_fp16")]; tensor var_42780_equation_0 = const()[name = tensor("op_42780_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42780_cast_fp16 = einsum(equation = var_42780_equation_0, values = (var_42250_cast_fp16, var_42652_cast_fp16))[name = tensor("op_42780_cast_fp16")]; tensor var_42782_equation_0 = const()[name = tensor("op_42782_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42782_cast_fp16 = einsum(equation = var_42782_equation_0, values = (var_42254_cast_fp16, var_42653_cast_fp16))[name = tensor("op_42782_cast_fp16")]; tensor var_42784_equation_0 = const()[name = tensor("op_42784_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42784_cast_fp16 = einsum(equation = var_42784_equation_0, values = (var_42254_cast_fp16, var_42654_cast_fp16))[name = tensor("op_42784_cast_fp16")]; tensor var_42786_equation_0 = const()[name = tensor("op_42786_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42786_cast_fp16 = einsum(equation = var_42786_equation_0, values = (var_42254_cast_fp16, var_42655_cast_fp16))[name = tensor("op_42786_cast_fp16")]; tensor var_42788_equation_0 = const()[name = tensor("op_42788_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42788_cast_fp16 = einsum(equation = var_42788_equation_0, values = (var_42254_cast_fp16, var_42656_cast_fp16))[name = tensor("op_42788_cast_fp16")]; tensor var_42790_equation_0 = const()[name = tensor("op_42790_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42790_cast_fp16 = einsum(equation = var_42790_equation_0, values = (var_42258_cast_fp16, var_42657_cast_fp16))[name = tensor("op_42790_cast_fp16")]; tensor var_42792_equation_0 = const()[name = tensor("op_42792_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42792_cast_fp16 = einsum(equation = var_42792_equation_0, values = (var_42258_cast_fp16, var_42658_cast_fp16))[name = tensor("op_42792_cast_fp16")]; tensor var_42794_equation_0 = const()[name = tensor("op_42794_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42794_cast_fp16 = einsum(equation = var_42794_equation_0, values = (var_42258_cast_fp16, var_42659_cast_fp16))[name = tensor("op_42794_cast_fp16")]; tensor var_42796_equation_0 = const()[name = tensor("op_42796_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42796_cast_fp16 = einsum(equation = var_42796_equation_0, values = (var_42258_cast_fp16, var_42660_cast_fp16))[name = tensor("op_42796_cast_fp16")]; tensor var_42798_equation_0 = const()[name = tensor("op_42798_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42798_cast_fp16 = einsum(equation = var_42798_equation_0, values = (var_42262_cast_fp16, var_42661_cast_fp16))[name = tensor("op_42798_cast_fp16")]; tensor var_42800_equation_0 = const()[name = tensor("op_42800_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42800_cast_fp16 = einsum(equation = var_42800_equation_0, values = (var_42262_cast_fp16, var_42662_cast_fp16))[name = tensor("op_42800_cast_fp16")]; tensor var_42802_equation_0 = const()[name = tensor("op_42802_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42802_cast_fp16 = einsum(equation = var_42802_equation_0, values = (var_42262_cast_fp16, var_42663_cast_fp16))[name = tensor("op_42802_cast_fp16")]; tensor var_42804_equation_0 = const()[name = tensor("op_42804_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42804_cast_fp16 = einsum(equation = var_42804_equation_0, values = (var_42262_cast_fp16, var_42664_cast_fp16))[name = tensor("op_42804_cast_fp16")]; tensor var_42806_equation_0 = const()[name = tensor("op_42806_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42806_cast_fp16 = einsum(equation = var_42806_equation_0, values = (var_42266_cast_fp16, var_42665_cast_fp16))[name = tensor("op_42806_cast_fp16")]; tensor var_42808_equation_0 = const()[name = tensor("op_42808_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42808_cast_fp16 = einsum(equation = var_42808_equation_0, values = (var_42266_cast_fp16, var_42666_cast_fp16))[name = tensor("op_42808_cast_fp16")]; tensor var_42810_equation_0 = const()[name = tensor("op_42810_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42810_cast_fp16 = einsum(equation = var_42810_equation_0, values = (var_42266_cast_fp16, var_42667_cast_fp16))[name = tensor("op_42810_cast_fp16")]; tensor var_42812_equation_0 = const()[name = tensor("op_42812_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42812_cast_fp16 = einsum(equation = var_42812_equation_0, values = (var_42266_cast_fp16, var_42668_cast_fp16))[name = tensor("op_42812_cast_fp16")]; tensor var_42814_equation_0 = const()[name = tensor("op_42814_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42814_cast_fp16 = einsum(equation = var_42814_equation_0, values = (var_42270_cast_fp16, var_42669_cast_fp16))[name = tensor("op_42814_cast_fp16")]; tensor var_42816_equation_0 = const()[name = tensor("op_42816_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42816_cast_fp16 = einsum(equation = var_42816_equation_0, values = (var_42270_cast_fp16, var_42670_cast_fp16))[name = tensor("op_42816_cast_fp16")]; tensor var_42818_equation_0 = const()[name = tensor("op_42818_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42818_cast_fp16 = einsum(equation = var_42818_equation_0, values = (var_42270_cast_fp16, var_42671_cast_fp16))[name = tensor("op_42818_cast_fp16")]; tensor var_42820_equation_0 = const()[name = tensor("op_42820_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42820_cast_fp16 = einsum(equation = var_42820_equation_0, values = (var_42270_cast_fp16, var_42672_cast_fp16))[name = tensor("op_42820_cast_fp16")]; tensor var_42822_equation_0 = const()[name = tensor("op_42822_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42822_cast_fp16 = einsum(equation = var_42822_equation_0, values = (var_42274_cast_fp16, var_42673_cast_fp16))[name = tensor("op_42822_cast_fp16")]; tensor var_42824_equation_0 = const()[name = tensor("op_42824_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42824_cast_fp16 = einsum(equation = var_42824_equation_0, values = (var_42274_cast_fp16, var_42674_cast_fp16))[name = tensor("op_42824_cast_fp16")]; tensor var_42826_equation_0 = const()[name = tensor("op_42826_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42826_cast_fp16 = einsum(equation = var_42826_equation_0, values = (var_42274_cast_fp16, var_42675_cast_fp16))[name = tensor("op_42826_cast_fp16")]; tensor var_42828_equation_0 = const()[name = tensor("op_42828_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42828_cast_fp16 = einsum(equation = var_42828_equation_0, values = (var_42274_cast_fp16, var_42676_cast_fp16))[name = tensor("op_42828_cast_fp16")]; tensor var_42830_equation_0 = const()[name = tensor("op_42830_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42830_cast_fp16 = einsum(equation = var_42830_equation_0, values = (var_42278_cast_fp16, var_42677_cast_fp16))[name = tensor("op_42830_cast_fp16")]; tensor var_42832_equation_0 = const()[name = tensor("op_42832_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42832_cast_fp16 = einsum(equation = var_42832_equation_0, values = (var_42278_cast_fp16, var_42678_cast_fp16))[name = tensor("op_42832_cast_fp16")]; tensor var_42834_equation_0 = const()[name = tensor("op_42834_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42834_cast_fp16 = einsum(equation = var_42834_equation_0, values = (var_42278_cast_fp16, var_42679_cast_fp16))[name = tensor("op_42834_cast_fp16")]; tensor var_42836_equation_0 = const()[name = tensor("op_42836_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42836_cast_fp16 = einsum(equation = var_42836_equation_0, values = (var_42278_cast_fp16, var_42680_cast_fp16))[name = tensor("op_42836_cast_fp16")]; tensor var_42838_equation_0 = const()[name = tensor("op_42838_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42838_cast_fp16 = einsum(equation = var_42838_equation_0, values = (var_42282_cast_fp16, var_42681_cast_fp16))[name = tensor("op_42838_cast_fp16")]; tensor var_42840_equation_0 = const()[name = tensor("op_42840_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42840_cast_fp16 = einsum(equation = var_42840_equation_0, values = (var_42282_cast_fp16, var_42682_cast_fp16))[name = tensor("op_42840_cast_fp16")]; tensor var_42842_equation_0 = const()[name = tensor("op_42842_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42842_cast_fp16 = einsum(equation = var_42842_equation_0, values = (var_42282_cast_fp16, var_42683_cast_fp16))[name = tensor("op_42842_cast_fp16")]; tensor var_42844_equation_0 = const()[name = tensor("op_42844_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_42844_cast_fp16 = einsum(equation = var_42844_equation_0, values = (var_42282_cast_fp16, var_42684_cast_fp16))[name = tensor("op_42844_cast_fp16")]; tensor var_42846_interleave_0 = const()[name = tensor("op_42846_interleave_0"), val = tensor(false)]; tensor var_42846_cast_fp16 = concat(axis = var_41378, interleave = var_42846_interleave_0, values = (var_42686_cast_fp16, var_42688_cast_fp16, var_42690_cast_fp16, var_42692_cast_fp16))[name = tensor("op_42846_cast_fp16")]; tensor var_42848_interleave_0 = const()[name = tensor("op_42848_interleave_0"), val = tensor(false)]; tensor var_42848_cast_fp16 = concat(axis = var_41378, interleave = var_42848_interleave_0, values = (var_42694_cast_fp16, var_42696_cast_fp16, var_42698_cast_fp16, var_42700_cast_fp16))[name = tensor("op_42848_cast_fp16")]; tensor var_42850_interleave_0 = const()[name = tensor("op_42850_interleave_0"), val = tensor(false)]; tensor var_42850_cast_fp16 = concat(axis = var_41378, interleave = var_42850_interleave_0, values = (var_42702_cast_fp16, var_42704_cast_fp16, var_42706_cast_fp16, var_42708_cast_fp16))[name = tensor("op_42850_cast_fp16")]; tensor var_42852_interleave_0 = const()[name = tensor("op_42852_interleave_0"), val = tensor(false)]; tensor var_42852_cast_fp16 = concat(axis = var_41378, interleave = var_42852_interleave_0, values = (var_42710_cast_fp16, var_42712_cast_fp16, var_42714_cast_fp16, var_42716_cast_fp16))[name = tensor("op_42852_cast_fp16")]; tensor var_42854_interleave_0 = const()[name = tensor("op_42854_interleave_0"), val = tensor(false)]; tensor var_42854_cast_fp16 = concat(axis = var_41378, interleave = var_42854_interleave_0, values = (var_42718_cast_fp16, var_42720_cast_fp16, var_42722_cast_fp16, var_42724_cast_fp16))[name = tensor("op_42854_cast_fp16")]; tensor var_42856_interleave_0 = const()[name = tensor("op_42856_interleave_0"), val = tensor(false)]; tensor var_42856_cast_fp16 = concat(axis = var_41378, interleave = var_42856_interleave_0, values = (var_42726_cast_fp16, var_42728_cast_fp16, var_42730_cast_fp16, var_42732_cast_fp16))[name = tensor("op_42856_cast_fp16")]; tensor var_42858_interleave_0 = const()[name = tensor("op_42858_interleave_0"), val = tensor(false)]; tensor var_42858_cast_fp16 = concat(axis = var_41378, interleave = var_42858_interleave_0, values = (var_42734_cast_fp16, var_42736_cast_fp16, var_42738_cast_fp16, var_42740_cast_fp16))[name = tensor("op_42858_cast_fp16")]; tensor var_42860_interleave_0 = const()[name = tensor("op_42860_interleave_0"), val = tensor(false)]; tensor var_42860_cast_fp16 = concat(axis = var_41378, interleave = var_42860_interleave_0, values = (var_42742_cast_fp16, var_42744_cast_fp16, var_42746_cast_fp16, var_42748_cast_fp16))[name = tensor("op_42860_cast_fp16")]; tensor var_42862_interleave_0 = const()[name = tensor("op_42862_interleave_0"), val = tensor(false)]; tensor var_42862_cast_fp16 = concat(axis = var_41378, interleave = var_42862_interleave_0, values = (var_42750_cast_fp16, var_42752_cast_fp16, var_42754_cast_fp16, var_42756_cast_fp16))[name = tensor("op_42862_cast_fp16")]; tensor var_42864_interleave_0 = const()[name = tensor("op_42864_interleave_0"), val = tensor(false)]; tensor var_42864_cast_fp16 = concat(axis = var_41378, interleave = var_42864_interleave_0, values = (var_42758_cast_fp16, var_42760_cast_fp16, var_42762_cast_fp16, var_42764_cast_fp16))[name = tensor("op_42864_cast_fp16")]; tensor var_42866_interleave_0 = const()[name = tensor("op_42866_interleave_0"), val = tensor(false)]; tensor var_42866_cast_fp16 = concat(axis = var_41378, interleave = var_42866_interleave_0, values = (var_42766_cast_fp16, var_42768_cast_fp16, var_42770_cast_fp16, var_42772_cast_fp16))[name = tensor("op_42866_cast_fp16")]; tensor var_42868_interleave_0 = const()[name = tensor("op_42868_interleave_0"), val = tensor(false)]; tensor var_42868_cast_fp16 = concat(axis = var_41378, interleave = var_42868_interleave_0, values = (var_42774_cast_fp16, var_42776_cast_fp16, var_42778_cast_fp16, var_42780_cast_fp16))[name = tensor("op_42868_cast_fp16")]; tensor var_42870_interleave_0 = const()[name = tensor("op_42870_interleave_0"), val = tensor(false)]; tensor var_42870_cast_fp16 = concat(axis = var_41378, interleave = var_42870_interleave_0, values = (var_42782_cast_fp16, var_42784_cast_fp16, var_42786_cast_fp16, var_42788_cast_fp16))[name = tensor("op_42870_cast_fp16")]; tensor var_42872_interleave_0 = const()[name = tensor("op_42872_interleave_0"), val = tensor(false)]; tensor var_42872_cast_fp16 = concat(axis = var_41378, interleave = var_42872_interleave_0, values = (var_42790_cast_fp16, var_42792_cast_fp16, var_42794_cast_fp16, var_42796_cast_fp16))[name = tensor("op_42872_cast_fp16")]; tensor var_42874_interleave_0 = const()[name = tensor("op_42874_interleave_0"), val = tensor(false)]; tensor var_42874_cast_fp16 = concat(axis = var_41378, interleave = var_42874_interleave_0, values = (var_42798_cast_fp16, var_42800_cast_fp16, var_42802_cast_fp16, var_42804_cast_fp16))[name = tensor("op_42874_cast_fp16")]; tensor var_42876_interleave_0 = const()[name = tensor("op_42876_interleave_0"), val = tensor(false)]; tensor var_42876_cast_fp16 = concat(axis = var_41378, interleave = var_42876_interleave_0, values = (var_42806_cast_fp16, var_42808_cast_fp16, var_42810_cast_fp16, var_42812_cast_fp16))[name = tensor("op_42876_cast_fp16")]; tensor var_42878_interleave_0 = const()[name = tensor("op_42878_interleave_0"), val = tensor(false)]; tensor var_42878_cast_fp16 = concat(axis = var_41378, interleave = var_42878_interleave_0, values = (var_42814_cast_fp16, var_42816_cast_fp16, var_42818_cast_fp16, var_42820_cast_fp16))[name = tensor("op_42878_cast_fp16")]; tensor var_42880_interleave_0 = const()[name = tensor("op_42880_interleave_0"), val = tensor(false)]; tensor var_42880_cast_fp16 = concat(axis = var_41378, interleave = var_42880_interleave_0, values = (var_42822_cast_fp16, var_42824_cast_fp16, var_42826_cast_fp16, var_42828_cast_fp16))[name = tensor("op_42880_cast_fp16")]; tensor var_42882_interleave_0 = const()[name = tensor("op_42882_interleave_0"), val = tensor(false)]; tensor var_42882_cast_fp16 = concat(axis = var_41378, interleave = var_42882_interleave_0, values = (var_42830_cast_fp16, var_42832_cast_fp16, var_42834_cast_fp16, var_42836_cast_fp16))[name = tensor("op_42882_cast_fp16")]; tensor var_42884_interleave_0 = const()[name = tensor("op_42884_interleave_0"), val = tensor(false)]; tensor var_42884_cast_fp16 = concat(axis = var_41378, interleave = var_42884_interleave_0, values = (var_42838_cast_fp16, var_42840_cast_fp16, var_42842_cast_fp16, var_42844_cast_fp16))[name = tensor("op_42884_cast_fp16")]; tensor input_209_interleave_0 = const()[name = tensor("input_209_interleave_0"), val = tensor(false)]; tensor input_209_cast_fp16 = concat(axis = var_41403, interleave = input_209_interleave_0, values = (var_42846_cast_fp16, var_42848_cast_fp16, var_42850_cast_fp16, var_42852_cast_fp16, var_42854_cast_fp16, var_42856_cast_fp16, var_42858_cast_fp16, var_42860_cast_fp16, var_42862_cast_fp16, var_42864_cast_fp16, var_42866_cast_fp16, var_42868_cast_fp16, var_42870_cast_fp16, var_42872_cast_fp16, var_42874_cast_fp16, var_42876_cast_fp16, var_42878_cast_fp16, var_42880_cast_fp16, var_42882_cast_fp16, var_42884_cast_fp16))[name = tensor("input_209_cast_fp16")]; tensor var_42895_pad_type_0 = const()[name = tensor("op_42895_pad_type_0"), val = tensor("valid")]; tensor var_42895_strides_0 = const()[name = tensor("op_42895_strides_0"), val = tensor([1, 1])]; tensor var_42895_pad_0 = const()[name = tensor("op_42895_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_42895_dilations_0 = const()[name = tensor("op_42895_dilations_0"), val = tensor([1, 1])]; tensor var_42895_groups_0 = const()[name = tensor("op_42895_groups_0"), val = tensor(1)]; tensor layers_26_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(350056128))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(350875392))), name = tensor("layers_26_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_26_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_26_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(350875520)))]; tensor var_42895_cast_fp16 = conv(bias = layers_26_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_42895_dilations_0, groups = var_42895_groups_0, pad = var_42895_pad_0, pad_type = var_42895_pad_type_0, strides = var_42895_strides_0, weight = layers_26_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_209_cast_fp16)[name = tensor("op_42895_cast_fp16")]; tensor var_42901_pad_type_0 = const()[name = tensor("op_42901_pad_type_0"), val = tensor("valid")]; tensor var_42901_strides_0 = const()[name = tensor("op_42901_strides_0"), val = tensor([1, 1])]; tensor var_42901_pad_0 = const()[name = tensor("op_42901_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_42901_dilations_0 = const()[name = tensor("op_42901_dilations_0"), val = tensor([1, 1])]; tensor var_42901_groups_0 = const()[name = tensor("op_42901_groups_0"), val = tensor(1)]; tensor layers_26_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(350889984))), name = tensor("layers_26_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(350878144))), shape = tensor([1280, 1280, 1, 1])]; tensor var_42901_cast_fp16 = conv(dilations = var_42901_dilations_0, groups = var_42901_groups_0, pad = var_42901_pad_0, pad_type = var_42901_pad_type_0, strides = var_42901_strides_0, weight = layers_26_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_209_cast_fp16)[name = tensor("op_42901_cast_fp16")]; tensor obj_107_cast_fp16 = add(x = var_42895_cast_fp16, y = var_42901_cast_fp16)[name = tensor("obj_107_cast_fp16")]; tensor inputs_107_cast_fp16 = add(x = inputs_105_cast_fp16, y = obj_107_cast_fp16)[name = tensor("inputs_107_cast_fp16")]; tensor out_107_axes_0 = const()[name = tensor("out_107_axes_0"), val = tensor([1])]; tensor var_42912_to_fp16 = const()[name = tensor("op_42912_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_107_cast_fp16 = layer_norm(axes = out_107_axes_0, epsilon = var_42912_to_fp16, x = inputs_107_cast_fp16)[name = tensor("out_107_cast_fp16")]; tensor input_211_gamma_0_to_fp16 = const()[name = tensor("input_211_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(351094848)))]; tensor input_211_beta_0_to_fp16 = const()[name = tensor("input_211_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(351097472)))]; tensor input_211_epsilon_0_to_fp16 = const()[name = tensor("input_211_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_211_cast_fp16 = batch_norm(beta = input_211_beta_0_to_fp16, epsilon = input_211_epsilon_0_to_fp16, gamma = input_211_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_107_cast_fp16)[name = tensor("input_211_cast_fp16")]; tensor var_42930_pad_type_0 = const()[name = tensor("op_42930_pad_type_0"), val = tensor("valid")]; tensor var_42930_strides_0 = const()[name = tensor("op_42930_strides_0"), val = tensor([1, 1])]; tensor var_42930_pad_0 = const()[name = tensor("op_42930_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_42930_dilations_0 = const()[name = tensor("op_42930_dilations_0"), val = tensor([1, 1])]; tensor var_42930_groups_0 = const()[name = tensor("op_42930_groups_0"), val = tensor(1)]; tensor layers_26_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(351100096))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(354376960))), name = tensor("layers_26_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_26_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_26_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(354377088)))]; tensor var_42930_cast_fp16 = conv(bias = layers_26_fc1_inlier_module_bias_to_fp16, dilations = var_42930_dilations_0, groups = var_42930_groups_0, pad = var_42930_pad_0, pad_type = var_42930_pad_type_0, strides = var_42930_strides_0, weight = layers_26_fc1_inlier_module_weight_to_fp16_palettized, x = input_211_cast_fp16)[name = tensor("op_42930_cast_fp16")]; tensor var_42936_pad_type_0 = const()[name = tensor("op_42936_pad_type_0"), val = tensor("valid")]; tensor var_42936_strides_0 = const()[name = tensor("op_42936_strides_0"), val = tensor([1, 1])]; tensor var_42936_pad_0 = const()[name = tensor("op_42936_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_42936_dilations_0 = const()[name = tensor("op_42936_dilations_0"), val = tensor([1, 1])]; tensor var_42936_groups_0 = const()[name = tensor("op_42936_groups_0"), val = tensor(1)]; tensor layers_26_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(354440128))), name = tensor("layers_26_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(354387392))), shape = tensor([5120, 1280, 1, 1])]; tensor var_42936_cast_fp16 = conv(dilations = var_42936_dilations_0, groups = var_42936_groups_0, pad = var_42936_pad_0, pad_type = var_42936_pad_type_0, strides = var_42936_strides_0, weight = layers_26_fc1_outlier_module_weight_to_fp16_sparsified, x = input_211_cast_fp16)[name = tensor("op_42936_cast_fp16")]; tensor input_213_cast_fp16 = add(x = var_42930_cast_fp16, y = var_42936_cast_fp16)[name = tensor("input_213_cast_fp16")]; tensor input_215_mode_0 = const()[name = tensor("input_215_mode_0"), val = tensor("EXACT")]; tensor input_215_cast_fp16 = gelu(mode = input_215_mode_0, x = input_213_cast_fp16)[name = tensor("input_215_cast_fp16")]; tensor var_42947_pad_type_0 = const()[name = tensor("op_42947_pad_type_0"), val = tensor("valid")]; tensor var_42947_strides_0 = const()[name = tensor("op_42947_strides_0"), val = tensor([1, 1])]; tensor var_42947_pad_0 = const()[name = tensor("op_42947_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_42947_dilations_0 = const()[name = tensor("op_42947_dilations_0"), val = tensor([1, 1])]; tensor var_42947_groups_0 = const()[name = tensor("op_42947_groups_0"), val = tensor(1)]; tensor layers_26_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(355259392))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358536256))), name = tensor("layers_26_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_26_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_26_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358536384)))]; tensor var_42947_cast_fp16 = conv(bias = layers_26_fc2_inlier_module_bias_to_fp16, dilations = var_42947_dilations_0, groups = var_42947_groups_0, pad = var_42947_pad_0, pad_type = var_42947_pad_type_0, strides = var_42947_strides_0, weight = layers_26_fc2_inlier_module_weight_to_fp16_palettized, x = input_215_cast_fp16)[name = tensor("op_42947_cast_fp16")]; tensor var_42953_pad_type_0 = const()[name = tensor("op_42953_pad_type_0"), val = tensor("valid")]; tensor var_42953_strides_0 = const()[name = tensor("op_42953_strides_0"), val = tensor([1, 1])]; tensor var_42953_pad_0 = const()[name = tensor("op_42953_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_42953_dilations_0 = const()[name = tensor("op_42953_dilations_0"), val = tensor([1, 1])]; tensor var_42953_groups_0 = const()[name = tensor("op_42953_groups_0"), val = tensor(1)]; tensor layers_26_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358588352))), name = tensor("layers_26_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(358539008))), shape = tensor([1280, 5120, 1, 1])]; tensor var_42953_cast_fp16 = conv(dilations = var_42953_dilations_0, groups = var_42953_groups_0, pad = var_42953_pad_0, pad_type = var_42953_pad_type_0, strides = var_42953_strides_0, weight = layers_26_fc2_outlier_module_weight_to_fp16_sparsified, x = input_215_cast_fp16)[name = tensor("op_42953_cast_fp16")]; tensor hidden_states_57_cast_fp16 = add(x = var_42947_cast_fp16, y = var_42953_cast_fp16)[name = tensor("hidden_states_57_cast_fp16")]; tensor inputs_109_cast_fp16 = add(x = inputs_107_cast_fp16, y = hidden_states_57_cast_fp16)[name = tensor("inputs_109_cast_fp16")]; tensor var_42959 = const()[name = tensor("op_42959"), val = tensor(3)]; tensor var_42984 = const()[name = tensor("op_42984"), val = tensor(1)]; tensor out_109_axes_0 = const()[name = tensor("out_109_axes_0"), val = tensor([1])]; tensor var_43001_to_fp16 = const()[name = tensor("op_43001_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_109_cast_fp16 = layer_norm(axes = out_109_axes_0, epsilon = var_43001_to_fp16, x = inputs_109_cast_fp16)[name = tensor("out_109_cast_fp16")]; tensor obj_109_gamma_0_to_fp16 = const()[name = tensor("obj_109_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(359407616)))]; tensor obj_109_beta_0_to_fp16 = const()[name = tensor("obj_109_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(359410240)))]; tensor obj_109_epsilon_0_to_fp16 = const()[name = tensor("obj_109_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_109_cast_fp16 = batch_norm(beta = obj_109_beta_0_to_fp16, epsilon = obj_109_epsilon_0_to_fp16, gamma = obj_109_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_109_cast_fp16)[name = tensor("obj_109_cast_fp16")]; tensor var_43023_pad_type_0 = const()[name = tensor("op_43023_pad_type_0"), val = tensor("valid")]; tensor var_43023_strides_0 = const()[name = tensor("op_43023_strides_0"), val = tensor([1, 1])]; tensor var_43023_pad_0 = const()[name = tensor("op_43023_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_43023_dilations_0 = const()[name = tensor("op_43023_dilations_0"), val = tensor([1, 1])]; tensor var_43023_groups_0 = const()[name = tensor("op_43023_groups_0"), val = tensor(1)]; tensor layers_27_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(359412864))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360232128))), name = tensor("layers_27_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_27_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_27_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360232256)))]; tensor var_43023_cast_fp16 = conv(bias = layers_27_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_43023_dilations_0, groups = var_43023_groups_0, pad = var_43023_pad_0, pad_type = var_43023_pad_type_0, strides = var_43023_strides_0, weight = layers_27_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_109_cast_fp16)[name = tensor("op_43023_cast_fp16")]; tensor var_43029_pad_type_0 = const()[name = tensor("op_43029_pad_type_0"), val = tensor("valid")]; tensor var_43029_strides_0 = const()[name = tensor("op_43029_strides_0"), val = tensor([1, 1])]; tensor var_43029_pad_0 = const()[name = tensor("op_43029_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_43029_dilations_0 = const()[name = tensor("op_43029_dilations_0"), val = tensor([1, 1])]; tensor var_43029_groups_0 = const()[name = tensor("op_43029_groups_0"), val = tensor(1)]; tensor layers_27_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360271616))), name = tensor("layers_27_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360234880))), shape = tensor([1280, 1280, 1, 1])]; tensor var_43029_cast_fp16 = conv(dilations = var_43029_dilations_0, groups = var_43029_groups_0, pad = var_43029_pad_0, pad_type = var_43029_pad_type_0, strides = var_43029_strides_0, weight = layers_27_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_109_cast_fp16)[name = tensor("op_43029_cast_fp16")]; tensor query_55_cast_fp16 = add(x = var_43023_cast_fp16, y = var_43029_cast_fp16)[name = tensor("query_55_cast_fp16")]; tensor var_43038_pad_type_0 = const()[name = tensor("op_43038_pad_type_0"), val = tensor("valid")]; tensor var_43038_strides_0 = const()[name = tensor("op_43038_strides_0"), val = tensor([1, 1])]; tensor var_43038_pad_0 = const()[name = tensor("op_43038_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_43038_dilations_0 = const()[name = tensor("op_43038_dilations_0"), val = tensor([1, 1])]; tensor var_43038_groups_0 = const()[name = tensor("op_43038_groups_0"), val = tensor(1)]; tensor layers_27_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(360476480))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361295744))), name = tensor("layers_27_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_43038_cast_fp16 = conv(dilations = var_43038_dilations_0, groups = var_43038_groups_0, pad = var_43038_pad_0, pad_type = var_43038_pad_type_0, strides = var_43038_strides_0, weight = layers_27_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_109_cast_fp16)[name = tensor("op_43038_cast_fp16")]; tensor var_43044_pad_type_0 = const()[name = tensor("op_43044_pad_type_0"), val = tensor("valid")]; tensor var_43044_strides_0 = const()[name = tensor("op_43044_strides_0"), val = tensor([1, 1])]; tensor var_43044_pad_0 = const()[name = tensor("op_43044_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_43044_dilations_0 = const()[name = tensor("op_43044_dilations_0"), val = tensor([1, 1])]; tensor var_43044_groups_0 = const()[name = tensor("op_43044_groups_0"), val = tensor(1)]; tensor layers_27_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361321984))), name = tensor("layers_27_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361295872))), shape = tensor([1280, 1280, 1, 1])]; tensor var_43044_cast_fp16 = conv(dilations = var_43044_dilations_0, groups = var_43044_groups_0, pad = var_43044_pad_0, pad_type = var_43044_pad_type_0, strides = var_43044_strides_0, weight = layers_27_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_109_cast_fp16)[name = tensor("op_43044_cast_fp16")]; tensor key_55_cast_fp16 = add(x = var_43038_cast_fp16, y = var_43044_cast_fp16)[name = tensor("key_55_cast_fp16")]; tensor var_43054_pad_type_0 = const()[name = tensor("op_43054_pad_type_0"), val = tensor("valid")]; tensor var_43054_strides_0 = const()[name = tensor("op_43054_strides_0"), val = tensor([1, 1])]; tensor var_43054_pad_0 = const()[name = tensor("op_43054_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_43054_dilations_0 = const()[name = tensor("op_43054_dilations_0"), val = tensor([1, 1])]; tensor var_43054_groups_0 = const()[name = tensor("op_43054_groups_0"), val = tensor(1)]; tensor layers_27_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(361526848))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362346112))), name = tensor("layers_27_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_27_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_27_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362346240)))]; tensor var_43054_cast_fp16 = conv(bias = layers_27_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_43054_dilations_0, groups = var_43054_groups_0, pad = var_43054_pad_0, pad_type = var_43054_pad_type_0, strides = var_43054_strides_0, weight = layers_27_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_109_cast_fp16)[name = tensor("op_43054_cast_fp16")]; tensor var_43060_pad_type_0 = const()[name = tensor("op_43060_pad_type_0"), val = tensor("valid")]; tensor var_43060_strides_0 = const()[name = tensor("op_43060_strides_0"), val = tensor([1, 1])]; tensor var_43060_pad_0 = const()[name = tensor("op_43060_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_43060_dilations_0 = const()[name = tensor("op_43060_dilations_0"), val = tensor([1, 1])]; tensor var_43060_groups_0 = const()[name = tensor("op_43060_groups_0"), val = tensor(1)]; tensor layers_27_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362359552))), name = tensor("layers_27_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362348864))), shape = tensor([1280, 1280, 1, 1])]; tensor var_43060_cast_fp16 = conv(dilations = var_43060_dilations_0, groups = var_43060_groups_0, pad = var_43060_pad_0, pad_type = var_43060_pad_type_0, strides = var_43060_strides_0, weight = layers_27_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_109_cast_fp16)[name = tensor("op_43060_cast_fp16")]; tensor value_55_cast_fp16 = add(x = var_43054_cast_fp16, y = var_43060_cast_fp16)[name = tensor("value_55_cast_fp16")]; tensor var_43066_begin_0 = const()[name = tensor("op_43066_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43066_end_0 = const()[name = tensor("op_43066_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43066_end_mask_0 = const()[name = tensor("op_43066_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43066_cast_fp16 = slice_by_index(begin = var_43066_begin_0, end = var_43066_end_0, end_mask = var_43066_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43066_cast_fp16")]; tensor var_43070_begin_0 = const()[name = tensor("op_43070_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_43070_end_0 = const()[name = tensor("op_43070_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_43070_end_mask_0 = const()[name = tensor("op_43070_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43070_cast_fp16 = slice_by_index(begin = var_43070_begin_0, end = var_43070_end_0, end_mask = var_43070_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43070_cast_fp16")]; tensor var_43074_begin_0 = const()[name = tensor("op_43074_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_43074_end_0 = const()[name = tensor("op_43074_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_43074_end_mask_0 = const()[name = tensor("op_43074_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43074_cast_fp16 = slice_by_index(begin = var_43074_begin_0, end = var_43074_end_0, end_mask = var_43074_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43074_cast_fp16")]; tensor var_43078_begin_0 = const()[name = tensor("op_43078_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_43078_end_0 = const()[name = tensor("op_43078_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_43078_end_mask_0 = const()[name = tensor("op_43078_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43078_cast_fp16 = slice_by_index(begin = var_43078_begin_0, end = var_43078_end_0, end_mask = var_43078_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43078_cast_fp16")]; tensor var_43082_begin_0 = const()[name = tensor("op_43082_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_43082_end_0 = const()[name = tensor("op_43082_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_43082_end_mask_0 = const()[name = tensor("op_43082_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43082_cast_fp16 = slice_by_index(begin = var_43082_begin_0, end = var_43082_end_0, end_mask = var_43082_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43082_cast_fp16")]; tensor var_43086_begin_0 = const()[name = tensor("op_43086_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_43086_end_0 = const()[name = tensor("op_43086_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_43086_end_mask_0 = const()[name = tensor("op_43086_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43086_cast_fp16 = slice_by_index(begin = var_43086_begin_0, end = var_43086_end_0, end_mask = var_43086_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43086_cast_fp16")]; tensor var_43090_begin_0 = const()[name = tensor("op_43090_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_43090_end_0 = const()[name = tensor("op_43090_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_43090_end_mask_0 = const()[name = tensor("op_43090_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43090_cast_fp16 = slice_by_index(begin = var_43090_begin_0, end = var_43090_end_0, end_mask = var_43090_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43090_cast_fp16")]; tensor var_43094_begin_0 = const()[name = tensor("op_43094_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_43094_end_0 = const()[name = tensor("op_43094_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_43094_end_mask_0 = const()[name = tensor("op_43094_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43094_cast_fp16 = slice_by_index(begin = var_43094_begin_0, end = var_43094_end_0, end_mask = var_43094_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43094_cast_fp16")]; tensor var_43098_begin_0 = const()[name = tensor("op_43098_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_43098_end_0 = const()[name = tensor("op_43098_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_43098_end_mask_0 = const()[name = tensor("op_43098_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43098_cast_fp16 = slice_by_index(begin = var_43098_begin_0, end = var_43098_end_0, end_mask = var_43098_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43098_cast_fp16")]; tensor var_43102_begin_0 = const()[name = tensor("op_43102_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_43102_end_0 = const()[name = tensor("op_43102_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_43102_end_mask_0 = const()[name = tensor("op_43102_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43102_cast_fp16 = slice_by_index(begin = var_43102_begin_0, end = var_43102_end_0, end_mask = var_43102_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43102_cast_fp16")]; tensor var_43106_begin_0 = const()[name = tensor("op_43106_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_43106_end_0 = const()[name = tensor("op_43106_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_43106_end_mask_0 = const()[name = tensor("op_43106_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43106_cast_fp16 = slice_by_index(begin = var_43106_begin_0, end = var_43106_end_0, end_mask = var_43106_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43106_cast_fp16")]; tensor var_43110_begin_0 = const()[name = tensor("op_43110_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_43110_end_0 = const()[name = tensor("op_43110_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_43110_end_mask_0 = const()[name = tensor("op_43110_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43110_cast_fp16 = slice_by_index(begin = var_43110_begin_0, end = var_43110_end_0, end_mask = var_43110_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43110_cast_fp16")]; tensor var_43114_begin_0 = const()[name = tensor("op_43114_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_43114_end_0 = const()[name = tensor("op_43114_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_43114_end_mask_0 = const()[name = tensor("op_43114_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43114_cast_fp16 = slice_by_index(begin = var_43114_begin_0, end = var_43114_end_0, end_mask = var_43114_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43114_cast_fp16")]; tensor var_43118_begin_0 = const()[name = tensor("op_43118_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_43118_end_0 = const()[name = tensor("op_43118_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_43118_end_mask_0 = const()[name = tensor("op_43118_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43118_cast_fp16 = slice_by_index(begin = var_43118_begin_0, end = var_43118_end_0, end_mask = var_43118_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43118_cast_fp16")]; tensor var_43122_begin_0 = const()[name = tensor("op_43122_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_43122_end_0 = const()[name = tensor("op_43122_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_43122_end_mask_0 = const()[name = tensor("op_43122_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43122_cast_fp16 = slice_by_index(begin = var_43122_begin_0, end = var_43122_end_0, end_mask = var_43122_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43122_cast_fp16")]; tensor var_43126_begin_0 = const()[name = tensor("op_43126_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_43126_end_0 = const()[name = tensor("op_43126_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_43126_end_mask_0 = const()[name = tensor("op_43126_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43126_cast_fp16 = slice_by_index(begin = var_43126_begin_0, end = var_43126_end_0, end_mask = var_43126_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43126_cast_fp16")]; tensor var_43130_begin_0 = const()[name = tensor("op_43130_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_43130_end_0 = const()[name = tensor("op_43130_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_43130_end_mask_0 = const()[name = tensor("op_43130_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43130_cast_fp16 = slice_by_index(begin = var_43130_begin_0, end = var_43130_end_0, end_mask = var_43130_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43130_cast_fp16")]; tensor var_43134_begin_0 = const()[name = tensor("op_43134_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_43134_end_0 = const()[name = tensor("op_43134_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_43134_end_mask_0 = const()[name = tensor("op_43134_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43134_cast_fp16 = slice_by_index(begin = var_43134_begin_0, end = var_43134_end_0, end_mask = var_43134_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43134_cast_fp16")]; tensor var_43138_begin_0 = const()[name = tensor("op_43138_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_43138_end_0 = const()[name = tensor("op_43138_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_43138_end_mask_0 = const()[name = tensor("op_43138_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43138_cast_fp16 = slice_by_index(begin = var_43138_begin_0, end = var_43138_end_0, end_mask = var_43138_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43138_cast_fp16")]; tensor var_43142_begin_0 = const()[name = tensor("op_43142_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_43142_end_0 = const()[name = tensor("op_43142_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_43142_end_mask_0 = const()[name = tensor("op_43142_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43142_cast_fp16 = slice_by_index(begin = var_43142_begin_0, end = var_43142_end_0, end_mask = var_43142_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_43142_cast_fp16")]; tensor var_43151_begin_0 = const()[name = tensor("op_43151_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43151_end_0 = const()[name = tensor("op_43151_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43151_end_mask_0 = const()[name = tensor("op_43151_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43151_cast_fp16 = slice_by_index(begin = var_43151_begin_0, end = var_43151_end_0, end_mask = var_43151_end_mask_0, x = var_43066_cast_fp16)[name = tensor("op_43151_cast_fp16")]; tensor var_43158_begin_0 = const()[name = tensor("op_43158_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43158_end_0 = const()[name = tensor("op_43158_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43158_end_mask_0 = const()[name = tensor("op_43158_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43158_cast_fp16 = slice_by_index(begin = var_43158_begin_0, end = var_43158_end_0, end_mask = var_43158_end_mask_0, x = var_43066_cast_fp16)[name = tensor("op_43158_cast_fp16")]; tensor var_43165_begin_0 = const()[name = tensor("op_43165_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43165_end_0 = const()[name = tensor("op_43165_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43165_end_mask_0 = const()[name = tensor("op_43165_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43165_cast_fp16 = slice_by_index(begin = var_43165_begin_0, end = var_43165_end_0, end_mask = var_43165_end_mask_0, x = var_43066_cast_fp16)[name = tensor("op_43165_cast_fp16")]; tensor var_43172_begin_0 = const()[name = tensor("op_43172_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43172_end_0 = const()[name = tensor("op_43172_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43172_end_mask_0 = const()[name = tensor("op_43172_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43172_cast_fp16 = slice_by_index(begin = var_43172_begin_0, end = var_43172_end_0, end_mask = var_43172_end_mask_0, x = var_43066_cast_fp16)[name = tensor("op_43172_cast_fp16")]; tensor var_43179_begin_0 = const()[name = tensor("op_43179_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43179_end_0 = const()[name = tensor("op_43179_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43179_end_mask_0 = const()[name = tensor("op_43179_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43179_cast_fp16 = slice_by_index(begin = var_43179_begin_0, end = var_43179_end_0, end_mask = var_43179_end_mask_0, x = var_43070_cast_fp16)[name = tensor("op_43179_cast_fp16")]; tensor var_43186_begin_0 = const()[name = tensor("op_43186_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43186_end_0 = const()[name = tensor("op_43186_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43186_end_mask_0 = const()[name = tensor("op_43186_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43186_cast_fp16 = slice_by_index(begin = var_43186_begin_0, end = var_43186_end_0, end_mask = var_43186_end_mask_0, x = var_43070_cast_fp16)[name = tensor("op_43186_cast_fp16")]; tensor var_43193_begin_0 = const()[name = tensor("op_43193_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43193_end_0 = const()[name = tensor("op_43193_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43193_end_mask_0 = const()[name = tensor("op_43193_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43193_cast_fp16 = slice_by_index(begin = var_43193_begin_0, end = var_43193_end_0, end_mask = var_43193_end_mask_0, x = var_43070_cast_fp16)[name = tensor("op_43193_cast_fp16")]; tensor var_43200_begin_0 = const()[name = tensor("op_43200_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43200_end_0 = const()[name = tensor("op_43200_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43200_end_mask_0 = const()[name = tensor("op_43200_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43200_cast_fp16 = slice_by_index(begin = var_43200_begin_0, end = var_43200_end_0, end_mask = var_43200_end_mask_0, x = var_43070_cast_fp16)[name = tensor("op_43200_cast_fp16")]; tensor var_43207_begin_0 = const()[name = tensor("op_43207_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43207_end_0 = const()[name = tensor("op_43207_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43207_end_mask_0 = const()[name = tensor("op_43207_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43207_cast_fp16 = slice_by_index(begin = var_43207_begin_0, end = var_43207_end_0, end_mask = var_43207_end_mask_0, x = var_43074_cast_fp16)[name = tensor("op_43207_cast_fp16")]; tensor var_43214_begin_0 = const()[name = tensor("op_43214_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43214_end_0 = const()[name = tensor("op_43214_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43214_end_mask_0 = const()[name = tensor("op_43214_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43214_cast_fp16 = slice_by_index(begin = var_43214_begin_0, end = var_43214_end_0, end_mask = var_43214_end_mask_0, x = var_43074_cast_fp16)[name = tensor("op_43214_cast_fp16")]; tensor var_43221_begin_0 = const()[name = tensor("op_43221_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43221_end_0 = const()[name = tensor("op_43221_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43221_end_mask_0 = const()[name = tensor("op_43221_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43221_cast_fp16 = slice_by_index(begin = var_43221_begin_0, end = var_43221_end_0, end_mask = var_43221_end_mask_0, x = var_43074_cast_fp16)[name = tensor("op_43221_cast_fp16")]; tensor var_43228_begin_0 = const()[name = tensor("op_43228_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43228_end_0 = const()[name = tensor("op_43228_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43228_end_mask_0 = const()[name = tensor("op_43228_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43228_cast_fp16 = slice_by_index(begin = var_43228_begin_0, end = var_43228_end_0, end_mask = var_43228_end_mask_0, x = var_43074_cast_fp16)[name = tensor("op_43228_cast_fp16")]; tensor var_43235_begin_0 = const()[name = tensor("op_43235_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43235_end_0 = const()[name = tensor("op_43235_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43235_end_mask_0 = const()[name = tensor("op_43235_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43235_cast_fp16 = slice_by_index(begin = var_43235_begin_0, end = var_43235_end_0, end_mask = var_43235_end_mask_0, x = var_43078_cast_fp16)[name = tensor("op_43235_cast_fp16")]; tensor var_43242_begin_0 = const()[name = tensor("op_43242_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43242_end_0 = const()[name = tensor("op_43242_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43242_end_mask_0 = const()[name = tensor("op_43242_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43242_cast_fp16 = slice_by_index(begin = var_43242_begin_0, end = var_43242_end_0, end_mask = var_43242_end_mask_0, x = var_43078_cast_fp16)[name = tensor("op_43242_cast_fp16")]; tensor var_43249_begin_0 = const()[name = tensor("op_43249_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43249_end_0 = const()[name = tensor("op_43249_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43249_end_mask_0 = const()[name = tensor("op_43249_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43249_cast_fp16 = slice_by_index(begin = var_43249_begin_0, end = var_43249_end_0, end_mask = var_43249_end_mask_0, x = var_43078_cast_fp16)[name = tensor("op_43249_cast_fp16")]; tensor var_43256_begin_0 = const()[name = tensor("op_43256_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43256_end_0 = const()[name = tensor("op_43256_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43256_end_mask_0 = const()[name = tensor("op_43256_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43256_cast_fp16 = slice_by_index(begin = var_43256_begin_0, end = var_43256_end_0, end_mask = var_43256_end_mask_0, x = var_43078_cast_fp16)[name = tensor("op_43256_cast_fp16")]; tensor var_43263_begin_0 = const()[name = tensor("op_43263_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43263_end_0 = const()[name = tensor("op_43263_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43263_end_mask_0 = const()[name = tensor("op_43263_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43263_cast_fp16 = slice_by_index(begin = var_43263_begin_0, end = var_43263_end_0, end_mask = var_43263_end_mask_0, x = var_43082_cast_fp16)[name = tensor("op_43263_cast_fp16")]; tensor var_43270_begin_0 = const()[name = tensor("op_43270_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43270_end_0 = const()[name = tensor("op_43270_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43270_end_mask_0 = const()[name = tensor("op_43270_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43270_cast_fp16 = slice_by_index(begin = var_43270_begin_0, end = var_43270_end_0, end_mask = var_43270_end_mask_0, x = var_43082_cast_fp16)[name = tensor("op_43270_cast_fp16")]; tensor var_43277_begin_0 = const()[name = tensor("op_43277_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43277_end_0 = const()[name = tensor("op_43277_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43277_end_mask_0 = const()[name = tensor("op_43277_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43277_cast_fp16 = slice_by_index(begin = var_43277_begin_0, end = var_43277_end_0, end_mask = var_43277_end_mask_0, x = var_43082_cast_fp16)[name = tensor("op_43277_cast_fp16")]; tensor var_43284_begin_0 = const()[name = tensor("op_43284_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43284_end_0 = const()[name = tensor("op_43284_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43284_end_mask_0 = const()[name = tensor("op_43284_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43284_cast_fp16 = slice_by_index(begin = var_43284_begin_0, end = var_43284_end_0, end_mask = var_43284_end_mask_0, x = var_43082_cast_fp16)[name = tensor("op_43284_cast_fp16")]; tensor var_43291_begin_0 = const()[name = tensor("op_43291_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43291_end_0 = const()[name = tensor("op_43291_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43291_end_mask_0 = const()[name = tensor("op_43291_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43291_cast_fp16 = slice_by_index(begin = var_43291_begin_0, end = var_43291_end_0, end_mask = var_43291_end_mask_0, x = var_43086_cast_fp16)[name = tensor("op_43291_cast_fp16")]; tensor var_43298_begin_0 = const()[name = tensor("op_43298_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43298_end_0 = const()[name = tensor("op_43298_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43298_end_mask_0 = const()[name = tensor("op_43298_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43298_cast_fp16 = slice_by_index(begin = var_43298_begin_0, end = var_43298_end_0, end_mask = var_43298_end_mask_0, x = var_43086_cast_fp16)[name = tensor("op_43298_cast_fp16")]; tensor var_43305_begin_0 = const()[name = tensor("op_43305_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43305_end_0 = const()[name = tensor("op_43305_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43305_end_mask_0 = const()[name = tensor("op_43305_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43305_cast_fp16 = slice_by_index(begin = var_43305_begin_0, end = var_43305_end_0, end_mask = var_43305_end_mask_0, x = var_43086_cast_fp16)[name = tensor("op_43305_cast_fp16")]; tensor var_43312_begin_0 = const()[name = tensor("op_43312_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43312_end_0 = const()[name = tensor("op_43312_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43312_end_mask_0 = const()[name = tensor("op_43312_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43312_cast_fp16 = slice_by_index(begin = var_43312_begin_0, end = var_43312_end_0, end_mask = var_43312_end_mask_0, x = var_43086_cast_fp16)[name = tensor("op_43312_cast_fp16")]; tensor var_43319_begin_0 = const()[name = tensor("op_43319_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43319_end_0 = const()[name = tensor("op_43319_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43319_end_mask_0 = const()[name = tensor("op_43319_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43319_cast_fp16 = slice_by_index(begin = var_43319_begin_0, end = var_43319_end_0, end_mask = var_43319_end_mask_0, x = var_43090_cast_fp16)[name = tensor("op_43319_cast_fp16")]; tensor var_43326_begin_0 = const()[name = tensor("op_43326_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43326_end_0 = const()[name = tensor("op_43326_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43326_end_mask_0 = const()[name = tensor("op_43326_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43326_cast_fp16 = slice_by_index(begin = var_43326_begin_0, end = var_43326_end_0, end_mask = var_43326_end_mask_0, x = var_43090_cast_fp16)[name = tensor("op_43326_cast_fp16")]; tensor var_43333_begin_0 = const()[name = tensor("op_43333_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43333_end_0 = const()[name = tensor("op_43333_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43333_end_mask_0 = const()[name = tensor("op_43333_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43333_cast_fp16 = slice_by_index(begin = var_43333_begin_0, end = var_43333_end_0, end_mask = var_43333_end_mask_0, x = var_43090_cast_fp16)[name = tensor("op_43333_cast_fp16")]; tensor var_43340_begin_0 = const()[name = tensor("op_43340_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43340_end_0 = const()[name = tensor("op_43340_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43340_end_mask_0 = const()[name = tensor("op_43340_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43340_cast_fp16 = slice_by_index(begin = var_43340_begin_0, end = var_43340_end_0, end_mask = var_43340_end_mask_0, x = var_43090_cast_fp16)[name = tensor("op_43340_cast_fp16")]; tensor var_43347_begin_0 = const()[name = tensor("op_43347_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43347_end_0 = const()[name = tensor("op_43347_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43347_end_mask_0 = const()[name = tensor("op_43347_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43347_cast_fp16 = slice_by_index(begin = var_43347_begin_0, end = var_43347_end_0, end_mask = var_43347_end_mask_0, x = var_43094_cast_fp16)[name = tensor("op_43347_cast_fp16")]; tensor var_43354_begin_0 = const()[name = tensor("op_43354_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43354_end_0 = const()[name = tensor("op_43354_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43354_end_mask_0 = const()[name = tensor("op_43354_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43354_cast_fp16 = slice_by_index(begin = var_43354_begin_0, end = var_43354_end_0, end_mask = var_43354_end_mask_0, x = var_43094_cast_fp16)[name = tensor("op_43354_cast_fp16")]; tensor var_43361_begin_0 = const()[name = tensor("op_43361_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43361_end_0 = const()[name = tensor("op_43361_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43361_end_mask_0 = const()[name = tensor("op_43361_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43361_cast_fp16 = slice_by_index(begin = var_43361_begin_0, end = var_43361_end_0, end_mask = var_43361_end_mask_0, x = var_43094_cast_fp16)[name = tensor("op_43361_cast_fp16")]; tensor var_43368_begin_0 = const()[name = tensor("op_43368_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43368_end_0 = const()[name = tensor("op_43368_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43368_end_mask_0 = const()[name = tensor("op_43368_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43368_cast_fp16 = slice_by_index(begin = var_43368_begin_0, end = var_43368_end_0, end_mask = var_43368_end_mask_0, x = var_43094_cast_fp16)[name = tensor("op_43368_cast_fp16")]; tensor var_43375_begin_0 = const()[name = tensor("op_43375_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43375_end_0 = const()[name = tensor("op_43375_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43375_end_mask_0 = const()[name = tensor("op_43375_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43375_cast_fp16 = slice_by_index(begin = var_43375_begin_0, end = var_43375_end_0, end_mask = var_43375_end_mask_0, x = var_43098_cast_fp16)[name = tensor("op_43375_cast_fp16")]; tensor var_43382_begin_0 = const()[name = tensor("op_43382_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43382_end_0 = const()[name = tensor("op_43382_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43382_end_mask_0 = const()[name = tensor("op_43382_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43382_cast_fp16 = slice_by_index(begin = var_43382_begin_0, end = var_43382_end_0, end_mask = var_43382_end_mask_0, x = var_43098_cast_fp16)[name = tensor("op_43382_cast_fp16")]; tensor var_43389_begin_0 = const()[name = tensor("op_43389_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43389_end_0 = const()[name = tensor("op_43389_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43389_end_mask_0 = const()[name = tensor("op_43389_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43389_cast_fp16 = slice_by_index(begin = var_43389_begin_0, end = var_43389_end_0, end_mask = var_43389_end_mask_0, x = var_43098_cast_fp16)[name = tensor("op_43389_cast_fp16")]; tensor var_43396_begin_0 = const()[name = tensor("op_43396_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43396_end_0 = const()[name = tensor("op_43396_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43396_end_mask_0 = const()[name = tensor("op_43396_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43396_cast_fp16 = slice_by_index(begin = var_43396_begin_0, end = var_43396_end_0, end_mask = var_43396_end_mask_0, x = var_43098_cast_fp16)[name = tensor("op_43396_cast_fp16")]; tensor var_43403_begin_0 = const()[name = tensor("op_43403_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43403_end_0 = const()[name = tensor("op_43403_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43403_end_mask_0 = const()[name = tensor("op_43403_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43403_cast_fp16 = slice_by_index(begin = var_43403_begin_0, end = var_43403_end_0, end_mask = var_43403_end_mask_0, x = var_43102_cast_fp16)[name = tensor("op_43403_cast_fp16")]; tensor var_43410_begin_0 = const()[name = tensor("op_43410_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43410_end_0 = const()[name = tensor("op_43410_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43410_end_mask_0 = const()[name = tensor("op_43410_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43410_cast_fp16 = slice_by_index(begin = var_43410_begin_0, end = var_43410_end_0, end_mask = var_43410_end_mask_0, x = var_43102_cast_fp16)[name = tensor("op_43410_cast_fp16")]; tensor var_43417_begin_0 = const()[name = tensor("op_43417_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43417_end_0 = const()[name = tensor("op_43417_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43417_end_mask_0 = const()[name = tensor("op_43417_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43417_cast_fp16 = slice_by_index(begin = var_43417_begin_0, end = var_43417_end_0, end_mask = var_43417_end_mask_0, x = var_43102_cast_fp16)[name = tensor("op_43417_cast_fp16")]; tensor var_43424_begin_0 = const()[name = tensor("op_43424_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43424_end_0 = const()[name = tensor("op_43424_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43424_end_mask_0 = const()[name = tensor("op_43424_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43424_cast_fp16 = slice_by_index(begin = var_43424_begin_0, end = var_43424_end_0, end_mask = var_43424_end_mask_0, x = var_43102_cast_fp16)[name = tensor("op_43424_cast_fp16")]; tensor var_43431_begin_0 = const()[name = tensor("op_43431_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43431_end_0 = const()[name = tensor("op_43431_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43431_end_mask_0 = const()[name = tensor("op_43431_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43431_cast_fp16 = slice_by_index(begin = var_43431_begin_0, end = var_43431_end_0, end_mask = var_43431_end_mask_0, x = var_43106_cast_fp16)[name = tensor("op_43431_cast_fp16")]; tensor var_43438_begin_0 = const()[name = tensor("op_43438_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43438_end_0 = const()[name = tensor("op_43438_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43438_end_mask_0 = const()[name = tensor("op_43438_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43438_cast_fp16 = slice_by_index(begin = var_43438_begin_0, end = var_43438_end_0, end_mask = var_43438_end_mask_0, x = var_43106_cast_fp16)[name = tensor("op_43438_cast_fp16")]; tensor var_43445_begin_0 = const()[name = tensor("op_43445_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43445_end_0 = const()[name = tensor("op_43445_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43445_end_mask_0 = const()[name = tensor("op_43445_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43445_cast_fp16 = slice_by_index(begin = var_43445_begin_0, end = var_43445_end_0, end_mask = var_43445_end_mask_0, x = var_43106_cast_fp16)[name = tensor("op_43445_cast_fp16")]; tensor var_43452_begin_0 = const()[name = tensor("op_43452_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43452_end_0 = const()[name = tensor("op_43452_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43452_end_mask_0 = const()[name = tensor("op_43452_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43452_cast_fp16 = slice_by_index(begin = var_43452_begin_0, end = var_43452_end_0, end_mask = var_43452_end_mask_0, x = var_43106_cast_fp16)[name = tensor("op_43452_cast_fp16")]; tensor var_43459_begin_0 = const()[name = tensor("op_43459_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43459_end_0 = const()[name = tensor("op_43459_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43459_end_mask_0 = const()[name = tensor("op_43459_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43459_cast_fp16 = slice_by_index(begin = var_43459_begin_0, end = var_43459_end_0, end_mask = var_43459_end_mask_0, x = var_43110_cast_fp16)[name = tensor("op_43459_cast_fp16")]; tensor var_43466_begin_0 = const()[name = tensor("op_43466_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43466_end_0 = const()[name = tensor("op_43466_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43466_end_mask_0 = const()[name = tensor("op_43466_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43466_cast_fp16 = slice_by_index(begin = var_43466_begin_0, end = var_43466_end_0, end_mask = var_43466_end_mask_0, x = var_43110_cast_fp16)[name = tensor("op_43466_cast_fp16")]; tensor var_43473_begin_0 = const()[name = tensor("op_43473_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43473_end_0 = const()[name = tensor("op_43473_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43473_end_mask_0 = const()[name = tensor("op_43473_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43473_cast_fp16 = slice_by_index(begin = var_43473_begin_0, end = var_43473_end_0, end_mask = var_43473_end_mask_0, x = var_43110_cast_fp16)[name = tensor("op_43473_cast_fp16")]; tensor var_43480_begin_0 = const()[name = tensor("op_43480_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43480_end_0 = const()[name = tensor("op_43480_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43480_end_mask_0 = const()[name = tensor("op_43480_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43480_cast_fp16 = slice_by_index(begin = var_43480_begin_0, end = var_43480_end_0, end_mask = var_43480_end_mask_0, x = var_43110_cast_fp16)[name = tensor("op_43480_cast_fp16")]; tensor var_43487_begin_0 = const()[name = tensor("op_43487_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43487_end_0 = const()[name = tensor("op_43487_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43487_end_mask_0 = const()[name = tensor("op_43487_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43487_cast_fp16 = slice_by_index(begin = var_43487_begin_0, end = var_43487_end_0, end_mask = var_43487_end_mask_0, x = var_43114_cast_fp16)[name = tensor("op_43487_cast_fp16")]; tensor var_43494_begin_0 = const()[name = tensor("op_43494_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43494_end_0 = const()[name = tensor("op_43494_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43494_end_mask_0 = const()[name = tensor("op_43494_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43494_cast_fp16 = slice_by_index(begin = var_43494_begin_0, end = var_43494_end_0, end_mask = var_43494_end_mask_0, x = var_43114_cast_fp16)[name = tensor("op_43494_cast_fp16")]; tensor var_43501_begin_0 = const()[name = tensor("op_43501_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43501_end_0 = const()[name = tensor("op_43501_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43501_end_mask_0 = const()[name = tensor("op_43501_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43501_cast_fp16 = slice_by_index(begin = var_43501_begin_0, end = var_43501_end_0, end_mask = var_43501_end_mask_0, x = var_43114_cast_fp16)[name = tensor("op_43501_cast_fp16")]; tensor var_43508_begin_0 = const()[name = tensor("op_43508_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43508_end_0 = const()[name = tensor("op_43508_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43508_end_mask_0 = const()[name = tensor("op_43508_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43508_cast_fp16 = slice_by_index(begin = var_43508_begin_0, end = var_43508_end_0, end_mask = var_43508_end_mask_0, x = var_43114_cast_fp16)[name = tensor("op_43508_cast_fp16")]; tensor var_43515_begin_0 = const()[name = tensor("op_43515_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43515_end_0 = const()[name = tensor("op_43515_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43515_end_mask_0 = const()[name = tensor("op_43515_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43515_cast_fp16 = slice_by_index(begin = var_43515_begin_0, end = var_43515_end_0, end_mask = var_43515_end_mask_0, x = var_43118_cast_fp16)[name = tensor("op_43515_cast_fp16")]; tensor var_43522_begin_0 = const()[name = tensor("op_43522_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43522_end_0 = const()[name = tensor("op_43522_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43522_end_mask_0 = const()[name = tensor("op_43522_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43522_cast_fp16 = slice_by_index(begin = var_43522_begin_0, end = var_43522_end_0, end_mask = var_43522_end_mask_0, x = var_43118_cast_fp16)[name = tensor("op_43522_cast_fp16")]; tensor var_43529_begin_0 = const()[name = tensor("op_43529_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43529_end_0 = const()[name = tensor("op_43529_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43529_end_mask_0 = const()[name = tensor("op_43529_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43529_cast_fp16 = slice_by_index(begin = var_43529_begin_0, end = var_43529_end_0, end_mask = var_43529_end_mask_0, x = var_43118_cast_fp16)[name = tensor("op_43529_cast_fp16")]; tensor var_43536_begin_0 = const()[name = tensor("op_43536_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43536_end_0 = const()[name = tensor("op_43536_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43536_end_mask_0 = const()[name = tensor("op_43536_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43536_cast_fp16 = slice_by_index(begin = var_43536_begin_0, end = var_43536_end_0, end_mask = var_43536_end_mask_0, x = var_43118_cast_fp16)[name = tensor("op_43536_cast_fp16")]; tensor var_43543_begin_0 = const()[name = tensor("op_43543_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43543_end_0 = const()[name = tensor("op_43543_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43543_end_mask_0 = const()[name = tensor("op_43543_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43543_cast_fp16 = slice_by_index(begin = var_43543_begin_0, end = var_43543_end_0, end_mask = var_43543_end_mask_0, x = var_43122_cast_fp16)[name = tensor("op_43543_cast_fp16")]; tensor var_43550_begin_0 = const()[name = tensor("op_43550_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43550_end_0 = const()[name = tensor("op_43550_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43550_end_mask_0 = const()[name = tensor("op_43550_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43550_cast_fp16 = slice_by_index(begin = var_43550_begin_0, end = var_43550_end_0, end_mask = var_43550_end_mask_0, x = var_43122_cast_fp16)[name = tensor("op_43550_cast_fp16")]; tensor var_43557_begin_0 = const()[name = tensor("op_43557_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43557_end_0 = const()[name = tensor("op_43557_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43557_end_mask_0 = const()[name = tensor("op_43557_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43557_cast_fp16 = slice_by_index(begin = var_43557_begin_0, end = var_43557_end_0, end_mask = var_43557_end_mask_0, x = var_43122_cast_fp16)[name = tensor("op_43557_cast_fp16")]; tensor var_43564_begin_0 = const()[name = tensor("op_43564_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43564_end_0 = const()[name = tensor("op_43564_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43564_end_mask_0 = const()[name = tensor("op_43564_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43564_cast_fp16 = slice_by_index(begin = var_43564_begin_0, end = var_43564_end_0, end_mask = var_43564_end_mask_0, x = var_43122_cast_fp16)[name = tensor("op_43564_cast_fp16")]; tensor var_43571_begin_0 = const()[name = tensor("op_43571_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43571_end_0 = const()[name = tensor("op_43571_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43571_end_mask_0 = const()[name = tensor("op_43571_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43571_cast_fp16 = slice_by_index(begin = var_43571_begin_0, end = var_43571_end_0, end_mask = var_43571_end_mask_0, x = var_43126_cast_fp16)[name = tensor("op_43571_cast_fp16")]; tensor var_43578_begin_0 = const()[name = tensor("op_43578_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43578_end_0 = const()[name = tensor("op_43578_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43578_end_mask_0 = const()[name = tensor("op_43578_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43578_cast_fp16 = slice_by_index(begin = var_43578_begin_0, end = var_43578_end_0, end_mask = var_43578_end_mask_0, x = var_43126_cast_fp16)[name = tensor("op_43578_cast_fp16")]; tensor var_43585_begin_0 = const()[name = tensor("op_43585_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43585_end_0 = const()[name = tensor("op_43585_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43585_end_mask_0 = const()[name = tensor("op_43585_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43585_cast_fp16 = slice_by_index(begin = var_43585_begin_0, end = var_43585_end_0, end_mask = var_43585_end_mask_0, x = var_43126_cast_fp16)[name = tensor("op_43585_cast_fp16")]; tensor var_43592_begin_0 = const()[name = tensor("op_43592_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43592_end_0 = const()[name = tensor("op_43592_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43592_end_mask_0 = const()[name = tensor("op_43592_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43592_cast_fp16 = slice_by_index(begin = var_43592_begin_0, end = var_43592_end_0, end_mask = var_43592_end_mask_0, x = var_43126_cast_fp16)[name = tensor("op_43592_cast_fp16")]; tensor var_43599_begin_0 = const()[name = tensor("op_43599_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43599_end_0 = const()[name = tensor("op_43599_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43599_end_mask_0 = const()[name = tensor("op_43599_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43599_cast_fp16 = slice_by_index(begin = var_43599_begin_0, end = var_43599_end_0, end_mask = var_43599_end_mask_0, x = var_43130_cast_fp16)[name = tensor("op_43599_cast_fp16")]; tensor var_43606_begin_0 = const()[name = tensor("op_43606_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43606_end_0 = const()[name = tensor("op_43606_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43606_end_mask_0 = const()[name = tensor("op_43606_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43606_cast_fp16 = slice_by_index(begin = var_43606_begin_0, end = var_43606_end_0, end_mask = var_43606_end_mask_0, x = var_43130_cast_fp16)[name = tensor("op_43606_cast_fp16")]; tensor var_43613_begin_0 = const()[name = tensor("op_43613_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43613_end_0 = const()[name = tensor("op_43613_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43613_end_mask_0 = const()[name = tensor("op_43613_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43613_cast_fp16 = slice_by_index(begin = var_43613_begin_0, end = var_43613_end_0, end_mask = var_43613_end_mask_0, x = var_43130_cast_fp16)[name = tensor("op_43613_cast_fp16")]; tensor var_43620_begin_0 = const()[name = tensor("op_43620_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43620_end_0 = const()[name = tensor("op_43620_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43620_end_mask_0 = const()[name = tensor("op_43620_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43620_cast_fp16 = slice_by_index(begin = var_43620_begin_0, end = var_43620_end_0, end_mask = var_43620_end_mask_0, x = var_43130_cast_fp16)[name = tensor("op_43620_cast_fp16")]; tensor var_43627_begin_0 = const()[name = tensor("op_43627_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43627_end_0 = const()[name = tensor("op_43627_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43627_end_mask_0 = const()[name = tensor("op_43627_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43627_cast_fp16 = slice_by_index(begin = var_43627_begin_0, end = var_43627_end_0, end_mask = var_43627_end_mask_0, x = var_43134_cast_fp16)[name = tensor("op_43627_cast_fp16")]; tensor var_43634_begin_0 = const()[name = tensor("op_43634_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43634_end_0 = const()[name = tensor("op_43634_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43634_end_mask_0 = const()[name = tensor("op_43634_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43634_cast_fp16 = slice_by_index(begin = var_43634_begin_0, end = var_43634_end_0, end_mask = var_43634_end_mask_0, x = var_43134_cast_fp16)[name = tensor("op_43634_cast_fp16")]; tensor var_43641_begin_0 = const()[name = tensor("op_43641_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43641_end_0 = const()[name = tensor("op_43641_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43641_end_mask_0 = const()[name = tensor("op_43641_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43641_cast_fp16 = slice_by_index(begin = var_43641_begin_0, end = var_43641_end_0, end_mask = var_43641_end_mask_0, x = var_43134_cast_fp16)[name = tensor("op_43641_cast_fp16")]; tensor var_43648_begin_0 = const()[name = tensor("op_43648_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43648_end_0 = const()[name = tensor("op_43648_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43648_end_mask_0 = const()[name = tensor("op_43648_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43648_cast_fp16 = slice_by_index(begin = var_43648_begin_0, end = var_43648_end_0, end_mask = var_43648_end_mask_0, x = var_43134_cast_fp16)[name = tensor("op_43648_cast_fp16")]; tensor var_43655_begin_0 = const()[name = tensor("op_43655_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43655_end_0 = const()[name = tensor("op_43655_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43655_end_mask_0 = const()[name = tensor("op_43655_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43655_cast_fp16 = slice_by_index(begin = var_43655_begin_0, end = var_43655_end_0, end_mask = var_43655_end_mask_0, x = var_43138_cast_fp16)[name = tensor("op_43655_cast_fp16")]; tensor var_43662_begin_0 = const()[name = tensor("op_43662_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43662_end_0 = const()[name = tensor("op_43662_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43662_end_mask_0 = const()[name = tensor("op_43662_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43662_cast_fp16 = slice_by_index(begin = var_43662_begin_0, end = var_43662_end_0, end_mask = var_43662_end_mask_0, x = var_43138_cast_fp16)[name = tensor("op_43662_cast_fp16")]; tensor var_43669_begin_0 = const()[name = tensor("op_43669_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43669_end_0 = const()[name = tensor("op_43669_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43669_end_mask_0 = const()[name = tensor("op_43669_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43669_cast_fp16 = slice_by_index(begin = var_43669_begin_0, end = var_43669_end_0, end_mask = var_43669_end_mask_0, x = var_43138_cast_fp16)[name = tensor("op_43669_cast_fp16")]; tensor var_43676_begin_0 = const()[name = tensor("op_43676_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43676_end_0 = const()[name = tensor("op_43676_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43676_end_mask_0 = const()[name = tensor("op_43676_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43676_cast_fp16 = slice_by_index(begin = var_43676_begin_0, end = var_43676_end_0, end_mask = var_43676_end_mask_0, x = var_43138_cast_fp16)[name = tensor("op_43676_cast_fp16")]; tensor var_43683_begin_0 = const()[name = tensor("op_43683_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43683_end_0 = const()[name = tensor("op_43683_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_43683_end_mask_0 = const()[name = tensor("op_43683_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43683_cast_fp16 = slice_by_index(begin = var_43683_begin_0, end = var_43683_end_0, end_mask = var_43683_end_mask_0, x = var_43142_cast_fp16)[name = tensor("op_43683_cast_fp16")]; tensor var_43690_begin_0 = const()[name = tensor("op_43690_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_43690_end_0 = const()[name = tensor("op_43690_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_43690_end_mask_0 = const()[name = tensor("op_43690_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43690_cast_fp16 = slice_by_index(begin = var_43690_begin_0, end = var_43690_end_0, end_mask = var_43690_end_mask_0, x = var_43142_cast_fp16)[name = tensor("op_43690_cast_fp16")]; tensor var_43697_begin_0 = const()[name = tensor("op_43697_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_43697_end_0 = const()[name = tensor("op_43697_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_43697_end_mask_0 = const()[name = tensor("op_43697_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43697_cast_fp16 = slice_by_index(begin = var_43697_begin_0, end = var_43697_end_0, end_mask = var_43697_end_mask_0, x = var_43142_cast_fp16)[name = tensor("op_43697_cast_fp16")]; tensor var_43704_begin_0 = const()[name = tensor("op_43704_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_43704_end_0 = const()[name = tensor("op_43704_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43704_end_mask_0 = const()[name = tensor("op_43704_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43704_cast_fp16 = slice_by_index(begin = var_43704_begin_0, end = var_43704_end_0, end_mask = var_43704_end_mask_0, x = var_43142_cast_fp16)[name = tensor("op_43704_cast_fp16")]; tensor k_55_perm_0 = const()[name = tensor("k_55_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_43709_begin_0 = const()[name = tensor("op_43709_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43709_end_0 = const()[name = tensor("op_43709_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_43709_end_mask_0 = const()[name = tensor("op_43709_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_55_cast_fp16 = transpose(perm = k_55_perm_0, x = key_55_cast_fp16)[name = tensor("transpose_4")]; tensor var_43709_cast_fp16 = slice_by_index(begin = var_43709_begin_0, end = var_43709_end_0, end_mask = var_43709_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43709_cast_fp16")]; tensor var_43713_begin_0 = const()[name = tensor("op_43713_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_43713_end_0 = const()[name = tensor("op_43713_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_43713_end_mask_0 = const()[name = tensor("op_43713_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43713_cast_fp16 = slice_by_index(begin = var_43713_begin_0, end = var_43713_end_0, end_mask = var_43713_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43713_cast_fp16")]; tensor var_43717_begin_0 = const()[name = tensor("op_43717_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_43717_end_0 = const()[name = tensor("op_43717_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_43717_end_mask_0 = const()[name = tensor("op_43717_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43717_cast_fp16 = slice_by_index(begin = var_43717_begin_0, end = var_43717_end_0, end_mask = var_43717_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43717_cast_fp16")]; tensor var_43721_begin_0 = const()[name = tensor("op_43721_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_43721_end_0 = const()[name = tensor("op_43721_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_43721_end_mask_0 = const()[name = tensor("op_43721_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43721_cast_fp16 = slice_by_index(begin = var_43721_begin_0, end = var_43721_end_0, end_mask = var_43721_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43721_cast_fp16")]; tensor var_43725_begin_0 = const()[name = tensor("op_43725_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_43725_end_0 = const()[name = tensor("op_43725_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_43725_end_mask_0 = const()[name = tensor("op_43725_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43725_cast_fp16 = slice_by_index(begin = var_43725_begin_0, end = var_43725_end_0, end_mask = var_43725_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43725_cast_fp16")]; tensor var_43729_begin_0 = const()[name = tensor("op_43729_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_43729_end_0 = const()[name = tensor("op_43729_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_43729_end_mask_0 = const()[name = tensor("op_43729_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43729_cast_fp16 = slice_by_index(begin = var_43729_begin_0, end = var_43729_end_0, end_mask = var_43729_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43729_cast_fp16")]; tensor var_43733_begin_0 = const()[name = tensor("op_43733_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_43733_end_0 = const()[name = tensor("op_43733_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_43733_end_mask_0 = const()[name = tensor("op_43733_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43733_cast_fp16 = slice_by_index(begin = var_43733_begin_0, end = var_43733_end_0, end_mask = var_43733_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43733_cast_fp16")]; tensor var_43737_begin_0 = const()[name = tensor("op_43737_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_43737_end_0 = const()[name = tensor("op_43737_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_43737_end_mask_0 = const()[name = tensor("op_43737_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43737_cast_fp16 = slice_by_index(begin = var_43737_begin_0, end = var_43737_end_0, end_mask = var_43737_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43737_cast_fp16")]; tensor var_43741_begin_0 = const()[name = tensor("op_43741_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_43741_end_0 = const()[name = tensor("op_43741_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_43741_end_mask_0 = const()[name = tensor("op_43741_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43741_cast_fp16 = slice_by_index(begin = var_43741_begin_0, end = var_43741_end_0, end_mask = var_43741_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43741_cast_fp16")]; tensor var_43745_begin_0 = const()[name = tensor("op_43745_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_43745_end_0 = const()[name = tensor("op_43745_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_43745_end_mask_0 = const()[name = tensor("op_43745_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43745_cast_fp16 = slice_by_index(begin = var_43745_begin_0, end = var_43745_end_0, end_mask = var_43745_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43745_cast_fp16")]; tensor var_43749_begin_0 = const()[name = tensor("op_43749_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_43749_end_0 = const()[name = tensor("op_43749_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_43749_end_mask_0 = const()[name = tensor("op_43749_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43749_cast_fp16 = slice_by_index(begin = var_43749_begin_0, end = var_43749_end_0, end_mask = var_43749_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43749_cast_fp16")]; tensor var_43753_begin_0 = const()[name = tensor("op_43753_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_43753_end_0 = const()[name = tensor("op_43753_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_43753_end_mask_0 = const()[name = tensor("op_43753_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43753_cast_fp16 = slice_by_index(begin = var_43753_begin_0, end = var_43753_end_0, end_mask = var_43753_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43753_cast_fp16")]; tensor var_43757_begin_0 = const()[name = tensor("op_43757_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_43757_end_0 = const()[name = tensor("op_43757_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_43757_end_mask_0 = const()[name = tensor("op_43757_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43757_cast_fp16 = slice_by_index(begin = var_43757_begin_0, end = var_43757_end_0, end_mask = var_43757_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43757_cast_fp16")]; tensor var_43761_begin_0 = const()[name = tensor("op_43761_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_43761_end_0 = const()[name = tensor("op_43761_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_43761_end_mask_0 = const()[name = tensor("op_43761_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43761_cast_fp16 = slice_by_index(begin = var_43761_begin_0, end = var_43761_end_0, end_mask = var_43761_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43761_cast_fp16")]; tensor var_43765_begin_0 = const()[name = tensor("op_43765_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_43765_end_0 = const()[name = tensor("op_43765_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_43765_end_mask_0 = const()[name = tensor("op_43765_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43765_cast_fp16 = slice_by_index(begin = var_43765_begin_0, end = var_43765_end_0, end_mask = var_43765_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43765_cast_fp16")]; tensor var_43769_begin_0 = const()[name = tensor("op_43769_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_43769_end_0 = const()[name = tensor("op_43769_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_43769_end_mask_0 = const()[name = tensor("op_43769_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43769_cast_fp16 = slice_by_index(begin = var_43769_begin_0, end = var_43769_end_0, end_mask = var_43769_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43769_cast_fp16")]; tensor var_43773_begin_0 = const()[name = tensor("op_43773_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_43773_end_0 = const()[name = tensor("op_43773_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_43773_end_mask_0 = const()[name = tensor("op_43773_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43773_cast_fp16 = slice_by_index(begin = var_43773_begin_0, end = var_43773_end_0, end_mask = var_43773_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43773_cast_fp16")]; tensor var_43777_begin_0 = const()[name = tensor("op_43777_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_43777_end_0 = const()[name = tensor("op_43777_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_43777_end_mask_0 = const()[name = tensor("op_43777_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43777_cast_fp16 = slice_by_index(begin = var_43777_begin_0, end = var_43777_end_0, end_mask = var_43777_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43777_cast_fp16")]; tensor var_43781_begin_0 = const()[name = tensor("op_43781_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_43781_end_0 = const()[name = tensor("op_43781_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_43781_end_mask_0 = const()[name = tensor("op_43781_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43781_cast_fp16 = slice_by_index(begin = var_43781_begin_0, end = var_43781_end_0, end_mask = var_43781_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43781_cast_fp16")]; tensor var_43785_begin_0 = const()[name = tensor("op_43785_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_43785_end_0 = const()[name = tensor("op_43785_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_43785_end_mask_0 = const()[name = tensor("op_43785_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_43785_cast_fp16 = slice_by_index(begin = var_43785_begin_0, end = var_43785_end_0, end_mask = var_43785_end_mask_0, x = k_55_cast_fp16)[name = tensor("op_43785_cast_fp16")]; tensor var_43787_begin_0 = const()[name = tensor("op_43787_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_43787_end_0 = const()[name = tensor("op_43787_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_43787_end_mask_0 = const()[name = tensor("op_43787_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43787_cast_fp16 = slice_by_index(begin = var_43787_begin_0, end = var_43787_end_0, end_mask = var_43787_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43787_cast_fp16")]; tensor var_43791_begin_0 = const()[name = tensor("op_43791_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_43791_end_0 = const()[name = tensor("op_43791_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_43791_end_mask_0 = const()[name = tensor("op_43791_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43791_cast_fp16 = slice_by_index(begin = var_43791_begin_0, end = var_43791_end_0, end_mask = var_43791_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43791_cast_fp16")]; tensor var_43795_begin_0 = const()[name = tensor("op_43795_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_43795_end_0 = const()[name = tensor("op_43795_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_43795_end_mask_0 = const()[name = tensor("op_43795_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43795_cast_fp16 = slice_by_index(begin = var_43795_begin_0, end = var_43795_end_0, end_mask = var_43795_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43795_cast_fp16")]; tensor var_43799_begin_0 = const()[name = tensor("op_43799_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_43799_end_0 = const()[name = tensor("op_43799_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_43799_end_mask_0 = const()[name = tensor("op_43799_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43799_cast_fp16 = slice_by_index(begin = var_43799_begin_0, end = var_43799_end_0, end_mask = var_43799_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43799_cast_fp16")]; tensor var_43803_begin_0 = const()[name = tensor("op_43803_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_43803_end_0 = const()[name = tensor("op_43803_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_43803_end_mask_0 = const()[name = tensor("op_43803_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43803_cast_fp16 = slice_by_index(begin = var_43803_begin_0, end = var_43803_end_0, end_mask = var_43803_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43803_cast_fp16")]; tensor var_43807_begin_0 = const()[name = tensor("op_43807_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_43807_end_0 = const()[name = tensor("op_43807_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_43807_end_mask_0 = const()[name = tensor("op_43807_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43807_cast_fp16 = slice_by_index(begin = var_43807_begin_0, end = var_43807_end_0, end_mask = var_43807_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43807_cast_fp16")]; tensor var_43811_begin_0 = const()[name = tensor("op_43811_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_43811_end_0 = const()[name = tensor("op_43811_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_43811_end_mask_0 = const()[name = tensor("op_43811_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43811_cast_fp16 = slice_by_index(begin = var_43811_begin_0, end = var_43811_end_0, end_mask = var_43811_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43811_cast_fp16")]; tensor var_43815_begin_0 = const()[name = tensor("op_43815_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_43815_end_0 = const()[name = tensor("op_43815_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_43815_end_mask_0 = const()[name = tensor("op_43815_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43815_cast_fp16 = slice_by_index(begin = var_43815_begin_0, end = var_43815_end_0, end_mask = var_43815_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43815_cast_fp16")]; tensor var_43819_begin_0 = const()[name = tensor("op_43819_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_43819_end_0 = const()[name = tensor("op_43819_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_43819_end_mask_0 = const()[name = tensor("op_43819_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43819_cast_fp16 = slice_by_index(begin = var_43819_begin_0, end = var_43819_end_0, end_mask = var_43819_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43819_cast_fp16")]; tensor var_43823_begin_0 = const()[name = tensor("op_43823_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_43823_end_0 = const()[name = tensor("op_43823_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_43823_end_mask_0 = const()[name = tensor("op_43823_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43823_cast_fp16 = slice_by_index(begin = var_43823_begin_0, end = var_43823_end_0, end_mask = var_43823_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43823_cast_fp16")]; tensor var_43827_begin_0 = const()[name = tensor("op_43827_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_43827_end_0 = const()[name = tensor("op_43827_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_43827_end_mask_0 = const()[name = tensor("op_43827_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43827_cast_fp16 = slice_by_index(begin = var_43827_begin_0, end = var_43827_end_0, end_mask = var_43827_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43827_cast_fp16")]; tensor var_43831_begin_0 = const()[name = tensor("op_43831_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_43831_end_0 = const()[name = tensor("op_43831_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_43831_end_mask_0 = const()[name = tensor("op_43831_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43831_cast_fp16 = slice_by_index(begin = var_43831_begin_0, end = var_43831_end_0, end_mask = var_43831_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43831_cast_fp16")]; tensor var_43835_begin_0 = const()[name = tensor("op_43835_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_43835_end_0 = const()[name = tensor("op_43835_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_43835_end_mask_0 = const()[name = tensor("op_43835_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43835_cast_fp16 = slice_by_index(begin = var_43835_begin_0, end = var_43835_end_0, end_mask = var_43835_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43835_cast_fp16")]; tensor var_43839_begin_0 = const()[name = tensor("op_43839_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_43839_end_0 = const()[name = tensor("op_43839_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_43839_end_mask_0 = const()[name = tensor("op_43839_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43839_cast_fp16 = slice_by_index(begin = var_43839_begin_0, end = var_43839_end_0, end_mask = var_43839_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43839_cast_fp16")]; tensor var_43843_begin_0 = const()[name = tensor("op_43843_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_43843_end_0 = const()[name = tensor("op_43843_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_43843_end_mask_0 = const()[name = tensor("op_43843_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43843_cast_fp16 = slice_by_index(begin = var_43843_begin_0, end = var_43843_end_0, end_mask = var_43843_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43843_cast_fp16")]; tensor var_43847_begin_0 = const()[name = tensor("op_43847_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_43847_end_0 = const()[name = tensor("op_43847_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_43847_end_mask_0 = const()[name = tensor("op_43847_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43847_cast_fp16 = slice_by_index(begin = var_43847_begin_0, end = var_43847_end_0, end_mask = var_43847_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43847_cast_fp16")]; tensor var_43851_begin_0 = const()[name = tensor("op_43851_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_43851_end_0 = const()[name = tensor("op_43851_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_43851_end_mask_0 = const()[name = tensor("op_43851_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43851_cast_fp16 = slice_by_index(begin = var_43851_begin_0, end = var_43851_end_0, end_mask = var_43851_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43851_cast_fp16")]; tensor var_43855_begin_0 = const()[name = tensor("op_43855_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_43855_end_0 = const()[name = tensor("op_43855_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_43855_end_mask_0 = const()[name = tensor("op_43855_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43855_cast_fp16 = slice_by_index(begin = var_43855_begin_0, end = var_43855_end_0, end_mask = var_43855_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43855_cast_fp16")]; tensor var_43859_begin_0 = const()[name = tensor("op_43859_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_43859_end_0 = const()[name = tensor("op_43859_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_43859_end_mask_0 = const()[name = tensor("op_43859_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43859_cast_fp16 = slice_by_index(begin = var_43859_begin_0, end = var_43859_end_0, end_mask = var_43859_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43859_cast_fp16")]; tensor var_43863_begin_0 = const()[name = tensor("op_43863_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_43863_end_0 = const()[name = tensor("op_43863_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_43863_end_mask_0 = const()[name = tensor("op_43863_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_43863_cast_fp16 = slice_by_index(begin = var_43863_begin_0, end = var_43863_end_0, end_mask = var_43863_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_43863_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4321_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4321_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4321_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4321_equation_0, values = (var_43709_cast_fp16, var_43151_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4321_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4323_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4323_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4323_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4323_equation_0, values = (var_43709_cast_fp16, var_43158_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4323_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4325_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4325_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4325_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4325_equation_0, values = (var_43709_cast_fp16, var_43165_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4325_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4327_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4327_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4327_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4327_equation_0, values = (var_43709_cast_fp16, var_43172_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4327_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4329_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4329_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4329_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4329_equation_0, values = (var_43713_cast_fp16, var_43179_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4329_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4331_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4331_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4331_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4331_equation_0, values = (var_43713_cast_fp16, var_43186_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4331_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4333_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4333_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4333_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4333_equation_0, values = (var_43713_cast_fp16, var_43193_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4333_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4335_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4335_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4335_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4335_equation_0, values = (var_43713_cast_fp16, var_43200_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4335_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4337_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4337_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4337_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4337_equation_0, values = (var_43717_cast_fp16, var_43207_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4337_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4339_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4339_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4339_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4339_equation_0, values = (var_43717_cast_fp16, var_43214_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4339_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4341_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4341_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4341_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4341_equation_0, values = (var_43717_cast_fp16, var_43221_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4341_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4343_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4343_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4343_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4343_equation_0, values = (var_43717_cast_fp16, var_43228_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4343_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4345_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4345_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4345_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4345_equation_0, values = (var_43721_cast_fp16, var_43235_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4345_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4347_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4347_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4347_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4347_equation_0, values = (var_43721_cast_fp16, var_43242_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4347_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4349_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4349_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4349_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4349_equation_0, values = (var_43721_cast_fp16, var_43249_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4349_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4351_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4351_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4351_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4351_equation_0, values = (var_43721_cast_fp16, var_43256_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4351_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4353_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4353_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4353_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4353_equation_0, values = (var_43725_cast_fp16, var_43263_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4353_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4355_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4355_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4355_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4355_equation_0, values = (var_43725_cast_fp16, var_43270_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4355_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4357_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4357_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4357_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4357_equation_0, values = (var_43725_cast_fp16, var_43277_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4357_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4359_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4359_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4359_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4359_equation_0, values = (var_43725_cast_fp16, var_43284_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4359_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4361_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4361_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4361_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4361_equation_0, values = (var_43729_cast_fp16, var_43291_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4361_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4363_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4363_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4363_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4363_equation_0, values = (var_43729_cast_fp16, var_43298_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4363_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4365_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4365_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4365_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4365_equation_0, values = (var_43729_cast_fp16, var_43305_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4365_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4367_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4367_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4367_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4367_equation_0, values = (var_43729_cast_fp16, var_43312_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4367_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4369_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4369_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4369_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4369_equation_0, values = (var_43733_cast_fp16, var_43319_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4369_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4371_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4371_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4371_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4371_equation_0, values = (var_43733_cast_fp16, var_43326_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4371_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4373_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4373_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4373_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4373_equation_0, values = (var_43733_cast_fp16, var_43333_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4373_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4375_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4375_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4375_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4375_equation_0, values = (var_43733_cast_fp16, var_43340_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4375_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4377_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4377_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4377_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4377_equation_0, values = (var_43737_cast_fp16, var_43347_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4377_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4379_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4379_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4379_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4379_equation_0, values = (var_43737_cast_fp16, var_43354_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4379_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4381_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4381_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4381_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4381_equation_0, values = (var_43737_cast_fp16, var_43361_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4381_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4383_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4383_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4383_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4383_equation_0, values = (var_43737_cast_fp16, var_43368_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4383_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4385_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4385_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4385_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4385_equation_0, values = (var_43741_cast_fp16, var_43375_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4385_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4387_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4387_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4387_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4387_equation_0, values = (var_43741_cast_fp16, var_43382_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4387_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4389_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4389_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4389_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4389_equation_0, values = (var_43741_cast_fp16, var_43389_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4389_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4391_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4391_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4391_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4391_equation_0, values = (var_43741_cast_fp16, var_43396_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4391_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4393_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4393_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4393_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4393_equation_0, values = (var_43745_cast_fp16, var_43403_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4393_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4395_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4395_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4395_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4395_equation_0, values = (var_43745_cast_fp16, var_43410_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4395_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4397_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4397_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4397_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4397_equation_0, values = (var_43745_cast_fp16, var_43417_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4397_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4399_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4399_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4399_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4399_equation_0, values = (var_43745_cast_fp16, var_43424_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4399_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4401_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4401_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4401_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4401_equation_0, values = (var_43749_cast_fp16, var_43431_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4401_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4403_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4403_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4403_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4403_equation_0, values = (var_43749_cast_fp16, var_43438_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4403_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4405_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4405_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4405_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4405_equation_0, values = (var_43749_cast_fp16, var_43445_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4405_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4407_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4407_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4407_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4407_equation_0, values = (var_43749_cast_fp16, var_43452_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4407_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4409_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4409_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4409_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4409_equation_0, values = (var_43753_cast_fp16, var_43459_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4409_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4411_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4411_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4411_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4411_equation_0, values = (var_43753_cast_fp16, var_43466_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4411_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4413_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4413_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4413_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4413_equation_0, values = (var_43753_cast_fp16, var_43473_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4413_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4415_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4415_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4415_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4415_equation_0, values = (var_43753_cast_fp16, var_43480_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4415_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4417_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4417_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4417_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4417_equation_0, values = (var_43757_cast_fp16, var_43487_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4417_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4419_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4419_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4419_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4419_equation_0, values = (var_43757_cast_fp16, var_43494_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4419_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4421_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4421_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4421_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4421_equation_0, values = (var_43757_cast_fp16, var_43501_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4421_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4423_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4423_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4423_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4423_equation_0, values = (var_43757_cast_fp16, var_43508_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4423_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4425_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4425_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4425_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4425_equation_0, values = (var_43761_cast_fp16, var_43515_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4425_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4427_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4427_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4427_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4427_equation_0, values = (var_43761_cast_fp16, var_43522_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4427_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4429_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4429_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4429_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4429_equation_0, values = (var_43761_cast_fp16, var_43529_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4429_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4431_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4431_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4431_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4431_equation_0, values = (var_43761_cast_fp16, var_43536_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4431_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4433_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4433_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4433_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4433_equation_0, values = (var_43765_cast_fp16, var_43543_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4433_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4435_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4435_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4435_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4435_equation_0, values = (var_43765_cast_fp16, var_43550_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4435_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4437_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4437_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4437_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4437_equation_0, values = (var_43765_cast_fp16, var_43557_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4437_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4439_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4439_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4439_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4439_equation_0, values = (var_43765_cast_fp16, var_43564_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4439_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4441_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4441_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4441_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4441_equation_0, values = (var_43769_cast_fp16, var_43571_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4441_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4443_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4443_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4443_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4443_equation_0, values = (var_43769_cast_fp16, var_43578_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4443_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4445_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4445_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4445_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4445_equation_0, values = (var_43769_cast_fp16, var_43585_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4445_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4447_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4447_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4447_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4447_equation_0, values = (var_43769_cast_fp16, var_43592_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4447_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4449_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4449_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4449_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4449_equation_0, values = (var_43773_cast_fp16, var_43599_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4449_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4451_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4451_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4451_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4451_equation_0, values = (var_43773_cast_fp16, var_43606_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4451_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4453_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4453_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4453_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4453_equation_0, values = (var_43773_cast_fp16, var_43613_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4453_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4455_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4455_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4455_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4455_equation_0, values = (var_43773_cast_fp16, var_43620_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4455_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4457_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4457_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4457_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4457_equation_0, values = (var_43777_cast_fp16, var_43627_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4457_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4459_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4459_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4459_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4459_equation_0, values = (var_43777_cast_fp16, var_43634_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4459_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4461_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4461_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4461_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4461_equation_0, values = (var_43777_cast_fp16, var_43641_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4461_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4463_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4463_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4463_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4463_equation_0, values = (var_43777_cast_fp16, var_43648_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4463_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4465_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4465_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4465_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4465_equation_0, values = (var_43781_cast_fp16, var_43655_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4465_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4467_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4467_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4467_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4467_equation_0, values = (var_43781_cast_fp16, var_43662_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4467_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4469_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4469_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4469_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4469_equation_0, values = (var_43781_cast_fp16, var_43669_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4469_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4471_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4471_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4471_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4471_equation_0, values = (var_43781_cast_fp16, var_43676_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4471_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4473_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4473_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4473_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4473_equation_0, values = (var_43785_cast_fp16, var_43683_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4473_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4475_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4475_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4475_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4475_equation_0, values = (var_43785_cast_fp16, var_43690_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4475_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4477_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4477_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4477_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4477_equation_0, values = (var_43785_cast_fp16, var_43697_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4477_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4479_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4479_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4479_equation_0, values = (var_43785_cast_fp16, var_43704_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4479_cast_fp16")]; tensor var_44026_to_fp16 = const()[name = tensor("op_44026_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4321_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4321_cast_fp16, y = var_44026_to_fp16)[name = tensor("aw_chunk_4321_cast_fp16")]; tensor var_44028_to_fp16 = const()[name = tensor("op_44028_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4323_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4323_cast_fp16, y = var_44028_to_fp16)[name = tensor("aw_chunk_4323_cast_fp16")]; tensor var_44030_to_fp16 = const()[name = tensor("op_44030_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4325_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4325_cast_fp16, y = var_44030_to_fp16)[name = tensor("aw_chunk_4325_cast_fp16")]; tensor var_44032_to_fp16 = const()[name = tensor("op_44032_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4327_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4327_cast_fp16, y = var_44032_to_fp16)[name = tensor("aw_chunk_4327_cast_fp16")]; tensor var_44034_to_fp16 = const()[name = tensor("op_44034_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4329_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4329_cast_fp16, y = var_44034_to_fp16)[name = tensor("aw_chunk_4329_cast_fp16")]; tensor var_44036_to_fp16 = const()[name = tensor("op_44036_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4331_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4331_cast_fp16, y = var_44036_to_fp16)[name = tensor("aw_chunk_4331_cast_fp16")]; tensor var_44038_to_fp16 = const()[name = tensor("op_44038_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4333_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4333_cast_fp16, y = var_44038_to_fp16)[name = tensor("aw_chunk_4333_cast_fp16")]; tensor var_44040_to_fp16 = const()[name = tensor("op_44040_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4335_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4335_cast_fp16, y = var_44040_to_fp16)[name = tensor("aw_chunk_4335_cast_fp16")]; tensor var_44042_to_fp16 = const()[name = tensor("op_44042_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4337_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4337_cast_fp16, y = var_44042_to_fp16)[name = tensor("aw_chunk_4337_cast_fp16")]; tensor var_44044_to_fp16 = const()[name = tensor("op_44044_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4339_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4339_cast_fp16, y = var_44044_to_fp16)[name = tensor("aw_chunk_4339_cast_fp16")]; tensor var_44046_to_fp16 = const()[name = tensor("op_44046_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4341_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4341_cast_fp16, y = var_44046_to_fp16)[name = tensor("aw_chunk_4341_cast_fp16")]; tensor var_44048_to_fp16 = const()[name = tensor("op_44048_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4343_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4343_cast_fp16, y = var_44048_to_fp16)[name = tensor("aw_chunk_4343_cast_fp16")]; tensor var_44050_to_fp16 = const()[name = tensor("op_44050_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4345_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4345_cast_fp16, y = var_44050_to_fp16)[name = tensor("aw_chunk_4345_cast_fp16")]; tensor var_44052_to_fp16 = const()[name = tensor("op_44052_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4347_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4347_cast_fp16, y = var_44052_to_fp16)[name = tensor("aw_chunk_4347_cast_fp16")]; tensor var_44054_to_fp16 = const()[name = tensor("op_44054_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4349_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4349_cast_fp16, y = var_44054_to_fp16)[name = tensor("aw_chunk_4349_cast_fp16")]; tensor var_44056_to_fp16 = const()[name = tensor("op_44056_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4351_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4351_cast_fp16, y = var_44056_to_fp16)[name = tensor("aw_chunk_4351_cast_fp16")]; tensor var_44058_to_fp16 = const()[name = tensor("op_44058_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4353_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4353_cast_fp16, y = var_44058_to_fp16)[name = tensor("aw_chunk_4353_cast_fp16")]; tensor var_44060_to_fp16 = const()[name = tensor("op_44060_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4355_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4355_cast_fp16, y = var_44060_to_fp16)[name = tensor("aw_chunk_4355_cast_fp16")]; tensor var_44062_to_fp16 = const()[name = tensor("op_44062_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4357_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4357_cast_fp16, y = var_44062_to_fp16)[name = tensor("aw_chunk_4357_cast_fp16")]; tensor var_44064_to_fp16 = const()[name = tensor("op_44064_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4359_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4359_cast_fp16, y = var_44064_to_fp16)[name = tensor("aw_chunk_4359_cast_fp16")]; tensor var_44066_to_fp16 = const()[name = tensor("op_44066_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4361_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4361_cast_fp16, y = var_44066_to_fp16)[name = tensor("aw_chunk_4361_cast_fp16")]; tensor var_44068_to_fp16 = const()[name = tensor("op_44068_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4363_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4363_cast_fp16, y = var_44068_to_fp16)[name = tensor("aw_chunk_4363_cast_fp16")]; tensor var_44070_to_fp16 = const()[name = tensor("op_44070_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4365_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4365_cast_fp16, y = var_44070_to_fp16)[name = tensor("aw_chunk_4365_cast_fp16")]; tensor var_44072_to_fp16 = const()[name = tensor("op_44072_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4367_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4367_cast_fp16, y = var_44072_to_fp16)[name = tensor("aw_chunk_4367_cast_fp16")]; tensor var_44074_to_fp16 = const()[name = tensor("op_44074_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4369_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4369_cast_fp16, y = var_44074_to_fp16)[name = tensor("aw_chunk_4369_cast_fp16")]; tensor var_44076_to_fp16 = const()[name = tensor("op_44076_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4371_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4371_cast_fp16, y = var_44076_to_fp16)[name = tensor("aw_chunk_4371_cast_fp16")]; tensor var_44078_to_fp16 = const()[name = tensor("op_44078_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4373_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4373_cast_fp16, y = var_44078_to_fp16)[name = tensor("aw_chunk_4373_cast_fp16")]; tensor var_44080_to_fp16 = const()[name = tensor("op_44080_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4375_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4375_cast_fp16, y = var_44080_to_fp16)[name = tensor("aw_chunk_4375_cast_fp16")]; tensor var_44082_to_fp16 = const()[name = tensor("op_44082_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4377_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4377_cast_fp16, y = var_44082_to_fp16)[name = tensor("aw_chunk_4377_cast_fp16")]; tensor var_44084_to_fp16 = const()[name = tensor("op_44084_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4379_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4379_cast_fp16, y = var_44084_to_fp16)[name = tensor("aw_chunk_4379_cast_fp16")]; tensor var_44086_to_fp16 = const()[name = tensor("op_44086_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4381_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4381_cast_fp16, y = var_44086_to_fp16)[name = tensor("aw_chunk_4381_cast_fp16")]; tensor var_44088_to_fp16 = const()[name = tensor("op_44088_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4383_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4383_cast_fp16, y = var_44088_to_fp16)[name = tensor("aw_chunk_4383_cast_fp16")]; tensor var_44090_to_fp16 = const()[name = tensor("op_44090_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4385_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4385_cast_fp16, y = var_44090_to_fp16)[name = tensor("aw_chunk_4385_cast_fp16")]; tensor var_44092_to_fp16 = const()[name = tensor("op_44092_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4387_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4387_cast_fp16, y = var_44092_to_fp16)[name = tensor("aw_chunk_4387_cast_fp16")]; tensor var_44094_to_fp16 = const()[name = tensor("op_44094_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4389_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4389_cast_fp16, y = var_44094_to_fp16)[name = tensor("aw_chunk_4389_cast_fp16")]; tensor var_44096_to_fp16 = const()[name = tensor("op_44096_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4391_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4391_cast_fp16, y = var_44096_to_fp16)[name = tensor("aw_chunk_4391_cast_fp16")]; tensor var_44098_to_fp16 = const()[name = tensor("op_44098_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4393_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4393_cast_fp16, y = var_44098_to_fp16)[name = tensor("aw_chunk_4393_cast_fp16")]; tensor var_44100_to_fp16 = const()[name = tensor("op_44100_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4395_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4395_cast_fp16, y = var_44100_to_fp16)[name = tensor("aw_chunk_4395_cast_fp16")]; tensor var_44102_to_fp16 = const()[name = tensor("op_44102_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4397_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4397_cast_fp16, y = var_44102_to_fp16)[name = tensor("aw_chunk_4397_cast_fp16")]; tensor var_44104_to_fp16 = const()[name = tensor("op_44104_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4399_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4399_cast_fp16, y = var_44104_to_fp16)[name = tensor("aw_chunk_4399_cast_fp16")]; tensor var_44106_to_fp16 = const()[name = tensor("op_44106_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4401_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4401_cast_fp16, y = var_44106_to_fp16)[name = tensor("aw_chunk_4401_cast_fp16")]; tensor var_44108_to_fp16 = const()[name = tensor("op_44108_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4403_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4403_cast_fp16, y = var_44108_to_fp16)[name = tensor("aw_chunk_4403_cast_fp16")]; tensor var_44110_to_fp16 = const()[name = tensor("op_44110_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4405_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4405_cast_fp16, y = var_44110_to_fp16)[name = tensor("aw_chunk_4405_cast_fp16")]; tensor var_44112_to_fp16 = const()[name = tensor("op_44112_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4407_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4407_cast_fp16, y = var_44112_to_fp16)[name = tensor("aw_chunk_4407_cast_fp16")]; tensor var_44114_to_fp16 = const()[name = tensor("op_44114_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4409_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4409_cast_fp16, y = var_44114_to_fp16)[name = tensor("aw_chunk_4409_cast_fp16")]; tensor var_44116_to_fp16 = const()[name = tensor("op_44116_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4411_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4411_cast_fp16, y = var_44116_to_fp16)[name = tensor("aw_chunk_4411_cast_fp16")]; tensor var_44118_to_fp16 = const()[name = tensor("op_44118_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4413_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4413_cast_fp16, y = var_44118_to_fp16)[name = tensor("aw_chunk_4413_cast_fp16")]; tensor var_44120_to_fp16 = const()[name = tensor("op_44120_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4415_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4415_cast_fp16, y = var_44120_to_fp16)[name = tensor("aw_chunk_4415_cast_fp16")]; tensor var_44122_to_fp16 = const()[name = tensor("op_44122_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4417_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4417_cast_fp16, y = var_44122_to_fp16)[name = tensor("aw_chunk_4417_cast_fp16")]; tensor var_44124_to_fp16 = const()[name = tensor("op_44124_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4419_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4419_cast_fp16, y = var_44124_to_fp16)[name = tensor("aw_chunk_4419_cast_fp16")]; tensor var_44126_to_fp16 = const()[name = tensor("op_44126_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4421_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4421_cast_fp16, y = var_44126_to_fp16)[name = tensor("aw_chunk_4421_cast_fp16")]; tensor var_44128_to_fp16 = const()[name = tensor("op_44128_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4423_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4423_cast_fp16, y = var_44128_to_fp16)[name = tensor("aw_chunk_4423_cast_fp16")]; tensor var_44130_to_fp16 = const()[name = tensor("op_44130_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4425_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4425_cast_fp16, y = var_44130_to_fp16)[name = tensor("aw_chunk_4425_cast_fp16")]; tensor var_44132_to_fp16 = const()[name = tensor("op_44132_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4427_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4427_cast_fp16, y = var_44132_to_fp16)[name = tensor("aw_chunk_4427_cast_fp16")]; tensor var_44134_to_fp16 = const()[name = tensor("op_44134_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4429_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4429_cast_fp16, y = var_44134_to_fp16)[name = tensor("aw_chunk_4429_cast_fp16")]; tensor var_44136_to_fp16 = const()[name = tensor("op_44136_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4431_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4431_cast_fp16, y = var_44136_to_fp16)[name = tensor("aw_chunk_4431_cast_fp16")]; tensor var_44138_to_fp16 = const()[name = tensor("op_44138_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4433_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4433_cast_fp16, y = var_44138_to_fp16)[name = tensor("aw_chunk_4433_cast_fp16")]; tensor var_44140_to_fp16 = const()[name = tensor("op_44140_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4435_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4435_cast_fp16, y = var_44140_to_fp16)[name = tensor("aw_chunk_4435_cast_fp16")]; tensor var_44142_to_fp16 = const()[name = tensor("op_44142_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4437_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4437_cast_fp16, y = var_44142_to_fp16)[name = tensor("aw_chunk_4437_cast_fp16")]; tensor var_44144_to_fp16 = const()[name = tensor("op_44144_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4439_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4439_cast_fp16, y = var_44144_to_fp16)[name = tensor("aw_chunk_4439_cast_fp16")]; tensor var_44146_to_fp16 = const()[name = tensor("op_44146_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4441_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4441_cast_fp16, y = var_44146_to_fp16)[name = tensor("aw_chunk_4441_cast_fp16")]; tensor var_44148_to_fp16 = const()[name = tensor("op_44148_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4443_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4443_cast_fp16, y = var_44148_to_fp16)[name = tensor("aw_chunk_4443_cast_fp16")]; tensor var_44150_to_fp16 = const()[name = tensor("op_44150_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4445_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4445_cast_fp16, y = var_44150_to_fp16)[name = tensor("aw_chunk_4445_cast_fp16")]; tensor var_44152_to_fp16 = const()[name = tensor("op_44152_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4447_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4447_cast_fp16, y = var_44152_to_fp16)[name = tensor("aw_chunk_4447_cast_fp16")]; tensor var_44154_to_fp16 = const()[name = tensor("op_44154_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4449_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4449_cast_fp16, y = var_44154_to_fp16)[name = tensor("aw_chunk_4449_cast_fp16")]; tensor var_44156_to_fp16 = const()[name = tensor("op_44156_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4451_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4451_cast_fp16, y = var_44156_to_fp16)[name = tensor("aw_chunk_4451_cast_fp16")]; tensor var_44158_to_fp16 = const()[name = tensor("op_44158_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4453_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4453_cast_fp16, y = var_44158_to_fp16)[name = tensor("aw_chunk_4453_cast_fp16")]; tensor var_44160_to_fp16 = const()[name = tensor("op_44160_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4455_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4455_cast_fp16, y = var_44160_to_fp16)[name = tensor("aw_chunk_4455_cast_fp16")]; tensor var_44162_to_fp16 = const()[name = tensor("op_44162_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4457_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4457_cast_fp16, y = var_44162_to_fp16)[name = tensor("aw_chunk_4457_cast_fp16")]; tensor var_44164_to_fp16 = const()[name = tensor("op_44164_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4459_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4459_cast_fp16, y = var_44164_to_fp16)[name = tensor("aw_chunk_4459_cast_fp16")]; tensor var_44166_to_fp16 = const()[name = tensor("op_44166_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4461_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4461_cast_fp16, y = var_44166_to_fp16)[name = tensor("aw_chunk_4461_cast_fp16")]; tensor var_44168_to_fp16 = const()[name = tensor("op_44168_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4463_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4463_cast_fp16, y = var_44168_to_fp16)[name = tensor("aw_chunk_4463_cast_fp16")]; tensor var_44170_to_fp16 = const()[name = tensor("op_44170_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4465_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4465_cast_fp16, y = var_44170_to_fp16)[name = tensor("aw_chunk_4465_cast_fp16")]; tensor var_44172_to_fp16 = const()[name = tensor("op_44172_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4467_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4467_cast_fp16, y = var_44172_to_fp16)[name = tensor("aw_chunk_4467_cast_fp16")]; tensor var_44174_to_fp16 = const()[name = tensor("op_44174_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4469_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4469_cast_fp16, y = var_44174_to_fp16)[name = tensor("aw_chunk_4469_cast_fp16")]; tensor var_44176_to_fp16 = const()[name = tensor("op_44176_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4471_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4471_cast_fp16, y = var_44176_to_fp16)[name = tensor("aw_chunk_4471_cast_fp16")]; tensor var_44178_to_fp16 = const()[name = tensor("op_44178_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4473_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4473_cast_fp16, y = var_44178_to_fp16)[name = tensor("aw_chunk_4473_cast_fp16")]; tensor var_44180_to_fp16 = const()[name = tensor("op_44180_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4475_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4475_cast_fp16, y = var_44180_to_fp16)[name = tensor("aw_chunk_4475_cast_fp16")]; tensor var_44182_to_fp16 = const()[name = tensor("op_44182_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4477_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4477_cast_fp16, y = var_44182_to_fp16)[name = tensor("aw_chunk_4477_cast_fp16")]; tensor var_44184_to_fp16 = const()[name = tensor("op_44184_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4479_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4479_cast_fp16, y = var_44184_to_fp16)[name = tensor("aw_chunk_4479_cast_fp16")]; tensor var_44186_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4321_cast_fp16)[name = tensor("op_44186_cast_fp16")]; tensor var_44187_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4323_cast_fp16)[name = tensor("op_44187_cast_fp16")]; tensor var_44188_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4325_cast_fp16)[name = tensor("op_44188_cast_fp16")]; tensor var_44189_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4327_cast_fp16)[name = tensor("op_44189_cast_fp16")]; tensor var_44190_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4329_cast_fp16)[name = tensor("op_44190_cast_fp16")]; tensor var_44191_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4331_cast_fp16)[name = tensor("op_44191_cast_fp16")]; tensor var_44192_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4333_cast_fp16)[name = tensor("op_44192_cast_fp16")]; tensor var_44193_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4335_cast_fp16)[name = tensor("op_44193_cast_fp16")]; tensor var_44194_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4337_cast_fp16)[name = tensor("op_44194_cast_fp16")]; tensor var_44195_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4339_cast_fp16)[name = tensor("op_44195_cast_fp16")]; tensor var_44196_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4341_cast_fp16)[name = tensor("op_44196_cast_fp16")]; tensor var_44197_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4343_cast_fp16)[name = tensor("op_44197_cast_fp16")]; tensor var_44198_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4345_cast_fp16)[name = tensor("op_44198_cast_fp16")]; tensor var_44199_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4347_cast_fp16)[name = tensor("op_44199_cast_fp16")]; tensor var_44200_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4349_cast_fp16)[name = tensor("op_44200_cast_fp16")]; tensor var_44201_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4351_cast_fp16)[name = tensor("op_44201_cast_fp16")]; tensor var_44202_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4353_cast_fp16)[name = tensor("op_44202_cast_fp16")]; tensor var_44203_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4355_cast_fp16)[name = tensor("op_44203_cast_fp16")]; tensor var_44204_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4357_cast_fp16)[name = tensor("op_44204_cast_fp16")]; tensor var_44205_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4359_cast_fp16)[name = tensor("op_44205_cast_fp16")]; tensor var_44206_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4361_cast_fp16)[name = tensor("op_44206_cast_fp16")]; tensor var_44207_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4363_cast_fp16)[name = tensor("op_44207_cast_fp16")]; tensor var_44208_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4365_cast_fp16)[name = tensor("op_44208_cast_fp16")]; tensor var_44209_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4367_cast_fp16)[name = tensor("op_44209_cast_fp16")]; tensor var_44210_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4369_cast_fp16)[name = tensor("op_44210_cast_fp16")]; tensor var_44211_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4371_cast_fp16)[name = tensor("op_44211_cast_fp16")]; tensor var_44212_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4373_cast_fp16)[name = tensor("op_44212_cast_fp16")]; tensor var_44213_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4375_cast_fp16)[name = tensor("op_44213_cast_fp16")]; tensor var_44214_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4377_cast_fp16)[name = tensor("op_44214_cast_fp16")]; tensor var_44215_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4379_cast_fp16)[name = tensor("op_44215_cast_fp16")]; tensor var_44216_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4381_cast_fp16)[name = tensor("op_44216_cast_fp16")]; tensor var_44217_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4383_cast_fp16)[name = tensor("op_44217_cast_fp16")]; tensor var_44218_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4385_cast_fp16)[name = tensor("op_44218_cast_fp16")]; tensor var_44219_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4387_cast_fp16)[name = tensor("op_44219_cast_fp16")]; tensor var_44220_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4389_cast_fp16)[name = tensor("op_44220_cast_fp16")]; tensor var_44221_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4391_cast_fp16)[name = tensor("op_44221_cast_fp16")]; tensor var_44222_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4393_cast_fp16)[name = tensor("op_44222_cast_fp16")]; tensor var_44223_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4395_cast_fp16)[name = tensor("op_44223_cast_fp16")]; tensor var_44224_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4397_cast_fp16)[name = tensor("op_44224_cast_fp16")]; tensor var_44225_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4399_cast_fp16)[name = tensor("op_44225_cast_fp16")]; tensor var_44226_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4401_cast_fp16)[name = tensor("op_44226_cast_fp16")]; tensor var_44227_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4403_cast_fp16)[name = tensor("op_44227_cast_fp16")]; tensor var_44228_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4405_cast_fp16)[name = tensor("op_44228_cast_fp16")]; tensor var_44229_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4407_cast_fp16)[name = tensor("op_44229_cast_fp16")]; tensor var_44230_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4409_cast_fp16)[name = tensor("op_44230_cast_fp16")]; tensor var_44231_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4411_cast_fp16)[name = tensor("op_44231_cast_fp16")]; tensor var_44232_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4413_cast_fp16)[name = tensor("op_44232_cast_fp16")]; tensor var_44233_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4415_cast_fp16)[name = tensor("op_44233_cast_fp16")]; tensor var_44234_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4417_cast_fp16)[name = tensor("op_44234_cast_fp16")]; tensor var_44235_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4419_cast_fp16)[name = tensor("op_44235_cast_fp16")]; tensor var_44236_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4421_cast_fp16)[name = tensor("op_44236_cast_fp16")]; tensor var_44237_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4423_cast_fp16)[name = tensor("op_44237_cast_fp16")]; tensor var_44238_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4425_cast_fp16)[name = tensor("op_44238_cast_fp16")]; tensor var_44239_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4427_cast_fp16)[name = tensor("op_44239_cast_fp16")]; tensor var_44240_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4429_cast_fp16)[name = tensor("op_44240_cast_fp16")]; tensor var_44241_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4431_cast_fp16)[name = tensor("op_44241_cast_fp16")]; tensor var_44242_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4433_cast_fp16)[name = tensor("op_44242_cast_fp16")]; tensor var_44243_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4435_cast_fp16)[name = tensor("op_44243_cast_fp16")]; tensor var_44244_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4437_cast_fp16)[name = tensor("op_44244_cast_fp16")]; tensor var_44245_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4439_cast_fp16)[name = tensor("op_44245_cast_fp16")]; tensor var_44246_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4441_cast_fp16)[name = tensor("op_44246_cast_fp16")]; tensor var_44247_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4443_cast_fp16)[name = tensor("op_44247_cast_fp16")]; tensor var_44248_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4445_cast_fp16)[name = tensor("op_44248_cast_fp16")]; tensor var_44249_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4447_cast_fp16)[name = tensor("op_44249_cast_fp16")]; tensor var_44250_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4449_cast_fp16)[name = tensor("op_44250_cast_fp16")]; tensor var_44251_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4451_cast_fp16)[name = tensor("op_44251_cast_fp16")]; tensor var_44252_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4453_cast_fp16)[name = tensor("op_44252_cast_fp16")]; tensor var_44253_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4455_cast_fp16)[name = tensor("op_44253_cast_fp16")]; tensor var_44254_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4457_cast_fp16)[name = tensor("op_44254_cast_fp16")]; tensor var_44255_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4459_cast_fp16)[name = tensor("op_44255_cast_fp16")]; tensor var_44256_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4461_cast_fp16)[name = tensor("op_44256_cast_fp16")]; tensor var_44257_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4463_cast_fp16)[name = tensor("op_44257_cast_fp16")]; tensor var_44258_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4465_cast_fp16)[name = tensor("op_44258_cast_fp16")]; tensor var_44259_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4467_cast_fp16)[name = tensor("op_44259_cast_fp16")]; tensor var_44260_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4469_cast_fp16)[name = tensor("op_44260_cast_fp16")]; tensor var_44261_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4471_cast_fp16)[name = tensor("op_44261_cast_fp16")]; tensor var_44262_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4473_cast_fp16)[name = tensor("op_44262_cast_fp16")]; tensor var_44263_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4475_cast_fp16)[name = tensor("op_44263_cast_fp16")]; tensor var_44264_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4477_cast_fp16)[name = tensor("op_44264_cast_fp16")]; tensor var_44265_cast_fp16 = softmax(axis = var_42984, x = aw_chunk_4479_cast_fp16)[name = tensor("op_44265_cast_fp16")]; tensor var_44267_equation_0 = const()[name = tensor("op_44267_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44267_cast_fp16 = einsum(equation = var_44267_equation_0, values = (var_43787_cast_fp16, var_44186_cast_fp16))[name = tensor("op_44267_cast_fp16")]; tensor var_44269_equation_0 = const()[name = tensor("op_44269_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44269_cast_fp16 = einsum(equation = var_44269_equation_0, values = (var_43787_cast_fp16, var_44187_cast_fp16))[name = tensor("op_44269_cast_fp16")]; tensor var_44271_equation_0 = const()[name = tensor("op_44271_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44271_cast_fp16 = einsum(equation = var_44271_equation_0, values = (var_43787_cast_fp16, var_44188_cast_fp16))[name = tensor("op_44271_cast_fp16")]; tensor var_44273_equation_0 = const()[name = tensor("op_44273_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44273_cast_fp16 = einsum(equation = var_44273_equation_0, values = (var_43787_cast_fp16, var_44189_cast_fp16))[name = tensor("op_44273_cast_fp16")]; tensor var_44275_equation_0 = const()[name = tensor("op_44275_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44275_cast_fp16 = einsum(equation = var_44275_equation_0, values = (var_43791_cast_fp16, var_44190_cast_fp16))[name = tensor("op_44275_cast_fp16")]; tensor var_44277_equation_0 = const()[name = tensor("op_44277_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44277_cast_fp16 = einsum(equation = var_44277_equation_0, values = (var_43791_cast_fp16, var_44191_cast_fp16))[name = tensor("op_44277_cast_fp16")]; tensor var_44279_equation_0 = const()[name = tensor("op_44279_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44279_cast_fp16 = einsum(equation = var_44279_equation_0, values = (var_43791_cast_fp16, var_44192_cast_fp16))[name = tensor("op_44279_cast_fp16")]; tensor var_44281_equation_0 = const()[name = tensor("op_44281_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44281_cast_fp16 = einsum(equation = var_44281_equation_0, values = (var_43791_cast_fp16, var_44193_cast_fp16))[name = tensor("op_44281_cast_fp16")]; tensor var_44283_equation_0 = const()[name = tensor("op_44283_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44283_cast_fp16 = einsum(equation = var_44283_equation_0, values = (var_43795_cast_fp16, var_44194_cast_fp16))[name = tensor("op_44283_cast_fp16")]; tensor var_44285_equation_0 = const()[name = tensor("op_44285_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44285_cast_fp16 = einsum(equation = var_44285_equation_0, values = (var_43795_cast_fp16, var_44195_cast_fp16))[name = tensor("op_44285_cast_fp16")]; tensor var_44287_equation_0 = const()[name = tensor("op_44287_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44287_cast_fp16 = einsum(equation = var_44287_equation_0, values = (var_43795_cast_fp16, var_44196_cast_fp16))[name = tensor("op_44287_cast_fp16")]; tensor var_44289_equation_0 = const()[name = tensor("op_44289_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44289_cast_fp16 = einsum(equation = var_44289_equation_0, values = (var_43795_cast_fp16, var_44197_cast_fp16))[name = tensor("op_44289_cast_fp16")]; tensor var_44291_equation_0 = const()[name = tensor("op_44291_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44291_cast_fp16 = einsum(equation = var_44291_equation_0, values = (var_43799_cast_fp16, var_44198_cast_fp16))[name = tensor("op_44291_cast_fp16")]; tensor var_44293_equation_0 = const()[name = tensor("op_44293_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44293_cast_fp16 = einsum(equation = var_44293_equation_0, values = (var_43799_cast_fp16, var_44199_cast_fp16))[name = tensor("op_44293_cast_fp16")]; tensor var_44295_equation_0 = const()[name = tensor("op_44295_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44295_cast_fp16 = einsum(equation = var_44295_equation_0, values = (var_43799_cast_fp16, var_44200_cast_fp16))[name = tensor("op_44295_cast_fp16")]; tensor var_44297_equation_0 = const()[name = tensor("op_44297_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44297_cast_fp16 = einsum(equation = var_44297_equation_0, values = (var_43799_cast_fp16, var_44201_cast_fp16))[name = tensor("op_44297_cast_fp16")]; tensor var_44299_equation_0 = const()[name = tensor("op_44299_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44299_cast_fp16 = einsum(equation = var_44299_equation_0, values = (var_43803_cast_fp16, var_44202_cast_fp16))[name = tensor("op_44299_cast_fp16")]; tensor var_44301_equation_0 = const()[name = tensor("op_44301_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44301_cast_fp16 = einsum(equation = var_44301_equation_0, values = (var_43803_cast_fp16, var_44203_cast_fp16))[name = tensor("op_44301_cast_fp16")]; tensor var_44303_equation_0 = const()[name = tensor("op_44303_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44303_cast_fp16 = einsum(equation = var_44303_equation_0, values = (var_43803_cast_fp16, var_44204_cast_fp16))[name = tensor("op_44303_cast_fp16")]; tensor var_44305_equation_0 = const()[name = tensor("op_44305_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44305_cast_fp16 = einsum(equation = var_44305_equation_0, values = (var_43803_cast_fp16, var_44205_cast_fp16))[name = tensor("op_44305_cast_fp16")]; tensor var_44307_equation_0 = const()[name = tensor("op_44307_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44307_cast_fp16 = einsum(equation = var_44307_equation_0, values = (var_43807_cast_fp16, var_44206_cast_fp16))[name = tensor("op_44307_cast_fp16")]; tensor var_44309_equation_0 = const()[name = tensor("op_44309_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44309_cast_fp16 = einsum(equation = var_44309_equation_0, values = (var_43807_cast_fp16, var_44207_cast_fp16))[name = tensor("op_44309_cast_fp16")]; tensor var_44311_equation_0 = const()[name = tensor("op_44311_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44311_cast_fp16 = einsum(equation = var_44311_equation_0, values = (var_43807_cast_fp16, var_44208_cast_fp16))[name = tensor("op_44311_cast_fp16")]; tensor var_44313_equation_0 = const()[name = tensor("op_44313_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44313_cast_fp16 = einsum(equation = var_44313_equation_0, values = (var_43807_cast_fp16, var_44209_cast_fp16))[name = tensor("op_44313_cast_fp16")]; tensor var_44315_equation_0 = const()[name = tensor("op_44315_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44315_cast_fp16 = einsum(equation = var_44315_equation_0, values = (var_43811_cast_fp16, var_44210_cast_fp16))[name = tensor("op_44315_cast_fp16")]; tensor var_44317_equation_0 = const()[name = tensor("op_44317_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44317_cast_fp16 = einsum(equation = var_44317_equation_0, values = (var_43811_cast_fp16, var_44211_cast_fp16))[name = tensor("op_44317_cast_fp16")]; tensor var_44319_equation_0 = const()[name = tensor("op_44319_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44319_cast_fp16 = einsum(equation = var_44319_equation_0, values = (var_43811_cast_fp16, var_44212_cast_fp16))[name = tensor("op_44319_cast_fp16")]; tensor var_44321_equation_0 = const()[name = tensor("op_44321_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44321_cast_fp16 = einsum(equation = var_44321_equation_0, values = (var_43811_cast_fp16, var_44213_cast_fp16))[name = tensor("op_44321_cast_fp16")]; tensor var_44323_equation_0 = const()[name = tensor("op_44323_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44323_cast_fp16 = einsum(equation = var_44323_equation_0, values = (var_43815_cast_fp16, var_44214_cast_fp16))[name = tensor("op_44323_cast_fp16")]; tensor var_44325_equation_0 = const()[name = tensor("op_44325_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44325_cast_fp16 = einsum(equation = var_44325_equation_0, values = (var_43815_cast_fp16, var_44215_cast_fp16))[name = tensor("op_44325_cast_fp16")]; tensor var_44327_equation_0 = const()[name = tensor("op_44327_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44327_cast_fp16 = einsum(equation = var_44327_equation_0, values = (var_43815_cast_fp16, var_44216_cast_fp16))[name = tensor("op_44327_cast_fp16")]; tensor var_44329_equation_0 = const()[name = tensor("op_44329_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44329_cast_fp16 = einsum(equation = var_44329_equation_0, values = (var_43815_cast_fp16, var_44217_cast_fp16))[name = tensor("op_44329_cast_fp16")]; tensor var_44331_equation_0 = const()[name = tensor("op_44331_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44331_cast_fp16 = einsum(equation = var_44331_equation_0, values = (var_43819_cast_fp16, var_44218_cast_fp16))[name = tensor("op_44331_cast_fp16")]; tensor var_44333_equation_0 = const()[name = tensor("op_44333_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44333_cast_fp16 = einsum(equation = var_44333_equation_0, values = (var_43819_cast_fp16, var_44219_cast_fp16))[name = tensor("op_44333_cast_fp16")]; tensor var_44335_equation_0 = const()[name = tensor("op_44335_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44335_cast_fp16 = einsum(equation = var_44335_equation_0, values = (var_43819_cast_fp16, var_44220_cast_fp16))[name = tensor("op_44335_cast_fp16")]; tensor var_44337_equation_0 = const()[name = tensor("op_44337_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44337_cast_fp16 = einsum(equation = var_44337_equation_0, values = (var_43819_cast_fp16, var_44221_cast_fp16))[name = tensor("op_44337_cast_fp16")]; tensor var_44339_equation_0 = const()[name = tensor("op_44339_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44339_cast_fp16 = einsum(equation = var_44339_equation_0, values = (var_43823_cast_fp16, var_44222_cast_fp16))[name = tensor("op_44339_cast_fp16")]; tensor var_44341_equation_0 = const()[name = tensor("op_44341_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44341_cast_fp16 = einsum(equation = var_44341_equation_0, values = (var_43823_cast_fp16, var_44223_cast_fp16))[name = tensor("op_44341_cast_fp16")]; tensor var_44343_equation_0 = const()[name = tensor("op_44343_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44343_cast_fp16 = einsum(equation = var_44343_equation_0, values = (var_43823_cast_fp16, var_44224_cast_fp16))[name = tensor("op_44343_cast_fp16")]; tensor var_44345_equation_0 = const()[name = tensor("op_44345_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44345_cast_fp16 = einsum(equation = var_44345_equation_0, values = (var_43823_cast_fp16, var_44225_cast_fp16))[name = tensor("op_44345_cast_fp16")]; tensor var_44347_equation_0 = const()[name = tensor("op_44347_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44347_cast_fp16 = einsum(equation = var_44347_equation_0, values = (var_43827_cast_fp16, var_44226_cast_fp16))[name = tensor("op_44347_cast_fp16")]; tensor var_44349_equation_0 = const()[name = tensor("op_44349_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44349_cast_fp16 = einsum(equation = var_44349_equation_0, values = (var_43827_cast_fp16, var_44227_cast_fp16))[name = tensor("op_44349_cast_fp16")]; tensor var_44351_equation_0 = const()[name = tensor("op_44351_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44351_cast_fp16 = einsum(equation = var_44351_equation_0, values = (var_43827_cast_fp16, var_44228_cast_fp16))[name = tensor("op_44351_cast_fp16")]; tensor var_44353_equation_0 = const()[name = tensor("op_44353_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44353_cast_fp16 = einsum(equation = var_44353_equation_0, values = (var_43827_cast_fp16, var_44229_cast_fp16))[name = tensor("op_44353_cast_fp16")]; tensor var_44355_equation_0 = const()[name = tensor("op_44355_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44355_cast_fp16 = einsum(equation = var_44355_equation_0, values = (var_43831_cast_fp16, var_44230_cast_fp16))[name = tensor("op_44355_cast_fp16")]; tensor var_44357_equation_0 = const()[name = tensor("op_44357_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44357_cast_fp16 = einsum(equation = var_44357_equation_0, values = (var_43831_cast_fp16, var_44231_cast_fp16))[name = tensor("op_44357_cast_fp16")]; tensor var_44359_equation_0 = const()[name = tensor("op_44359_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44359_cast_fp16 = einsum(equation = var_44359_equation_0, values = (var_43831_cast_fp16, var_44232_cast_fp16))[name = tensor("op_44359_cast_fp16")]; tensor var_44361_equation_0 = const()[name = tensor("op_44361_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44361_cast_fp16 = einsum(equation = var_44361_equation_0, values = (var_43831_cast_fp16, var_44233_cast_fp16))[name = tensor("op_44361_cast_fp16")]; tensor var_44363_equation_0 = const()[name = tensor("op_44363_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44363_cast_fp16 = einsum(equation = var_44363_equation_0, values = (var_43835_cast_fp16, var_44234_cast_fp16))[name = tensor("op_44363_cast_fp16")]; tensor var_44365_equation_0 = const()[name = tensor("op_44365_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44365_cast_fp16 = einsum(equation = var_44365_equation_0, values = (var_43835_cast_fp16, var_44235_cast_fp16))[name = tensor("op_44365_cast_fp16")]; tensor var_44367_equation_0 = const()[name = tensor("op_44367_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44367_cast_fp16 = einsum(equation = var_44367_equation_0, values = (var_43835_cast_fp16, var_44236_cast_fp16))[name = tensor("op_44367_cast_fp16")]; tensor var_44369_equation_0 = const()[name = tensor("op_44369_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44369_cast_fp16 = einsum(equation = var_44369_equation_0, values = (var_43835_cast_fp16, var_44237_cast_fp16))[name = tensor("op_44369_cast_fp16")]; tensor var_44371_equation_0 = const()[name = tensor("op_44371_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44371_cast_fp16 = einsum(equation = var_44371_equation_0, values = (var_43839_cast_fp16, var_44238_cast_fp16))[name = tensor("op_44371_cast_fp16")]; tensor var_44373_equation_0 = const()[name = tensor("op_44373_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44373_cast_fp16 = einsum(equation = var_44373_equation_0, values = (var_43839_cast_fp16, var_44239_cast_fp16))[name = tensor("op_44373_cast_fp16")]; tensor var_44375_equation_0 = const()[name = tensor("op_44375_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44375_cast_fp16 = einsum(equation = var_44375_equation_0, values = (var_43839_cast_fp16, var_44240_cast_fp16))[name = tensor("op_44375_cast_fp16")]; tensor var_44377_equation_0 = const()[name = tensor("op_44377_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44377_cast_fp16 = einsum(equation = var_44377_equation_0, values = (var_43839_cast_fp16, var_44241_cast_fp16))[name = tensor("op_44377_cast_fp16")]; tensor var_44379_equation_0 = const()[name = tensor("op_44379_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44379_cast_fp16 = einsum(equation = var_44379_equation_0, values = (var_43843_cast_fp16, var_44242_cast_fp16))[name = tensor("op_44379_cast_fp16")]; tensor var_44381_equation_0 = const()[name = tensor("op_44381_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44381_cast_fp16 = einsum(equation = var_44381_equation_0, values = (var_43843_cast_fp16, var_44243_cast_fp16))[name = tensor("op_44381_cast_fp16")]; tensor var_44383_equation_0 = const()[name = tensor("op_44383_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44383_cast_fp16 = einsum(equation = var_44383_equation_0, values = (var_43843_cast_fp16, var_44244_cast_fp16))[name = tensor("op_44383_cast_fp16")]; tensor var_44385_equation_0 = const()[name = tensor("op_44385_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44385_cast_fp16 = einsum(equation = var_44385_equation_0, values = (var_43843_cast_fp16, var_44245_cast_fp16))[name = tensor("op_44385_cast_fp16")]; tensor var_44387_equation_0 = const()[name = tensor("op_44387_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44387_cast_fp16 = einsum(equation = var_44387_equation_0, values = (var_43847_cast_fp16, var_44246_cast_fp16))[name = tensor("op_44387_cast_fp16")]; tensor var_44389_equation_0 = const()[name = tensor("op_44389_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44389_cast_fp16 = einsum(equation = var_44389_equation_0, values = (var_43847_cast_fp16, var_44247_cast_fp16))[name = tensor("op_44389_cast_fp16")]; tensor var_44391_equation_0 = const()[name = tensor("op_44391_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44391_cast_fp16 = einsum(equation = var_44391_equation_0, values = (var_43847_cast_fp16, var_44248_cast_fp16))[name = tensor("op_44391_cast_fp16")]; tensor var_44393_equation_0 = const()[name = tensor("op_44393_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44393_cast_fp16 = einsum(equation = var_44393_equation_0, values = (var_43847_cast_fp16, var_44249_cast_fp16))[name = tensor("op_44393_cast_fp16")]; tensor var_44395_equation_0 = const()[name = tensor("op_44395_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44395_cast_fp16 = einsum(equation = var_44395_equation_0, values = (var_43851_cast_fp16, var_44250_cast_fp16))[name = tensor("op_44395_cast_fp16")]; tensor var_44397_equation_0 = const()[name = tensor("op_44397_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44397_cast_fp16 = einsum(equation = var_44397_equation_0, values = (var_43851_cast_fp16, var_44251_cast_fp16))[name = tensor("op_44397_cast_fp16")]; tensor var_44399_equation_0 = const()[name = tensor("op_44399_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44399_cast_fp16 = einsum(equation = var_44399_equation_0, values = (var_43851_cast_fp16, var_44252_cast_fp16))[name = tensor("op_44399_cast_fp16")]; tensor var_44401_equation_0 = const()[name = tensor("op_44401_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44401_cast_fp16 = einsum(equation = var_44401_equation_0, values = (var_43851_cast_fp16, var_44253_cast_fp16))[name = tensor("op_44401_cast_fp16")]; tensor var_44403_equation_0 = const()[name = tensor("op_44403_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44403_cast_fp16 = einsum(equation = var_44403_equation_0, values = (var_43855_cast_fp16, var_44254_cast_fp16))[name = tensor("op_44403_cast_fp16")]; tensor var_44405_equation_0 = const()[name = tensor("op_44405_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44405_cast_fp16 = einsum(equation = var_44405_equation_0, values = (var_43855_cast_fp16, var_44255_cast_fp16))[name = tensor("op_44405_cast_fp16")]; tensor var_44407_equation_0 = const()[name = tensor("op_44407_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44407_cast_fp16 = einsum(equation = var_44407_equation_0, values = (var_43855_cast_fp16, var_44256_cast_fp16))[name = tensor("op_44407_cast_fp16")]; tensor var_44409_equation_0 = const()[name = tensor("op_44409_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44409_cast_fp16 = einsum(equation = var_44409_equation_0, values = (var_43855_cast_fp16, var_44257_cast_fp16))[name = tensor("op_44409_cast_fp16")]; tensor var_44411_equation_0 = const()[name = tensor("op_44411_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44411_cast_fp16 = einsum(equation = var_44411_equation_0, values = (var_43859_cast_fp16, var_44258_cast_fp16))[name = tensor("op_44411_cast_fp16")]; tensor var_44413_equation_0 = const()[name = tensor("op_44413_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44413_cast_fp16 = einsum(equation = var_44413_equation_0, values = (var_43859_cast_fp16, var_44259_cast_fp16))[name = tensor("op_44413_cast_fp16")]; tensor var_44415_equation_0 = const()[name = tensor("op_44415_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44415_cast_fp16 = einsum(equation = var_44415_equation_0, values = (var_43859_cast_fp16, var_44260_cast_fp16))[name = tensor("op_44415_cast_fp16")]; tensor var_44417_equation_0 = const()[name = tensor("op_44417_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44417_cast_fp16 = einsum(equation = var_44417_equation_0, values = (var_43859_cast_fp16, var_44261_cast_fp16))[name = tensor("op_44417_cast_fp16")]; tensor var_44419_equation_0 = const()[name = tensor("op_44419_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44419_cast_fp16 = einsum(equation = var_44419_equation_0, values = (var_43863_cast_fp16, var_44262_cast_fp16))[name = tensor("op_44419_cast_fp16")]; tensor var_44421_equation_0 = const()[name = tensor("op_44421_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44421_cast_fp16 = einsum(equation = var_44421_equation_0, values = (var_43863_cast_fp16, var_44263_cast_fp16))[name = tensor("op_44421_cast_fp16")]; tensor var_44423_equation_0 = const()[name = tensor("op_44423_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44423_cast_fp16 = einsum(equation = var_44423_equation_0, values = (var_43863_cast_fp16, var_44264_cast_fp16))[name = tensor("op_44423_cast_fp16")]; tensor var_44425_equation_0 = const()[name = tensor("op_44425_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_44425_cast_fp16 = einsum(equation = var_44425_equation_0, values = (var_43863_cast_fp16, var_44265_cast_fp16))[name = tensor("op_44425_cast_fp16")]; tensor var_44427_interleave_0 = const()[name = tensor("op_44427_interleave_0"), val = tensor(false)]; tensor var_44427_cast_fp16 = concat(axis = var_42959, interleave = var_44427_interleave_0, values = (var_44267_cast_fp16, var_44269_cast_fp16, var_44271_cast_fp16, var_44273_cast_fp16))[name = tensor("op_44427_cast_fp16")]; tensor var_44429_interleave_0 = const()[name = tensor("op_44429_interleave_0"), val = tensor(false)]; tensor var_44429_cast_fp16 = concat(axis = var_42959, interleave = var_44429_interleave_0, values = (var_44275_cast_fp16, var_44277_cast_fp16, var_44279_cast_fp16, var_44281_cast_fp16))[name = tensor("op_44429_cast_fp16")]; tensor var_44431_interleave_0 = const()[name = tensor("op_44431_interleave_0"), val = tensor(false)]; tensor var_44431_cast_fp16 = concat(axis = var_42959, interleave = var_44431_interleave_0, values = (var_44283_cast_fp16, var_44285_cast_fp16, var_44287_cast_fp16, var_44289_cast_fp16))[name = tensor("op_44431_cast_fp16")]; tensor var_44433_interleave_0 = const()[name = tensor("op_44433_interleave_0"), val = tensor(false)]; tensor var_44433_cast_fp16 = concat(axis = var_42959, interleave = var_44433_interleave_0, values = (var_44291_cast_fp16, var_44293_cast_fp16, var_44295_cast_fp16, var_44297_cast_fp16))[name = tensor("op_44433_cast_fp16")]; tensor var_44435_interleave_0 = const()[name = tensor("op_44435_interleave_0"), val = tensor(false)]; tensor var_44435_cast_fp16 = concat(axis = var_42959, interleave = var_44435_interleave_0, values = (var_44299_cast_fp16, var_44301_cast_fp16, var_44303_cast_fp16, var_44305_cast_fp16))[name = tensor("op_44435_cast_fp16")]; tensor var_44437_interleave_0 = const()[name = tensor("op_44437_interleave_0"), val = tensor(false)]; tensor var_44437_cast_fp16 = concat(axis = var_42959, interleave = var_44437_interleave_0, values = (var_44307_cast_fp16, var_44309_cast_fp16, var_44311_cast_fp16, var_44313_cast_fp16))[name = tensor("op_44437_cast_fp16")]; tensor var_44439_interleave_0 = const()[name = tensor("op_44439_interleave_0"), val = tensor(false)]; tensor var_44439_cast_fp16 = concat(axis = var_42959, interleave = var_44439_interleave_0, values = (var_44315_cast_fp16, var_44317_cast_fp16, var_44319_cast_fp16, var_44321_cast_fp16))[name = tensor("op_44439_cast_fp16")]; tensor var_44441_interleave_0 = const()[name = tensor("op_44441_interleave_0"), val = tensor(false)]; tensor var_44441_cast_fp16 = concat(axis = var_42959, interleave = var_44441_interleave_0, values = (var_44323_cast_fp16, var_44325_cast_fp16, var_44327_cast_fp16, var_44329_cast_fp16))[name = tensor("op_44441_cast_fp16")]; tensor var_44443_interleave_0 = const()[name = tensor("op_44443_interleave_0"), val = tensor(false)]; tensor var_44443_cast_fp16 = concat(axis = var_42959, interleave = var_44443_interleave_0, values = (var_44331_cast_fp16, var_44333_cast_fp16, var_44335_cast_fp16, var_44337_cast_fp16))[name = tensor("op_44443_cast_fp16")]; tensor var_44445_interleave_0 = const()[name = tensor("op_44445_interleave_0"), val = tensor(false)]; tensor var_44445_cast_fp16 = concat(axis = var_42959, interleave = var_44445_interleave_0, values = (var_44339_cast_fp16, var_44341_cast_fp16, var_44343_cast_fp16, var_44345_cast_fp16))[name = tensor("op_44445_cast_fp16")]; tensor var_44447_interleave_0 = const()[name = tensor("op_44447_interleave_0"), val = tensor(false)]; tensor var_44447_cast_fp16 = concat(axis = var_42959, interleave = var_44447_interleave_0, values = (var_44347_cast_fp16, var_44349_cast_fp16, var_44351_cast_fp16, var_44353_cast_fp16))[name = tensor("op_44447_cast_fp16")]; tensor var_44449_interleave_0 = const()[name = tensor("op_44449_interleave_0"), val = tensor(false)]; tensor var_44449_cast_fp16 = concat(axis = var_42959, interleave = var_44449_interleave_0, values = (var_44355_cast_fp16, var_44357_cast_fp16, var_44359_cast_fp16, var_44361_cast_fp16))[name = tensor("op_44449_cast_fp16")]; tensor var_44451_interleave_0 = const()[name = tensor("op_44451_interleave_0"), val = tensor(false)]; tensor var_44451_cast_fp16 = concat(axis = var_42959, interleave = var_44451_interleave_0, values = (var_44363_cast_fp16, var_44365_cast_fp16, var_44367_cast_fp16, var_44369_cast_fp16))[name = tensor("op_44451_cast_fp16")]; tensor var_44453_interleave_0 = const()[name = tensor("op_44453_interleave_0"), val = tensor(false)]; tensor var_44453_cast_fp16 = concat(axis = var_42959, interleave = var_44453_interleave_0, values = (var_44371_cast_fp16, var_44373_cast_fp16, var_44375_cast_fp16, var_44377_cast_fp16))[name = tensor("op_44453_cast_fp16")]; tensor var_44455_interleave_0 = const()[name = tensor("op_44455_interleave_0"), val = tensor(false)]; tensor var_44455_cast_fp16 = concat(axis = var_42959, interleave = var_44455_interleave_0, values = (var_44379_cast_fp16, var_44381_cast_fp16, var_44383_cast_fp16, var_44385_cast_fp16))[name = tensor("op_44455_cast_fp16")]; tensor var_44457_interleave_0 = const()[name = tensor("op_44457_interleave_0"), val = tensor(false)]; tensor var_44457_cast_fp16 = concat(axis = var_42959, interleave = var_44457_interleave_0, values = (var_44387_cast_fp16, var_44389_cast_fp16, var_44391_cast_fp16, var_44393_cast_fp16))[name = tensor("op_44457_cast_fp16")]; tensor var_44459_interleave_0 = const()[name = tensor("op_44459_interleave_0"), val = tensor(false)]; tensor var_44459_cast_fp16 = concat(axis = var_42959, interleave = var_44459_interleave_0, values = (var_44395_cast_fp16, var_44397_cast_fp16, var_44399_cast_fp16, var_44401_cast_fp16))[name = tensor("op_44459_cast_fp16")]; tensor var_44461_interleave_0 = const()[name = tensor("op_44461_interleave_0"), val = tensor(false)]; tensor var_44461_cast_fp16 = concat(axis = var_42959, interleave = var_44461_interleave_0, values = (var_44403_cast_fp16, var_44405_cast_fp16, var_44407_cast_fp16, var_44409_cast_fp16))[name = tensor("op_44461_cast_fp16")]; tensor var_44463_interleave_0 = const()[name = tensor("op_44463_interleave_0"), val = tensor(false)]; tensor var_44463_cast_fp16 = concat(axis = var_42959, interleave = var_44463_interleave_0, values = (var_44411_cast_fp16, var_44413_cast_fp16, var_44415_cast_fp16, var_44417_cast_fp16))[name = tensor("op_44463_cast_fp16")]; tensor var_44465_interleave_0 = const()[name = tensor("op_44465_interleave_0"), val = tensor(false)]; tensor var_44465_cast_fp16 = concat(axis = var_42959, interleave = var_44465_interleave_0, values = (var_44419_cast_fp16, var_44421_cast_fp16, var_44423_cast_fp16, var_44425_cast_fp16))[name = tensor("op_44465_cast_fp16")]; tensor input_217_interleave_0 = const()[name = tensor("input_217_interleave_0"), val = tensor(false)]; tensor input_217_cast_fp16 = concat(axis = var_42984, interleave = input_217_interleave_0, values = (var_44427_cast_fp16, var_44429_cast_fp16, var_44431_cast_fp16, var_44433_cast_fp16, var_44435_cast_fp16, var_44437_cast_fp16, var_44439_cast_fp16, var_44441_cast_fp16, var_44443_cast_fp16, var_44445_cast_fp16, var_44447_cast_fp16, var_44449_cast_fp16, var_44451_cast_fp16, var_44453_cast_fp16, var_44455_cast_fp16, var_44457_cast_fp16, var_44459_cast_fp16, var_44461_cast_fp16, var_44463_cast_fp16, var_44465_cast_fp16))[name = tensor("input_217_cast_fp16")]; tensor var_44476_pad_type_0 = const()[name = tensor("op_44476_pad_type_0"), val = tensor("valid")]; tensor var_44476_strides_0 = const()[name = tensor("op_44476_strides_0"), val = tensor([1, 1])]; tensor var_44476_pad_0 = const()[name = tensor("op_44476_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_44476_dilations_0 = const()[name = tensor("op_44476_dilations_0"), val = tensor([1, 1])]; tensor var_44476_groups_0 = const()[name = tensor("op_44476_groups_0"), val = tensor(1)]; tensor layers_27_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(362564416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363383680))), name = tensor("layers_27_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_27_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_27_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363383808)))]; tensor var_44476_cast_fp16 = conv(bias = layers_27_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_44476_dilations_0, groups = var_44476_groups_0, pad = var_44476_pad_0, pad_type = var_44476_pad_type_0, strides = var_44476_strides_0, weight = layers_27_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_217_cast_fp16)[name = tensor("op_44476_cast_fp16")]; tensor var_44482_pad_type_0 = const()[name = tensor("op_44482_pad_type_0"), val = tensor("valid")]; tensor var_44482_strides_0 = const()[name = tensor("op_44482_strides_0"), val = tensor([1, 1])]; tensor var_44482_pad_0 = const()[name = tensor("op_44482_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_44482_dilations_0 = const()[name = tensor("op_44482_dilations_0"), val = tensor([1, 1])]; tensor var_44482_groups_0 = const()[name = tensor("op_44482_groups_0"), val = tensor(1)]; tensor layers_27_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363396544))), name = tensor("layers_27_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363386432))), shape = tensor([1280, 1280, 1, 1])]; tensor var_44482_cast_fp16 = conv(dilations = var_44482_dilations_0, groups = var_44482_groups_0, pad = var_44482_pad_0, pad_type = var_44482_pad_type_0, strides = var_44482_strides_0, weight = layers_27_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_217_cast_fp16)[name = tensor("op_44482_cast_fp16")]; tensor obj_111_cast_fp16 = add(x = var_44476_cast_fp16, y = var_44482_cast_fp16)[name = tensor("obj_111_cast_fp16")]; tensor inputs_111_cast_fp16 = add(x = inputs_109_cast_fp16, y = obj_111_cast_fp16)[name = tensor("inputs_111_cast_fp16")]; tensor out_111_axes_0 = const()[name = tensor("out_111_axes_0"), val = tensor([1])]; tensor var_44493_to_fp16 = const()[name = tensor("op_44493_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_111_cast_fp16 = layer_norm(axes = out_111_axes_0, epsilon = var_44493_to_fp16, x = inputs_111_cast_fp16)[name = tensor("out_111_cast_fp16")]; tensor input_219_gamma_0_to_fp16 = const()[name = tensor("input_219_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363601408)))]; tensor input_219_beta_0_to_fp16 = const()[name = tensor("input_219_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363604032)))]; tensor input_219_epsilon_0_to_fp16 = const()[name = tensor("input_219_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_219_cast_fp16 = batch_norm(beta = input_219_beta_0_to_fp16, epsilon = input_219_epsilon_0_to_fp16, gamma = input_219_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_111_cast_fp16)[name = tensor("input_219_cast_fp16")]; tensor var_44511_pad_type_0 = const()[name = tensor("op_44511_pad_type_0"), val = tensor("valid")]; tensor var_44511_strides_0 = const()[name = tensor("op_44511_strides_0"), val = tensor([1, 1])]; tensor var_44511_pad_0 = const()[name = tensor("op_44511_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_44511_dilations_0 = const()[name = tensor("op_44511_dilations_0"), val = tensor([1, 1])]; tensor var_44511_groups_0 = const()[name = tensor("op_44511_groups_0"), val = tensor(1)]; tensor layers_27_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(363606656))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(366883520))), name = tensor("layers_27_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_27_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_27_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(366883648)))]; tensor var_44511_cast_fp16 = conv(bias = layers_27_fc1_inlier_module_bias_to_fp16, dilations = var_44511_dilations_0, groups = var_44511_groups_0, pad = var_44511_pad_0, pad_type = var_44511_pad_type_0, strides = var_44511_strides_0, weight = layers_27_fc1_inlier_module_weight_to_fp16_palettized, x = input_219_cast_fp16)[name = tensor("op_44511_cast_fp16")]; tensor var_44517_pad_type_0 = const()[name = tensor("op_44517_pad_type_0"), val = tensor("valid")]; tensor var_44517_strides_0 = const()[name = tensor("op_44517_strides_0"), val = tensor([1, 1])]; tensor var_44517_pad_0 = const()[name = tensor("op_44517_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_44517_dilations_0 = const()[name = tensor("op_44517_dilations_0"), val = tensor([1, 1])]; tensor var_44517_groups_0 = const()[name = tensor("op_44517_groups_0"), val = tensor(1)]; tensor layers_27_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(366948672))), name = tensor("layers_27_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(366893952))), shape = tensor([5120, 1280, 1, 1])]; tensor var_44517_cast_fp16 = conv(dilations = var_44517_dilations_0, groups = var_44517_groups_0, pad = var_44517_pad_0, pad_type = var_44517_pad_type_0, strides = var_44517_strides_0, weight = layers_27_fc1_outlier_module_weight_to_fp16_sparsified, x = input_219_cast_fp16)[name = tensor("op_44517_cast_fp16")]; tensor input_221_cast_fp16 = add(x = var_44511_cast_fp16, y = var_44517_cast_fp16)[name = tensor("input_221_cast_fp16")]; tensor input_223_mode_0 = const()[name = tensor("input_223_mode_0"), val = tensor("EXACT")]; tensor input_223_cast_fp16 = gelu(mode = input_223_mode_0, x = input_221_cast_fp16)[name = tensor("input_223_cast_fp16")]; tensor var_44528_pad_type_0 = const()[name = tensor("op_44528_pad_type_0"), val = tensor("valid")]; tensor var_44528_strides_0 = const()[name = tensor("op_44528_strides_0"), val = tensor([1, 1])]; tensor var_44528_pad_0 = const()[name = tensor("op_44528_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_44528_dilations_0 = const()[name = tensor("op_44528_dilations_0"), val = tensor([1, 1])]; tensor var_44528_groups_0 = const()[name = tensor("op_44528_groups_0"), val = tensor(1)]; tensor layers_27_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(367767936))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371044800))), name = tensor("layers_27_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_27_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_27_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371044928)))]; tensor var_44528_cast_fp16 = conv(bias = layers_27_fc2_inlier_module_bias_to_fp16, dilations = var_44528_dilations_0, groups = var_44528_groups_0, pad = var_44528_pad_0, pad_type = var_44528_pad_type_0, strides = var_44528_strides_0, weight = layers_27_fc2_inlier_module_weight_to_fp16_palettized, x = input_223_cast_fp16)[name = tensor("op_44528_cast_fp16")]; tensor var_44534_pad_type_0 = const()[name = tensor("op_44534_pad_type_0"), val = tensor("valid")]; tensor var_44534_strides_0 = const()[name = tensor("op_44534_strides_0"), val = tensor([1, 1])]; tensor var_44534_pad_0 = const()[name = tensor("op_44534_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_44534_dilations_0 = const()[name = tensor("op_44534_dilations_0"), val = tensor([1, 1])]; tensor var_44534_groups_0 = const()[name = tensor("op_44534_groups_0"), val = tensor(1)]; tensor layers_27_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371098048))), name = tensor("layers_27_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371047552))), shape = tensor([1280, 5120, 1, 1])]; tensor var_44534_cast_fp16 = conv(dilations = var_44534_dilations_0, groups = var_44534_groups_0, pad = var_44534_pad_0, pad_type = var_44534_pad_type_0, strides = var_44534_strides_0, weight = layers_27_fc2_outlier_module_weight_to_fp16_sparsified, x = input_223_cast_fp16)[name = tensor("op_44534_cast_fp16")]; tensor hidden_states_59_cast_fp16 = add(x = var_44528_cast_fp16, y = var_44534_cast_fp16)[name = tensor("hidden_states_59_cast_fp16")]; tensor inputs_113_cast_fp16 = add(x = inputs_111_cast_fp16, y = hidden_states_59_cast_fp16)[name = tensor("inputs_113_cast_fp16")]; tensor var_44540 = const()[name = tensor("op_44540"), val = tensor(3)]; tensor var_44565 = const()[name = tensor("op_44565"), val = tensor(1)]; tensor out_113_axes_0 = const()[name = tensor("out_113_axes_0"), val = tensor([1])]; tensor var_44582_to_fp16 = const()[name = tensor("op_44582_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_113_cast_fp16 = layer_norm(axes = out_113_axes_0, epsilon = var_44582_to_fp16, x = inputs_113_cast_fp16)[name = tensor("out_113_cast_fp16")]; tensor obj_113_gamma_0_to_fp16 = const()[name = tensor("obj_113_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371917312)))]; tensor obj_113_beta_0_to_fp16 = const()[name = tensor("obj_113_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371919936)))]; tensor obj_113_epsilon_0_to_fp16 = const()[name = tensor("obj_113_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_113_cast_fp16 = batch_norm(beta = obj_113_beta_0_to_fp16, epsilon = obj_113_epsilon_0_to_fp16, gamma = obj_113_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_113_cast_fp16)[name = tensor("obj_113_cast_fp16")]; tensor var_44604_pad_type_0 = const()[name = tensor("op_44604_pad_type_0"), val = tensor("valid")]; tensor var_44604_strides_0 = const()[name = tensor("op_44604_strides_0"), val = tensor([1, 1])]; tensor var_44604_pad_0 = const()[name = tensor("op_44604_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_44604_dilations_0 = const()[name = tensor("op_44604_dilations_0"), val = tensor([1, 1])]; tensor var_44604_groups_0 = const()[name = tensor("op_44604_groups_0"), val = tensor(1)]; tensor layers_28_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371922560))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(372741824))), name = tensor("layers_28_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_28_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_28_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(372741952)))]; tensor var_44604_cast_fp16 = conv(bias = layers_28_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_44604_dilations_0, groups = var_44604_groups_0, pad = var_44604_pad_0, pad_type = var_44604_pad_type_0, strides = var_44604_strides_0, weight = layers_28_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_113_cast_fp16)[name = tensor("op_44604_cast_fp16")]; tensor var_44610_pad_type_0 = const()[name = tensor("op_44610_pad_type_0"), val = tensor("valid")]; tensor var_44610_strides_0 = const()[name = tensor("op_44610_strides_0"), val = tensor([1, 1])]; tensor var_44610_pad_0 = const()[name = tensor("op_44610_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_44610_dilations_0 = const()[name = tensor("op_44610_dilations_0"), val = tensor([1, 1])]; tensor var_44610_groups_0 = const()[name = tensor("op_44610_groups_0"), val = tensor(1)]; tensor layers_28_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(372772992))), name = tensor("layers_28_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(372744576))), shape = tensor([1280, 1280, 1, 1])]; tensor var_44610_cast_fp16 = conv(dilations = var_44610_dilations_0, groups = var_44610_groups_0, pad = var_44610_pad_0, pad_type = var_44610_pad_type_0, strides = var_44610_strides_0, weight = layers_28_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_113_cast_fp16)[name = tensor("op_44610_cast_fp16")]; tensor query_57_cast_fp16 = add(x = var_44604_cast_fp16, y = var_44610_cast_fp16)[name = tensor("query_57_cast_fp16")]; tensor var_44619_pad_type_0 = const()[name = tensor("op_44619_pad_type_0"), val = tensor("valid")]; tensor var_44619_strides_0 = const()[name = tensor("op_44619_strides_0"), val = tensor([1, 1])]; tensor var_44619_pad_0 = const()[name = tensor("op_44619_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_44619_dilations_0 = const()[name = tensor("op_44619_dilations_0"), val = tensor([1, 1])]; tensor var_44619_groups_0 = const()[name = tensor("op_44619_groups_0"), val = tensor(1)]; tensor layers_28_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(372977856))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373797120))), name = tensor("layers_28_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_44619_cast_fp16 = conv(dilations = var_44619_dilations_0, groups = var_44619_groups_0, pad = var_44619_pad_0, pad_type = var_44619_pad_type_0, strides = var_44619_strides_0, weight = layers_28_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_113_cast_fp16)[name = tensor("op_44619_cast_fp16")]; tensor var_44625_pad_type_0 = const()[name = tensor("op_44625_pad_type_0"), val = tensor("valid")]; tensor var_44625_strides_0 = const()[name = tensor("op_44625_strides_0"), val = tensor([1, 1])]; tensor var_44625_pad_0 = const()[name = tensor("op_44625_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_44625_dilations_0 = const()[name = tensor("op_44625_dilations_0"), val = tensor([1, 1])]; tensor var_44625_groups_0 = const()[name = tensor("op_44625_groups_0"), val = tensor(1)]; tensor layers_28_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373819328))), name = tensor("layers_28_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(373797248))), shape = tensor([1280, 1280, 1, 1])]; tensor var_44625_cast_fp16 = conv(dilations = var_44625_dilations_0, groups = var_44625_groups_0, pad = var_44625_pad_0, pad_type = var_44625_pad_type_0, strides = var_44625_strides_0, weight = layers_28_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_113_cast_fp16)[name = tensor("op_44625_cast_fp16")]; tensor key_57_cast_fp16 = add(x = var_44619_cast_fp16, y = var_44625_cast_fp16)[name = tensor("key_57_cast_fp16")]; tensor var_44635_pad_type_0 = const()[name = tensor("op_44635_pad_type_0"), val = tensor("valid")]; tensor var_44635_strides_0 = const()[name = tensor("op_44635_strides_0"), val = tensor([1, 1])]; tensor var_44635_pad_0 = const()[name = tensor("op_44635_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_44635_dilations_0 = const()[name = tensor("op_44635_dilations_0"), val = tensor([1, 1])]; tensor var_44635_groups_0 = const()[name = tensor("op_44635_groups_0"), val = tensor(1)]; tensor layers_28_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(374024192))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(374843456))), name = tensor("layers_28_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_28_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_28_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(374843584)))]; tensor var_44635_cast_fp16 = conv(bias = layers_28_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_44635_dilations_0, groups = var_44635_groups_0, pad = var_44635_pad_0, pad_type = var_44635_pad_type_0, strides = var_44635_strides_0, weight = layers_28_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_113_cast_fp16)[name = tensor("op_44635_cast_fp16")]; tensor var_44641_pad_type_0 = const()[name = tensor("op_44641_pad_type_0"), val = tensor("valid")]; tensor var_44641_strides_0 = const()[name = tensor("op_44641_strides_0"), val = tensor([1, 1])]; tensor var_44641_pad_0 = const()[name = tensor("op_44641_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_44641_dilations_0 = const()[name = tensor("op_44641_dilations_0"), val = tensor([1, 1])]; tensor var_44641_groups_0 = const()[name = tensor("op_44641_groups_0"), val = tensor(1)]; tensor layers_28_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(374858048))), name = tensor("layers_28_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(374846208))), shape = tensor([1280, 1280, 1, 1])]; tensor var_44641_cast_fp16 = conv(dilations = var_44641_dilations_0, groups = var_44641_groups_0, pad = var_44641_pad_0, pad_type = var_44641_pad_type_0, strides = var_44641_strides_0, weight = layers_28_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_113_cast_fp16)[name = tensor("op_44641_cast_fp16")]; tensor value_57_cast_fp16 = add(x = var_44635_cast_fp16, y = var_44641_cast_fp16)[name = tensor("value_57_cast_fp16")]; tensor var_44647_begin_0 = const()[name = tensor("op_44647_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_44647_end_0 = const()[name = tensor("op_44647_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_44647_end_mask_0 = const()[name = tensor("op_44647_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44647_cast_fp16 = slice_by_index(begin = var_44647_begin_0, end = var_44647_end_0, end_mask = var_44647_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44647_cast_fp16")]; tensor var_44651_begin_0 = const()[name = tensor("op_44651_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_44651_end_0 = const()[name = tensor("op_44651_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_44651_end_mask_0 = const()[name = tensor("op_44651_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44651_cast_fp16 = slice_by_index(begin = var_44651_begin_0, end = var_44651_end_0, end_mask = var_44651_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44651_cast_fp16")]; tensor var_44655_begin_0 = const()[name = tensor("op_44655_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_44655_end_0 = const()[name = tensor("op_44655_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_44655_end_mask_0 = const()[name = tensor("op_44655_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44655_cast_fp16 = slice_by_index(begin = var_44655_begin_0, end = var_44655_end_0, end_mask = var_44655_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44655_cast_fp16")]; tensor var_44659_begin_0 = const()[name = tensor("op_44659_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_44659_end_0 = const()[name = tensor("op_44659_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_44659_end_mask_0 = const()[name = tensor("op_44659_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44659_cast_fp16 = slice_by_index(begin = var_44659_begin_0, end = var_44659_end_0, end_mask = var_44659_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44659_cast_fp16")]; tensor var_44663_begin_0 = const()[name = tensor("op_44663_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_44663_end_0 = const()[name = tensor("op_44663_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_44663_end_mask_0 = const()[name = tensor("op_44663_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44663_cast_fp16 = slice_by_index(begin = var_44663_begin_0, end = var_44663_end_0, end_mask = var_44663_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44663_cast_fp16")]; tensor var_44667_begin_0 = const()[name = tensor("op_44667_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_44667_end_0 = const()[name = tensor("op_44667_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_44667_end_mask_0 = const()[name = tensor("op_44667_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44667_cast_fp16 = slice_by_index(begin = var_44667_begin_0, end = var_44667_end_0, end_mask = var_44667_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44667_cast_fp16")]; tensor var_44671_begin_0 = const()[name = tensor("op_44671_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_44671_end_0 = const()[name = tensor("op_44671_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_44671_end_mask_0 = const()[name = tensor("op_44671_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44671_cast_fp16 = slice_by_index(begin = var_44671_begin_0, end = var_44671_end_0, end_mask = var_44671_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44671_cast_fp16")]; tensor var_44675_begin_0 = const()[name = tensor("op_44675_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_44675_end_0 = const()[name = tensor("op_44675_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_44675_end_mask_0 = const()[name = tensor("op_44675_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44675_cast_fp16 = slice_by_index(begin = var_44675_begin_0, end = var_44675_end_0, end_mask = var_44675_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44675_cast_fp16")]; tensor var_44679_begin_0 = const()[name = tensor("op_44679_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_44679_end_0 = const()[name = tensor("op_44679_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_44679_end_mask_0 = const()[name = tensor("op_44679_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44679_cast_fp16 = slice_by_index(begin = var_44679_begin_0, end = var_44679_end_0, end_mask = var_44679_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44679_cast_fp16")]; tensor var_44683_begin_0 = const()[name = tensor("op_44683_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_44683_end_0 = const()[name = tensor("op_44683_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_44683_end_mask_0 = const()[name = tensor("op_44683_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44683_cast_fp16 = slice_by_index(begin = var_44683_begin_0, end = var_44683_end_0, end_mask = var_44683_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44683_cast_fp16")]; tensor var_44687_begin_0 = const()[name = tensor("op_44687_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_44687_end_0 = const()[name = tensor("op_44687_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_44687_end_mask_0 = const()[name = tensor("op_44687_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44687_cast_fp16 = slice_by_index(begin = var_44687_begin_0, end = var_44687_end_0, end_mask = var_44687_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44687_cast_fp16")]; tensor var_44691_begin_0 = const()[name = tensor("op_44691_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_44691_end_0 = const()[name = tensor("op_44691_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_44691_end_mask_0 = const()[name = tensor("op_44691_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44691_cast_fp16 = slice_by_index(begin = var_44691_begin_0, end = var_44691_end_0, end_mask = var_44691_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44691_cast_fp16")]; tensor var_44695_begin_0 = const()[name = tensor("op_44695_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_44695_end_0 = const()[name = tensor("op_44695_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_44695_end_mask_0 = const()[name = tensor("op_44695_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44695_cast_fp16 = slice_by_index(begin = var_44695_begin_0, end = var_44695_end_0, end_mask = var_44695_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44695_cast_fp16")]; tensor var_44699_begin_0 = const()[name = tensor("op_44699_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_44699_end_0 = const()[name = tensor("op_44699_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_44699_end_mask_0 = const()[name = tensor("op_44699_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44699_cast_fp16 = slice_by_index(begin = var_44699_begin_0, end = var_44699_end_0, end_mask = var_44699_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44699_cast_fp16")]; tensor var_44703_begin_0 = const()[name = tensor("op_44703_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_44703_end_0 = const()[name = tensor("op_44703_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_44703_end_mask_0 = const()[name = tensor("op_44703_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44703_cast_fp16 = slice_by_index(begin = var_44703_begin_0, end = var_44703_end_0, end_mask = var_44703_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44703_cast_fp16")]; tensor var_44707_begin_0 = const()[name = tensor("op_44707_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_44707_end_0 = const()[name = tensor("op_44707_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_44707_end_mask_0 = const()[name = tensor("op_44707_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44707_cast_fp16 = slice_by_index(begin = var_44707_begin_0, end = var_44707_end_0, end_mask = var_44707_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44707_cast_fp16")]; tensor var_44711_begin_0 = const()[name = tensor("op_44711_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_44711_end_0 = const()[name = tensor("op_44711_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_44711_end_mask_0 = const()[name = tensor("op_44711_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44711_cast_fp16 = slice_by_index(begin = var_44711_begin_0, end = var_44711_end_0, end_mask = var_44711_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44711_cast_fp16")]; tensor var_44715_begin_0 = const()[name = tensor("op_44715_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_44715_end_0 = const()[name = tensor("op_44715_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_44715_end_mask_0 = const()[name = tensor("op_44715_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44715_cast_fp16 = slice_by_index(begin = var_44715_begin_0, end = var_44715_end_0, end_mask = var_44715_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44715_cast_fp16")]; tensor var_44719_begin_0 = const()[name = tensor("op_44719_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_44719_end_0 = const()[name = tensor("op_44719_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_44719_end_mask_0 = const()[name = tensor("op_44719_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44719_cast_fp16 = slice_by_index(begin = var_44719_begin_0, end = var_44719_end_0, end_mask = var_44719_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44719_cast_fp16")]; tensor var_44723_begin_0 = const()[name = tensor("op_44723_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_44723_end_0 = const()[name = tensor("op_44723_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_44723_end_mask_0 = const()[name = tensor("op_44723_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_44723_cast_fp16 = slice_by_index(begin = var_44723_begin_0, end = var_44723_end_0, end_mask = var_44723_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_44723_cast_fp16")]; tensor var_44732_begin_0 = const()[name = tensor("op_44732_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_44732_end_0 = const()[name = tensor("op_44732_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_44732_end_mask_0 = const()[name = tensor("op_44732_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44732_cast_fp16 = slice_by_index(begin = var_44732_begin_0, end = var_44732_end_0, end_mask = var_44732_end_mask_0, x = var_44647_cast_fp16)[name = tensor("op_44732_cast_fp16")]; tensor var_44739_begin_0 = const()[name = tensor("op_44739_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_44739_end_0 = const()[name = tensor("op_44739_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_44739_end_mask_0 = const()[name = tensor("op_44739_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44739_cast_fp16 = slice_by_index(begin = var_44739_begin_0, end = var_44739_end_0, end_mask = var_44739_end_mask_0, x = var_44647_cast_fp16)[name = tensor("op_44739_cast_fp16")]; tensor var_44746_begin_0 = const()[name = tensor("op_44746_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_44746_end_0 = const()[name = tensor("op_44746_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_44746_end_mask_0 = const()[name = tensor("op_44746_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44746_cast_fp16 = slice_by_index(begin = var_44746_begin_0, end = var_44746_end_0, end_mask = var_44746_end_mask_0, x = var_44647_cast_fp16)[name = tensor("op_44746_cast_fp16")]; tensor var_44753_begin_0 = const()[name = tensor("op_44753_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_44753_end_0 = const()[name = tensor("op_44753_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_44753_end_mask_0 = const()[name = tensor("op_44753_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44753_cast_fp16 = slice_by_index(begin = var_44753_begin_0, end = var_44753_end_0, end_mask = var_44753_end_mask_0, x = var_44647_cast_fp16)[name = tensor("op_44753_cast_fp16")]; tensor var_44760_begin_0 = const()[name = tensor("op_44760_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_44760_end_0 = const()[name = tensor("op_44760_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_44760_end_mask_0 = const()[name = tensor("op_44760_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44760_cast_fp16 = slice_by_index(begin = var_44760_begin_0, end = var_44760_end_0, end_mask = var_44760_end_mask_0, x = var_44651_cast_fp16)[name = tensor("op_44760_cast_fp16")]; tensor var_44767_begin_0 = const()[name = tensor("op_44767_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_44767_end_0 = const()[name = tensor("op_44767_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_44767_end_mask_0 = const()[name = tensor("op_44767_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44767_cast_fp16 = slice_by_index(begin = var_44767_begin_0, end = var_44767_end_0, end_mask = var_44767_end_mask_0, x = var_44651_cast_fp16)[name = tensor("op_44767_cast_fp16")]; tensor var_44774_begin_0 = const()[name = tensor("op_44774_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_44774_end_0 = const()[name = tensor("op_44774_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_44774_end_mask_0 = const()[name = tensor("op_44774_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44774_cast_fp16 = slice_by_index(begin = var_44774_begin_0, end = var_44774_end_0, end_mask = var_44774_end_mask_0, x = var_44651_cast_fp16)[name = tensor("op_44774_cast_fp16")]; tensor var_44781_begin_0 = const()[name = tensor("op_44781_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_44781_end_0 = const()[name = tensor("op_44781_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_44781_end_mask_0 = const()[name = tensor("op_44781_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44781_cast_fp16 = slice_by_index(begin = var_44781_begin_0, end = var_44781_end_0, end_mask = var_44781_end_mask_0, x = var_44651_cast_fp16)[name = tensor("op_44781_cast_fp16")]; tensor var_44788_begin_0 = const()[name = tensor("op_44788_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_44788_end_0 = const()[name = tensor("op_44788_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_44788_end_mask_0 = const()[name = tensor("op_44788_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44788_cast_fp16 = slice_by_index(begin = var_44788_begin_0, end = var_44788_end_0, end_mask = var_44788_end_mask_0, x = var_44655_cast_fp16)[name = tensor("op_44788_cast_fp16")]; tensor var_44795_begin_0 = const()[name = tensor("op_44795_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_44795_end_0 = const()[name = tensor("op_44795_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_44795_end_mask_0 = const()[name = tensor("op_44795_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44795_cast_fp16 = slice_by_index(begin = var_44795_begin_0, end = var_44795_end_0, end_mask = var_44795_end_mask_0, x = var_44655_cast_fp16)[name = tensor("op_44795_cast_fp16")]; tensor var_44802_begin_0 = const()[name = tensor("op_44802_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_44802_end_0 = const()[name = tensor("op_44802_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_44802_end_mask_0 = const()[name = tensor("op_44802_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44802_cast_fp16 = slice_by_index(begin = var_44802_begin_0, end = var_44802_end_0, end_mask = var_44802_end_mask_0, x = var_44655_cast_fp16)[name = tensor("op_44802_cast_fp16")]; tensor var_44809_begin_0 = const()[name = tensor("op_44809_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_44809_end_0 = const()[name = tensor("op_44809_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_44809_end_mask_0 = const()[name = tensor("op_44809_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44809_cast_fp16 = slice_by_index(begin = var_44809_begin_0, end = var_44809_end_0, end_mask = var_44809_end_mask_0, x = var_44655_cast_fp16)[name = tensor("op_44809_cast_fp16")]; tensor var_44816_begin_0 = const()[name = tensor("op_44816_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_44816_end_0 = const()[name = tensor("op_44816_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_44816_end_mask_0 = const()[name = tensor("op_44816_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44816_cast_fp16 = slice_by_index(begin = var_44816_begin_0, end = var_44816_end_0, end_mask = var_44816_end_mask_0, x = var_44659_cast_fp16)[name = tensor("op_44816_cast_fp16")]; tensor var_44823_begin_0 = const()[name = tensor("op_44823_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_44823_end_0 = const()[name = tensor("op_44823_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_44823_end_mask_0 = const()[name = tensor("op_44823_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44823_cast_fp16 = slice_by_index(begin = var_44823_begin_0, end = var_44823_end_0, end_mask = var_44823_end_mask_0, x = var_44659_cast_fp16)[name = tensor("op_44823_cast_fp16")]; tensor var_44830_begin_0 = const()[name = tensor("op_44830_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_44830_end_0 = const()[name = tensor("op_44830_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_44830_end_mask_0 = const()[name = tensor("op_44830_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44830_cast_fp16 = slice_by_index(begin = var_44830_begin_0, end = var_44830_end_0, end_mask = var_44830_end_mask_0, x = var_44659_cast_fp16)[name = tensor("op_44830_cast_fp16")]; tensor var_44837_begin_0 = const()[name = tensor("op_44837_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_44837_end_0 = const()[name = tensor("op_44837_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_44837_end_mask_0 = const()[name = tensor("op_44837_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44837_cast_fp16 = slice_by_index(begin = var_44837_begin_0, end = var_44837_end_0, end_mask = var_44837_end_mask_0, x = var_44659_cast_fp16)[name = tensor("op_44837_cast_fp16")]; tensor var_44844_begin_0 = const()[name = tensor("op_44844_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_44844_end_0 = const()[name = tensor("op_44844_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_44844_end_mask_0 = const()[name = tensor("op_44844_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44844_cast_fp16 = slice_by_index(begin = var_44844_begin_0, end = var_44844_end_0, end_mask = var_44844_end_mask_0, x = var_44663_cast_fp16)[name = tensor("op_44844_cast_fp16")]; tensor var_44851_begin_0 = const()[name = tensor("op_44851_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_44851_end_0 = const()[name = tensor("op_44851_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_44851_end_mask_0 = const()[name = tensor("op_44851_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44851_cast_fp16 = slice_by_index(begin = var_44851_begin_0, end = var_44851_end_0, end_mask = var_44851_end_mask_0, x = var_44663_cast_fp16)[name = tensor("op_44851_cast_fp16")]; tensor var_44858_begin_0 = const()[name = tensor("op_44858_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_44858_end_0 = const()[name = tensor("op_44858_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_44858_end_mask_0 = const()[name = tensor("op_44858_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44858_cast_fp16 = slice_by_index(begin = var_44858_begin_0, end = var_44858_end_0, end_mask = var_44858_end_mask_0, x = var_44663_cast_fp16)[name = tensor("op_44858_cast_fp16")]; tensor var_44865_begin_0 = const()[name = tensor("op_44865_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_44865_end_0 = const()[name = tensor("op_44865_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_44865_end_mask_0 = const()[name = tensor("op_44865_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44865_cast_fp16 = slice_by_index(begin = var_44865_begin_0, end = var_44865_end_0, end_mask = var_44865_end_mask_0, x = var_44663_cast_fp16)[name = tensor("op_44865_cast_fp16")]; tensor var_44872_begin_0 = const()[name = tensor("op_44872_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_44872_end_0 = const()[name = tensor("op_44872_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_44872_end_mask_0 = const()[name = tensor("op_44872_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44872_cast_fp16 = slice_by_index(begin = var_44872_begin_0, end = var_44872_end_0, end_mask = var_44872_end_mask_0, x = var_44667_cast_fp16)[name = tensor("op_44872_cast_fp16")]; tensor var_44879_begin_0 = const()[name = tensor("op_44879_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_44879_end_0 = const()[name = tensor("op_44879_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_44879_end_mask_0 = const()[name = tensor("op_44879_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44879_cast_fp16 = slice_by_index(begin = var_44879_begin_0, end = var_44879_end_0, end_mask = var_44879_end_mask_0, x = var_44667_cast_fp16)[name = tensor("op_44879_cast_fp16")]; tensor var_44886_begin_0 = const()[name = tensor("op_44886_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_44886_end_0 = const()[name = tensor("op_44886_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_44886_end_mask_0 = const()[name = tensor("op_44886_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44886_cast_fp16 = slice_by_index(begin = var_44886_begin_0, end = var_44886_end_0, end_mask = var_44886_end_mask_0, x = var_44667_cast_fp16)[name = tensor("op_44886_cast_fp16")]; tensor var_44893_begin_0 = const()[name = tensor("op_44893_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_44893_end_0 = const()[name = tensor("op_44893_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_44893_end_mask_0 = const()[name = tensor("op_44893_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44893_cast_fp16 = slice_by_index(begin = var_44893_begin_0, end = var_44893_end_0, end_mask = var_44893_end_mask_0, x = var_44667_cast_fp16)[name = tensor("op_44893_cast_fp16")]; tensor var_44900_begin_0 = const()[name = tensor("op_44900_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_44900_end_0 = const()[name = tensor("op_44900_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_44900_end_mask_0 = const()[name = tensor("op_44900_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44900_cast_fp16 = slice_by_index(begin = var_44900_begin_0, end = var_44900_end_0, end_mask = var_44900_end_mask_0, x = var_44671_cast_fp16)[name = tensor("op_44900_cast_fp16")]; tensor var_44907_begin_0 = const()[name = tensor("op_44907_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_44907_end_0 = const()[name = tensor("op_44907_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_44907_end_mask_0 = const()[name = tensor("op_44907_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44907_cast_fp16 = slice_by_index(begin = var_44907_begin_0, end = var_44907_end_0, end_mask = var_44907_end_mask_0, x = var_44671_cast_fp16)[name = tensor("op_44907_cast_fp16")]; tensor var_44914_begin_0 = const()[name = tensor("op_44914_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_44914_end_0 = const()[name = tensor("op_44914_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_44914_end_mask_0 = const()[name = tensor("op_44914_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44914_cast_fp16 = slice_by_index(begin = var_44914_begin_0, end = var_44914_end_0, end_mask = var_44914_end_mask_0, x = var_44671_cast_fp16)[name = tensor("op_44914_cast_fp16")]; tensor var_44921_begin_0 = const()[name = tensor("op_44921_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_44921_end_0 = const()[name = tensor("op_44921_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_44921_end_mask_0 = const()[name = tensor("op_44921_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44921_cast_fp16 = slice_by_index(begin = var_44921_begin_0, end = var_44921_end_0, end_mask = var_44921_end_mask_0, x = var_44671_cast_fp16)[name = tensor("op_44921_cast_fp16")]; tensor var_44928_begin_0 = const()[name = tensor("op_44928_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_44928_end_0 = const()[name = tensor("op_44928_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_44928_end_mask_0 = const()[name = tensor("op_44928_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44928_cast_fp16 = slice_by_index(begin = var_44928_begin_0, end = var_44928_end_0, end_mask = var_44928_end_mask_0, x = var_44675_cast_fp16)[name = tensor("op_44928_cast_fp16")]; tensor var_44935_begin_0 = const()[name = tensor("op_44935_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_44935_end_0 = const()[name = tensor("op_44935_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_44935_end_mask_0 = const()[name = tensor("op_44935_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44935_cast_fp16 = slice_by_index(begin = var_44935_begin_0, end = var_44935_end_0, end_mask = var_44935_end_mask_0, x = var_44675_cast_fp16)[name = tensor("op_44935_cast_fp16")]; tensor var_44942_begin_0 = const()[name = tensor("op_44942_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_44942_end_0 = const()[name = tensor("op_44942_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_44942_end_mask_0 = const()[name = tensor("op_44942_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44942_cast_fp16 = slice_by_index(begin = var_44942_begin_0, end = var_44942_end_0, end_mask = var_44942_end_mask_0, x = var_44675_cast_fp16)[name = tensor("op_44942_cast_fp16")]; tensor var_44949_begin_0 = const()[name = tensor("op_44949_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_44949_end_0 = const()[name = tensor("op_44949_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_44949_end_mask_0 = const()[name = tensor("op_44949_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44949_cast_fp16 = slice_by_index(begin = var_44949_begin_0, end = var_44949_end_0, end_mask = var_44949_end_mask_0, x = var_44675_cast_fp16)[name = tensor("op_44949_cast_fp16")]; tensor var_44956_begin_0 = const()[name = tensor("op_44956_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_44956_end_0 = const()[name = tensor("op_44956_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_44956_end_mask_0 = const()[name = tensor("op_44956_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44956_cast_fp16 = slice_by_index(begin = var_44956_begin_0, end = var_44956_end_0, end_mask = var_44956_end_mask_0, x = var_44679_cast_fp16)[name = tensor("op_44956_cast_fp16")]; tensor var_44963_begin_0 = const()[name = tensor("op_44963_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_44963_end_0 = const()[name = tensor("op_44963_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_44963_end_mask_0 = const()[name = tensor("op_44963_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44963_cast_fp16 = slice_by_index(begin = var_44963_begin_0, end = var_44963_end_0, end_mask = var_44963_end_mask_0, x = var_44679_cast_fp16)[name = tensor("op_44963_cast_fp16")]; tensor var_44970_begin_0 = const()[name = tensor("op_44970_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_44970_end_0 = const()[name = tensor("op_44970_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_44970_end_mask_0 = const()[name = tensor("op_44970_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44970_cast_fp16 = slice_by_index(begin = var_44970_begin_0, end = var_44970_end_0, end_mask = var_44970_end_mask_0, x = var_44679_cast_fp16)[name = tensor("op_44970_cast_fp16")]; tensor var_44977_begin_0 = const()[name = tensor("op_44977_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_44977_end_0 = const()[name = tensor("op_44977_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_44977_end_mask_0 = const()[name = tensor("op_44977_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44977_cast_fp16 = slice_by_index(begin = var_44977_begin_0, end = var_44977_end_0, end_mask = var_44977_end_mask_0, x = var_44679_cast_fp16)[name = tensor("op_44977_cast_fp16")]; tensor var_44984_begin_0 = const()[name = tensor("op_44984_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_44984_end_0 = const()[name = tensor("op_44984_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_44984_end_mask_0 = const()[name = tensor("op_44984_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44984_cast_fp16 = slice_by_index(begin = var_44984_begin_0, end = var_44984_end_0, end_mask = var_44984_end_mask_0, x = var_44683_cast_fp16)[name = tensor("op_44984_cast_fp16")]; tensor var_44991_begin_0 = const()[name = tensor("op_44991_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_44991_end_0 = const()[name = tensor("op_44991_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_44991_end_mask_0 = const()[name = tensor("op_44991_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44991_cast_fp16 = slice_by_index(begin = var_44991_begin_0, end = var_44991_end_0, end_mask = var_44991_end_mask_0, x = var_44683_cast_fp16)[name = tensor("op_44991_cast_fp16")]; tensor var_44998_begin_0 = const()[name = tensor("op_44998_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_44998_end_0 = const()[name = tensor("op_44998_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_44998_end_mask_0 = const()[name = tensor("op_44998_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_44998_cast_fp16 = slice_by_index(begin = var_44998_begin_0, end = var_44998_end_0, end_mask = var_44998_end_mask_0, x = var_44683_cast_fp16)[name = tensor("op_44998_cast_fp16")]; tensor var_45005_begin_0 = const()[name = tensor("op_45005_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_45005_end_0 = const()[name = tensor("op_45005_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_45005_end_mask_0 = const()[name = tensor("op_45005_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45005_cast_fp16 = slice_by_index(begin = var_45005_begin_0, end = var_45005_end_0, end_mask = var_45005_end_mask_0, x = var_44683_cast_fp16)[name = tensor("op_45005_cast_fp16")]; tensor var_45012_begin_0 = const()[name = tensor("op_45012_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_45012_end_0 = const()[name = tensor("op_45012_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_45012_end_mask_0 = const()[name = tensor("op_45012_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45012_cast_fp16 = slice_by_index(begin = var_45012_begin_0, end = var_45012_end_0, end_mask = var_45012_end_mask_0, x = var_44687_cast_fp16)[name = tensor("op_45012_cast_fp16")]; tensor var_45019_begin_0 = const()[name = tensor("op_45019_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_45019_end_0 = const()[name = tensor("op_45019_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_45019_end_mask_0 = const()[name = tensor("op_45019_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45019_cast_fp16 = slice_by_index(begin = var_45019_begin_0, end = var_45019_end_0, end_mask = var_45019_end_mask_0, x = var_44687_cast_fp16)[name = tensor("op_45019_cast_fp16")]; tensor var_45026_begin_0 = const()[name = tensor("op_45026_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_45026_end_0 = const()[name = tensor("op_45026_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_45026_end_mask_0 = const()[name = tensor("op_45026_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45026_cast_fp16 = slice_by_index(begin = var_45026_begin_0, end = var_45026_end_0, end_mask = var_45026_end_mask_0, x = var_44687_cast_fp16)[name = tensor("op_45026_cast_fp16")]; tensor var_45033_begin_0 = const()[name = tensor("op_45033_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_45033_end_0 = const()[name = tensor("op_45033_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_45033_end_mask_0 = const()[name = tensor("op_45033_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45033_cast_fp16 = slice_by_index(begin = var_45033_begin_0, end = var_45033_end_0, end_mask = var_45033_end_mask_0, x = var_44687_cast_fp16)[name = tensor("op_45033_cast_fp16")]; tensor var_45040_begin_0 = const()[name = tensor("op_45040_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_45040_end_0 = const()[name = tensor("op_45040_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_45040_end_mask_0 = const()[name = tensor("op_45040_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45040_cast_fp16 = slice_by_index(begin = var_45040_begin_0, end = var_45040_end_0, end_mask = var_45040_end_mask_0, x = var_44691_cast_fp16)[name = tensor("op_45040_cast_fp16")]; tensor var_45047_begin_0 = const()[name = tensor("op_45047_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_45047_end_0 = const()[name = tensor("op_45047_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_45047_end_mask_0 = const()[name = tensor("op_45047_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45047_cast_fp16 = slice_by_index(begin = var_45047_begin_0, end = var_45047_end_0, end_mask = var_45047_end_mask_0, x = var_44691_cast_fp16)[name = tensor("op_45047_cast_fp16")]; tensor var_45054_begin_0 = const()[name = tensor("op_45054_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_45054_end_0 = const()[name = tensor("op_45054_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_45054_end_mask_0 = const()[name = tensor("op_45054_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45054_cast_fp16 = slice_by_index(begin = var_45054_begin_0, end = var_45054_end_0, end_mask = var_45054_end_mask_0, x = var_44691_cast_fp16)[name = tensor("op_45054_cast_fp16")]; tensor var_45061_begin_0 = const()[name = tensor("op_45061_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_45061_end_0 = const()[name = tensor("op_45061_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_45061_end_mask_0 = const()[name = tensor("op_45061_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45061_cast_fp16 = slice_by_index(begin = var_45061_begin_0, end = var_45061_end_0, end_mask = var_45061_end_mask_0, x = var_44691_cast_fp16)[name = tensor("op_45061_cast_fp16")]; tensor var_45068_begin_0 = const()[name = tensor("op_45068_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_45068_end_0 = const()[name = tensor("op_45068_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_45068_end_mask_0 = const()[name = tensor("op_45068_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45068_cast_fp16 = slice_by_index(begin = var_45068_begin_0, end = var_45068_end_0, end_mask = var_45068_end_mask_0, x = var_44695_cast_fp16)[name = tensor("op_45068_cast_fp16")]; tensor var_45075_begin_0 = const()[name = tensor("op_45075_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_45075_end_0 = const()[name = tensor("op_45075_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_45075_end_mask_0 = const()[name = tensor("op_45075_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45075_cast_fp16 = slice_by_index(begin = var_45075_begin_0, end = var_45075_end_0, end_mask = var_45075_end_mask_0, x = var_44695_cast_fp16)[name = tensor("op_45075_cast_fp16")]; tensor var_45082_begin_0 = const()[name = tensor("op_45082_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_45082_end_0 = const()[name = tensor("op_45082_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_45082_end_mask_0 = const()[name = tensor("op_45082_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45082_cast_fp16 = slice_by_index(begin = var_45082_begin_0, end = var_45082_end_0, end_mask = var_45082_end_mask_0, x = var_44695_cast_fp16)[name = tensor("op_45082_cast_fp16")]; tensor var_45089_begin_0 = const()[name = tensor("op_45089_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_45089_end_0 = const()[name = tensor("op_45089_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_45089_end_mask_0 = const()[name = tensor("op_45089_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45089_cast_fp16 = slice_by_index(begin = var_45089_begin_0, end = var_45089_end_0, end_mask = var_45089_end_mask_0, x = var_44695_cast_fp16)[name = tensor("op_45089_cast_fp16")]; tensor var_45096_begin_0 = const()[name = tensor("op_45096_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_45096_end_0 = const()[name = tensor("op_45096_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_45096_end_mask_0 = const()[name = tensor("op_45096_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45096_cast_fp16 = slice_by_index(begin = var_45096_begin_0, end = var_45096_end_0, end_mask = var_45096_end_mask_0, x = var_44699_cast_fp16)[name = tensor("op_45096_cast_fp16")]; tensor var_45103_begin_0 = const()[name = tensor("op_45103_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_45103_end_0 = const()[name = tensor("op_45103_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_45103_end_mask_0 = const()[name = tensor("op_45103_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45103_cast_fp16 = slice_by_index(begin = var_45103_begin_0, end = var_45103_end_0, end_mask = var_45103_end_mask_0, x = var_44699_cast_fp16)[name = tensor("op_45103_cast_fp16")]; tensor var_45110_begin_0 = const()[name = tensor("op_45110_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_45110_end_0 = const()[name = tensor("op_45110_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_45110_end_mask_0 = const()[name = tensor("op_45110_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45110_cast_fp16 = slice_by_index(begin = var_45110_begin_0, end = var_45110_end_0, end_mask = var_45110_end_mask_0, x = var_44699_cast_fp16)[name = tensor("op_45110_cast_fp16")]; tensor var_45117_begin_0 = const()[name = tensor("op_45117_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_45117_end_0 = const()[name = tensor("op_45117_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_45117_end_mask_0 = const()[name = tensor("op_45117_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45117_cast_fp16 = slice_by_index(begin = var_45117_begin_0, end = var_45117_end_0, end_mask = var_45117_end_mask_0, x = var_44699_cast_fp16)[name = tensor("op_45117_cast_fp16")]; tensor var_45124_begin_0 = const()[name = tensor("op_45124_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_45124_end_0 = const()[name = tensor("op_45124_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_45124_end_mask_0 = const()[name = tensor("op_45124_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45124_cast_fp16 = slice_by_index(begin = var_45124_begin_0, end = var_45124_end_0, end_mask = var_45124_end_mask_0, x = var_44703_cast_fp16)[name = tensor("op_45124_cast_fp16")]; tensor var_45131_begin_0 = const()[name = tensor("op_45131_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_45131_end_0 = const()[name = tensor("op_45131_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_45131_end_mask_0 = const()[name = tensor("op_45131_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45131_cast_fp16 = slice_by_index(begin = var_45131_begin_0, end = var_45131_end_0, end_mask = var_45131_end_mask_0, x = var_44703_cast_fp16)[name = tensor("op_45131_cast_fp16")]; tensor var_45138_begin_0 = const()[name = tensor("op_45138_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_45138_end_0 = const()[name = tensor("op_45138_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_45138_end_mask_0 = const()[name = tensor("op_45138_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45138_cast_fp16 = slice_by_index(begin = var_45138_begin_0, end = var_45138_end_0, end_mask = var_45138_end_mask_0, x = var_44703_cast_fp16)[name = tensor("op_45138_cast_fp16")]; tensor var_45145_begin_0 = const()[name = tensor("op_45145_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_45145_end_0 = const()[name = tensor("op_45145_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_45145_end_mask_0 = const()[name = tensor("op_45145_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45145_cast_fp16 = slice_by_index(begin = var_45145_begin_0, end = var_45145_end_0, end_mask = var_45145_end_mask_0, x = var_44703_cast_fp16)[name = tensor("op_45145_cast_fp16")]; tensor var_45152_begin_0 = const()[name = tensor("op_45152_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_45152_end_0 = const()[name = tensor("op_45152_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_45152_end_mask_0 = const()[name = tensor("op_45152_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45152_cast_fp16 = slice_by_index(begin = var_45152_begin_0, end = var_45152_end_0, end_mask = var_45152_end_mask_0, x = var_44707_cast_fp16)[name = tensor("op_45152_cast_fp16")]; tensor var_45159_begin_0 = const()[name = tensor("op_45159_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_45159_end_0 = const()[name = tensor("op_45159_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_45159_end_mask_0 = const()[name = tensor("op_45159_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45159_cast_fp16 = slice_by_index(begin = var_45159_begin_0, end = var_45159_end_0, end_mask = var_45159_end_mask_0, x = var_44707_cast_fp16)[name = tensor("op_45159_cast_fp16")]; tensor var_45166_begin_0 = const()[name = tensor("op_45166_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_45166_end_0 = const()[name = tensor("op_45166_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_45166_end_mask_0 = const()[name = tensor("op_45166_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45166_cast_fp16 = slice_by_index(begin = var_45166_begin_0, end = var_45166_end_0, end_mask = var_45166_end_mask_0, x = var_44707_cast_fp16)[name = tensor("op_45166_cast_fp16")]; tensor var_45173_begin_0 = const()[name = tensor("op_45173_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_45173_end_0 = const()[name = tensor("op_45173_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_45173_end_mask_0 = const()[name = tensor("op_45173_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45173_cast_fp16 = slice_by_index(begin = var_45173_begin_0, end = var_45173_end_0, end_mask = var_45173_end_mask_0, x = var_44707_cast_fp16)[name = tensor("op_45173_cast_fp16")]; tensor var_45180_begin_0 = const()[name = tensor("op_45180_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_45180_end_0 = const()[name = tensor("op_45180_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_45180_end_mask_0 = const()[name = tensor("op_45180_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45180_cast_fp16 = slice_by_index(begin = var_45180_begin_0, end = var_45180_end_0, end_mask = var_45180_end_mask_0, x = var_44711_cast_fp16)[name = tensor("op_45180_cast_fp16")]; tensor var_45187_begin_0 = const()[name = tensor("op_45187_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_45187_end_0 = const()[name = tensor("op_45187_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_45187_end_mask_0 = const()[name = tensor("op_45187_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45187_cast_fp16 = slice_by_index(begin = var_45187_begin_0, end = var_45187_end_0, end_mask = var_45187_end_mask_0, x = var_44711_cast_fp16)[name = tensor("op_45187_cast_fp16")]; tensor var_45194_begin_0 = const()[name = tensor("op_45194_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_45194_end_0 = const()[name = tensor("op_45194_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_45194_end_mask_0 = const()[name = tensor("op_45194_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45194_cast_fp16 = slice_by_index(begin = var_45194_begin_0, end = var_45194_end_0, end_mask = var_45194_end_mask_0, x = var_44711_cast_fp16)[name = tensor("op_45194_cast_fp16")]; tensor var_45201_begin_0 = const()[name = tensor("op_45201_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_45201_end_0 = const()[name = tensor("op_45201_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_45201_end_mask_0 = const()[name = tensor("op_45201_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45201_cast_fp16 = slice_by_index(begin = var_45201_begin_0, end = var_45201_end_0, end_mask = var_45201_end_mask_0, x = var_44711_cast_fp16)[name = tensor("op_45201_cast_fp16")]; tensor var_45208_begin_0 = const()[name = tensor("op_45208_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_45208_end_0 = const()[name = tensor("op_45208_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_45208_end_mask_0 = const()[name = tensor("op_45208_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45208_cast_fp16 = slice_by_index(begin = var_45208_begin_0, end = var_45208_end_0, end_mask = var_45208_end_mask_0, x = var_44715_cast_fp16)[name = tensor("op_45208_cast_fp16")]; tensor var_45215_begin_0 = const()[name = tensor("op_45215_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_45215_end_0 = const()[name = tensor("op_45215_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_45215_end_mask_0 = const()[name = tensor("op_45215_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45215_cast_fp16 = slice_by_index(begin = var_45215_begin_0, end = var_45215_end_0, end_mask = var_45215_end_mask_0, x = var_44715_cast_fp16)[name = tensor("op_45215_cast_fp16")]; tensor var_45222_begin_0 = const()[name = tensor("op_45222_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_45222_end_0 = const()[name = tensor("op_45222_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_45222_end_mask_0 = const()[name = tensor("op_45222_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45222_cast_fp16 = slice_by_index(begin = var_45222_begin_0, end = var_45222_end_0, end_mask = var_45222_end_mask_0, x = var_44715_cast_fp16)[name = tensor("op_45222_cast_fp16")]; tensor var_45229_begin_0 = const()[name = tensor("op_45229_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_45229_end_0 = const()[name = tensor("op_45229_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_45229_end_mask_0 = const()[name = tensor("op_45229_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45229_cast_fp16 = slice_by_index(begin = var_45229_begin_0, end = var_45229_end_0, end_mask = var_45229_end_mask_0, x = var_44715_cast_fp16)[name = tensor("op_45229_cast_fp16")]; tensor var_45236_begin_0 = const()[name = tensor("op_45236_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_45236_end_0 = const()[name = tensor("op_45236_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_45236_end_mask_0 = const()[name = tensor("op_45236_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45236_cast_fp16 = slice_by_index(begin = var_45236_begin_0, end = var_45236_end_0, end_mask = var_45236_end_mask_0, x = var_44719_cast_fp16)[name = tensor("op_45236_cast_fp16")]; tensor var_45243_begin_0 = const()[name = tensor("op_45243_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_45243_end_0 = const()[name = tensor("op_45243_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_45243_end_mask_0 = const()[name = tensor("op_45243_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45243_cast_fp16 = slice_by_index(begin = var_45243_begin_0, end = var_45243_end_0, end_mask = var_45243_end_mask_0, x = var_44719_cast_fp16)[name = tensor("op_45243_cast_fp16")]; tensor var_45250_begin_0 = const()[name = tensor("op_45250_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_45250_end_0 = const()[name = tensor("op_45250_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_45250_end_mask_0 = const()[name = tensor("op_45250_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45250_cast_fp16 = slice_by_index(begin = var_45250_begin_0, end = var_45250_end_0, end_mask = var_45250_end_mask_0, x = var_44719_cast_fp16)[name = tensor("op_45250_cast_fp16")]; tensor var_45257_begin_0 = const()[name = tensor("op_45257_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_45257_end_0 = const()[name = tensor("op_45257_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_45257_end_mask_0 = const()[name = tensor("op_45257_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45257_cast_fp16 = slice_by_index(begin = var_45257_begin_0, end = var_45257_end_0, end_mask = var_45257_end_mask_0, x = var_44719_cast_fp16)[name = tensor("op_45257_cast_fp16")]; tensor var_45264_begin_0 = const()[name = tensor("op_45264_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_45264_end_0 = const()[name = tensor("op_45264_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_45264_end_mask_0 = const()[name = tensor("op_45264_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45264_cast_fp16 = slice_by_index(begin = var_45264_begin_0, end = var_45264_end_0, end_mask = var_45264_end_mask_0, x = var_44723_cast_fp16)[name = tensor("op_45264_cast_fp16")]; tensor var_45271_begin_0 = const()[name = tensor("op_45271_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_45271_end_0 = const()[name = tensor("op_45271_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_45271_end_mask_0 = const()[name = tensor("op_45271_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45271_cast_fp16 = slice_by_index(begin = var_45271_begin_0, end = var_45271_end_0, end_mask = var_45271_end_mask_0, x = var_44723_cast_fp16)[name = tensor("op_45271_cast_fp16")]; tensor var_45278_begin_0 = const()[name = tensor("op_45278_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_45278_end_0 = const()[name = tensor("op_45278_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_45278_end_mask_0 = const()[name = tensor("op_45278_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45278_cast_fp16 = slice_by_index(begin = var_45278_begin_0, end = var_45278_end_0, end_mask = var_45278_end_mask_0, x = var_44723_cast_fp16)[name = tensor("op_45278_cast_fp16")]; tensor var_45285_begin_0 = const()[name = tensor("op_45285_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_45285_end_0 = const()[name = tensor("op_45285_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_45285_end_mask_0 = const()[name = tensor("op_45285_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45285_cast_fp16 = slice_by_index(begin = var_45285_begin_0, end = var_45285_end_0, end_mask = var_45285_end_mask_0, x = var_44723_cast_fp16)[name = tensor("op_45285_cast_fp16")]; tensor k_57_perm_0 = const()[name = tensor("k_57_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_45290_begin_0 = const()[name = tensor("op_45290_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_45290_end_0 = const()[name = tensor("op_45290_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_45290_end_mask_0 = const()[name = tensor("op_45290_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_57_cast_fp16 = transpose(perm = k_57_perm_0, x = key_57_cast_fp16)[name = tensor("transpose_3")]; tensor var_45290_cast_fp16 = slice_by_index(begin = var_45290_begin_0, end = var_45290_end_0, end_mask = var_45290_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45290_cast_fp16")]; tensor var_45294_begin_0 = const()[name = tensor("op_45294_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_45294_end_0 = const()[name = tensor("op_45294_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_45294_end_mask_0 = const()[name = tensor("op_45294_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45294_cast_fp16 = slice_by_index(begin = var_45294_begin_0, end = var_45294_end_0, end_mask = var_45294_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45294_cast_fp16")]; tensor var_45298_begin_0 = const()[name = tensor("op_45298_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_45298_end_0 = const()[name = tensor("op_45298_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_45298_end_mask_0 = const()[name = tensor("op_45298_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45298_cast_fp16 = slice_by_index(begin = var_45298_begin_0, end = var_45298_end_0, end_mask = var_45298_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45298_cast_fp16")]; tensor var_45302_begin_0 = const()[name = tensor("op_45302_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_45302_end_0 = const()[name = tensor("op_45302_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_45302_end_mask_0 = const()[name = tensor("op_45302_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45302_cast_fp16 = slice_by_index(begin = var_45302_begin_0, end = var_45302_end_0, end_mask = var_45302_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45302_cast_fp16")]; tensor var_45306_begin_0 = const()[name = tensor("op_45306_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_45306_end_0 = const()[name = tensor("op_45306_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_45306_end_mask_0 = const()[name = tensor("op_45306_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45306_cast_fp16 = slice_by_index(begin = var_45306_begin_0, end = var_45306_end_0, end_mask = var_45306_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45306_cast_fp16")]; tensor var_45310_begin_0 = const()[name = tensor("op_45310_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_45310_end_0 = const()[name = tensor("op_45310_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_45310_end_mask_0 = const()[name = tensor("op_45310_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45310_cast_fp16 = slice_by_index(begin = var_45310_begin_0, end = var_45310_end_0, end_mask = var_45310_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45310_cast_fp16")]; tensor var_45314_begin_0 = const()[name = tensor("op_45314_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_45314_end_0 = const()[name = tensor("op_45314_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_45314_end_mask_0 = const()[name = tensor("op_45314_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45314_cast_fp16 = slice_by_index(begin = var_45314_begin_0, end = var_45314_end_0, end_mask = var_45314_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45314_cast_fp16")]; tensor var_45318_begin_0 = const()[name = tensor("op_45318_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_45318_end_0 = const()[name = tensor("op_45318_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_45318_end_mask_0 = const()[name = tensor("op_45318_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45318_cast_fp16 = slice_by_index(begin = var_45318_begin_0, end = var_45318_end_0, end_mask = var_45318_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45318_cast_fp16")]; tensor var_45322_begin_0 = const()[name = tensor("op_45322_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_45322_end_0 = const()[name = tensor("op_45322_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_45322_end_mask_0 = const()[name = tensor("op_45322_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45322_cast_fp16 = slice_by_index(begin = var_45322_begin_0, end = var_45322_end_0, end_mask = var_45322_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45322_cast_fp16")]; tensor var_45326_begin_0 = const()[name = tensor("op_45326_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_45326_end_0 = const()[name = tensor("op_45326_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_45326_end_mask_0 = const()[name = tensor("op_45326_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45326_cast_fp16 = slice_by_index(begin = var_45326_begin_0, end = var_45326_end_0, end_mask = var_45326_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45326_cast_fp16")]; tensor var_45330_begin_0 = const()[name = tensor("op_45330_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_45330_end_0 = const()[name = tensor("op_45330_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_45330_end_mask_0 = const()[name = tensor("op_45330_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45330_cast_fp16 = slice_by_index(begin = var_45330_begin_0, end = var_45330_end_0, end_mask = var_45330_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45330_cast_fp16")]; tensor var_45334_begin_0 = const()[name = tensor("op_45334_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_45334_end_0 = const()[name = tensor("op_45334_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_45334_end_mask_0 = const()[name = tensor("op_45334_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45334_cast_fp16 = slice_by_index(begin = var_45334_begin_0, end = var_45334_end_0, end_mask = var_45334_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45334_cast_fp16")]; tensor var_45338_begin_0 = const()[name = tensor("op_45338_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_45338_end_0 = const()[name = tensor("op_45338_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_45338_end_mask_0 = const()[name = tensor("op_45338_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45338_cast_fp16 = slice_by_index(begin = var_45338_begin_0, end = var_45338_end_0, end_mask = var_45338_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45338_cast_fp16")]; tensor var_45342_begin_0 = const()[name = tensor("op_45342_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_45342_end_0 = const()[name = tensor("op_45342_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_45342_end_mask_0 = const()[name = tensor("op_45342_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45342_cast_fp16 = slice_by_index(begin = var_45342_begin_0, end = var_45342_end_0, end_mask = var_45342_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45342_cast_fp16")]; tensor var_45346_begin_0 = const()[name = tensor("op_45346_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_45346_end_0 = const()[name = tensor("op_45346_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_45346_end_mask_0 = const()[name = tensor("op_45346_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45346_cast_fp16 = slice_by_index(begin = var_45346_begin_0, end = var_45346_end_0, end_mask = var_45346_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45346_cast_fp16")]; tensor var_45350_begin_0 = const()[name = tensor("op_45350_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_45350_end_0 = const()[name = tensor("op_45350_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_45350_end_mask_0 = const()[name = tensor("op_45350_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45350_cast_fp16 = slice_by_index(begin = var_45350_begin_0, end = var_45350_end_0, end_mask = var_45350_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45350_cast_fp16")]; tensor var_45354_begin_0 = const()[name = tensor("op_45354_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_45354_end_0 = const()[name = tensor("op_45354_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_45354_end_mask_0 = const()[name = tensor("op_45354_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45354_cast_fp16 = slice_by_index(begin = var_45354_begin_0, end = var_45354_end_0, end_mask = var_45354_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45354_cast_fp16")]; tensor var_45358_begin_0 = const()[name = tensor("op_45358_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_45358_end_0 = const()[name = tensor("op_45358_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_45358_end_mask_0 = const()[name = tensor("op_45358_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45358_cast_fp16 = slice_by_index(begin = var_45358_begin_0, end = var_45358_end_0, end_mask = var_45358_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45358_cast_fp16")]; tensor var_45362_begin_0 = const()[name = tensor("op_45362_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_45362_end_0 = const()[name = tensor("op_45362_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_45362_end_mask_0 = const()[name = tensor("op_45362_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45362_cast_fp16 = slice_by_index(begin = var_45362_begin_0, end = var_45362_end_0, end_mask = var_45362_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45362_cast_fp16")]; tensor var_45366_begin_0 = const()[name = tensor("op_45366_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_45366_end_0 = const()[name = tensor("op_45366_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_45366_end_mask_0 = const()[name = tensor("op_45366_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_45366_cast_fp16 = slice_by_index(begin = var_45366_begin_0, end = var_45366_end_0, end_mask = var_45366_end_mask_0, x = k_57_cast_fp16)[name = tensor("op_45366_cast_fp16")]; tensor var_45368_begin_0 = const()[name = tensor("op_45368_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_45368_end_0 = const()[name = tensor("op_45368_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_45368_end_mask_0 = const()[name = tensor("op_45368_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45368_cast_fp16 = slice_by_index(begin = var_45368_begin_0, end = var_45368_end_0, end_mask = var_45368_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45368_cast_fp16")]; tensor var_45372_begin_0 = const()[name = tensor("op_45372_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_45372_end_0 = const()[name = tensor("op_45372_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_45372_end_mask_0 = const()[name = tensor("op_45372_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45372_cast_fp16 = slice_by_index(begin = var_45372_begin_0, end = var_45372_end_0, end_mask = var_45372_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45372_cast_fp16")]; tensor var_45376_begin_0 = const()[name = tensor("op_45376_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_45376_end_0 = const()[name = tensor("op_45376_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_45376_end_mask_0 = const()[name = tensor("op_45376_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45376_cast_fp16 = slice_by_index(begin = var_45376_begin_0, end = var_45376_end_0, end_mask = var_45376_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45376_cast_fp16")]; tensor var_45380_begin_0 = const()[name = tensor("op_45380_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_45380_end_0 = const()[name = tensor("op_45380_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_45380_end_mask_0 = const()[name = tensor("op_45380_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45380_cast_fp16 = slice_by_index(begin = var_45380_begin_0, end = var_45380_end_0, end_mask = var_45380_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45380_cast_fp16")]; tensor var_45384_begin_0 = const()[name = tensor("op_45384_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_45384_end_0 = const()[name = tensor("op_45384_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_45384_end_mask_0 = const()[name = tensor("op_45384_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45384_cast_fp16 = slice_by_index(begin = var_45384_begin_0, end = var_45384_end_0, end_mask = var_45384_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45384_cast_fp16")]; tensor var_45388_begin_0 = const()[name = tensor("op_45388_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_45388_end_0 = const()[name = tensor("op_45388_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_45388_end_mask_0 = const()[name = tensor("op_45388_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45388_cast_fp16 = slice_by_index(begin = var_45388_begin_0, end = var_45388_end_0, end_mask = var_45388_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45388_cast_fp16")]; tensor var_45392_begin_0 = const()[name = tensor("op_45392_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_45392_end_0 = const()[name = tensor("op_45392_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_45392_end_mask_0 = const()[name = tensor("op_45392_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45392_cast_fp16 = slice_by_index(begin = var_45392_begin_0, end = var_45392_end_0, end_mask = var_45392_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45392_cast_fp16")]; tensor var_45396_begin_0 = const()[name = tensor("op_45396_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_45396_end_0 = const()[name = tensor("op_45396_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_45396_end_mask_0 = const()[name = tensor("op_45396_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45396_cast_fp16 = slice_by_index(begin = var_45396_begin_0, end = var_45396_end_0, end_mask = var_45396_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45396_cast_fp16")]; tensor var_45400_begin_0 = const()[name = tensor("op_45400_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_45400_end_0 = const()[name = tensor("op_45400_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_45400_end_mask_0 = const()[name = tensor("op_45400_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45400_cast_fp16 = slice_by_index(begin = var_45400_begin_0, end = var_45400_end_0, end_mask = var_45400_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45400_cast_fp16")]; tensor var_45404_begin_0 = const()[name = tensor("op_45404_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_45404_end_0 = const()[name = tensor("op_45404_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_45404_end_mask_0 = const()[name = tensor("op_45404_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45404_cast_fp16 = slice_by_index(begin = var_45404_begin_0, end = var_45404_end_0, end_mask = var_45404_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45404_cast_fp16")]; tensor var_45408_begin_0 = const()[name = tensor("op_45408_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_45408_end_0 = const()[name = tensor("op_45408_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_45408_end_mask_0 = const()[name = tensor("op_45408_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45408_cast_fp16 = slice_by_index(begin = var_45408_begin_0, end = var_45408_end_0, end_mask = var_45408_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45408_cast_fp16")]; tensor var_45412_begin_0 = const()[name = tensor("op_45412_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_45412_end_0 = const()[name = tensor("op_45412_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_45412_end_mask_0 = const()[name = tensor("op_45412_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45412_cast_fp16 = slice_by_index(begin = var_45412_begin_0, end = var_45412_end_0, end_mask = var_45412_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45412_cast_fp16")]; tensor var_45416_begin_0 = const()[name = tensor("op_45416_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_45416_end_0 = const()[name = tensor("op_45416_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_45416_end_mask_0 = const()[name = tensor("op_45416_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45416_cast_fp16 = slice_by_index(begin = var_45416_begin_0, end = var_45416_end_0, end_mask = var_45416_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45416_cast_fp16")]; tensor var_45420_begin_0 = const()[name = tensor("op_45420_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_45420_end_0 = const()[name = tensor("op_45420_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_45420_end_mask_0 = const()[name = tensor("op_45420_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45420_cast_fp16 = slice_by_index(begin = var_45420_begin_0, end = var_45420_end_0, end_mask = var_45420_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45420_cast_fp16")]; tensor var_45424_begin_0 = const()[name = tensor("op_45424_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_45424_end_0 = const()[name = tensor("op_45424_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_45424_end_mask_0 = const()[name = tensor("op_45424_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45424_cast_fp16 = slice_by_index(begin = var_45424_begin_0, end = var_45424_end_0, end_mask = var_45424_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45424_cast_fp16")]; tensor var_45428_begin_0 = const()[name = tensor("op_45428_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_45428_end_0 = const()[name = tensor("op_45428_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_45428_end_mask_0 = const()[name = tensor("op_45428_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45428_cast_fp16 = slice_by_index(begin = var_45428_begin_0, end = var_45428_end_0, end_mask = var_45428_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45428_cast_fp16")]; tensor var_45432_begin_0 = const()[name = tensor("op_45432_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_45432_end_0 = const()[name = tensor("op_45432_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_45432_end_mask_0 = const()[name = tensor("op_45432_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45432_cast_fp16 = slice_by_index(begin = var_45432_begin_0, end = var_45432_end_0, end_mask = var_45432_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45432_cast_fp16")]; tensor var_45436_begin_0 = const()[name = tensor("op_45436_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_45436_end_0 = const()[name = tensor("op_45436_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_45436_end_mask_0 = const()[name = tensor("op_45436_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45436_cast_fp16 = slice_by_index(begin = var_45436_begin_0, end = var_45436_end_0, end_mask = var_45436_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45436_cast_fp16")]; tensor var_45440_begin_0 = const()[name = tensor("op_45440_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_45440_end_0 = const()[name = tensor("op_45440_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_45440_end_mask_0 = const()[name = tensor("op_45440_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45440_cast_fp16 = slice_by_index(begin = var_45440_begin_0, end = var_45440_end_0, end_mask = var_45440_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45440_cast_fp16")]; tensor var_45444_begin_0 = const()[name = tensor("op_45444_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_45444_end_0 = const()[name = tensor("op_45444_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_45444_end_mask_0 = const()[name = tensor("op_45444_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_45444_cast_fp16 = slice_by_index(begin = var_45444_begin_0, end = var_45444_end_0, end_mask = var_45444_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_45444_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4481_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4481_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4481_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4481_equation_0, values = (var_45290_cast_fp16, var_44732_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4481_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4483_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4483_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4483_equation_0, values = (var_45290_cast_fp16, var_44739_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4483_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4485_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4485_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4485_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4485_equation_0, values = (var_45290_cast_fp16, var_44746_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4485_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4487_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4487_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4487_equation_0, values = (var_45290_cast_fp16, var_44753_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4487_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4489_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4489_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4489_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4489_equation_0, values = (var_45294_cast_fp16, var_44760_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4489_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4491_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4491_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4491_equation_0, values = (var_45294_cast_fp16, var_44767_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4491_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4493_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4493_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4493_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4493_equation_0, values = (var_45294_cast_fp16, var_44774_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4493_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4495_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4495_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4495_equation_0, values = (var_45294_cast_fp16, var_44781_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4495_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4497_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4497_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4497_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4497_equation_0, values = (var_45298_cast_fp16, var_44788_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4497_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4499_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4499_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4499_equation_0, values = (var_45298_cast_fp16, var_44795_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4499_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4501_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4501_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4501_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4501_equation_0, values = (var_45298_cast_fp16, var_44802_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4501_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4503_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4503_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4503_equation_0, values = (var_45298_cast_fp16, var_44809_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4503_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4505_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4505_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4505_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4505_equation_0, values = (var_45302_cast_fp16, var_44816_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4505_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4507_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4507_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4507_equation_0, values = (var_45302_cast_fp16, var_44823_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4507_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4509_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4509_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4509_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4509_equation_0, values = (var_45302_cast_fp16, var_44830_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4509_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4511_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4511_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4511_equation_0, values = (var_45302_cast_fp16, var_44837_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4511_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4513_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4513_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4513_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4513_equation_0, values = (var_45306_cast_fp16, var_44844_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4513_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4515_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4515_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4515_equation_0, values = (var_45306_cast_fp16, var_44851_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4515_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4517_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4517_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4517_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4517_equation_0, values = (var_45306_cast_fp16, var_44858_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4517_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4519_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4519_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4519_equation_0, values = (var_45306_cast_fp16, var_44865_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4519_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4521_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4521_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4521_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4521_equation_0, values = (var_45310_cast_fp16, var_44872_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4521_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4523_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4523_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4523_equation_0, values = (var_45310_cast_fp16, var_44879_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4523_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4525_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4525_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4525_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4525_equation_0, values = (var_45310_cast_fp16, var_44886_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4525_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4527_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4527_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4527_equation_0, values = (var_45310_cast_fp16, var_44893_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4527_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4529_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4529_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4529_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4529_equation_0, values = (var_45314_cast_fp16, var_44900_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4529_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4531_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4531_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4531_equation_0, values = (var_45314_cast_fp16, var_44907_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4531_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4533_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4533_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4533_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4533_equation_0, values = (var_45314_cast_fp16, var_44914_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4533_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4535_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4535_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4535_equation_0, values = (var_45314_cast_fp16, var_44921_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4535_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4537_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4537_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4537_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4537_equation_0, values = (var_45318_cast_fp16, var_44928_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4537_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4539_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4539_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4539_equation_0, values = (var_45318_cast_fp16, var_44935_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4539_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4541_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4541_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4541_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4541_equation_0, values = (var_45318_cast_fp16, var_44942_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4541_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4543_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4543_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4543_equation_0, values = (var_45318_cast_fp16, var_44949_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4543_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4545_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4545_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4545_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4545_equation_0, values = (var_45322_cast_fp16, var_44956_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4545_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4547_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4547_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4547_equation_0, values = (var_45322_cast_fp16, var_44963_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4547_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4549_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4549_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4549_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4549_equation_0, values = (var_45322_cast_fp16, var_44970_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4549_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4551_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4551_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4551_equation_0, values = (var_45322_cast_fp16, var_44977_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4551_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4553_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4553_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4553_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4553_equation_0, values = (var_45326_cast_fp16, var_44984_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4553_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4555_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4555_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4555_equation_0, values = (var_45326_cast_fp16, var_44991_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4555_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4557_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4557_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4557_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4557_equation_0, values = (var_45326_cast_fp16, var_44998_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4557_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4559_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4559_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4559_equation_0, values = (var_45326_cast_fp16, var_45005_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4559_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4561_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4561_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4561_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4561_equation_0, values = (var_45330_cast_fp16, var_45012_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4561_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4563_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4563_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4563_equation_0, values = (var_45330_cast_fp16, var_45019_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4563_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4565_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4565_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4565_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4565_equation_0, values = (var_45330_cast_fp16, var_45026_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4565_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4567_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4567_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4567_equation_0, values = (var_45330_cast_fp16, var_45033_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4567_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4569_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4569_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4569_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4569_equation_0, values = (var_45334_cast_fp16, var_45040_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4569_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4571_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4571_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4571_equation_0, values = (var_45334_cast_fp16, var_45047_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4571_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4573_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4573_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4573_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4573_equation_0, values = (var_45334_cast_fp16, var_45054_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4573_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4575_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4575_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4575_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4575_equation_0, values = (var_45334_cast_fp16, var_45061_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4575_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4577_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4577_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4577_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4577_equation_0, values = (var_45338_cast_fp16, var_45068_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4577_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4579_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4579_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4579_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4579_equation_0, values = (var_45338_cast_fp16, var_45075_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4579_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4581_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4581_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4581_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4581_equation_0, values = (var_45338_cast_fp16, var_45082_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4581_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4583_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4583_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4583_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4583_equation_0, values = (var_45338_cast_fp16, var_45089_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4583_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4585_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4585_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4585_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4585_equation_0, values = (var_45342_cast_fp16, var_45096_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4585_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4587_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4587_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4587_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4587_equation_0, values = (var_45342_cast_fp16, var_45103_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4587_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4589_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4589_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4589_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4589_equation_0, values = (var_45342_cast_fp16, var_45110_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4589_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4591_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4591_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4591_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4591_equation_0, values = (var_45342_cast_fp16, var_45117_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4591_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4593_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4593_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4593_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4593_equation_0, values = (var_45346_cast_fp16, var_45124_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4593_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4595_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4595_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4595_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4595_equation_0, values = (var_45346_cast_fp16, var_45131_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4595_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4597_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4597_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4597_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4597_equation_0, values = (var_45346_cast_fp16, var_45138_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4597_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4599_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4599_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4599_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4599_equation_0, values = (var_45346_cast_fp16, var_45145_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4599_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4601_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4601_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4601_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4601_equation_0, values = (var_45350_cast_fp16, var_45152_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4601_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4603_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4603_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4603_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4603_equation_0, values = (var_45350_cast_fp16, var_45159_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4603_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4605_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4605_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4605_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4605_equation_0, values = (var_45350_cast_fp16, var_45166_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4605_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4607_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4607_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4607_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4607_equation_0, values = (var_45350_cast_fp16, var_45173_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4607_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4609_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4609_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4609_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4609_equation_0, values = (var_45354_cast_fp16, var_45180_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4609_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4611_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4611_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4611_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4611_equation_0, values = (var_45354_cast_fp16, var_45187_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4611_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4613_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4613_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4613_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4613_equation_0, values = (var_45354_cast_fp16, var_45194_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4613_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4615_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4615_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4615_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4615_equation_0, values = (var_45354_cast_fp16, var_45201_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4615_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4617_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4617_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4617_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4617_equation_0, values = (var_45358_cast_fp16, var_45208_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4617_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4619_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4619_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4619_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4619_equation_0, values = (var_45358_cast_fp16, var_45215_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4619_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4621_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4621_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4621_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4621_equation_0, values = (var_45358_cast_fp16, var_45222_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4621_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4623_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4623_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4623_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4623_equation_0, values = (var_45358_cast_fp16, var_45229_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4623_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4625_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4625_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4625_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4625_equation_0, values = (var_45362_cast_fp16, var_45236_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4625_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4627_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4627_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4627_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4627_equation_0, values = (var_45362_cast_fp16, var_45243_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4627_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4629_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4629_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4629_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4629_equation_0, values = (var_45362_cast_fp16, var_45250_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4629_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4631_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4631_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4631_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4631_equation_0, values = (var_45362_cast_fp16, var_45257_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4631_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4633_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4633_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4633_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4633_equation_0, values = (var_45366_cast_fp16, var_45264_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4633_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4635_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4635_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4635_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4635_equation_0, values = (var_45366_cast_fp16, var_45271_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4635_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4637_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4637_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4637_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4637_equation_0, values = (var_45366_cast_fp16, var_45278_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4637_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4639_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4639_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4639_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4639_equation_0, values = (var_45366_cast_fp16, var_45285_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4639_cast_fp16")]; tensor var_45607_to_fp16 = const()[name = tensor("op_45607_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4481_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4481_cast_fp16, y = var_45607_to_fp16)[name = tensor("aw_chunk_4481_cast_fp16")]; tensor var_45609_to_fp16 = const()[name = tensor("op_45609_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4483_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4483_cast_fp16, y = var_45609_to_fp16)[name = tensor("aw_chunk_4483_cast_fp16")]; tensor var_45611_to_fp16 = const()[name = tensor("op_45611_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4485_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4485_cast_fp16, y = var_45611_to_fp16)[name = tensor("aw_chunk_4485_cast_fp16")]; tensor var_45613_to_fp16 = const()[name = tensor("op_45613_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4487_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4487_cast_fp16, y = var_45613_to_fp16)[name = tensor("aw_chunk_4487_cast_fp16")]; tensor var_45615_to_fp16 = const()[name = tensor("op_45615_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4489_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4489_cast_fp16, y = var_45615_to_fp16)[name = tensor("aw_chunk_4489_cast_fp16")]; tensor var_45617_to_fp16 = const()[name = tensor("op_45617_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4491_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4491_cast_fp16, y = var_45617_to_fp16)[name = tensor("aw_chunk_4491_cast_fp16")]; tensor var_45619_to_fp16 = const()[name = tensor("op_45619_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4493_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4493_cast_fp16, y = var_45619_to_fp16)[name = tensor("aw_chunk_4493_cast_fp16")]; tensor var_45621_to_fp16 = const()[name = tensor("op_45621_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4495_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4495_cast_fp16, y = var_45621_to_fp16)[name = tensor("aw_chunk_4495_cast_fp16")]; tensor var_45623_to_fp16 = const()[name = tensor("op_45623_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4497_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4497_cast_fp16, y = var_45623_to_fp16)[name = tensor("aw_chunk_4497_cast_fp16")]; tensor var_45625_to_fp16 = const()[name = tensor("op_45625_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4499_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4499_cast_fp16, y = var_45625_to_fp16)[name = tensor("aw_chunk_4499_cast_fp16")]; tensor var_45627_to_fp16 = const()[name = tensor("op_45627_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4501_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4501_cast_fp16, y = var_45627_to_fp16)[name = tensor("aw_chunk_4501_cast_fp16")]; tensor var_45629_to_fp16 = const()[name = tensor("op_45629_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4503_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4503_cast_fp16, y = var_45629_to_fp16)[name = tensor("aw_chunk_4503_cast_fp16")]; tensor var_45631_to_fp16 = const()[name = tensor("op_45631_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4505_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4505_cast_fp16, y = var_45631_to_fp16)[name = tensor("aw_chunk_4505_cast_fp16")]; tensor var_45633_to_fp16 = const()[name = tensor("op_45633_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4507_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4507_cast_fp16, y = var_45633_to_fp16)[name = tensor("aw_chunk_4507_cast_fp16")]; tensor var_45635_to_fp16 = const()[name = tensor("op_45635_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4509_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4509_cast_fp16, y = var_45635_to_fp16)[name = tensor("aw_chunk_4509_cast_fp16")]; tensor var_45637_to_fp16 = const()[name = tensor("op_45637_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4511_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4511_cast_fp16, y = var_45637_to_fp16)[name = tensor("aw_chunk_4511_cast_fp16")]; tensor var_45639_to_fp16 = const()[name = tensor("op_45639_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4513_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4513_cast_fp16, y = var_45639_to_fp16)[name = tensor("aw_chunk_4513_cast_fp16")]; tensor var_45641_to_fp16 = const()[name = tensor("op_45641_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4515_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4515_cast_fp16, y = var_45641_to_fp16)[name = tensor("aw_chunk_4515_cast_fp16")]; tensor var_45643_to_fp16 = const()[name = tensor("op_45643_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4517_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4517_cast_fp16, y = var_45643_to_fp16)[name = tensor("aw_chunk_4517_cast_fp16")]; tensor var_45645_to_fp16 = const()[name = tensor("op_45645_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4519_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4519_cast_fp16, y = var_45645_to_fp16)[name = tensor("aw_chunk_4519_cast_fp16")]; tensor var_45647_to_fp16 = const()[name = tensor("op_45647_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4521_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4521_cast_fp16, y = var_45647_to_fp16)[name = tensor("aw_chunk_4521_cast_fp16")]; tensor var_45649_to_fp16 = const()[name = tensor("op_45649_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4523_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4523_cast_fp16, y = var_45649_to_fp16)[name = tensor("aw_chunk_4523_cast_fp16")]; tensor var_45651_to_fp16 = const()[name = tensor("op_45651_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4525_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4525_cast_fp16, y = var_45651_to_fp16)[name = tensor("aw_chunk_4525_cast_fp16")]; tensor var_45653_to_fp16 = const()[name = tensor("op_45653_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4527_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4527_cast_fp16, y = var_45653_to_fp16)[name = tensor("aw_chunk_4527_cast_fp16")]; tensor var_45655_to_fp16 = const()[name = tensor("op_45655_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4529_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4529_cast_fp16, y = var_45655_to_fp16)[name = tensor("aw_chunk_4529_cast_fp16")]; tensor var_45657_to_fp16 = const()[name = tensor("op_45657_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4531_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4531_cast_fp16, y = var_45657_to_fp16)[name = tensor("aw_chunk_4531_cast_fp16")]; tensor var_45659_to_fp16 = const()[name = tensor("op_45659_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4533_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4533_cast_fp16, y = var_45659_to_fp16)[name = tensor("aw_chunk_4533_cast_fp16")]; tensor var_45661_to_fp16 = const()[name = tensor("op_45661_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4535_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4535_cast_fp16, y = var_45661_to_fp16)[name = tensor("aw_chunk_4535_cast_fp16")]; tensor var_45663_to_fp16 = const()[name = tensor("op_45663_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4537_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4537_cast_fp16, y = var_45663_to_fp16)[name = tensor("aw_chunk_4537_cast_fp16")]; tensor var_45665_to_fp16 = const()[name = tensor("op_45665_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4539_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4539_cast_fp16, y = var_45665_to_fp16)[name = tensor("aw_chunk_4539_cast_fp16")]; tensor var_45667_to_fp16 = const()[name = tensor("op_45667_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4541_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4541_cast_fp16, y = var_45667_to_fp16)[name = tensor("aw_chunk_4541_cast_fp16")]; tensor var_45669_to_fp16 = const()[name = tensor("op_45669_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4543_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4543_cast_fp16, y = var_45669_to_fp16)[name = tensor("aw_chunk_4543_cast_fp16")]; tensor var_45671_to_fp16 = const()[name = tensor("op_45671_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4545_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4545_cast_fp16, y = var_45671_to_fp16)[name = tensor("aw_chunk_4545_cast_fp16")]; tensor var_45673_to_fp16 = const()[name = tensor("op_45673_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4547_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4547_cast_fp16, y = var_45673_to_fp16)[name = tensor("aw_chunk_4547_cast_fp16")]; tensor var_45675_to_fp16 = const()[name = tensor("op_45675_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4549_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4549_cast_fp16, y = var_45675_to_fp16)[name = tensor("aw_chunk_4549_cast_fp16")]; tensor var_45677_to_fp16 = const()[name = tensor("op_45677_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4551_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4551_cast_fp16, y = var_45677_to_fp16)[name = tensor("aw_chunk_4551_cast_fp16")]; tensor var_45679_to_fp16 = const()[name = tensor("op_45679_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4553_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4553_cast_fp16, y = var_45679_to_fp16)[name = tensor("aw_chunk_4553_cast_fp16")]; tensor var_45681_to_fp16 = const()[name = tensor("op_45681_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4555_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4555_cast_fp16, y = var_45681_to_fp16)[name = tensor("aw_chunk_4555_cast_fp16")]; tensor var_45683_to_fp16 = const()[name = tensor("op_45683_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4557_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4557_cast_fp16, y = var_45683_to_fp16)[name = tensor("aw_chunk_4557_cast_fp16")]; tensor var_45685_to_fp16 = const()[name = tensor("op_45685_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4559_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4559_cast_fp16, y = var_45685_to_fp16)[name = tensor("aw_chunk_4559_cast_fp16")]; tensor var_45687_to_fp16 = const()[name = tensor("op_45687_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4561_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4561_cast_fp16, y = var_45687_to_fp16)[name = tensor("aw_chunk_4561_cast_fp16")]; tensor var_45689_to_fp16 = const()[name = tensor("op_45689_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4563_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4563_cast_fp16, y = var_45689_to_fp16)[name = tensor("aw_chunk_4563_cast_fp16")]; tensor var_45691_to_fp16 = const()[name = tensor("op_45691_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4565_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4565_cast_fp16, y = var_45691_to_fp16)[name = tensor("aw_chunk_4565_cast_fp16")]; tensor var_45693_to_fp16 = const()[name = tensor("op_45693_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4567_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4567_cast_fp16, y = var_45693_to_fp16)[name = tensor("aw_chunk_4567_cast_fp16")]; tensor var_45695_to_fp16 = const()[name = tensor("op_45695_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4569_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4569_cast_fp16, y = var_45695_to_fp16)[name = tensor("aw_chunk_4569_cast_fp16")]; tensor var_45697_to_fp16 = const()[name = tensor("op_45697_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4571_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4571_cast_fp16, y = var_45697_to_fp16)[name = tensor("aw_chunk_4571_cast_fp16")]; tensor var_45699_to_fp16 = const()[name = tensor("op_45699_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4573_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4573_cast_fp16, y = var_45699_to_fp16)[name = tensor("aw_chunk_4573_cast_fp16")]; tensor var_45701_to_fp16 = const()[name = tensor("op_45701_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4575_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4575_cast_fp16, y = var_45701_to_fp16)[name = tensor("aw_chunk_4575_cast_fp16")]; tensor var_45703_to_fp16 = const()[name = tensor("op_45703_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4577_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4577_cast_fp16, y = var_45703_to_fp16)[name = tensor("aw_chunk_4577_cast_fp16")]; tensor var_45705_to_fp16 = const()[name = tensor("op_45705_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4579_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4579_cast_fp16, y = var_45705_to_fp16)[name = tensor("aw_chunk_4579_cast_fp16")]; tensor var_45707_to_fp16 = const()[name = tensor("op_45707_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4581_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4581_cast_fp16, y = var_45707_to_fp16)[name = tensor("aw_chunk_4581_cast_fp16")]; tensor var_45709_to_fp16 = const()[name = tensor("op_45709_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4583_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4583_cast_fp16, y = var_45709_to_fp16)[name = tensor("aw_chunk_4583_cast_fp16")]; tensor var_45711_to_fp16 = const()[name = tensor("op_45711_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4585_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4585_cast_fp16, y = var_45711_to_fp16)[name = tensor("aw_chunk_4585_cast_fp16")]; tensor var_45713_to_fp16 = const()[name = tensor("op_45713_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4587_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4587_cast_fp16, y = var_45713_to_fp16)[name = tensor("aw_chunk_4587_cast_fp16")]; tensor var_45715_to_fp16 = const()[name = tensor("op_45715_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4589_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4589_cast_fp16, y = var_45715_to_fp16)[name = tensor("aw_chunk_4589_cast_fp16")]; tensor var_45717_to_fp16 = const()[name = tensor("op_45717_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4591_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4591_cast_fp16, y = var_45717_to_fp16)[name = tensor("aw_chunk_4591_cast_fp16")]; tensor var_45719_to_fp16 = const()[name = tensor("op_45719_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4593_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4593_cast_fp16, y = var_45719_to_fp16)[name = tensor("aw_chunk_4593_cast_fp16")]; tensor var_45721_to_fp16 = const()[name = tensor("op_45721_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4595_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4595_cast_fp16, y = var_45721_to_fp16)[name = tensor("aw_chunk_4595_cast_fp16")]; tensor var_45723_to_fp16 = const()[name = tensor("op_45723_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4597_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4597_cast_fp16, y = var_45723_to_fp16)[name = tensor("aw_chunk_4597_cast_fp16")]; tensor var_45725_to_fp16 = const()[name = tensor("op_45725_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4599_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4599_cast_fp16, y = var_45725_to_fp16)[name = tensor("aw_chunk_4599_cast_fp16")]; tensor var_45727_to_fp16 = const()[name = tensor("op_45727_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4601_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4601_cast_fp16, y = var_45727_to_fp16)[name = tensor("aw_chunk_4601_cast_fp16")]; tensor var_45729_to_fp16 = const()[name = tensor("op_45729_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4603_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4603_cast_fp16, y = var_45729_to_fp16)[name = tensor("aw_chunk_4603_cast_fp16")]; tensor var_45731_to_fp16 = const()[name = tensor("op_45731_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4605_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4605_cast_fp16, y = var_45731_to_fp16)[name = tensor("aw_chunk_4605_cast_fp16")]; tensor var_45733_to_fp16 = const()[name = tensor("op_45733_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4607_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4607_cast_fp16, y = var_45733_to_fp16)[name = tensor("aw_chunk_4607_cast_fp16")]; tensor var_45735_to_fp16 = const()[name = tensor("op_45735_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4609_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4609_cast_fp16, y = var_45735_to_fp16)[name = tensor("aw_chunk_4609_cast_fp16")]; tensor var_45737_to_fp16 = const()[name = tensor("op_45737_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4611_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4611_cast_fp16, y = var_45737_to_fp16)[name = tensor("aw_chunk_4611_cast_fp16")]; tensor var_45739_to_fp16 = const()[name = tensor("op_45739_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4613_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4613_cast_fp16, y = var_45739_to_fp16)[name = tensor("aw_chunk_4613_cast_fp16")]; tensor var_45741_to_fp16 = const()[name = tensor("op_45741_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4615_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4615_cast_fp16, y = var_45741_to_fp16)[name = tensor("aw_chunk_4615_cast_fp16")]; tensor var_45743_to_fp16 = const()[name = tensor("op_45743_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4617_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4617_cast_fp16, y = var_45743_to_fp16)[name = tensor("aw_chunk_4617_cast_fp16")]; tensor var_45745_to_fp16 = const()[name = tensor("op_45745_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4619_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4619_cast_fp16, y = var_45745_to_fp16)[name = tensor("aw_chunk_4619_cast_fp16")]; tensor var_45747_to_fp16 = const()[name = tensor("op_45747_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4621_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4621_cast_fp16, y = var_45747_to_fp16)[name = tensor("aw_chunk_4621_cast_fp16")]; tensor var_45749_to_fp16 = const()[name = tensor("op_45749_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4623_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4623_cast_fp16, y = var_45749_to_fp16)[name = tensor("aw_chunk_4623_cast_fp16")]; tensor var_45751_to_fp16 = const()[name = tensor("op_45751_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4625_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4625_cast_fp16, y = var_45751_to_fp16)[name = tensor("aw_chunk_4625_cast_fp16")]; tensor var_45753_to_fp16 = const()[name = tensor("op_45753_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4627_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4627_cast_fp16, y = var_45753_to_fp16)[name = tensor("aw_chunk_4627_cast_fp16")]; tensor var_45755_to_fp16 = const()[name = tensor("op_45755_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4629_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4629_cast_fp16, y = var_45755_to_fp16)[name = tensor("aw_chunk_4629_cast_fp16")]; tensor var_45757_to_fp16 = const()[name = tensor("op_45757_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4631_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4631_cast_fp16, y = var_45757_to_fp16)[name = tensor("aw_chunk_4631_cast_fp16")]; tensor var_45759_to_fp16 = const()[name = tensor("op_45759_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4633_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4633_cast_fp16, y = var_45759_to_fp16)[name = tensor("aw_chunk_4633_cast_fp16")]; tensor var_45761_to_fp16 = const()[name = tensor("op_45761_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4635_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4635_cast_fp16, y = var_45761_to_fp16)[name = tensor("aw_chunk_4635_cast_fp16")]; tensor var_45763_to_fp16 = const()[name = tensor("op_45763_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4637_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4637_cast_fp16, y = var_45763_to_fp16)[name = tensor("aw_chunk_4637_cast_fp16")]; tensor var_45765_to_fp16 = const()[name = tensor("op_45765_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4639_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4639_cast_fp16, y = var_45765_to_fp16)[name = tensor("aw_chunk_4639_cast_fp16")]; tensor var_45767_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4481_cast_fp16)[name = tensor("op_45767_cast_fp16")]; tensor var_45768_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4483_cast_fp16)[name = tensor("op_45768_cast_fp16")]; tensor var_45769_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4485_cast_fp16)[name = tensor("op_45769_cast_fp16")]; tensor var_45770_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4487_cast_fp16)[name = tensor("op_45770_cast_fp16")]; tensor var_45771_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4489_cast_fp16)[name = tensor("op_45771_cast_fp16")]; tensor var_45772_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4491_cast_fp16)[name = tensor("op_45772_cast_fp16")]; tensor var_45773_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4493_cast_fp16)[name = tensor("op_45773_cast_fp16")]; tensor var_45774_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4495_cast_fp16)[name = tensor("op_45774_cast_fp16")]; tensor var_45775_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4497_cast_fp16)[name = tensor("op_45775_cast_fp16")]; tensor var_45776_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4499_cast_fp16)[name = tensor("op_45776_cast_fp16")]; tensor var_45777_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4501_cast_fp16)[name = tensor("op_45777_cast_fp16")]; tensor var_45778_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4503_cast_fp16)[name = tensor("op_45778_cast_fp16")]; tensor var_45779_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4505_cast_fp16)[name = tensor("op_45779_cast_fp16")]; tensor var_45780_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4507_cast_fp16)[name = tensor("op_45780_cast_fp16")]; tensor var_45781_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4509_cast_fp16)[name = tensor("op_45781_cast_fp16")]; tensor var_45782_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4511_cast_fp16)[name = tensor("op_45782_cast_fp16")]; tensor var_45783_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4513_cast_fp16)[name = tensor("op_45783_cast_fp16")]; tensor var_45784_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4515_cast_fp16)[name = tensor("op_45784_cast_fp16")]; tensor var_45785_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4517_cast_fp16)[name = tensor("op_45785_cast_fp16")]; tensor var_45786_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4519_cast_fp16)[name = tensor("op_45786_cast_fp16")]; tensor var_45787_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4521_cast_fp16)[name = tensor("op_45787_cast_fp16")]; tensor var_45788_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4523_cast_fp16)[name = tensor("op_45788_cast_fp16")]; tensor var_45789_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4525_cast_fp16)[name = tensor("op_45789_cast_fp16")]; tensor var_45790_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4527_cast_fp16)[name = tensor("op_45790_cast_fp16")]; tensor var_45791_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4529_cast_fp16)[name = tensor("op_45791_cast_fp16")]; tensor var_45792_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4531_cast_fp16)[name = tensor("op_45792_cast_fp16")]; tensor var_45793_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4533_cast_fp16)[name = tensor("op_45793_cast_fp16")]; tensor var_45794_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4535_cast_fp16)[name = tensor("op_45794_cast_fp16")]; tensor var_45795_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4537_cast_fp16)[name = tensor("op_45795_cast_fp16")]; tensor var_45796_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4539_cast_fp16)[name = tensor("op_45796_cast_fp16")]; tensor var_45797_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4541_cast_fp16)[name = tensor("op_45797_cast_fp16")]; tensor var_45798_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4543_cast_fp16)[name = tensor("op_45798_cast_fp16")]; tensor var_45799_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4545_cast_fp16)[name = tensor("op_45799_cast_fp16")]; tensor var_45800_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4547_cast_fp16)[name = tensor("op_45800_cast_fp16")]; tensor var_45801_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4549_cast_fp16)[name = tensor("op_45801_cast_fp16")]; tensor var_45802_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4551_cast_fp16)[name = tensor("op_45802_cast_fp16")]; tensor var_45803_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4553_cast_fp16)[name = tensor("op_45803_cast_fp16")]; tensor var_45804_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4555_cast_fp16)[name = tensor("op_45804_cast_fp16")]; tensor var_45805_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4557_cast_fp16)[name = tensor("op_45805_cast_fp16")]; tensor var_45806_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4559_cast_fp16)[name = tensor("op_45806_cast_fp16")]; tensor var_45807_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4561_cast_fp16)[name = tensor("op_45807_cast_fp16")]; tensor var_45808_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4563_cast_fp16)[name = tensor("op_45808_cast_fp16")]; tensor var_45809_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4565_cast_fp16)[name = tensor("op_45809_cast_fp16")]; tensor var_45810_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4567_cast_fp16)[name = tensor("op_45810_cast_fp16")]; tensor var_45811_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4569_cast_fp16)[name = tensor("op_45811_cast_fp16")]; tensor var_45812_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4571_cast_fp16)[name = tensor("op_45812_cast_fp16")]; tensor var_45813_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4573_cast_fp16)[name = tensor("op_45813_cast_fp16")]; tensor var_45814_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4575_cast_fp16)[name = tensor("op_45814_cast_fp16")]; tensor var_45815_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4577_cast_fp16)[name = tensor("op_45815_cast_fp16")]; tensor var_45816_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4579_cast_fp16)[name = tensor("op_45816_cast_fp16")]; tensor var_45817_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4581_cast_fp16)[name = tensor("op_45817_cast_fp16")]; tensor var_45818_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4583_cast_fp16)[name = tensor("op_45818_cast_fp16")]; tensor var_45819_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4585_cast_fp16)[name = tensor("op_45819_cast_fp16")]; tensor var_45820_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4587_cast_fp16)[name = tensor("op_45820_cast_fp16")]; tensor var_45821_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4589_cast_fp16)[name = tensor("op_45821_cast_fp16")]; tensor var_45822_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4591_cast_fp16)[name = tensor("op_45822_cast_fp16")]; tensor var_45823_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4593_cast_fp16)[name = tensor("op_45823_cast_fp16")]; tensor var_45824_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4595_cast_fp16)[name = tensor("op_45824_cast_fp16")]; tensor var_45825_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4597_cast_fp16)[name = tensor("op_45825_cast_fp16")]; tensor var_45826_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4599_cast_fp16)[name = tensor("op_45826_cast_fp16")]; tensor var_45827_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4601_cast_fp16)[name = tensor("op_45827_cast_fp16")]; tensor var_45828_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4603_cast_fp16)[name = tensor("op_45828_cast_fp16")]; tensor var_45829_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4605_cast_fp16)[name = tensor("op_45829_cast_fp16")]; tensor var_45830_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4607_cast_fp16)[name = tensor("op_45830_cast_fp16")]; tensor var_45831_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4609_cast_fp16)[name = tensor("op_45831_cast_fp16")]; tensor var_45832_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4611_cast_fp16)[name = tensor("op_45832_cast_fp16")]; tensor var_45833_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4613_cast_fp16)[name = tensor("op_45833_cast_fp16")]; tensor var_45834_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4615_cast_fp16)[name = tensor("op_45834_cast_fp16")]; tensor var_45835_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4617_cast_fp16)[name = tensor("op_45835_cast_fp16")]; tensor var_45836_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4619_cast_fp16)[name = tensor("op_45836_cast_fp16")]; tensor var_45837_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4621_cast_fp16)[name = tensor("op_45837_cast_fp16")]; tensor var_45838_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4623_cast_fp16)[name = tensor("op_45838_cast_fp16")]; tensor var_45839_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4625_cast_fp16)[name = tensor("op_45839_cast_fp16")]; tensor var_45840_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4627_cast_fp16)[name = tensor("op_45840_cast_fp16")]; tensor var_45841_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4629_cast_fp16)[name = tensor("op_45841_cast_fp16")]; tensor var_45842_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4631_cast_fp16)[name = tensor("op_45842_cast_fp16")]; tensor var_45843_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4633_cast_fp16)[name = tensor("op_45843_cast_fp16")]; tensor var_45844_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4635_cast_fp16)[name = tensor("op_45844_cast_fp16")]; tensor var_45845_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4637_cast_fp16)[name = tensor("op_45845_cast_fp16")]; tensor var_45846_cast_fp16 = softmax(axis = var_44565, x = aw_chunk_4639_cast_fp16)[name = tensor("op_45846_cast_fp16")]; tensor var_45848_equation_0 = const()[name = tensor("op_45848_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45848_cast_fp16 = einsum(equation = var_45848_equation_0, values = (var_45368_cast_fp16, var_45767_cast_fp16))[name = tensor("op_45848_cast_fp16")]; tensor var_45850_equation_0 = const()[name = tensor("op_45850_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45850_cast_fp16 = einsum(equation = var_45850_equation_0, values = (var_45368_cast_fp16, var_45768_cast_fp16))[name = tensor("op_45850_cast_fp16")]; tensor var_45852_equation_0 = const()[name = tensor("op_45852_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45852_cast_fp16 = einsum(equation = var_45852_equation_0, values = (var_45368_cast_fp16, var_45769_cast_fp16))[name = tensor("op_45852_cast_fp16")]; tensor var_45854_equation_0 = const()[name = tensor("op_45854_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45854_cast_fp16 = einsum(equation = var_45854_equation_0, values = (var_45368_cast_fp16, var_45770_cast_fp16))[name = tensor("op_45854_cast_fp16")]; tensor var_45856_equation_0 = const()[name = tensor("op_45856_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45856_cast_fp16 = einsum(equation = var_45856_equation_0, values = (var_45372_cast_fp16, var_45771_cast_fp16))[name = tensor("op_45856_cast_fp16")]; tensor var_45858_equation_0 = const()[name = tensor("op_45858_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45858_cast_fp16 = einsum(equation = var_45858_equation_0, values = (var_45372_cast_fp16, var_45772_cast_fp16))[name = tensor("op_45858_cast_fp16")]; tensor var_45860_equation_0 = const()[name = tensor("op_45860_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45860_cast_fp16 = einsum(equation = var_45860_equation_0, values = (var_45372_cast_fp16, var_45773_cast_fp16))[name = tensor("op_45860_cast_fp16")]; tensor var_45862_equation_0 = const()[name = tensor("op_45862_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45862_cast_fp16 = einsum(equation = var_45862_equation_0, values = (var_45372_cast_fp16, var_45774_cast_fp16))[name = tensor("op_45862_cast_fp16")]; tensor var_45864_equation_0 = const()[name = tensor("op_45864_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45864_cast_fp16 = einsum(equation = var_45864_equation_0, values = (var_45376_cast_fp16, var_45775_cast_fp16))[name = tensor("op_45864_cast_fp16")]; tensor var_45866_equation_0 = const()[name = tensor("op_45866_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45866_cast_fp16 = einsum(equation = var_45866_equation_0, values = (var_45376_cast_fp16, var_45776_cast_fp16))[name = tensor("op_45866_cast_fp16")]; tensor var_45868_equation_0 = const()[name = tensor("op_45868_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45868_cast_fp16 = einsum(equation = var_45868_equation_0, values = (var_45376_cast_fp16, var_45777_cast_fp16))[name = tensor("op_45868_cast_fp16")]; tensor var_45870_equation_0 = const()[name = tensor("op_45870_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45870_cast_fp16 = einsum(equation = var_45870_equation_0, values = (var_45376_cast_fp16, var_45778_cast_fp16))[name = tensor("op_45870_cast_fp16")]; tensor var_45872_equation_0 = const()[name = tensor("op_45872_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45872_cast_fp16 = einsum(equation = var_45872_equation_0, values = (var_45380_cast_fp16, var_45779_cast_fp16))[name = tensor("op_45872_cast_fp16")]; tensor var_45874_equation_0 = const()[name = tensor("op_45874_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45874_cast_fp16 = einsum(equation = var_45874_equation_0, values = (var_45380_cast_fp16, var_45780_cast_fp16))[name = tensor("op_45874_cast_fp16")]; tensor var_45876_equation_0 = const()[name = tensor("op_45876_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45876_cast_fp16 = einsum(equation = var_45876_equation_0, values = (var_45380_cast_fp16, var_45781_cast_fp16))[name = tensor("op_45876_cast_fp16")]; tensor var_45878_equation_0 = const()[name = tensor("op_45878_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45878_cast_fp16 = einsum(equation = var_45878_equation_0, values = (var_45380_cast_fp16, var_45782_cast_fp16))[name = tensor("op_45878_cast_fp16")]; tensor var_45880_equation_0 = const()[name = tensor("op_45880_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45880_cast_fp16 = einsum(equation = var_45880_equation_0, values = (var_45384_cast_fp16, var_45783_cast_fp16))[name = tensor("op_45880_cast_fp16")]; tensor var_45882_equation_0 = const()[name = tensor("op_45882_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45882_cast_fp16 = einsum(equation = var_45882_equation_0, values = (var_45384_cast_fp16, var_45784_cast_fp16))[name = tensor("op_45882_cast_fp16")]; tensor var_45884_equation_0 = const()[name = tensor("op_45884_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45884_cast_fp16 = einsum(equation = var_45884_equation_0, values = (var_45384_cast_fp16, var_45785_cast_fp16))[name = tensor("op_45884_cast_fp16")]; tensor var_45886_equation_0 = const()[name = tensor("op_45886_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45886_cast_fp16 = einsum(equation = var_45886_equation_0, values = (var_45384_cast_fp16, var_45786_cast_fp16))[name = tensor("op_45886_cast_fp16")]; tensor var_45888_equation_0 = const()[name = tensor("op_45888_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45888_cast_fp16 = einsum(equation = var_45888_equation_0, values = (var_45388_cast_fp16, var_45787_cast_fp16))[name = tensor("op_45888_cast_fp16")]; tensor var_45890_equation_0 = const()[name = tensor("op_45890_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45890_cast_fp16 = einsum(equation = var_45890_equation_0, values = (var_45388_cast_fp16, var_45788_cast_fp16))[name = tensor("op_45890_cast_fp16")]; tensor var_45892_equation_0 = const()[name = tensor("op_45892_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45892_cast_fp16 = einsum(equation = var_45892_equation_0, values = (var_45388_cast_fp16, var_45789_cast_fp16))[name = tensor("op_45892_cast_fp16")]; tensor var_45894_equation_0 = const()[name = tensor("op_45894_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45894_cast_fp16 = einsum(equation = var_45894_equation_0, values = (var_45388_cast_fp16, var_45790_cast_fp16))[name = tensor("op_45894_cast_fp16")]; tensor var_45896_equation_0 = const()[name = tensor("op_45896_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45896_cast_fp16 = einsum(equation = var_45896_equation_0, values = (var_45392_cast_fp16, var_45791_cast_fp16))[name = tensor("op_45896_cast_fp16")]; tensor var_45898_equation_0 = const()[name = tensor("op_45898_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45898_cast_fp16 = einsum(equation = var_45898_equation_0, values = (var_45392_cast_fp16, var_45792_cast_fp16))[name = tensor("op_45898_cast_fp16")]; tensor var_45900_equation_0 = const()[name = tensor("op_45900_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45900_cast_fp16 = einsum(equation = var_45900_equation_0, values = (var_45392_cast_fp16, var_45793_cast_fp16))[name = tensor("op_45900_cast_fp16")]; tensor var_45902_equation_0 = const()[name = tensor("op_45902_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45902_cast_fp16 = einsum(equation = var_45902_equation_0, values = (var_45392_cast_fp16, var_45794_cast_fp16))[name = tensor("op_45902_cast_fp16")]; tensor var_45904_equation_0 = const()[name = tensor("op_45904_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45904_cast_fp16 = einsum(equation = var_45904_equation_0, values = (var_45396_cast_fp16, var_45795_cast_fp16))[name = tensor("op_45904_cast_fp16")]; tensor var_45906_equation_0 = const()[name = tensor("op_45906_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45906_cast_fp16 = einsum(equation = var_45906_equation_0, values = (var_45396_cast_fp16, var_45796_cast_fp16))[name = tensor("op_45906_cast_fp16")]; tensor var_45908_equation_0 = const()[name = tensor("op_45908_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45908_cast_fp16 = einsum(equation = var_45908_equation_0, values = (var_45396_cast_fp16, var_45797_cast_fp16))[name = tensor("op_45908_cast_fp16")]; tensor var_45910_equation_0 = const()[name = tensor("op_45910_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45910_cast_fp16 = einsum(equation = var_45910_equation_0, values = (var_45396_cast_fp16, var_45798_cast_fp16))[name = tensor("op_45910_cast_fp16")]; tensor var_45912_equation_0 = const()[name = tensor("op_45912_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45912_cast_fp16 = einsum(equation = var_45912_equation_0, values = (var_45400_cast_fp16, var_45799_cast_fp16))[name = tensor("op_45912_cast_fp16")]; tensor var_45914_equation_0 = const()[name = tensor("op_45914_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45914_cast_fp16 = einsum(equation = var_45914_equation_0, values = (var_45400_cast_fp16, var_45800_cast_fp16))[name = tensor("op_45914_cast_fp16")]; tensor var_45916_equation_0 = const()[name = tensor("op_45916_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45916_cast_fp16 = einsum(equation = var_45916_equation_0, values = (var_45400_cast_fp16, var_45801_cast_fp16))[name = tensor("op_45916_cast_fp16")]; tensor var_45918_equation_0 = const()[name = tensor("op_45918_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45918_cast_fp16 = einsum(equation = var_45918_equation_0, values = (var_45400_cast_fp16, var_45802_cast_fp16))[name = tensor("op_45918_cast_fp16")]; tensor var_45920_equation_0 = const()[name = tensor("op_45920_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45920_cast_fp16 = einsum(equation = var_45920_equation_0, values = (var_45404_cast_fp16, var_45803_cast_fp16))[name = tensor("op_45920_cast_fp16")]; tensor var_45922_equation_0 = const()[name = tensor("op_45922_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45922_cast_fp16 = einsum(equation = var_45922_equation_0, values = (var_45404_cast_fp16, var_45804_cast_fp16))[name = tensor("op_45922_cast_fp16")]; tensor var_45924_equation_0 = const()[name = tensor("op_45924_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45924_cast_fp16 = einsum(equation = var_45924_equation_0, values = (var_45404_cast_fp16, var_45805_cast_fp16))[name = tensor("op_45924_cast_fp16")]; tensor var_45926_equation_0 = const()[name = tensor("op_45926_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45926_cast_fp16 = einsum(equation = var_45926_equation_0, values = (var_45404_cast_fp16, var_45806_cast_fp16))[name = tensor("op_45926_cast_fp16")]; tensor var_45928_equation_0 = const()[name = tensor("op_45928_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45928_cast_fp16 = einsum(equation = var_45928_equation_0, values = (var_45408_cast_fp16, var_45807_cast_fp16))[name = tensor("op_45928_cast_fp16")]; tensor var_45930_equation_0 = const()[name = tensor("op_45930_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45930_cast_fp16 = einsum(equation = var_45930_equation_0, values = (var_45408_cast_fp16, var_45808_cast_fp16))[name = tensor("op_45930_cast_fp16")]; tensor var_45932_equation_0 = const()[name = tensor("op_45932_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45932_cast_fp16 = einsum(equation = var_45932_equation_0, values = (var_45408_cast_fp16, var_45809_cast_fp16))[name = tensor("op_45932_cast_fp16")]; tensor var_45934_equation_0 = const()[name = tensor("op_45934_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45934_cast_fp16 = einsum(equation = var_45934_equation_0, values = (var_45408_cast_fp16, var_45810_cast_fp16))[name = tensor("op_45934_cast_fp16")]; tensor var_45936_equation_0 = const()[name = tensor("op_45936_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45936_cast_fp16 = einsum(equation = var_45936_equation_0, values = (var_45412_cast_fp16, var_45811_cast_fp16))[name = tensor("op_45936_cast_fp16")]; tensor var_45938_equation_0 = const()[name = tensor("op_45938_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45938_cast_fp16 = einsum(equation = var_45938_equation_0, values = (var_45412_cast_fp16, var_45812_cast_fp16))[name = tensor("op_45938_cast_fp16")]; tensor var_45940_equation_0 = const()[name = tensor("op_45940_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45940_cast_fp16 = einsum(equation = var_45940_equation_0, values = (var_45412_cast_fp16, var_45813_cast_fp16))[name = tensor("op_45940_cast_fp16")]; tensor var_45942_equation_0 = const()[name = tensor("op_45942_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45942_cast_fp16 = einsum(equation = var_45942_equation_0, values = (var_45412_cast_fp16, var_45814_cast_fp16))[name = tensor("op_45942_cast_fp16")]; tensor var_45944_equation_0 = const()[name = tensor("op_45944_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45944_cast_fp16 = einsum(equation = var_45944_equation_0, values = (var_45416_cast_fp16, var_45815_cast_fp16))[name = tensor("op_45944_cast_fp16")]; tensor var_45946_equation_0 = const()[name = tensor("op_45946_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45946_cast_fp16 = einsum(equation = var_45946_equation_0, values = (var_45416_cast_fp16, var_45816_cast_fp16))[name = tensor("op_45946_cast_fp16")]; tensor var_45948_equation_0 = const()[name = tensor("op_45948_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45948_cast_fp16 = einsum(equation = var_45948_equation_0, values = (var_45416_cast_fp16, var_45817_cast_fp16))[name = tensor("op_45948_cast_fp16")]; tensor var_45950_equation_0 = const()[name = tensor("op_45950_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45950_cast_fp16 = einsum(equation = var_45950_equation_0, values = (var_45416_cast_fp16, var_45818_cast_fp16))[name = tensor("op_45950_cast_fp16")]; tensor var_45952_equation_0 = const()[name = tensor("op_45952_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45952_cast_fp16 = einsum(equation = var_45952_equation_0, values = (var_45420_cast_fp16, var_45819_cast_fp16))[name = tensor("op_45952_cast_fp16")]; tensor var_45954_equation_0 = const()[name = tensor("op_45954_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45954_cast_fp16 = einsum(equation = var_45954_equation_0, values = (var_45420_cast_fp16, var_45820_cast_fp16))[name = tensor("op_45954_cast_fp16")]; tensor var_45956_equation_0 = const()[name = tensor("op_45956_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45956_cast_fp16 = einsum(equation = var_45956_equation_0, values = (var_45420_cast_fp16, var_45821_cast_fp16))[name = tensor("op_45956_cast_fp16")]; tensor var_45958_equation_0 = const()[name = tensor("op_45958_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45958_cast_fp16 = einsum(equation = var_45958_equation_0, values = (var_45420_cast_fp16, var_45822_cast_fp16))[name = tensor("op_45958_cast_fp16")]; tensor var_45960_equation_0 = const()[name = tensor("op_45960_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45960_cast_fp16 = einsum(equation = var_45960_equation_0, values = (var_45424_cast_fp16, var_45823_cast_fp16))[name = tensor("op_45960_cast_fp16")]; tensor var_45962_equation_0 = const()[name = tensor("op_45962_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45962_cast_fp16 = einsum(equation = var_45962_equation_0, values = (var_45424_cast_fp16, var_45824_cast_fp16))[name = tensor("op_45962_cast_fp16")]; tensor var_45964_equation_0 = const()[name = tensor("op_45964_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45964_cast_fp16 = einsum(equation = var_45964_equation_0, values = (var_45424_cast_fp16, var_45825_cast_fp16))[name = tensor("op_45964_cast_fp16")]; tensor var_45966_equation_0 = const()[name = tensor("op_45966_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45966_cast_fp16 = einsum(equation = var_45966_equation_0, values = (var_45424_cast_fp16, var_45826_cast_fp16))[name = tensor("op_45966_cast_fp16")]; tensor var_45968_equation_0 = const()[name = tensor("op_45968_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45968_cast_fp16 = einsum(equation = var_45968_equation_0, values = (var_45428_cast_fp16, var_45827_cast_fp16))[name = tensor("op_45968_cast_fp16")]; tensor var_45970_equation_0 = const()[name = tensor("op_45970_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45970_cast_fp16 = einsum(equation = var_45970_equation_0, values = (var_45428_cast_fp16, var_45828_cast_fp16))[name = tensor("op_45970_cast_fp16")]; tensor var_45972_equation_0 = const()[name = tensor("op_45972_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45972_cast_fp16 = einsum(equation = var_45972_equation_0, values = (var_45428_cast_fp16, var_45829_cast_fp16))[name = tensor("op_45972_cast_fp16")]; tensor var_45974_equation_0 = const()[name = tensor("op_45974_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45974_cast_fp16 = einsum(equation = var_45974_equation_0, values = (var_45428_cast_fp16, var_45830_cast_fp16))[name = tensor("op_45974_cast_fp16")]; tensor var_45976_equation_0 = const()[name = tensor("op_45976_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45976_cast_fp16 = einsum(equation = var_45976_equation_0, values = (var_45432_cast_fp16, var_45831_cast_fp16))[name = tensor("op_45976_cast_fp16")]; tensor var_45978_equation_0 = const()[name = tensor("op_45978_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45978_cast_fp16 = einsum(equation = var_45978_equation_0, values = (var_45432_cast_fp16, var_45832_cast_fp16))[name = tensor("op_45978_cast_fp16")]; tensor var_45980_equation_0 = const()[name = tensor("op_45980_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45980_cast_fp16 = einsum(equation = var_45980_equation_0, values = (var_45432_cast_fp16, var_45833_cast_fp16))[name = tensor("op_45980_cast_fp16")]; tensor var_45982_equation_0 = const()[name = tensor("op_45982_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45982_cast_fp16 = einsum(equation = var_45982_equation_0, values = (var_45432_cast_fp16, var_45834_cast_fp16))[name = tensor("op_45982_cast_fp16")]; tensor var_45984_equation_0 = const()[name = tensor("op_45984_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45984_cast_fp16 = einsum(equation = var_45984_equation_0, values = (var_45436_cast_fp16, var_45835_cast_fp16))[name = tensor("op_45984_cast_fp16")]; tensor var_45986_equation_0 = const()[name = tensor("op_45986_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45986_cast_fp16 = einsum(equation = var_45986_equation_0, values = (var_45436_cast_fp16, var_45836_cast_fp16))[name = tensor("op_45986_cast_fp16")]; tensor var_45988_equation_0 = const()[name = tensor("op_45988_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45988_cast_fp16 = einsum(equation = var_45988_equation_0, values = (var_45436_cast_fp16, var_45837_cast_fp16))[name = tensor("op_45988_cast_fp16")]; tensor var_45990_equation_0 = const()[name = tensor("op_45990_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45990_cast_fp16 = einsum(equation = var_45990_equation_0, values = (var_45436_cast_fp16, var_45838_cast_fp16))[name = tensor("op_45990_cast_fp16")]; tensor var_45992_equation_0 = const()[name = tensor("op_45992_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45992_cast_fp16 = einsum(equation = var_45992_equation_0, values = (var_45440_cast_fp16, var_45839_cast_fp16))[name = tensor("op_45992_cast_fp16")]; tensor var_45994_equation_0 = const()[name = tensor("op_45994_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45994_cast_fp16 = einsum(equation = var_45994_equation_0, values = (var_45440_cast_fp16, var_45840_cast_fp16))[name = tensor("op_45994_cast_fp16")]; tensor var_45996_equation_0 = const()[name = tensor("op_45996_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45996_cast_fp16 = einsum(equation = var_45996_equation_0, values = (var_45440_cast_fp16, var_45841_cast_fp16))[name = tensor("op_45996_cast_fp16")]; tensor var_45998_equation_0 = const()[name = tensor("op_45998_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_45998_cast_fp16 = einsum(equation = var_45998_equation_0, values = (var_45440_cast_fp16, var_45842_cast_fp16))[name = tensor("op_45998_cast_fp16")]; tensor var_46000_equation_0 = const()[name = tensor("op_46000_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_46000_cast_fp16 = einsum(equation = var_46000_equation_0, values = (var_45444_cast_fp16, var_45843_cast_fp16))[name = tensor("op_46000_cast_fp16")]; tensor var_46002_equation_0 = const()[name = tensor("op_46002_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_46002_cast_fp16 = einsum(equation = var_46002_equation_0, values = (var_45444_cast_fp16, var_45844_cast_fp16))[name = tensor("op_46002_cast_fp16")]; tensor var_46004_equation_0 = const()[name = tensor("op_46004_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_46004_cast_fp16 = einsum(equation = var_46004_equation_0, values = (var_45444_cast_fp16, var_45845_cast_fp16))[name = tensor("op_46004_cast_fp16")]; tensor var_46006_equation_0 = const()[name = tensor("op_46006_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_46006_cast_fp16 = einsum(equation = var_46006_equation_0, values = (var_45444_cast_fp16, var_45846_cast_fp16))[name = tensor("op_46006_cast_fp16")]; tensor var_46008_interleave_0 = const()[name = tensor("op_46008_interleave_0"), val = tensor(false)]; tensor var_46008_cast_fp16 = concat(axis = var_44540, interleave = var_46008_interleave_0, values = (var_45848_cast_fp16, var_45850_cast_fp16, var_45852_cast_fp16, var_45854_cast_fp16))[name = tensor("op_46008_cast_fp16")]; tensor var_46010_interleave_0 = const()[name = tensor("op_46010_interleave_0"), val = tensor(false)]; tensor var_46010_cast_fp16 = concat(axis = var_44540, interleave = var_46010_interleave_0, values = (var_45856_cast_fp16, var_45858_cast_fp16, var_45860_cast_fp16, var_45862_cast_fp16))[name = tensor("op_46010_cast_fp16")]; tensor var_46012_interleave_0 = const()[name = tensor("op_46012_interleave_0"), val = tensor(false)]; tensor var_46012_cast_fp16 = concat(axis = var_44540, interleave = var_46012_interleave_0, values = (var_45864_cast_fp16, var_45866_cast_fp16, var_45868_cast_fp16, var_45870_cast_fp16))[name = tensor("op_46012_cast_fp16")]; tensor var_46014_interleave_0 = const()[name = tensor("op_46014_interleave_0"), val = tensor(false)]; tensor var_46014_cast_fp16 = concat(axis = var_44540, interleave = var_46014_interleave_0, values = (var_45872_cast_fp16, var_45874_cast_fp16, var_45876_cast_fp16, var_45878_cast_fp16))[name = tensor("op_46014_cast_fp16")]; tensor var_46016_interleave_0 = const()[name = tensor("op_46016_interleave_0"), val = tensor(false)]; tensor var_46016_cast_fp16 = concat(axis = var_44540, interleave = var_46016_interleave_0, values = (var_45880_cast_fp16, var_45882_cast_fp16, var_45884_cast_fp16, var_45886_cast_fp16))[name = tensor("op_46016_cast_fp16")]; tensor var_46018_interleave_0 = const()[name = tensor("op_46018_interleave_0"), val = tensor(false)]; tensor var_46018_cast_fp16 = concat(axis = var_44540, interleave = var_46018_interleave_0, values = (var_45888_cast_fp16, var_45890_cast_fp16, var_45892_cast_fp16, var_45894_cast_fp16))[name = tensor("op_46018_cast_fp16")]; tensor var_46020_interleave_0 = const()[name = tensor("op_46020_interleave_0"), val = tensor(false)]; tensor var_46020_cast_fp16 = concat(axis = var_44540, interleave = var_46020_interleave_0, values = (var_45896_cast_fp16, var_45898_cast_fp16, var_45900_cast_fp16, var_45902_cast_fp16))[name = tensor("op_46020_cast_fp16")]; tensor var_46022_interleave_0 = const()[name = tensor("op_46022_interleave_0"), val = tensor(false)]; tensor var_46022_cast_fp16 = concat(axis = var_44540, interleave = var_46022_interleave_0, values = (var_45904_cast_fp16, var_45906_cast_fp16, var_45908_cast_fp16, var_45910_cast_fp16))[name = tensor("op_46022_cast_fp16")]; tensor var_46024_interleave_0 = const()[name = tensor("op_46024_interleave_0"), val = tensor(false)]; tensor var_46024_cast_fp16 = concat(axis = var_44540, interleave = var_46024_interleave_0, values = (var_45912_cast_fp16, var_45914_cast_fp16, var_45916_cast_fp16, var_45918_cast_fp16))[name = tensor("op_46024_cast_fp16")]; tensor var_46026_interleave_0 = const()[name = tensor("op_46026_interleave_0"), val = tensor(false)]; tensor var_46026_cast_fp16 = concat(axis = var_44540, interleave = var_46026_interleave_0, values = (var_45920_cast_fp16, var_45922_cast_fp16, var_45924_cast_fp16, var_45926_cast_fp16))[name = tensor("op_46026_cast_fp16")]; tensor var_46028_interleave_0 = const()[name = tensor("op_46028_interleave_0"), val = tensor(false)]; tensor var_46028_cast_fp16 = concat(axis = var_44540, interleave = var_46028_interleave_0, values = (var_45928_cast_fp16, var_45930_cast_fp16, var_45932_cast_fp16, var_45934_cast_fp16))[name = tensor("op_46028_cast_fp16")]; tensor var_46030_interleave_0 = const()[name = tensor("op_46030_interleave_0"), val = tensor(false)]; tensor var_46030_cast_fp16 = concat(axis = var_44540, interleave = var_46030_interleave_0, values = (var_45936_cast_fp16, var_45938_cast_fp16, var_45940_cast_fp16, var_45942_cast_fp16))[name = tensor("op_46030_cast_fp16")]; tensor var_46032_interleave_0 = const()[name = tensor("op_46032_interleave_0"), val = tensor(false)]; tensor var_46032_cast_fp16 = concat(axis = var_44540, interleave = var_46032_interleave_0, values = (var_45944_cast_fp16, var_45946_cast_fp16, var_45948_cast_fp16, var_45950_cast_fp16))[name = tensor("op_46032_cast_fp16")]; tensor var_46034_interleave_0 = const()[name = tensor("op_46034_interleave_0"), val = tensor(false)]; tensor var_46034_cast_fp16 = concat(axis = var_44540, interleave = var_46034_interleave_0, values = (var_45952_cast_fp16, var_45954_cast_fp16, var_45956_cast_fp16, var_45958_cast_fp16))[name = tensor("op_46034_cast_fp16")]; tensor var_46036_interleave_0 = const()[name = tensor("op_46036_interleave_0"), val = tensor(false)]; tensor var_46036_cast_fp16 = concat(axis = var_44540, interleave = var_46036_interleave_0, values = (var_45960_cast_fp16, var_45962_cast_fp16, var_45964_cast_fp16, var_45966_cast_fp16))[name = tensor("op_46036_cast_fp16")]; tensor var_46038_interleave_0 = const()[name = tensor("op_46038_interleave_0"), val = tensor(false)]; tensor var_46038_cast_fp16 = concat(axis = var_44540, interleave = var_46038_interleave_0, values = (var_45968_cast_fp16, var_45970_cast_fp16, var_45972_cast_fp16, var_45974_cast_fp16))[name = tensor("op_46038_cast_fp16")]; tensor var_46040_interleave_0 = const()[name = tensor("op_46040_interleave_0"), val = tensor(false)]; tensor var_46040_cast_fp16 = concat(axis = var_44540, interleave = var_46040_interleave_0, values = (var_45976_cast_fp16, var_45978_cast_fp16, var_45980_cast_fp16, var_45982_cast_fp16))[name = tensor("op_46040_cast_fp16")]; tensor var_46042_interleave_0 = const()[name = tensor("op_46042_interleave_0"), val = tensor(false)]; tensor var_46042_cast_fp16 = concat(axis = var_44540, interleave = var_46042_interleave_0, values = (var_45984_cast_fp16, var_45986_cast_fp16, var_45988_cast_fp16, var_45990_cast_fp16))[name = tensor("op_46042_cast_fp16")]; tensor var_46044_interleave_0 = const()[name = tensor("op_46044_interleave_0"), val = tensor(false)]; tensor var_46044_cast_fp16 = concat(axis = var_44540, interleave = var_46044_interleave_0, values = (var_45992_cast_fp16, var_45994_cast_fp16, var_45996_cast_fp16, var_45998_cast_fp16))[name = tensor("op_46044_cast_fp16")]; tensor var_46046_interleave_0 = const()[name = tensor("op_46046_interleave_0"), val = tensor(false)]; tensor var_46046_cast_fp16 = concat(axis = var_44540, interleave = var_46046_interleave_0, values = (var_46000_cast_fp16, var_46002_cast_fp16, var_46004_cast_fp16, var_46006_cast_fp16))[name = tensor("op_46046_cast_fp16")]; tensor input_225_interleave_0 = const()[name = tensor("input_225_interleave_0"), val = tensor(false)]; tensor input_225_cast_fp16 = concat(axis = var_44565, interleave = input_225_interleave_0, values = (var_46008_cast_fp16, var_46010_cast_fp16, var_46012_cast_fp16, var_46014_cast_fp16, var_46016_cast_fp16, var_46018_cast_fp16, var_46020_cast_fp16, var_46022_cast_fp16, var_46024_cast_fp16, var_46026_cast_fp16, var_46028_cast_fp16, var_46030_cast_fp16, var_46032_cast_fp16, var_46034_cast_fp16, var_46036_cast_fp16, var_46038_cast_fp16, var_46040_cast_fp16, var_46042_cast_fp16, var_46044_cast_fp16, var_46046_cast_fp16))[name = tensor("input_225_cast_fp16")]; tensor var_46057_pad_type_0 = const()[name = tensor("op_46057_pad_type_0"), val = tensor("valid")]; tensor var_46057_strides_0 = const()[name = tensor("op_46057_strides_0"), val = tensor([1, 1])]; tensor var_46057_pad_0 = const()[name = tensor("op_46057_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_46057_dilations_0 = const()[name = tensor("op_46057_dilations_0"), val = tensor([1, 1])]; tensor var_46057_groups_0 = const()[name = tensor("op_46057_groups_0"), val = tensor(1)]; tensor layers_28_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(375062912))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(375882176))), name = tensor("layers_28_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_28_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_28_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(375882304)))]; tensor var_46057_cast_fp16 = conv(bias = layers_28_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_46057_dilations_0, groups = var_46057_groups_0, pad = var_46057_pad_0, pad_type = var_46057_pad_type_0, strides = var_46057_strides_0, weight = layers_28_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_225_cast_fp16)[name = tensor("op_46057_cast_fp16")]; tensor var_46063_pad_type_0 = const()[name = tensor("op_46063_pad_type_0"), val = tensor("valid")]; tensor var_46063_strides_0 = const()[name = tensor("op_46063_strides_0"), val = tensor([1, 1])]; tensor var_46063_pad_0 = const()[name = tensor("op_46063_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_46063_dilations_0 = const()[name = tensor("op_46063_dilations_0"), val = tensor([1, 1])]; tensor var_46063_groups_0 = const()[name = tensor("op_46063_groups_0"), val = tensor(1)]; tensor layers_28_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(375896192))), name = tensor("layers_28_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(375884928))), shape = tensor([1280, 1280, 1, 1])]; tensor var_46063_cast_fp16 = conv(dilations = var_46063_dilations_0, groups = var_46063_groups_0, pad = var_46063_pad_0, pad_type = var_46063_pad_type_0, strides = var_46063_strides_0, weight = layers_28_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_225_cast_fp16)[name = tensor("op_46063_cast_fp16")]; tensor obj_115_cast_fp16 = add(x = var_46057_cast_fp16, y = var_46063_cast_fp16)[name = tensor("obj_115_cast_fp16")]; tensor inputs_115_cast_fp16 = add(x = inputs_113_cast_fp16, y = obj_115_cast_fp16)[name = tensor("inputs_115_cast_fp16")]; tensor out_115_axes_0 = const()[name = tensor("out_115_axes_0"), val = tensor([1])]; tensor var_46074_to_fp16 = const()[name = tensor("op_46074_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_115_cast_fp16 = layer_norm(axes = out_115_axes_0, epsilon = var_46074_to_fp16, x = inputs_115_cast_fp16)[name = tensor("out_115_cast_fp16")]; tensor input_227_gamma_0_to_fp16 = const()[name = tensor("input_227_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(376101056)))]; tensor input_227_beta_0_to_fp16 = const()[name = tensor("input_227_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(376103680)))]; tensor input_227_epsilon_0_to_fp16 = const()[name = tensor("input_227_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_227_cast_fp16 = batch_norm(beta = input_227_beta_0_to_fp16, epsilon = input_227_epsilon_0_to_fp16, gamma = input_227_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_115_cast_fp16)[name = tensor("input_227_cast_fp16")]; tensor var_46092_pad_type_0 = const()[name = tensor("op_46092_pad_type_0"), val = tensor("valid")]; tensor var_46092_strides_0 = const()[name = tensor("op_46092_strides_0"), val = tensor([1, 1])]; tensor var_46092_pad_0 = const()[name = tensor("op_46092_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_46092_dilations_0 = const()[name = tensor("op_46092_dilations_0"), val = tensor([1, 1])]; tensor var_46092_groups_0 = const()[name = tensor("op_46092_groups_0"), val = tensor(1)]; tensor layers_28_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(376106304))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(379383168))), name = tensor("layers_28_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_28_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_28_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(379383296)))]; tensor var_46092_cast_fp16 = conv(bias = layers_28_fc1_inlier_module_bias_to_fp16, dilations = var_46092_dilations_0, groups = var_46092_groups_0, pad = var_46092_pad_0, pad_type = var_46092_pad_type_0, strides = var_46092_strides_0, weight = layers_28_fc1_inlier_module_weight_to_fp16_palettized, x = input_227_cast_fp16)[name = tensor("op_46092_cast_fp16")]; tensor var_46098_pad_type_0 = const()[name = tensor("op_46098_pad_type_0"), val = tensor("valid")]; tensor var_46098_strides_0 = const()[name = tensor("op_46098_strides_0"), val = tensor([1, 1])]; tensor var_46098_pad_0 = const()[name = tensor("op_46098_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_46098_dilations_0 = const()[name = tensor("op_46098_dilations_0"), val = tensor([1, 1])]; tensor var_46098_groups_0 = const()[name = tensor("op_46098_groups_0"), val = tensor(1)]; tensor layers_28_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(379450816))), name = tensor("layers_28_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(379393600))), shape = tensor([5120, 1280, 1, 1])]; tensor var_46098_cast_fp16 = conv(dilations = var_46098_dilations_0, groups = var_46098_groups_0, pad = var_46098_pad_0, pad_type = var_46098_pad_type_0, strides = var_46098_strides_0, weight = layers_28_fc1_outlier_module_weight_to_fp16_sparsified, x = input_227_cast_fp16)[name = tensor("op_46098_cast_fp16")]; tensor input_229_cast_fp16 = add(x = var_46092_cast_fp16, y = var_46098_cast_fp16)[name = tensor("input_229_cast_fp16")]; tensor input_231_mode_0 = const()[name = tensor("input_231_mode_0"), val = tensor("EXACT")]; tensor input_231_cast_fp16 = gelu(mode = input_231_mode_0, x = input_229_cast_fp16)[name = tensor("input_231_cast_fp16")]; tensor var_46109_pad_type_0 = const()[name = tensor("op_46109_pad_type_0"), val = tensor("valid")]; tensor var_46109_strides_0 = const()[name = tensor("op_46109_strides_0"), val = tensor([1, 1])]; tensor var_46109_pad_0 = const()[name = tensor("op_46109_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_46109_dilations_0 = const()[name = tensor("op_46109_dilations_0"), val = tensor([1, 1])]; tensor var_46109_groups_0 = const()[name = tensor("op_46109_groups_0"), val = tensor(1)]; tensor layers_28_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(380270080))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383546944))), name = tensor("layers_28_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_28_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_28_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383547072)))]; tensor var_46109_cast_fp16 = conv(bias = layers_28_fc2_inlier_module_bias_to_fp16, dilations = var_46109_dilations_0, groups = var_46109_groups_0, pad = var_46109_pad_0, pad_type = var_46109_pad_type_0, strides = var_46109_strides_0, weight = layers_28_fc2_inlier_module_weight_to_fp16_palettized, x = input_231_cast_fp16)[name = tensor("op_46109_cast_fp16")]; tensor var_46115_pad_type_0 = const()[name = tensor("op_46115_pad_type_0"), val = tensor("valid")]; tensor var_46115_strides_0 = const()[name = tensor("op_46115_strides_0"), val = tensor([1, 1])]; tensor var_46115_pad_0 = const()[name = tensor("op_46115_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_46115_dilations_0 = const()[name = tensor("op_46115_dilations_0"), val = tensor([1, 1])]; tensor var_46115_groups_0 = const()[name = tensor("op_46115_groups_0"), val = tensor(1)]; tensor layers_28_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383608320))), name = tensor("layers_28_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(383549696))), shape = tensor([1280, 5120, 1, 1])]; tensor var_46115_cast_fp16 = conv(dilations = var_46115_dilations_0, groups = var_46115_groups_0, pad = var_46115_pad_0, pad_type = var_46115_pad_type_0, strides = var_46115_strides_0, weight = layers_28_fc2_outlier_module_weight_to_fp16_sparsified, x = input_231_cast_fp16)[name = tensor("op_46115_cast_fp16")]; tensor hidden_states_61_cast_fp16 = add(x = var_46109_cast_fp16, y = var_46115_cast_fp16)[name = tensor("hidden_states_61_cast_fp16")]; tensor inputs_117_cast_fp16 = add(x = inputs_115_cast_fp16, y = hidden_states_61_cast_fp16)[name = tensor("inputs_117_cast_fp16")]; tensor var_46121 = const()[name = tensor("op_46121"), val = tensor(3)]; tensor var_46146 = const()[name = tensor("op_46146"), val = tensor(1)]; tensor out_117_axes_0 = const()[name = tensor("out_117_axes_0"), val = tensor([1])]; tensor var_46163_to_fp16 = const()[name = tensor("op_46163_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_117_cast_fp16 = layer_norm(axes = out_117_axes_0, epsilon = var_46163_to_fp16, x = inputs_117_cast_fp16)[name = tensor("out_117_cast_fp16")]; tensor obj_117_gamma_0_to_fp16 = const()[name = tensor("obj_117_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(384427584)))]; tensor obj_117_beta_0_to_fp16 = const()[name = tensor("obj_117_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(384430208)))]; tensor obj_117_epsilon_0_to_fp16 = const()[name = tensor("obj_117_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_117_cast_fp16 = batch_norm(beta = obj_117_beta_0_to_fp16, epsilon = obj_117_epsilon_0_to_fp16, gamma = obj_117_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_117_cast_fp16)[name = tensor("obj_117_cast_fp16")]; tensor var_46185_pad_type_0 = const()[name = tensor("op_46185_pad_type_0"), val = tensor("valid")]; tensor var_46185_strides_0 = const()[name = tensor("op_46185_strides_0"), val = tensor([1, 1])]; tensor var_46185_pad_0 = const()[name = tensor("op_46185_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_46185_dilations_0 = const()[name = tensor("op_46185_dilations_0"), val = tensor([1, 1])]; tensor var_46185_groups_0 = const()[name = tensor("op_46185_groups_0"), val = tensor(1)]; tensor layers_29_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(384432832))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385252096))), name = tensor("layers_29_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_29_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_29_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385252224)))]; tensor var_46185_cast_fp16 = conv(bias = layers_29_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_46185_dilations_0, groups = var_46185_groups_0, pad = var_46185_pad_0, pad_type = var_46185_pad_type_0, strides = var_46185_strides_0, weight = layers_29_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_117_cast_fp16)[name = tensor("op_46185_cast_fp16")]; tensor var_46191_pad_type_0 = const()[name = tensor("op_46191_pad_type_0"), val = tensor("valid")]; tensor var_46191_strides_0 = const()[name = tensor("op_46191_strides_0"), val = tensor([1, 1])]; tensor var_46191_pad_0 = const()[name = tensor("op_46191_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_46191_dilations_0 = const()[name = tensor("op_46191_dilations_0"), val = tensor([1, 1])]; tensor var_46191_groups_0 = const()[name = tensor("op_46191_groups_0"), val = tensor(1)]; tensor layers_29_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385277312))), name = tensor("layers_29_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385254848))), shape = tensor([1280, 1280, 1, 1])]; tensor var_46191_cast_fp16 = conv(dilations = var_46191_dilations_0, groups = var_46191_groups_0, pad = var_46191_pad_0, pad_type = var_46191_pad_type_0, strides = var_46191_strides_0, weight = layers_29_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_117_cast_fp16)[name = tensor("op_46191_cast_fp16")]; tensor query_59_cast_fp16 = add(x = var_46185_cast_fp16, y = var_46191_cast_fp16)[name = tensor("query_59_cast_fp16")]; tensor var_46200_pad_type_0 = const()[name = tensor("op_46200_pad_type_0"), val = tensor("valid")]; tensor var_46200_strides_0 = const()[name = tensor("op_46200_strides_0"), val = tensor([1, 1])]; tensor var_46200_pad_0 = const()[name = tensor("op_46200_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_46200_dilations_0 = const()[name = tensor("op_46200_dilations_0"), val = tensor([1, 1])]; tensor var_46200_groups_0 = const()[name = tensor("op_46200_groups_0"), val = tensor(1)]; tensor layers_29_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(385482176))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386301440))), name = tensor("layers_29_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_46200_cast_fp16 = conv(dilations = var_46200_dilations_0, groups = var_46200_groups_0, pad = var_46200_pad_0, pad_type = var_46200_pad_type_0, strides = var_46200_strides_0, weight = layers_29_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_117_cast_fp16)[name = tensor("op_46200_cast_fp16")]; tensor var_46206_pad_type_0 = const()[name = tensor("op_46206_pad_type_0"), val = tensor("valid")]; tensor var_46206_strides_0 = const()[name = tensor("op_46206_strides_0"), val = tensor([1, 1])]; tensor var_46206_pad_0 = const()[name = tensor("op_46206_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_46206_dilations_0 = const()[name = tensor("op_46206_dilations_0"), val = tensor([1, 1])]; tensor var_46206_groups_0 = const()[name = tensor("op_46206_groups_0"), val = tensor(1)]; tensor layers_29_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386323840))), name = tensor("layers_29_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386301568))), shape = tensor([1280, 1280, 1, 1])]; tensor var_46206_cast_fp16 = conv(dilations = var_46206_dilations_0, groups = var_46206_groups_0, pad = var_46206_pad_0, pad_type = var_46206_pad_type_0, strides = var_46206_strides_0, weight = layers_29_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_117_cast_fp16)[name = tensor("op_46206_cast_fp16")]; tensor key_59_cast_fp16 = add(x = var_46200_cast_fp16, y = var_46206_cast_fp16)[name = tensor("key_59_cast_fp16")]; tensor var_46216_pad_type_0 = const()[name = tensor("op_46216_pad_type_0"), val = tensor("valid")]; tensor var_46216_strides_0 = const()[name = tensor("op_46216_strides_0"), val = tensor([1, 1])]; tensor var_46216_pad_0 = const()[name = tensor("op_46216_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_46216_dilations_0 = const()[name = tensor("op_46216_dilations_0"), val = tensor([1, 1])]; tensor var_46216_groups_0 = const()[name = tensor("op_46216_groups_0"), val = tensor(1)]; tensor layers_29_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(386528704))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387347968))), name = tensor("layers_29_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_29_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_29_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387348096)))]; tensor var_46216_cast_fp16 = conv(bias = layers_29_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_46216_dilations_0, groups = var_46216_groups_0, pad = var_46216_pad_0, pad_type = var_46216_pad_type_0, strides = var_46216_strides_0, weight = layers_29_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_117_cast_fp16)[name = tensor("op_46216_cast_fp16")]; tensor var_46222_pad_type_0 = const()[name = tensor("op_46222_pad_type_0"), val = tensor("valid")]; tensor var_46222_strides_0 = const()[name = tensor("op_46222_strides_0"), val = tensor([1, 1])]; tensor var_46222_pad_0 = const()[name = tensor("op_46222_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_46222_dilations_0 = const()[name = tensor("op_46222_dilations_0"), val = tensor([1, 1])]; tensor var_46222_groups_0 = const()[name = tensor("op_46222_groups_0"), val = tensor(1)]; tensor layers_29_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387363392))), name = tensor("layers_29_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387350720))), shape = tensor([1280, 1280, 1, 1])]; tensor var_46222_cast_fp16 = conv(dilations = var_46222_dilations_0, groups = var_46222_groups_0, pad = var_46222_pad_0, pad_type = var_46222_pad_type_0, strides = var_46222_strides_0, weight = layers_29_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_117_cast_fp16)[name = tensor("op_46222_cast_fp16")]; tensor value_59_cast_fp16 = add(x = var_46216_cast_fp16, y = var_46222_cast_fp16)[name = tensor("value_59_cast_fp16")]; tensor var_46228_begin_0 = const()[name = tensor("op_46228_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46228_end_0 = const()[name = tensor("op_46228_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46228_end_mask_0 = const()[name = tensor("op_46228_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46228_cast_fp16 = slice_by_index(begin = var_46228_begin_0, end = var_46228_end_0, end_mask = var_46228_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46228_cast_fp16")]; tensor var_46232_begin_0 = const()[name = tensor("op_46232_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_46232_end_0 = const()[name = tensor("op_46232_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_46232_end_mask_0 = const()[name = tensor("op_46232_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46232_cast_fp16 = slice_by_index(begin = var_46232_begin_0, end = var_46232_end_0, end_mask = var_46232_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46232_cast_fp16")]; tensor var_46236_begin_0 = const()[name = tensor("op_46236_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_46236_end_0 = const()[name = tensor("op_46236_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_46236_end_mask_0 = const()[name = tensor("op_46236_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46236_cast_fp16 = slice_by_index(begin = var_46236_begin_0, end = var_46236_end_0, end_mask = var_46236_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46236_cast_fp16")]; tensor var_46240_begin_0 = const()[name = tensor("op_46240_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_46240_end_0 = const()[name = tensor("op_46240_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_46240_end_mask_0 = const()[name = tensor("op_46240_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46240_cast_fp16 = slice_by_index(begin = var_46240_begin_0, end = var_46240_end_0, end_mask = var_46240_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46240_cast_fp16")]; tensor var_46244_begin_0 = const()[name = tensor("op_46244_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_46244_end_0 = const()[name = tensor("op_46244_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_46244_end_mask_0 = const()[name = tensor("op_46244_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46244_cast_fp16 = slice_by_index(begin = var_46244_begin_0, end = var_46244_end_0, end_mask = var_46244_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46244_cast_fp16")]; tensor var_46248_begin_0 = const()[name = tensor("op_46248_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_46248_end_0 = const()[name = tensor("op_46248_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_46248_end_mask_0 = const()[name = tensor("op_46248_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46248_cast_fp16 = slice_by_index(begin = var_46248_begin_0, end = var_46248_end_0, end_mask = var_46248_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46248_cast_fp16")]; tensor var_46252_begin_0 = const()[name = tensor("op_46252_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_46252_end_0 = const()[name = tensor("op_46252_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_46252_end_mask_0 = const()[name = tensor("op_46252_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46252_cast_fp16 = slice_by_index(begin = var_46252_begin_0, end = var_46252_end_0, end_mask = var_46252_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46252_cast_fp16")]; tensor var_46256_begin_0 = const()[name = tensor("op_46256_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_46256_end_0 = const()[name = tensor("op_46256_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_46256_end_mask_0 = const()[name = tensor("op_46256_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46256_cast_fp16 = slice_by_index(begin = var_46256_begin_0, end = var_46256_end_0, end_mask = var_46256_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46256_cast_fp16")]; tensor var_46260_begin_0 = const()[name = tensor("op_46260_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_46260_end_0 = const()[name = tensor("op_46260_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_46260_end_mask_0 = const()[name = tensor("op_46260_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46260_cast_fp16 = slice_by_index(begin = var_46260_begin_0, end = var_46260_end_0, end_mask = var_46260_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46260_cast_fp16")]; tensor var_46264_begin_0 = const()[name = tensor("op_46264_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_46264_end_0 = const()[name = tensor("op_46264_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_46264_end_mask_0 = const()[name = tensor("op_46264_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46264_cast_fp16 = slice_by_index(begin = var_46264_begin_0, end = var_46264_end_0, end_mask = var_46264_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46264_cast_fp16")]; tensor var_46268_begin_0 = const()[name = tensor("op_46268_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_46268_end_0 = const()[name = tensor("op_46268_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_46268_end_mask_0 = const()[name = tensor("op_46268_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46268_cast_fp16 = slice_by_index(begin = var_46268_begin_0, end = var_46268_end_0, end_mask = var_46268_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46268_cast_fp16")]; tensor var_46272_begin_0 = const()[name = tensor("op_46272_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_46272_end_0 = const()[name = tensor("op_46272_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_46272_end_mask_0 = const()[name = tensor("op_46272_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46272_cast_fp16 = slice_by_index(begin = var_46272_begin_0, end = var_46272_end_0, end_mask = var_46272_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46272_cast_fp16")]; tensor var_46276_begin_0 = const()[name = tensor("op_46276_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_46276_end_0 = const()[name = tensor("op_46276_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_46276_end_mask_0 = const()[name = tensor("op_46276_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46276_cast_fp16 = slice_by_index(begin = var_46276_begin_0, end = var_46276_end_0, end_mask = var_46276_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46276_cast_fp16")]; tensor var_46280_begin_0 = const()[name = tensor("op_46280_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_46280_end_0 = const()[name = tensor("op_46280_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_46280_end_mask_0 = const()[name = tensor("op_46280_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46280_cast_fp16 = slice_by_index(begin = var_46280_begin_0, end = var_46280_end_0, end_mask = var_46280_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46280_cast_fp16")]; tensor var_46284_begin_0 = const()[name = tensor("op_46284_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_46284_end_0 = const()[name = tensor("op_46284_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_46284_end_mask_0 = const()[name = tensor("op_46284_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46284_cast_fp16 = slice_by_index(begin = var_46284_begin_0, end = var_46284_end_0, end_mask = var_46284_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46284_cast_fp16")]; tensor var_46288_begin_0 = const()[name = tensor("op_46288_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_46288_end_0 = const()[name = tensor("op_46288_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_46288_end_mask_0 = const()[name = tensor("op_46288_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46288_cast_fp16 = slice_by_index(begin = var_46288_begin_0, end = var_46288_end_0, end_mask = var_46288_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46288_cast_fp16")]; tensor var_46292_begin_0 = const()[name = tensor("op_46292_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_46292_end_0 = const()[name = tensor("op_46292_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_46292_end_mask_0 = const()[name = tensor("op_46292_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46292_cast_fp16 = slice_by_index(begin = var_46292_begin_0, end = var_46292_end_0, end_mask = var_46292_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46292_cast_fp16")]; tensor var_46296_begin_0 = const()[name = tensor("op_46296_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_46296_end_0 = const()[name = tensor("op_46296_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_46296_end_mask_0 = const()[name = tensor("op_46296_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46296_cast_fp16 = slice_by_index(begin = var_46296_begin_0, end = var_46296_end_0, end_mask = var_46296_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46296_cast_fp16")]; tensor var_46300_begin_0 = const()[name = tensor("op_46300_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_46300_end_0 = const()[name = tensor("op_46300_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_46300_end_mask_0 = const()[name = tensor("op_46300_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46300_cast_fp16 = slice_by_index(begin = var_46300_begin_0, end = var_46300_end_0, end_mask = var_46300_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46300_cast_fp16")]; tensor var_46304_begin_0 = const()[name = tensor("op_46304_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_46304_end_0 = const()[name = tensor("op_46304_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_46304_end_mask_0 = const()[name = tensor("op_46304_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46304_cast_fp16 = slice_by_index(begin = var_46304_begin_0, end = var_46304_end_0, end_mask = var_46304_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_46304_cast_fp16")]; tensor var_46313_begin_0 = const()[name = tensor("op_46313_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46313_end_0 = const()[name = tensor("op_46313_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46313_end_mask_0 = const()[name = tensor("op_46313_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46313_cast_fp16 = slice_by_index(begin = var_46313_begin_0, end = var_46313_end_0, end_mask = var_46313_end_mask_0, x = var_46228_cast_fp16)[name = tensor("op_46313_cast_fp16")]; tensor var_46320_begin_0 = const()[name = tensor("op_46320_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46320_end_0 = const()[name = tensor("op_46320_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46320_end_mask_0 = const()[name = tensor("op_46320_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46320_cast_fp16 = slice_by_index(begin = var_46320_begin_0, end = var_46320_end_0, end_mask = var_46320_end_mask_0, x = var_46228_cast_fp16)[name = tensor("op_46320_cast_fp16")]; tensor var_46327_begin_0 = const()[name = tensor("op_46327_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46327_end_0 = const()[name = tensor("op_46327_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46327_end_mask_0 = const()[name = tensor("op_46327_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46327_cast_fp16 = slice_by_index(begin = var_46327_begin_0, end = var_46327_end_0, end_mask = var_46327_end_mask_0, x = var_46228_cast_fp16)[name = tensor("op_46327_cast_fp16")]; tensor var_46334_begin_0 = const()[name = tensor("op_46334_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46334_end_0 = const()[name = tensor("op_46334_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46334_end_mask_0 = const()[name = tensor("op_46334_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46334_cast_fp16 = slice_by_index(begin = var_46334_begin_0, end = var_46334_end_0, end_mask = var_46334_end_mask_0, x = var_46228_cast_fp16)[name = tensor("op_46334_cast_fp16")]; tensor var_46341_begin_0 = const()[name = tensor("op_46341_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46341_end_0 = const()[name = tensor("op_46341_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46341_end_mask_0 = const()[name = tensor("op_46341_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46341_cast_fp16 = slice_by_index(begin = var_46341_begin_0, end = var_46341_end_0, end_mask = var_46341_end_mask_0, x = var_46232_cast_fp16)[name = tensor("op_46341_cast_fp16")]; tensor var_46348_begin_0 = const()[name = tensor("op_46348_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46348_end_0 = const()[name = tensor("op_46348_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46348_end_mask_0 = const()[name = tensor("op_46348_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46348_cast_fp16 = slice_by_index(begin = var_46348_begin_0, end = var_46348_end_0, end_mask = var_46348_end_mask_0, x = var_46232_cast_fp16)[name = tensor("op_46348_cast_fp16")]; tensor var_46355_begin_0 = const()[name = tensor("op_46355_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46355_end_0 = const()[name = tensor("op_46355_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46355_end_mask_0 = const()[name = tensor("op_46355_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46355_cast_fp16 = slice_by_index(begin = var_46355_begin_0, end = var_46355_end_0, end_mask = var_46355_end_mask_0, x = var_46232_cast_fp16)[name = tensor("op_46355_cast_fp16")]; tensor var_46362_begin_0 = const()[name = tensor("op_46362_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46362_end_0 = const()[name = tensor("op_46362_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46362_end_mask_0 = const()[name = tensor("op_46362_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46362_cast_fp16 = slice_by_index(begin = var_46362_begin_0, end = var_46362_end_0, end_mask = var_46362_end_mask_0, x = var_46232_cast_fp16)[name = tensor("op_46362_cast_fp16")]; tensor var_46369_begin_0 = const()[name = tensor("op_46369_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46369_end_0 = const()[name = tensor("op_46369_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46369_end_mask_0 = const()[name = tensor("op_46369_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46369_cast_fp16 = slice_by_index(begin = var_46369_begin_0, end = var_46369_end_0, end_mask = var_46369_end_mask_0, x = var_46236_cast_fp16)[name = tensor("op_46369_cast_fp16")]; tensor var_46376_begin_0 = const()[name = tensor("op_46376_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46376_end_0 = const()[name = tensor("op_46376_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46376_end_mask_0 = const()[name = tensor("op_46376_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46376_cast_fp16 = slice_by_index(begin = var_46376_begin_0, end = var_46376_end_0, end_mask = var_46376_end_mask_0, x = var_46236_cast_fp16)[name = tensor("op_46376_cast_fp16")]; tensor var_46383_begin_0 = const()[name = tensor("op_46383_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46383_end_0 = const()[name = tensor("op_46383_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46383_end_mask_0 = const()[name = tensor("op_46383_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46383_cast_fp16 = slice_by_index(begin = var_46383_begin_0, end = var_46383_end_0, end_mask = var_46383_end_mask_0, x = var_46236_cast_fp16)[name = tensor("op_46383_cast_fp16")]; tensor var_46390_begin_0 = const()[name = tensor("op_46390_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46390_end_0 = const()[name = tensor("op_46390_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46390_end_mask_0 = const()[name = tensor("op_46390_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46390_cast_fp16 = slice_by_index(begin = var_46390_begin_0, end = var_46390_end_0, end_mask = var_46390_end_mask_0, x = var_46236_cast_fp16)[name = tensor("op_46390_cast_fp16")]; tensor var_46397_begin_0 = const()[name = tensor("op_46397_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46397_end_0 = const()[name = tensor("op_46397_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46397_end_mask_0 = const()[name = tensor("op_46397_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46397_cast_fp16 = slice_by_index(begin = var_46397_begin_0, end = var_46397_end_0, end_mask = var_46397_end_mask_0, x = var_46240_cast_fp16)[name = tensor("op_46397_cast_fp16")]; tensor var_46404_begin_0 = const()[name = tensor("op_46404_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46404_end_0 = const()[name = tensor("op_46404_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46404_end_mask_0 = const()[name = tensor("op_46404_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46404_cast_fp16 = slice_by_index(begin = var_46404_begin_0, end = var_46404_end_0, end_mask = var_46404_end_mask_0, x = var_46240_cast_fp16)[name = tensor("op_46404_cast_fp16")]; tensor var_46411_begin_0 = const()[name = tensor("op_46411_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46411_end_0 = const()[name = tensor("op_46411_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46411_end_mask_0 = const()[name = tensor("op_46411_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46411_cast_fp16 = slice_by_index(begin = var_46411_begin_0, end = var_46411_end_0, end_mask = var_46411_end_mask_0, x = var_46240_cast_fp16)[name = tensor("op_46411_cast_fp16")]; tensor var_46418_begin_0 = const()[name = tensor("op_46418_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46418_end_0 = const()[name = tensor("op_46418_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46418_end_mask_0 = const()[name = tensor("op_46418_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46418_cast_fp16 = slice_by_index(begin = var_46418_begin_0, end = var_46418_end_0, end_mask = var_46418_end_mask_0, x = var_46240_cast_fp16)[name = tensor("op_46418_cast_fp16")]; tensor var_46425_begin_0 = const()[name = tensor("op_46425_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46425_end_0 = const()[name = tensor("op_46425_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46425_end_mask_0 = const()[name = tensor("op_46425_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46425_cast_fp16 = slice_by_index(begin = var_46425_begin_0, end = var_46425_end_0, end_mask = var_46425_end_mask_0, x = var_46244_cast_fp16)[name = tensor("op_46425_cast_fp16")]; tensor var_46432_begin_0 = const()[name = tensor("op_46432_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46432_end_0 = const()[name = tensor("op_46432_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46432_end_mask_0 = const()[name = tensor("op_46432_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46432_cast_fp16 = slice_by_index(begin = var_46432_begin_0, end = var_46432_end_0, end_mask = var_46432_end_mask_0, x = var_46244_cast_fp16)[name = tensor("op_46432_cast_fp16")]; tensor var_46439_begin_0 = const()[name = tensor("op_46439_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46439_end_0 = const()[name = tensor("op_46439_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46439_end_mask_0 = const()[name = tensor("op_46439_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46439_cast_fp16 = slice_by_index(begin = var_46439_begin_0, end = var_46439_end_0, end_mask = var_46439_end_mask_0, x = var_46244_cast_fp16)[name = tensor("op_46439_cast_fp16")]; tensor var_46446_begin_0 = const()[name = tensor("op_46446_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46446_end_0 = const()[name = tensor("op_46446_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46446_end_mask_0 = const()[name = tensor("op_46446_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46446_cast_fp16 = slice_by_index(begin = var_46446_begin_0, end = var_46446_end_0, end_mask = var_46446_end_mask_0, x = var_46244_cast_fp16)[name = tensor("op_46446_cast_fp16")]; tensor var_46453_begin_0 = const()[name = tensor("op_46453_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46453_end_0 = const()[name = tensor("op_46453_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46453_end_mask_0 = const()[name = tensor("op_46453_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46453_cast_fp16 = slice_by_index(begin = var_46453_begin_0, end = var_46453_end_0, end_mask = var_46453_end_mask_0, x = var_46248_cast_fp16)[name = tensor("op_46453_cast_fp16")]; tensor var_46460_begin_0 = const()[name = tensor("op_46460_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46460_end_0 = const()[name = tensor("op_46460_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46460_end_mask_0 = const()[name = tensor("op_46460_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46460_cast_fp16 = slice_by_index(begin = var_46460_begin_0, end = var_46460_end_0, end_mask = var_46460_end_mask_0, x = var_46248_cast_fp16)[name = tensor("op_46460_cast_fp16")]; tensor var_46467_begin_0 = const()[name = tensor("op_46467_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46467_end_0 = const()[name = tensor("op_46467_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46467_end_mask_0 = const()[name = tensor("op_46467_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46467_cast_fp16 = slice_by_index(begin = var_46467_begin_0, end = var_46467_end_0, end_mask = var_46467_end_mask_0, x = var_46248_cast_fp16)[name = tensor("op_46467_cast_fp16")]; tensor var_46474_begin_0 = const()[name = tensor("op_46474_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46474_end_0 = const()[name = tensor("op_46474_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46474_end_mask_0 = const()[name = tensor("op_46474_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46474_cast_fp16 = slice_by_index(begin = var_46474_begin_0, end = var_46474_end_0, end_mask = var_46474_end_mask_0, x = var_46248_cast_fp16)[name = tensor("op_46474_cast_fp16")]; tensor var_46481_begin_0 = const()[name = tensor("op_46481_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46481_end_0 = const()[name = tensor("op_46481_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46481_end_mask_0 = const()[name = tensor("op_46481_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46481_cast_fp16 = slice_by_index(begin = var_46481_begin_0, end = var_46481_end_0, end_mask = var_46481_end_mask_0, x = var_46252_cast_fp16)[name = tensor("op_46481_cast_fp16")]; tensor var_46488_begin_0 = const()[name = tensor("op_46488_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46488_end_0 = const()[name = tensor("op_46488_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46488_end_mask_0 = const()[name = tensor("op_46488_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46488_cast_fp16 = slice_by_index(begin = var_46488_begin_0, end = var_46488_end_0, end_mask = var_46488_end_mask_0, x = var_46252_cast_fp16)[name = tensor("op_46488_cast_fp16")]; tensor var_46495_begin_0 = const()[name = tensor("op_46495_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46495_end_0 = const()[name = tensor("op_46495_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46495_end_mask_0 = const()[name = tensor("op_46495_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46495_cast_fp16 = slice_by_index(begin = var_46495_begin_0, end = var_46495_end_0, end_mask = var_46495_end_mask_0, x = var_46252_cast_fp16)[name = tensor("op_46495_cast_fp16")]; tensor var_46502_begin_0 = const()[name = tensor("op_46502_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46502_end_0 = const()[name = tensor("op_46502_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46502_end_mask_0 = const()[name = tensor("op_46502_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46502_cast_fp16 = slice_by_index(begin = var_46502_begin_0, end = var_46502_end_0, end_mask = var_46502_end_mask_0, x = var_46252_cast_fp16)[name = tensor("op_46502_cast_fp16")]; tensor var_46509_begin_0 = const()[name = tensor("op_46509_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46509_end_0 = const()[name = tensor("op_46509_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46509_end_mask_0 = const()[name = tensor("op_46509_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46509_cast_fp16 = slice_by_index(begin = var_46509_begin_0, end = var_46509_end_0, end_mask = var_46509_end_mask_0, x = var_46256_cast_fp16)[name = tensor("op_46509_cast_fp16")]; tensor var_46516_begin_0 = const()[name = tensor("op_46516_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46516_end_0 = const()[name = tensor("op_46516_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46516_end_mask_0 = const()[name = tensor("op_46516_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46516_cast_fp16 = slice_by_index(begin = var_46516_begin_0, end = var_46516_end_0, end_mask = var_46516_end_mask_0, x = var_46256_cast_fp16)[name = tensor("op_46516_cast_fp16")]; tensor var_46523_begin_0 = const()[name = tensor("op_46523_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46523_end_0 = const()[name = tensor("op_46523_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46523_end_mask_0 = const()[name = tensor("op_46523_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46523_cast_fp16 = slice_by_index(begin = var_46523_begin_0, end = var_46523_end_0, end_mask = var_46523_end_mask_0, x = var_46256_cast_fp16)[name = tensor("op_46523_cast_fp16")]; tensor var_46530_begin_0 = const()[name = tensor("op_46530_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46530_end_0 = const()[name = tensor("op_46530_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46530_end_mask_0 = const()[name = tensor("op_46530_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46530_cast_fp16 = slice_by_index(begin = var_46530_begin_0, end = var_46530_end_0, end_mask = var_46530_end_mask_0, x = var_46256_cast_fp16)[name = tensor("op_46530_cast_fp16")]; tensor var_46537_begin_0 = const()[name = tensor("op_46537_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46537_end_0 = const()[name = tensor("op_46537_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46537_end_mask_0 = const()[name = tensor("op_46537_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46537_cast_fp16 = slice_by_index(begin = var_46537_begin_0, end = var_46537_end_0, end_mask = var_46537_end_mask_0, x = var_46260_cast_fp16)[name = tensor("op_46537_cast_fp16")]; tensor var_46544_begin_0 = const()[name = tensor("op_46544_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46544_end_0 = const()[name = tensor("op_46544_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46544_end_mask_0 = const()[name = tensor("op_46544_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46544_cast_fp16 = slice_by_index(begin = var_46544_begin_0, end = var_46544_end_0, end_mask = var_46544_end_mask_0, x = var_46260_cast_fp16)[name = tensor("op_46544_cast_fp16")]; tensor var_46551_begin_0 = const()[name = tensor("op_46551_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46551_end_0 = const()[name = tensor("op_46551_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46551_end_mask_0 = const()[name = tensor("op_46551_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46551_cast_fp16 = slice_by_index(begin = var_46551_begin_0, end = var_46551_end_0, end_mask = var_46551_end_mask_0, x = var_46260_cast_fp16)[name = tensor("op_46551_cast_fp16")]; tensor var_46558_begin_0 = const()[name = tensor("op_46558_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46558_end_0 = const()[name = tensor("op_46558_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46558_end_mask_0 = const()[name = tensor("op_46558_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46558_cast_fp16 = slice_by_index(begin = var_46558_begin_0, end = var_46558_end_0, end_mask = var_46558_end_mask_0, x = var_46260_cast_fp16)[name = tensor("op_46558_cast_fp16")]; tensor var_46565_begin_0 = const()[name = tensor("op_46565_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46565_end_0 = const()[name = tensor("op_46565_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46565_end_mask_0 = const()[name = tensor("op_46565_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46565_cast_fp16 = slice_by_index(begin = var_46565_begin_0, end = var_46565_end_0, end_mask = var_46565_end_mask_0, x = var_46264_cast_fp16)[name = tensor("op_46565_cast_fp16")]; tensor var_46572_begin_0 = const()[name = tensor("op_46572_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46572_end_0 = const()[name = tensor("op_46572_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46572_end_mask_0 = const()[name = tensor("op_46572_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46572_cast_fp16 = slice_by_index(begin = var_46572_begin_0, end = var_46572_end_0, end_mask = var_46572_end_mask_0, x = var_46264_cast_fp16)[name = tensor("op_46572_cast_fp16")]; tensor var_46579_begin_0 = const()[name = tensor("op_46579_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46579_end_0 = const()[name = tensor("op_46579_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46579_end_mask_0 = const()[name = tensor("op_46579_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46579_cast_fp16 = slice_by_index(begin = var_46579_begin_0, end = var_46579_end_0, end_mask = var_46579_end_mask_0, x = var_46264_cast_fp16)[name = tensor("op_46579_cast_fp16")]; tensor var_46586_begin_0 = const()[name = tensor("op_46586_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46586_end_0 = const()[name = tensor("op_46586_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46586_end_mask_0 = const()[name = tensor("op_46586_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46586_cast_fp16 = slice_by_index(begin = var_46586_begin_0, end = var_46586_end_0, end_mask = var_46586_end_mask_0, x = var_46264_cast_fp16)[name = tensor("op_46586_cast_fp16")]; tensor var_46593_begin_0 = const()[name = tensor("op_46593_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46593_end_0 = const()[name = tensor("op_46593_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46593_end_mask_0 = const()[name = tensor("op_46593_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46593_cast_fp16 = slice_by_index(begin = var_46593_begin_0, end = var_46593_end_0, end_mask = var_46593_end_mask_0, x = var_46268_cast_fp16)[name = tensor("op_46593_cast_fp16")]; tensor var_46600_begin_0 = const()[name = tensor("op_46600_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46600_end_0 = const()[name = tensor("op_46600_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46600_end_mask_0 = const()[name = tensor("op_46600_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46600_cast_fp16 = slice_by_index(begin = var_46600_begin_0, end = var_46600_end_0, end_mask = var_46600_end_mask_0, x = var_46268_cast_fp16)[name = tensor("op_46600_cast_fp16")]; tensor var_46607_begin_0 = const()[name = tensor("op_46607_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46607_end_0 = const()[name = tensor("op_46607_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46607_end_mask_0 = const()[name = tensor("op_46607_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46607_cast_fp16 = slice_by_index(begin = var_46607_begin_0, end = var_46607_end_0, end_mask = var_46607_end_mask_0, x = var_46268_cast_fp16)[name = tensor("op_46607_cast_fp16")]; tensor var_46614_begin_0 = const()[name = tensor("op_46614_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46614_end_0 = const()[name = tensor("op_46614_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46614_end_mask_0 = const()[name = tensor("op_46614_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46614_cast_fp16 = slice_by_index(begin = var_46614_begin_0, end = var_46614_end_0, end_mask = var_46614_end_mask_0, x = var_46268_cast_fp16)[name = tensor("op_46614_cast_fp16")]; tensor var_46621_begin_0 = const()[name = tensor("op_46621_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46621_end_0 = const()[name = tensor("op_46621_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46621_end_mask_0 = const()[name = tensor("op_46621_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46621_cast_fp16 = slice_by_index(begin = var_46621_begin_0, end = var_46621_end_0, end_mask = var_46621_end_mask_0, x = var_46272_cast_fp16)[name = tensor("op_46621_cast_fp16")]; tensor var_46628_begin_0 = const()[name = tensor("op_46628_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46628_end_0 = const()[name = tensor("op_46628_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46628_end_mask_0 = const()[name = tensor("op_46628_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46628_cast_fp16 = slice_by_index(begin = var_46628_begin_0, end = var_46628_end_0, end_mask = var_46628_end_mask_0, x = var_46272_cast_fp16)[name = tensor("op_46628_cast_fp16")]; tensor var_46635_begin_0 = const()[name = tensor("op_46635_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46635_end_0 = const()[name = tensor("op_46635_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46635_end_mask_0 = const()[name = tensor("op_46635_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46635_cast_fp16 = slice_by_index(begin = var_46635_begin_0, end = var_46635_end_0, end_mask = var_46635_end_mask_0, x = var_46272_cast_fp16)[name = tensor("op_46635_cast_fp16")]; tensor var_46642_begin_0 = const()[name = tensor("op_46642_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46642_end_0 = const()[name = tensor("op_46642_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46642_end_mask_0 = const()[name = tensor("op_46642_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46642_cast_fp16 = slice_by_index(begin = var_46642_begin_0, end = var_46642_end_0, end_mask = var_46642_end_mask_0, x = var_46272_cast_fp16)[name = tensor("op_46642_cast_fp16")]; tensor var_46649_begin_0 = const()[name = tensor("op_46649_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46649_end_0 = const()[name = tensor("op_46649_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46649_end_mask_0 = const()[name = tensor("op_46649_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46649_cast_fp16 = slice_by_index(begin = var_46649_begin_0, end = var_46649_end_0, end_mask = var_46649_end_mask_0, x = var_46276_cast_fp16)[name = tensor("op_46649_cast_fp16")]; tensor var_46656_begin_0 = const()[name = tensor("op_46656_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46656_end_0 = const()[name = tensor("op_46656_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46656_end_mask_0 = const()[name = tensor("op_46656_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46656_cast_fp16 = slice_by_index(begin = var_46656_begin_0, end = var_46656_end_0, end_mask = var_46656_end_mask_0, x = var_46276_cast_fp16)[name = tensor("op_46656_cast_fp16")]; tensor var_46663_begin_0 = const()[name = tensor("op_46663_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46663_end_0 = const()[name = tensor("op_46663_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46663_end_mask_0 = const()[name = tensor("op_46663_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46663_cast_fp16 = slice_by_index(begin = var_46663_begin_0, end = var_46663_end_0, end_mask = var_46663_end_mask_0, x = var_46276_cast_fp16)[name = tensor("op_46663_cast_fp16")]; tensor var_46670_begin_0 = const()[name = tensor("op_46670_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46670_end_0 = const()[name = tensor("op_46670_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46670_end_mask_0 = const()[name = tensor("op_46670_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46670_cast_fp16 = slice_by_index(begin = var_46670_begin_0, end = var_46670_end_0, end_mask = var_46670_end_mask_0, x = var_46276_cast_fp16)[name = tensor("op_46670_cast_fp16")]; tensor var_46677_begin_0 = const()[name = tensor("op_46677_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46677_end_0 = const()[name = tensor("op_46677_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46677_end_mask_0 = const()[name = tensor("op_46677_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46677_cast_fp16 = slice_by_index(begin = var_46677_begin_0, end = var_46677_end_0, end_mask = var_46677_end_mask_0, x = var_46280_cast_fp16)[name = tensor("op_46677_cast_fp16")]; tensor var_46684_begin_0 = const()[name = tensor("op_46684_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46684_end_0 = const()[name = tensor("op_46684_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46684_end_mask_0 = const()[name = tensor("op_46684_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46684_cast_fp16 = slice_by_index(begin = var_46684_begin_0, end = var_46684_end_0, end_mask = var_46684_end_mask_0, x = var_46280_cast_fp16)[name = tensor("op_46684_cast_fp16")]; tensor var_46691_begin_0 = const()[name = tensor("op_46691_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46691_end_0 = const()[name = tensor("op_46691_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46691_end_mask_0 = const()[name = tensor("op_46691_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46691_cast_fp16 = slice_by_index(begin = var_46691_begin_0, end = var_46691_end_0, end_mask = var_46691_end_mask_0, x = var_46280_cast_fp16)[name = tensor("op_46691_cast_fp16")]; tensor var_46698_begin_0 = const()[name = tensor("op_46698_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46698_end_0 = const()[name = tensor("op_46698_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46698_end_mask_0 = const()[name = tensor("op_46698_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46698_cast_fp16 = slice_by_index(begin = var_46698_begin_0, end = var_46698_end_0, end_mask = var_46698_end_mask_0, x = var_46280_cast_fp16)[name = tensor("op_46698_cast_fp16")]; tensor var_46705_begin_0 = const()[name = tensor("op_46705_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46705_end_0 = const()[name = tensor("op_46705_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46705_end_mask_0 = const()[name = tensor("op_46705_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46705_cast_fp16 = slice_by_index(begin = var_46705_begin_0, end = var_46705_end_0, end_mask = var_46705_end_mask_0, x = var_46284_cast_fp16)[name = tensor("op_46705_cast_fp16")]; tensor var_46712_begin_0 = const()[name = tensor("op_46712_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46712_end_0 = const()[name = tensor("op_46712_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46712_end_mask_0 = const()[name = tensor("op_46712_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46712_cast_fp16 = slice_by_index(begin = var_46712_begin_0, end = var_46712_end_0, end_mask = var_46712_end_mask_0, x = var_46284_cast_fp16)[name = tensor("op_46712_cast_fp16")]; tensor var_46719_begin_0 = const()[name = tensor("op_46719_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46719_end_0 = const()[name = tensor("op_46719_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46719_end_mask_0 = const()[name = tensor("op_46719_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46719_cast_fp16 = slice_by_index(begin = var_46719_begin_0, end = var_46719_end_0, end_mask = var_46719_end_mask_0, x = var_46284_cast_fp16)[name = tensor("op_46719_cast_fp16")]; tensor var_46726_begin_0 = const()[name = tensor("op_46726_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46726_end_0 = const()[name = tensor("op_46726_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46726_end_mask_0 = const()[name = tensor("op_46726_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46726_cast_fp16 = slice_by_index(begin = var_46726_begin_0, end = var_46726_end_0, end_mask = var_46726_end_mask_0, x = var_46284_cast_fp16)[name = tensor("op_46726_cast_fp16")]; tensor var_46733_begin_0 = const()[name = tensor("op_46733_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46733_end_0 = const()[name = tensor("op_46733_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46733_end_mask_0 = const()[name = tensor("op_46733_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46733_cast_fp16 = slice_by_index(begin = var_46733_begin_0, end = var_46733_end_0, end_mask = var_46733_end_mask_0, x = var_46288_cast_fp16)[name = tensor("op_46733_cast_fp16")]; tensor var_46740_begin_0 = const()[name = tensor("op_46740_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46740_end_0 = const()[name = tensor("op_46740_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46740_end_mask_0 = const()[name = tensor("op_46740_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46740_cast_fp16 = slice_by_index(begin = var_46740_begin_0, end = var_46740_end_0, end_mask = var_46740_end_mask_0, x = var_46288_cast_fp16)[name = tensor("op_46740_cast_fp16")]; tensor var_46747_begin_0 = const()[name = tensor("op_46747_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46747_end_0 = const()[name = tensor("op_46747_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46747_end_mask_0 = const()[name = tensor("op_46747_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46747_cast_fp16 = slice_by_index(begin = var_46747_begin_0, end = var_46747_end_0, end_mask = var_46747_end_mask_0, x = var_46288_cast_fp16)[name = tensor("op_46747_cast_fp16")]; tensor var_46754_begin_0 = const()[name = tensor("op_46754_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46754_end_0 = const()[name = tensor("op_46754_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46754_end_mask_0 = const()[name = tensor("op_46754_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46754_cast_fp16 = slice_by_index(begin = var_46754_begin_0, end = var_46754_end_0, end_mask = var_46754_end_mask_0, x = var_46288_cast_fp16)[name = tensor("op_46754_cast_fp16")]; tensor var_46761_begin_0 = const()[name = tensor("op_46761_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46761_end_0 = const()[name = tensor("op_46761_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46761_end_mask_0 = const()[name = tensor("op_46761_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46761_cast_fp16 = slice_by_index(begin = var_46761_begin_0, end = var_46761_end_0, end_mask = var_46761_end_mask_0, x = var_46292_cast_fp16)[name = tensor("op_46761_cast_fp16")]; tensor var_46768_begin_0 = const()[name = tensor("op_46768_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46768_end_0 = const()[name = tensor("op_46768_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46768_end_mask_0 = const()[name = tensor("op_46768_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46768_cast_fp16 = slice_by_index(begin = var_46768_begin_0, end = var_46768_end_0, end_mask = var_46768_end_mask_0, x = var_46292_cast_fp16)[name = tensor("op_46768_cast_fp16")]; tensor var_46775_begin_0 = const()[name = tensor("op_46775_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46775_end_0 = const()[name = tensor("op_46775_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46775_end_mask_0 = const()[name = tensor("op_46775_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46775_cast_fp16 = slice_by_index(begin = var_46775_begin_0, end = var_46775_end_0, end_mask = var_46775_end_mask_0, x = var_46292_cast_fp16)[name = tensor("op_46775_cast_fp16")]; tensor var_46782_begin_0 = const()[name = tensor("op_46782_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46782_end_0 = const()[name = tensor("op_46782_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46782_end_mask_0 = const()[name = tensor("op_46782_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46782_cast_fp16 = slice_by_index(begin = var_46782_begin_0, end = var_46782_end_0, end_mask = var_46782_end_mask_0, x = var_46292_cast_fp16)[name = tensor("op_46782_cast_fp16")]; tensor var_46789_begin_0 = const()[name = tensor("op_46789_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46789_end_0 = const()[name = tensor("op_46789_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46789_end_mask_0 = const()[name = tensor("op_46789_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46789_cast_fp16 = slice_by_index(begin = var_46789_begin_0, end = var_46789_end_0, end_mask = var_46789_end_mask_0, x = var_46296_cast_fp16)[name = tensor("op_46789_cast_fp16")]; tensor var_46796_begin_0 = const()[name = tensor("op_46796_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46796_end_0 = const()[name = tensor("op_46796_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46796_end_mask_0 = const()[name = tensor("op_46796_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46796_cast_fp16 = slice_by_index(begin = var_46796_begin_0, end = var_46796_end_0, end_mask = var_46796_end_mask_0, x = var_46296_cast_fp16)[name = tensor("op_46796_cast_fp16")]; tensor var_46803_begin_0 = const()[name = tensor("op_46803_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46803_end_0 = const()[name = tensor("op_46803_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46803_end_mask_0 = const()[name = tensor("op_46803_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46803_cast_fp16 = slice_by_index(begin = var_46803_begin_0, end = var_46803_end_0, end_mask = var_46803_end_mask_0, x = var_46296_cast_fp16)[name = tensor("op_46803_cast_fp16")]; tensor var_46810_begin_0 = const()[name = tensor("op_46810_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46810_end_0 = const()[name = tensor("op_46810_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46810_end_mask_0 = const()[name = tensor("op_46810_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46810_cast_fp16 = slice_by_index(begin = var_46810_begin_0, end = var_46810_end_0, end_mask = var_46810_end_mask_0, x = var_46296_cast_fp16)[name = tensor("op_46810_cast_fp16")]; tensor var_46817_begin_0 = const()[name = tensor("op_46817_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46817_end_0 = const()[name = tensor("op_46817_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46817_end_mask_0 = const()[name = tensor("op_46817_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46817_cast_fp16 = slice_by_index(begin = var_46817_begin_0, end = var_46817_end_0, end_mask = var_46817_end_mask_0, x = var_46300_cast_fp16)[name = tensor("op_46817_cast_fp16")]; tensor var_46824_begin_0 = const()[name = tensor("op_46824_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46824_end_0 = const()[name = tensor("op_46824_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46824_end_mask_0 = const()[name = tensor("op_46824_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46824_cast_fp16 = slice_by_index(begin = var_46824_begin_0, end = var_46824_end_0, end_mask = var_46824_end_mask_0, x = var_46300_cast_fp16)[name = tensor("op_46824_cast_fp16")]; tensor var_46831_begin_0 = const()[name = tensor("op_46831_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46831_end_0 = const()[name = tensor("op_46831_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46831_end_mask_0 = const()[name = tensor("op_46831_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46831_cast_fp16 = slice_by_index(begin = var_46831_begin_0, end = var_46831_end_0, end_mask = var_46831_end_mask_0, x = var_46300_cast_fp16)[name = tensor("op_46831_cast_fp16")]; tensor var_46838_begin_0 = const()[name = tensor("op_46838_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46838_end_0 = const()[name = tensor("op_46838_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46838_end_mask_0 = const()[name = tensor("op_46838_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46838_cast_fp16 = slice_by_index(begin = var_46838_begin_0, end = var_46838_end_0, end_mask = var_46838_end_mask_0, x = var_46300_cast_fp16)[name = tensor("op_46838_cast_fp16")]; tensor var_46845_begin_0 = const()[name = tensor("op_46845_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46845_end_0 = const()[name = tensor("op_46845_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_46845_end_mask_0 = const()[name = tensor("op_46845_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46845_cast_fp16 = slice_by_index(begin = var_46845_begin_0, end = var_46845_end_0, end_mask = var_46845_end_mask_0, x = var_46304_cast_fp16)[name = tensor("op_46845_cast_fp16")]; tensor var_46852_begin_0 = const()[name = tensor("op_46852_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_46852_end_0 = const()[name = tensor("op_46852_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_46852_end_mask_0 = const()[name = tensor("op_46852_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46852_cast_fp16 = slice_by_index(begin = var_46852_begin_0, end = var_46852_end_0, end_mask = var_46852_end_mask_0, x = var_46304_cast_fp16)[name = tensor("op_46852_cast_fp16")]; tensor var_46859_begin_0 = const()[name = tensor("op_46859_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_46859_end_0 = const()[name = tensor("op_46859_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_46859_end_mask_0 = const()[name = tensor("op_46859_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46859_cast_fp16 = slice_by_index(begin = var_46859_begin_0, end = var_46859_end_0, end_mask = var_46859_end_mask_0, x = var_46304_cast_fp16)[name = tensor("op_46859_cast_fp16")]; tensor var_46866_begin_0 = const()[name = tensor("op_46866_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_46866_end_0 = const()[name = tensor("op_46866_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46866_end_mask_0 = const()[name = tensor("op_46866_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46866_cast_fp16 = slice_by_index(begin = var_46866_begin_0, end = var_46866_end_0, end_mask = var_46866_end_mask_0, x = var_46304_cast_fp16)[name = tensor("op_46866_cast_fp16")]; tensor k_59_perm_0 = const()[name = tensor("k_59_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_46871_begin_0 = const()[name = tensor("op_46871_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46871_end_0 = const()[name = tensor("op_46871_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_46871_end_mask_0 = const()[name = tensor("op_46871_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_59_cast_fp16 = transpose(perm = k_59_perm_0, x = key_59_cast_fp16)[name = tensor("transpose_2")]; tensor var_46871_cast_fp16 = slice_by_index(begin = var_46871_begin_0, end = var_46871_end_0, end_mask = var_46871_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46871_cast_fp16")]; tensor var_46875_begin_0 = const()[name = tensor("op_46875_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_46875_end_0 = const()[name = tensor("op_46875_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_46875_end_mask_0 = const()[name = tensor("op_46875_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46875_cast_fp16 = slice_by_index(begin = var_46875_begin_0, end = var_46875_end_0, end_mask = var_46875_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46875_cast_fp16")]; tensor var_46879_begin_0 = const()[name = tensor("op_46879_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_46879_end_0 = const()[name = tensor("op_46879_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_46879_end_mask_0 = const()[name = tensor("op_46879_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46879_cast_fp16 = slice_by_index(begin = var_46879_begin_0, end = var_46879_end_0, end_mask = var_46879_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46879_cast_fp16")]; tensor var_46883_begin_0 = const()[name = tensor("op_46883_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_46883_end_0 = const()[name = tensor("op_46883_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_46883_end_mask_0 = const()[name = tensor("op_46883_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46883_cast_fp16 = slice_by_index(begin = var_46883_begin_0, end = var_46883_end_0, end_mask = var_46883_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46883_cast_fp16")]; tensor var_46887_begin_0 = const()[name = tensor("op_46887_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_46887_end_0 = const()[name = tensor("op_46887_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_46887_end_mask_0 = const()[name = tensor("op_46887_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46887_cast_fp16 = slice_by_index(begin = var_46887_begin_0, end = var_46887_end_0, end_mask = var_46887_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46887_cast_fp16")]; tensor var_46891_begin_0 = const()[name = tensor("op_46891_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_46891_end_0 = const()[name = tensor("op_46891_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_46891_end_mask_0 = const()[name = tensor("op_46891_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46891_cast_fp16 = slice_by_index(begin = var_46891_begin_0, end = var_46891_end_0, end_mask = var_46891_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46891_cast_fp16")]; tensor var_46895_begin_0 = const()[name = tensor("op_46895_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_46895_end_0 = const()[name = tensor("op_46895_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_46895_end_mask_0 = const()[name = tensor("op_46895_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46895_cast_fp16 = slice_by_index(begin = var_46895_begin_0, end = var_46895_end_0, end_mask = var_46895_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46895_cast_fp16")]; tensor var_46899_begin_0 = const()[name = tensor("op_46899_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_46899_end_0 = const()[name = tensor("op_46899_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_46899_end_mask_0 = const()[name = tensor("op_46899_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46899_cast_fp16 = slice_by_index(begin = var_46899_begin_0, end = var_46899_end_0, end_mask = var_46899_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46899_cast_fp16")]; tensor var_46903_begin_0 = const()[name = tensor("op_46903_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_46903_end_0 = const()[name = tensor("op_46903_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_46903_end_mask_0 = const()[name = tensor("op_46903_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46903_cast_fp16 = slice_by_index(begin = var_46903_begin_0, end = var_46903_end_0, end_mask = var_46903_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46903_cast_fp16")]; tensor var_46907_begin_0 = const()[name = tensor("op_46907_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_46907_end_0 = const()[name = tensor("op_46907_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_46907_end_mask_0 = const()[name = tensor("op_46907_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46907_cast_fp16 = slice_by_index(begin = var_46907_begin_0, end = var_46907_end_0, end_mask = var_46907_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46907_cast_fp16")]; tensor var_46911_begin_0 = const()[name = tensor("op_46911_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_46911_end_0 = const()[name = tensor("op_46911_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_46911_end_mask_0 = const()[name = tensor("op_46911_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46911_cast_fp16 = slice_by_index(begin = var_46911_begin_0, end = var_46911_end_0, end_mask = var_46911_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46911_cast_fp16")]; tensor var_46915_begin_0 = const()[name = tensor("op_46915_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_46915_end_0 = const()[name = tensor("op_46915_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_46915_end_mask_0 = const()[name = tensor("op_46915_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46915_cast_fp16 = slice_by_index(begin = var_46915_begin_0, end = var_46915_end_0, end_mask = var_46915_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46915_cast_fp16")]; tensor var_46919_begin_0 = const()[name = tensor("op_46919_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_46919_end_0 = const()[name = tensor("op_46919_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_46919_end_mask_0 = const()[name = tensor("op_46919_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46919_cast_fp16 = slice_by_index(begin = var_46919_begin_0, end = var_46919_end_0, end_mask = var_46919_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46919_cast_fp16")]; tensor var_46923_begin_0 = const()[name = tensor("op_46923_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_46923_end_0 = const()[name = tensor("op_46923_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_46923_end_mask_0 = const()[name = tensor("op_46923_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46923_cast_fp16 = slice_by_index(begin = var_46923_begin_0, end = var_46923_end_0, end_mask = var_46923_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46923_cast_fp16")]; tensor var_46927_begin_0 = const()[name = tensor("op_46927_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_46927_end_0 = const()[name = tensor("op_46927_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_46927_end_mask_0 = const()[name = tensor("op_46927_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46927_cast_fp16 = slice_by_index(begin = var_46927_begin_0, end = var_46927_end_0, end_mask = var_46927_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46927_cast_fp16")]; tensor var_46931_begin_0 = const()[name = tensor("op_46931_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_46931_end_0 = const()[name = tensor("op_46931_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_46931_end_mask_0 = const()[name = tensor("op_46931_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46931_cast_fp16 = slice_by_index(begin = var_46931_begin_0, end = var_46931_end_0, end_mask = var_46931_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46931_cast_fp16")]; tensor var_46935_begin_0 = const()[name = tensor("op_46935_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_46935_end_0 = const()[name = tensor("op_46935_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_46935_end_mask_0 = const()[name = tensor("op_46935_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46935_cast_fp16 = slice_by_index(begin = var_46935_begin_0, end = var_46935_end_0, end_mask = var_46935_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46935_cast_fp16")]; tensor var_46939_begin_0 = const()[name = tensor("op_46939_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_46939_end_0 = const()[name = tensor("op_46939_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_46939_end_mask_0 = const()[name = tensor("op_46939_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46939_cast_fp16 = slice_by_index(begin = var_46939_begin_0, end = var_46939_end_0, end_mask = var_46939_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46939_cast_fp16")]; tensor var_46943_begin_0 = const()[name = tensor("op_46943_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_46943_end_0 = const()[name = tensor("op_46943_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_46943_end_mask_0 = const()[name = tensor("op_46943_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46943_cast_fp16 = slice_by_index(begin = var_46943_begin_0, end = var_46943_end_0, end_mask = var_46943_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46943_cast_fp16")]; tensor var_46947_begin_0 = const()[name = tensor("op_46947_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_46947_end_0 = const()[name = tensor("op_46947_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_46947_end_mask_0 = const()[name = tensor("op_46947_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_46947_cast_fp16 = slice_by_index(begin = var_46947_begin_0, end = var_46947_end_0, end_mask = var_46947_end_mask_0, x = k_59_cast_fp16)[name = tensor("op_46947_cast_fp16")]; tensor var_46949_begin_0 = const()[name = tensor("op_46949_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_46949_end_0 = const()[name = tensor("op_46949_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_46949_end_mask_0 = const()[name = tensor("op_46949_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46949_cast_fp16 = slice_by_index(begin = var_46949_begin_0, end = var_46949_end_0, end_mask = var_46949_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_46949_cast_fp16")]; tensor var_46953_begin_0 = const()[name = tensor("op_46953_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_46953_end_0 = const()[name = tensor("op_46953_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_46953_end_mask_0 = const()[name = tensor("op_46953_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46953_cast_fp16 = slice_by_index(begin = var_46953_begin_0, end = var_46953_end_0, end_mask = var_46953_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_46953_cast_fp16")]; tensor var_46957_begin_0 = const()[name = tensor("op_46957_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_46957_end_0 = const()[name = tensor("op_46957_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_46957_end_mask_0 = const()[name = tensor("op_46957_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46957_cast_fp16 = slice_by_index(begin = var_46957_begin_0, end = var_46957_end_0, end_mask = var_46957_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_46957_cast_fp16")]; tensor var_46961_begin_0 = const()[name = tensor("op_46961_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_46961_end_0 = const()[name = tensor("op_46961_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_46961_end_mask_0 = const()[name = tensor("op_46961_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46961_cast_fp16 = slice_by_index(begin = var_46961_begin_0, end = var_46961_end_0, end_mask = var_46961_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_46961_cast_fp16")]; tensor var_46965_begin_0 = const()[name = tensor("op_46965_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_46965_end_0 = const()[name = tensor("op_46965_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_46965_end_mask_0 = const()[name = tensor("op_46965_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46965_cast_fp16 = slice_by_index(begin = var_46965_begin_0, end = var_46965_end_0, end_mask = var_46965_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_46965_cast_fp16")]; tensor var_46969_begin_0 = const()[name = tensor("op_46969_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_46969_end_0 = const()[name = tensor("op_46969_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_46969_end_mask_0 = const()[name = tensor("op_46969_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46969_cast_fp16 = slice_by_index(begin = var_46969_begin_0, end = var_46969_end_0, end_mask = var_46969_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_46969_cast_fp16")]; tensor var_46973_begin_0 = const()[name = tensor("op_46973_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_46973_end_0 = const()[name = tensor("op_46973_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_46973_end_mask_0 = const()[name = tensor("op_46973_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46973_cast_fp16 = slice_by_index(begin = var_46973_begin_0, end = var_46973_end_0, end_mask = var_46973_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_46973_cast_fp16")]; tensor var_46977_begin_0 = const()[name = tensor("op_46977_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_46977_end_0 = const()[name = tensor("op_46977_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_46977_end_mask_0 = const()[name = tensor("op_46977_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46977_cast_fp16 = slice_by_index(begin = var_46977_begin_0, end = var_46977_end_0, end_mask = var_46977_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_46977_cast_fp16")]; tensor var_46981_begin_0 = const()[name = tensor("op_46981_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_46981_end_0 = const()[name = tensor("op_46981_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_46981_end_mask_0 = const()[name = tensor("op_46981_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46981_cast_fp16 = slice_by_index(begin = var_46981_begin_0, end = var_46981_end_0, end_mask = var_46981_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_46981_cast_fp16")]; tensor var_46985_begin_0 = const()[name = tensor("op_46985_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_46985_end_0 = const()[name = tensor("op_46985_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_46985_end_mask_0 = const()[name = tensor("op_46985_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46985_cast_fp16 = slice_by_index(begin = var_46985_begin_0, end = var_46985_end_0, end_mask = var_46985_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_46985_cast_fp16")]; tensor var_46989_begin_0 = const()[name = tensor("op_46989_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_46989_end_0 = const()[name = tensor("op_46989_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_46989_end_mask_0 = const()[name = tensor("op_46989_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46989_cast_fp16 = slice_by_index(begin = var_46989_begin_0, end = var_46989_end_0, end_mask = var_46989_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_46989_cast_fp16")]; tensor var_46993_begin_0 = const()[name = tensor("op_46993_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_46993_end_0 = const()[name = tensor("op_46993_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_46993_end_mask_0 = const()[name = tensor("op_46993_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46993_cast_fp16 = slice_by_index(begin = var_46993_begin_0, end = var_46993_end_0, end_mask = var_46993_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_46993_cast_fp16")]; tensor var_46997_begin_0 = const()[name = tensor("op_46997_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_46997_end_0 = const()[name = tensor("op_46997_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_46997_end_mask_0 = const()[name = tensor("op_46997_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_46997_cast_fp16 = slice_by_index(begin = var_46997_begin_0, end = var_46997_end_0, end_mask = var_46997_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_46997_cast_fp16")]; tensor var_47001_begin_0 = const()[name = tensor("op_47001_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_47001_end_0 = const()[name = tensor("op_47001_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_47001_end_mask_0 = const()[name = tensor("op_47001_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47001_cast_fp16 = slice_by_index(begin = var_47001_begin_0, end = var_47001_end_0, end_mask = var_47001_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_47001_cast_fp16")]; tensor var_47005_begin_0 = const()[name = tensor("op_47005_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_47005_end_0 = const()[name = tensor("op_47005_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_47005_end_mask_0 = const()[name = tensor("op_47005_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47005_cast_fp16 = slice_by_index(begin = var_47005_begin_0, end = var_47005_end_0, end_mask = var_47005_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_47005_cast_fp16")]; tensor var_47009_begin_0 = const()[name = tensor("op_47009_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_47009_end_0 = const()[name = tensor("op_47009_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_47009_end_mask_0 = const()[name = tensor("op_47009_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47009_cast_fp16 = slice_by_index(begin = var_47009_begin_0, end = var_47009_end_0, end_mask = var_47009_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_47009_cast_fp16")]; tensor var_47013_begin_0 = const()[name = tensor("op_47013_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_47013_end_0 = const()[name = tensor("op_47013_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_47013_end_mask_0 = const()[name = tensor("op_47013_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47013_cast_fp16 = slice_by_index(begin = var_47013_begin_0, end = var_47013_end_0, end_mask = var_47013_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_47013_cast_fp16")]; tensor var_47017_begin_0 = const()[name = tensor("op_47017_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_47017_end_0 = const()[name = tensor("op_47017_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_47017_end_mask_0 = const()[name = tensor("op_47017_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47017_cast_fp16 = slice_by_index(begin = var_47017_begin_0, end = var_47017_end_0, end_mask = var_47017_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_47017_cast_fp16")]; tensor var_47021_begin_0 = const()[name = tensor("op_47021_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_47021_end_0 = const()[name = tensor("op_47021_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_47021_end_mask_0 = const()[name = tensor("op_47021_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47021_cast_fp16 = slice_by_index(begin = var_47021_begin_0, end = var_47021_end_0, end_mask = var_47021_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_47021_cast_fp16")]; tensor var_47025_begin_0 = const()[name = tensor("op_47025_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_47025_end_0 = const()[name = tensor("op_47025_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_47025_end_mask_0 = const()[name = tensor("op_47025_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47025_cast_fp16 = slice_by_index(begin = var_47025_begin_0, end = var_47025_end_0, end_mask = var_47025_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_47025_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4641_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4641_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4641_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4641_equation_0, values = (var_46871_cast_fp16, var_46313_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4641_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4643_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4643_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4643_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4643_equation_0, values = (var_46871_cast_fp16, var_46320_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4643_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4645_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4645_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4645_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4645_equation_0, values = (var_46871_cast_fp16, var_46327_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4645_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4647_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4647_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4647_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4647_equation_0, values = (var_46871_cast_fp16, var_46334_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4647_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4649_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4649_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4649_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4649_equation_0, values = (var_46875_cast_fp16, var_46341_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4649_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4651_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4651_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4651_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4651_equation_0, values = (var_46875_cast_fp16, var_46348_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4651_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4653_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4653_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4653_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4653_equation_0, values = (var_46875_cast_fp16, var_46355_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4653_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4655_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4655_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4655_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4655_equation_0, values = (var_46875_cast_fp16, var_46362_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4655_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4657_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4657_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4657_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4657_equation_0, values = (var_46879_cast_fp16, var_46369_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4657_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4659_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4659_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4659_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4659_equation_0, values = (var_46879_cast_fp16, var_46376_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4659_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4661_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4661_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4661_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4661_equation_0, values = (var_46879_cast_fp16, var_46383_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4661_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4663_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4663_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4663_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4663_equation_0, values = (var_46879_cast_fp16, var_46390_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4663_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4665_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4665_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4665_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4665_equation_0, values = (var_46883_cast_fp16, var_46397_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4665_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4667_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4667_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4667_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4667_equation_0, values = (var_46883_cast_fp16, var_46404_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4667_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4669_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4669_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4669_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4669_equation_0, values = (var_46883_cast_fp16, var_46411_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4669_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4671_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4671_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4671_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4671_equation_0, values = (var_46883_cast_fp16, var_46418_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4671_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4673_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4673_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4673_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4673_equation_0, values = (var_46887_cast_fp16, var_46425_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4673_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4675_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4675_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4675_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4675_equation_0, values = (var_46887_cast_fp16, var_46432_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4675_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4677_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4677_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4677_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4677_equation_0, values = (var_46887_cast_fp16, var_46439_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4677_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4679_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4679_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4679_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4679_equation_0, values = (var_46887_cast_fp16, var_46446_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4679_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4681_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4681_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4681_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4681_equation_0, values = (var_46891_cast_fp16, var_46453_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4681_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4683_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4683_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4683_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4683_equation_0, values = (var_46891_cast_fp16, var_46460_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4683_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4685_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4685_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4685_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4685_equation_0, values = (var_46891_cast_fp16, var_46467_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4685_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4687_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4687_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4687_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4687_equation_0, values = (var_46891_cast_fp16, var_46474_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4687_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4689_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4689_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4689_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4689_equation_0, values = (var_46895_cast_fp16, var_46481_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4689_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4691_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4691_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4691_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4691_equation_0, values = (var_46895_cast_fp16, var_46488_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4691_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4693_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4693_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4693_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4693_equation_0, values = (var_46895_cast_fp16, var_46495_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4693_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4695_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4695_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4695_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4695_equation_0, values = (var_46895_cast_fp16, var_46502_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4695_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4697_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4697_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4697_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4697_equation_0, values = (var_46899_cast_fp16, var_46509_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4697_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4699_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4699_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4699_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4699_equation_0, values = (var_46899_cast_fp16, var_46516_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4699_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4701_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4701_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4701_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4701_equation_0, values = (var_46899_cast_fp16, var_46523_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4701_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4703_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4703_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4703_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4703_equation_0, values = (var_46899_cast_fp16, var_46530_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4703_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4705_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4705_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4705_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4705_equation_0, values = (var_46903_cast_fp16, var_46537_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4705_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4707_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4707_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4707_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4707_equation_0, values = (var_46903_cast_fp16, var_46544_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4707_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4709_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4709_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4709_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4709_equation_0, values = (var_46903_cast_fp16, var_46551_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4709_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4711_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4711_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4711_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4711_equation_0, values = (var_46903_cast_fp16, var_46558_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4711_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4713_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4713_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4713_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4713_equation_0, values = (var_46907_cast_fp16, var_46565_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4713_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4715_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4715_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4715_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4715_equation_0, values = (var_46907_cast_fp16, var_46572_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4715_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4717_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4717_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4717_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4717_equation_0, values = (var_46907_cast_fp16, var_46579_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4717_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4719_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4719_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4719_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4719_equation_0, values = (var_46907_cast_fp16, var_46586_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4719_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4721_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4721_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4721_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4721_equation_0, values = (var_46911_cast_fp16, var_46593_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4721_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4723_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4723_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4723_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4723_equation_0, values = (var_46911_cast_fp16, var_46600_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4723_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4725_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4725_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4725_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4725_equation_0, values = (var_46911_cast_fp16, var_46607_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4725_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4727_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4727_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4727_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4727_equation_0, values = (var_46911_cast_fp16, var_46614_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4727_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4729_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4729_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4729_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4729_equation_0, values = (var_46915_cast_fp16, var_46621_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4729_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4731_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4731_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4731_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4731_equation_0, values = (var_46915_cast_fp16, var_46628_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4731_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4733_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4733_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4733_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4733_equation_0, values = (var_46915_cast_fp16, var_46635_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4733_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4735_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4735_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4735_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4735_equation_0, values = (var_46915_cast_fp16, var_46642_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4735_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4737_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4737_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4737_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4737_equation_0, values = (var_46919_cast_fp16, var_46649_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4737_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4739_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4739_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4739_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4739_equation_0, values = (var_46919_cast_fp16, var_46656_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4739_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4741_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4741_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4741_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4741_equation_0, values = (var_46919_cast_fp16, var_46663_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4741_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4743_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4743_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4743_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4743_equation_0, values = (var_46919_cast_fp16, var_46670_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4743_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4745_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4745_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4745_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4745_equation_0, values = (var_46923_cast_fp16, var_46677_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4745_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4747_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4747_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4747_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4747_equation_0, values = (var_46923_cast_fp16, var_46684_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4747_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4749_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4749_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4749_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4749_equation_0, values = (var_46923_cast_fp16, var_46691_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4749_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4751_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4751_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4751_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4751_equation_0, values = (var_46923_cast_fp16, var_46698_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4751_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4753_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4753_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4753_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4753_equation_0, values = (var_46927_cast_fp16, var_46705_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4753_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4755_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4755_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4755_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4755_equation_0, values = (var_46927_cast_fp16, var_46712_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4755_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4757_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4757_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4757_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4757_equation_0, values = (var_46927_cast_fp16, var_46719_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4757_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4759_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4759_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4759_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4759_equation_0, values = (var_46927_cast_fp16, var_46726_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4759_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4761_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4761_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4761_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4761_equation_0, values = (var_46931_cast_fp16, var_46733_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4761_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4763_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4763_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4763_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4763_equation_0, values = (var_46931_cast_fp16, var_46740_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4763_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4765_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4765_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4765_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4765_equation_0, values = (var_46931_cast_fp16, var_46747_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4765_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4767_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4767_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4767_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4767_equation_0, values = (var_46931_cast_fp16, var_46754_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4767_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4769_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4769_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4769_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4769_equation_0, values = (var_46935_cast_fp16, var_46761_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4769_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4771_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4771_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4771_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4771_equation_0, values = (var_46935_cast_fp16, var_46768_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4771_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4773_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4773_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4773_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4773_equation_0, values = (var_46935_cast_fp16, var_46775_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4773_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4775_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4775_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4775_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4775_equation_0, values = (var_46935_cast_fp16, var_46782_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4775_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4777_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4777_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4777_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4777_equation_0, values = (var_46939_cast_fp16, var_46789_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4777_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4779_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4779_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4779_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4779_equation_0, values = (var_46939_cast_fp16, var_46796_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4779_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4781_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4781_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4781_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4781_equation_0, values = (var_46939_cast_fp16, var_46803_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4781_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4783_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4783_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4783_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4783_equation_0, values = (var_46939_cast_fp16, var_46810_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4783_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4785_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4785_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4785_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4785_equation_0, values = (var_46943_cast_fp16, var_46817_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4785_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4787_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4787_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4787_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4787_equation_0, values = (var_46943_cast_fp16, var_46824_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4787_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4789_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4789_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4789_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4789_equation_0, values = (var_46943_cast_fp16, var_46831_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4789_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4791_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4791_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4791_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4791_equation_0, values = (var_46943_cast_fp16, var_46838_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4791_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4793_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4793_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4793_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4793_equation_0, values = (var_46947_cast_fp16, var_46845_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4793_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4795_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4795_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4795_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4795_equation_0, values = (var_46947_cast_fp16, var_46852_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4795_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4797_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4797_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4797_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4797_equation_0, values = (var_46947_cast_fp16, var_46859_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4797_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4799_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4799_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4799_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4799_equation_0, values = (var_46947_cast_fp16, var_46866_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4799_cast_fp16")]; tensor var_47188_to_fp16 = const()[name = tensor("op_47188_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4641_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4641_cast_fp16, y = var_47188_to_fp16)[name = tensor("aw_chunk_4641_cast_fp16")]; tensor var_47190_to_fp16 = const()[name = tensor("op_47190_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4643_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4643_cast_fp16, y = var_47190_to_fp16)[name = tensor("aw_chunk_4643_cast_fp16")]; tensor var_47192_to_fp16 = const()[name = tensor("op_47192_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4645_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4645_cast_fp16, y = var_47192_to_fp16)[name = tensor("aw_chunk_4645_cast_fp16")]; tensor var_47194_to_fp16 = const()[name = tensor("op_47194_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4647_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4647_cast_fp16, y = var_47194_to_fp16)[name = tensor("aw_chunk_4647_cast_fp16")]; tensor var_47196_to_fp16 = const()[name = tensor("op_47196_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4649_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4649_cast_fp16, y = var_47196_to_fp16)[name = tensor("aw_chunk_4649_cast_fp16")]; tensor var_47198_to_fp16 = const()[name = tensor("op_47198_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4651_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4651_cast_fp16, y = var_47198_to_fp16)[name = tensor("aw_chunk_4651_cast_fp16")]; tensor var_47200_to_fp16 = const()[name = tensor("op_47200_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4653_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4653_cast_fp16, y = var_47200_to_fp16)[name = tensor("aw_chunk_4653_cast_fp16")]; tensor var_47202_to_fp16 = const()[name = tensor("op_47202_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4655_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4655_cast_fp16, y = var_47202_to_fp16)[name = tensor("aw_chunk_4655_cast_fp16")]; tensor var_47204_to_fp16 = const()[name = tensor("op_47204_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4657_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4657_cast_fp16, y = var_47204_to_fp16)[name = tensor("aw_chunk_4657_cast_fp16")]; tensor var_47206_to_fp16 = const()[name = tensor("op_47206_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4659_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4659_cast_fp16, y = var_47206_to_fp16)[name = tensor("aw_chunk_4659_cast_fp16")]; tensor var_47208_to_fp16 = const()[name = tensor("op_47208_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4661_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4661_cast_fp16, y = var_47208_to_fp16)[name = tensor("aw_chunk_4661_cast_fp16")]; tensor var_47210_to_fp16 = const()[name = tensor("op_47210_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4663_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4663_cast_fp16, y = var_47210_to_fp16)[name = tensor("aw_chunk_4663_cast_fp16")]; tensor var_47212_to_fp16 = const()[name = tensor("op_47212_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4665_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4665_cast_fp16, y = var_47212_to_fp16)[name = tensor("aw_chunk_4665_cast_fp16")]; tensor var_47214_to_fp16 = const()[name = tensor("op_47214_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4667_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4667_cast_fp16, y = var_47214_to_fp16)[name = tensor("aw_chunk_4667_cast_fp16")]; tensor var_47216_to_fp16 = const()[name = tensor("op_47216_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4669_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4669_cast_fp16, y = var_47216_to_fp16)[name = tensor("aw_chunk_4669_cast_fp16")]; tensor var_47218_to_fp16 = const()[name = tensor("op_47218_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4671_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4671_cast_fp16, y = var_47218_to_fp16)[name = tensor("aw_chunk_4671_cast_fp16")]; tensor var_47220_to_fp16 = const()[name = tensor("op_47220_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4673_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4673_cast_fp16, y = var_47220_to_fp16)[name = tensor("aw_chunk_4673_cast_fp16")]; tensor var_47222_to_fp16 = const()[name = tensor("op_47222_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4675_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4675_cast_fp16, y = var_47222_to_fp16)[name = tensor("aw_chunk_4675_cast_fp16")]; tensor var_47224_to_fp16 = const()[name = tensor("op_47224_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4677_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4677_cast_fp16, y = var_47224_to_fp16)[name = tensor("aw_chunk_4677_cast_fp16")]; tensor var_47226_to_fp16 = const()[name = tensor("op_47226_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4679_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4679_cast_fp16, y = var_47226_to_fp16)[name = tensor("aw_chunk_4679_cast_fp16")]; tensor var_47228_to_fp16 = const()[name = tensor("op_47228_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4681_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4681_cast_fp16, y = var_47228_to_fp16)[name = tensor("aw_chunk_4681_cast_fp16")]; tensor var_47230_to_fp16 = const()[name = tensor("op_47230_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4683_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4683_cast_fp16, y = var_47230_to_fp16)[name = tensor("aw_chunk_4683_cast_fp16")]; tensor var_47232_to_fp16 = const()[name = tensor("op_47232_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4685_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4685_cast_fp16, y = var_47232_to_fp16)[name = tensor("aw_chunk_4685_cast_fp16")]; tensor var_47234_to_fp16 = const()[name = tensor("op_47234_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4687_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4687_cast_fp16, y = var_47234_to_fp16)[name = tensor("aw_chunk_4687_cast_fp16")]; tensor var_47236_to_fp16 = const()[name = tensor("op_47236_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4689_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4689_cast_fp16, y = var_47236_to_fp16)[name = tensor("aw_chunk_4689_cast_fp16")]; tensor var_47238_to_fp16 = const()[name = tensor("op_47238_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4691_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4691_cast_fp16, y = var_47238_to_fp16)[name = tensor("aw_chunk_4691_cast_fp16")]; tensor var_47240_to_fp16 = const()[name = tensor("op_47240_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4693_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4693_cast_fp16, y = var_47240_to_fp16)[name = tensor("aw_chunk_4693_cast_fp16")]; tensor var_47242_to_fp16 = const()[name = tensor("op_47242_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4695_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4695_cast_fp16, y = var_47242_to_fp16)[name = tensor("aw_chunk_4695_cast_fp16")]; tensor var_47244_to_fp16 = const()[name = tensor("op_47244_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4697_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4697_cast_fp16, y = var_47244_to_fp16)[name = tensor("aw_chunk_4697_cast_fp16")]; tensor var_47246_to_fp16 = const()[name = tensor("op_47246_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4699_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4699_cast_fp16, y = var_47246_to_fp16)[name = tensor("aw_chunk_4699_cast_fp16")]; tensor var_47248_to_fp16 = const()[name = tensor("op_47248_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4701_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4701_cast_fp16, y = var_47248_to_fp16)[name = tensor("aw_chunk_4701_cast_fp16")]; tensor var_47250_to_fp16 = const()[name = tensor("op_47250_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4703_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4703_cast_fp16, y = var_47250_to_fp16)[name = tensor("aw_chunk_4703_cast_fp16")]; tensor var_47252_to_fp16 = const()[name = tensor("op_47252_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4705_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4705_cast_fp16, y = var_47252_to_fp16)[name = tensor("aw_chunk_4705_cast_fp16")]; tensor var_47254_to_fp16 = const()[name = tensor("op_47254_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4707_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4707_cast_fp16, y = var_47254_to_fp16)[name = tensor("aw_chunk_4707_cast_fp16")]; tensor var_47256_to_fp16 = const()[name = tensor("op_47256_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4709_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4709_cast_fp16, y = var_47256_to_fp16)[name = tensor("aw_chunk_4709_cast_fp16")]; tensor var_47258_to_fp16 = const()[name = tensor("op_47258_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4711_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4711_cast_fp16, y = var_47258_to_fp16)[name = tensor("aw_chunk_4711_cast_fp16")]; tensor var_47260_to_fp16 = const()[name = tensor("op_47260_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4713_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4713_cast_fp16, y = var_47260_to_fp16)[name = tensor("aw_chunk_4713_cast_fp16")]; tensor var_47262_to_fp16 = const()[name = tensor("op_47262_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4715_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4715_cast_fp16, y = var_47262_to_fp16)[name = tensor("aw_chunk_4715_cast_fp16")]; tensor var_47264_to_fp16 = const()[name = tensor("op_47264_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4717_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4717_cast_fp16, y = var_47264_to_fp16)[name = tensor("aw_chunk_4717_cast_fp16")]; tensor var_47266_to_fp16 = const()[name = tensor("op_47266_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4719_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4719_cast_fp16, y = var_47266_to_fp16)[name = tensor("aw_chunk_4719_cast_fp16")]; tensor var_47268_to_fp16 = const()[name = tensor("op_47268_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4721_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4721_cast_fp16, y = var_47268_to_fp16)[name = tensor("aw_chunk_4721_cast_fp16")]; tensor var_47270_to_fp16 = const()[name = tensor("op_47270_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4723_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4723_cast_fp16, y = var_47270_to_fp16)[name = tensor("aw_chunk_4723_cast_fp16")]; tensor var_47272_to_fp16 = const()[name = tensor("op_47272_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4725_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4725_cast_fp16, y = var_47272_to_fp16)[name = tensor("aw_chunk_4725_cast_fp16")]; tensor var_47274_to_fp16 = const()[name = tensor("op_47274_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4727_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4727_cast_fp16, y = var_47274_to_fp16)[name = tensor("aw_chunk_4727_cast_fp16")]; tensor var_47276_to_fp16 = const()[name = tensor("op_47276_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4729_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4729_cast_fp16, y = var_47276_to_fp16)[name = tensor("aw_chunk_4729_cast_fp16")]; tensor var_47278_to_fp16 = const()[name = tensor("op_47278_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4731_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4731_cast_fp16, y = var_47278_to_fp16)[name = tensor("aw_chunk_4731_cast_fp16")]; tensor var_47280_to_fp16 = const()[name = tensor("op_47280_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4733_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4733_cast_fp16, y = var_47280_to_fp16)[name = tensor("aw_chunk_4733_cast_fp16")]; tensor var_47282_to_fp16 = const()[name = tensor("op_47282_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4735_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4735_cast_fp16, y = var_47282_to_fp16)[name = tensor("aw_chunk_4735_cast_fp16")]; tensor var_47284_to_fp16 = const()[name = tensor("op_47284_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4737_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4737_cast_fp16, y = var_47284_to_fp16)[name = tensor("aw_chunk_4737_cast_fp16")]; tensor var_47286_to_fp16 = const()[name = tensor("op_47286_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4739_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4739_cast_fp16, y = var_47286_to_fp16)[name = tensor("aw_chunk_4739_cast_fp16")]; tensor var_47288_to_fp16 = const()[name = tensor("op_47288_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4741_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4741_cast_fp16, y = var_47288_to_fp16)[name = tensor("aw_chunk_4741_cast_fp16")]; tensor var_47290_to_fp16 = const()[name = tensor("op_47290_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4743_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4743_cast_fp16, y = var_47290_to_fp16)[name = tensor("aw_chunk_4743_cast_fp16")]; tensor var_47292_to_fp16 = const()[name = tensor("op_47292_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4745_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4745_cast_fp16, y = var_47292_to_fp16)[name = tensor("aw_chunk_4745_cast_fp16")]; tensor var_47294_to_fp16 = const()[name = tensor("op_47294_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4747_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4747_cast_fp16, y = var_47294_to_fp16)[name = tensor("aw_chunk_4747_cast_fp16")]; tensor var_47296_to_fp16 = const()[name = tensor("op_47296_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4749_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4749_cast_fp16, y = var_47296_to_fp16)[name = tensor("aw_chunk_4749_cast_fp16")]; tensor var_47298_to_fp16 = const()[name = tensor("op_47298_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4751_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4751_cast_fp16, y = var_47298_to_fp16)[name = tensor("aw_chunk_4751_cast_fp16")]; tensor var_47300_to_fp16 = const()[name = tensor("op_47300_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4753_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4753_cast_fp16, y = var_47300_to_fp16)[name = tensor("aw_chunk_4753_cast_fp16")]; tensor var_47302_to_fp16 = const()[name = tensor("op_47302_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4755_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4755_cast_fp16, y = var_47302_to_fp16)[name = tensor("aw_chunk_4755_cast_fp16")]; tensor var_47304_to_fp16 = const()[name = tensor("op_47304_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4757_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4757_cast_fp16, y = var_47304_to_fp16)[name = tensor("aw_chunk_4757_cast_fp16")]; tensor var_47306_to_fp16 = const()[name = tensor("op_47306_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4759_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4759_cast_fp16, y = var_47306_to_fp16)[name = tensor("aw_chunk_4759_cast_fp16")]; tensor var_47308_to_fp16 = const()[name = tensor("op_47308_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4761_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4761_cast_fp16, y = var_47308_to_fp16)[name = tensor("aw_chunk_4761_cast_fp16")]; tensor var_47310_to_fp16 = const()[name = tensor("op_47310_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4763_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4763_cast_fp16, y = var_47310_to_fp16)[name = tensor("aw_chunk_4763_cast_fp16")]; tensor var_47312_to_fp16 = const()[name = tensor("op_47312_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4765_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4765_cast_fp16, y = var_47312_to_fp16)[name = tensor("aw_chunk_4765_cast_fp16")]; tensor var_47314_to_fp16 = const()[name = tensor("op_47314_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4767_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4767_cast_fp16, y = var_47314_to_fp16)[name = tensor("aw_chunk_4767_cast_fp16")]; tensor var_47316_to_fp16 = const()[name = tensor("op_47316_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4769_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4769_cast_fp16, y = var_47316_to_fp16)[name = tensor("aw_chunk_4769_cast_fp16")]; tensor var_47318_to_fp16 = const()[name = tensor("op_47318_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4771_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4771_cast_fp16, y = var_47318_to_fp16)[name = tensor("aw_chunk_4771_cast_fp16")]; tensor var_47320_to_fp16 = const()[name = tensor("op_47320_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4773_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4773_cast_fp16, y = var_47320_to_fp16)[name = tensor("aw_chunk_4773_cast_fp16")]; tensor var_47322_to_fp16 = const()[name = tensor("op_47322_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4775_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4775_cast_fp16, y = var_47322_to_fp16)[name = tensor("aw_chunk_4775_cast_fp16")]; tensor var_47324_to_fp16 = const()[name = tensor("op_47324_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4777_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4777_cast_fp16, y = var_47324_to_fp16)[name = tensor("aw_chunk_4777_cast_fp16")]; tensor var_47326_to_fp16 = const()[name = tensor("op_47326_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4779_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4779_cast_fp16, y = var_47326_to_fp16)[name = tensor("aw_chunk_4779_cast_fp16")]; tensor var_47328_to_fp16 = const()[name = tensor("op_47328_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4781_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4781_cast_fp16, y = var_47328_to_fp16)[name = tensor("aw_chunk_4781_cast_fp16")]; tensor var_47330_to_fp16 = const()[name = tensor("op_47330_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4783_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4783_cast_fp16, y = var_47330_to_fp16)[name = tensor("aw_chunk_4783_cast_fp16")]; tensor var_47332_to_fp16 = const()[name = tensor("op_47332_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4785_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4785_cast_fp16, y = var_47332_to_fp16)[name = tensor("aw_chunk_4785_cast_fp16")]; tensor var_47334_to_fp16 = const()[name = tensor("op_47334_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4787_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4787_cast_fp16, y = var_47334_to_fp16)[name = tensor("aw_chunk_4787_cast_fp16")]; tensor var_47336_to_fp16 = const()[name = tensor("op_47336_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4789_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4789_cast_fp16, y = var_47336_to_fp16)[name = tensor("aw_chunk_4789_cast_fp16")]; tensor var_47338_to_fp16 = const()[name = tensor("op_47338_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4791_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4791_cast_fp16, y = var_47338_to_fp16)[name = tensor("aw_chunk_4791_cast_fp16")]; tensor var_47340_to_fp16 = const()[name = tensor("op_47340_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4793_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4793_cast_fp16, y = var_47340_to_fp16)[name = tensor("aw_chunk_4793_cast_fp16")]; tensor var_47342_to_fp16 = const()[name = tensor("op_47342_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4795_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4795_cast_fp16, y = var_47342_to_fp16)[name = tensor("aw_chunk_4795_cast_fp16")]; tensor var_47344_to_fp16 = const()[name = tensor("op_47344_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4797_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4797_cast_fp16, y = var_47344_to_fp16)[name = tensor("aw_chunk_4797_cast_fp16")]; tensor var_47346_to_fp16 = const()[name = tensor("op_47346_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4799_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4799_cast_fp16, y = var_47346_to_fp16)[name = tensor("aw_chunk_4799_cast_fp16")]; tensor var_47348_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4641_cast_fp16)[name = tensor("op_47348_cast_fp16")]; tensor var_47349_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4643_cast_fp16)[name = tensor("op_47349_cast_fp16")]; tensor var_47350_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4645_cast_fp16)[name = tensor("op_47350_cast_fp16")]; tensor var_47351_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4647_cast_fp16)[name = tensor("op_47351_cast_fp16")]; tensor var_47352_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4649_cast_fp16)[name = tensor("op_47352_cast_fp16")]; tensor var_47353_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4651_cast_fp16)[name = tensor("op_47353_cast_fp16")]; tensor var_47354_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4653_cast_fp16)[name = tensor("op_47354_cast_fp16")]; tensor var_47355_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4655_cast_fp16)[name = tensor("op_47355_cast_fp16")]; tensor var_47356_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4657_cast_fp16)[name = tensor("op_47356_cast_fp16")]; tensor var_47357_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4659_cast_fp16)[name = tensor("op_47357_cast_fp16")]; tensor var_47358_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4661_cast_fp16)[name = tensor("op_47358_cast_fp16")]; tensor var_47359_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4663_cast_fp16)[name = tensor("op_47359_cast_fp16")]; tensor var_47360_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4665_cast_fp16)[name = tensor("op_47360_cast_fp16")]; tensor var_47361_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4667_cast_fp16)[name = tensor("op_47361_cast_fp16")]; tensor var_47362_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4669_cast_fp16)[name = tensor("op_47362_cast_fp16")]; tensor var_47363_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4671_cast_fp16)[name = tensor("op_47363_cast_fp16")]; tensor var_47364_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4673_cast_fp16)[name = tensor("op_47364_cast_fp16")]; tensor var_47365_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4675_cast_fp16)[name = tensor("op_47365_cast_fp16")]; tensor var_47366_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4677_cast_fp16)[name = tensor("op_47366_cast_fp16")]; tensor var_47367_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4679_cast_fp16)[name = tensor("op_47367_cast_fp16")]; tensor var_47368_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4681_cast_fp16)[name = tensor("op_47368_cast_fp16")]; tensor var_47369_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4683_cast_fp16)[name = tensor("op_47369_cast_fp16")]; tensor var_47370_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4685_cast_fp16)[name = tensor("op_47370_cast_fp16")]; tensor var_47371_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4687_cast_fp16)[name = tensor("op_47371_cast_fp16")]; tensor var_47372_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4689_cast_fp16)[name = tensor("op_47372_cast_fp16")]; tensor var_47373_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4691_cast_fp16)[name = tensor("op_47373_cast_fp16")]; tensor var_47374_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4693_cast_fp16)[name = tensor("op_47374_cast_fp16")]; tensor var_47375_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4695_cast_fp16)[name = tensor("op_47375_cast_fp16")]; tensor var_47376_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4697_cast_fp16)[name = tensor("op_47376_cast_fp16")]; tensor var_47377_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4699_cast_fp16)[name = tensor("op_47377_cast_fp16")]; tensor var_47378_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4701_cast_fp16)[name = tensor("op_47378_cast_fp16")]; tensor var_47379_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4703_cast_fp16)[name = tensor("op_47379_cast_fp16")]; tensor var_47380_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4705_cast_fp16)[name = tensor("op_47380_cast_fp16")]; tensor var_47381_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4707_cast_fp16)[name = tensor("op_47381_cast_fp16")]; tensor var_47382_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4709_cast_fp16)[name = tensor("op_47382_cast_fp16")]; tensor var_47383_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4711_cast_fp16)[name = tensor("op_47383_cast_fp16")]; tensor var_47384_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4713_cast_fp16)[name = tensor("op_47384_cast_fp16")]; tensor var_47385_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4715_cast_fp16)[name = tensor("op_47385_cast_fp16")]; tensor var_47386_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4717_cast_fp16)[name = tensor("op_47386_cast_fp16")]; tensor var_47387_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4719_cast_fp16)[name = tensor("op_47387_cast_fp16")]; tensor var_47388_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4721_cast_fp16)[name = tensor("op_47388_cast_fp16")]; tensor var_47389_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4723_cast_fp16)[name = tensor("op_47389_cast_fp16")]; tensor var_47390_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4725_cast_fp16)[name = tensor("op_47390_cast_fp16")]; tensor var_47391_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4727_cast_fp16)[name = tensor("op_47391_cast_fp16")]; tensor var_47392_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4729_cast_fp16)[name = tensor("op_47392_cast_fp16")]; tensor var_47393_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4731_cast_fp16)[name = tensor("op_47393_cast_fp16")]; tensor var_47394_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4733_cast_fp16)[name = tensor("op_47394_cast_fp16")]; tensor var_47395_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4735_cast_fp16)[name = tensor("op_47395_cast_fp16")]; tensor var_47396_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4737_cast_fp16)[name = tensor("op_47396_cast_fp16")]; tensor var_47397_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4739_cast_fp16)[name = tensor("op_47397_cast_fp16")]; tensor var_47398_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4741_cast_fp16)[name = tensor("op_47398_cast_fp16")]; tensor var_47399_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4743_cast_fp16)[name = tensor("op_47399_cast_fp16")]; tensor var_47400_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4745_cast_fp16)[name = tensor("op_47400_cast_fp16")]; tensor var_47401_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4747_cast_fp16)[name = tensor("op_47401_cast_fp16")]; tensor var_47402_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4749_cast_fp16)[name = tensor("op_47402_cast_fp16")]; tensor var_47403_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4751_cast_fp16)[name = tensor("op_47403_cast_fp16")]; tensor var_47404_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4753_cast_fp16)[name = tensor("op_47404_cast_fp16")]; tensor var_47405_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4755_cast_fp16)[name = tensor("op_47405_cast_fp16")]; tensor var_47406_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4757_cast_fp16)[name = tensor("op_47406_cast_fp16")]; tensor var_47407_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4759_cast_fp16)[name = tensor("op_47407_cast_fp16")]; tensor var_47408_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4761_cast_fp16)[name = tensor("op_47408_cast_fp16")]; tensor var_47409_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4763_cast_fp16)[name = tensor("op_47409_cast_fp16")]; tensor var_47410_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4765_cast_fp16)[name = tensor("op_47410_cast_fp16")]; tensor var_47411_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4767_cast_fp16)[name = tensor("op_47411_cast_fp16")]; tensor var_47412_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4769_cast_fp16)[name = tensor("op_47412_cast_fp16")]; tensor var_47413_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4771_cast_fp16)[name = tensor("op_47413_cast_fp16")]; tensor var_47414_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4773_cast_fp16)[name = tensor("op_47414_cast_fp16")]; tensor var_47415_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4775_cast_fp16)[name = tensor("op_47415_cast_fp16")]; tensor var_47416_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4777_cast_fp16)[name = tensor("op_47416_cast_fp16")]; tensor var_47417_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4779_cast_fp16)[name = tensor("op_47417_cast_fp16")]; tensor var_47418_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4781_cast_fp16)[name = tensor("op_47418_cast_fp16")]; tensor var_47419_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4783_cast_fp16)[name = tensor("op_47419_cast_fp16")]; tensor var_47420_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4785_cast_fp16)[name = tensor("op_47420_cast_fp16")]; tensor var_47421_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4787_cast_fp16)[name = tensor("op_47421_cast_fp16")]; tensor var_47422_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4789_cast_fp16)[name = tensor("op_47422_cast_fp16")]; tensor var_47423_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4791_cast_fp16)[name = tensor("op_47423_cast_fp16")]; tensor var_47424_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4793_cast_fp16)[name = tensor("op_47424_cast_fp16")]; tensor var_47425_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4795_cast_fp16)[name = tensor("op_47425_cast_fp16")]; tensor var_47426_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4797_cast_fp16)[name = tensor("op_47426_cast_fp16")]; tensor var_47427_cast_fp16 = softmax(axis = var_46146, x = aw_chunk_4799_cast_fp16)[name = tensor("op_47427_cast_fp16")]; tensor var_47429_equation_0 = const()[name = tensor("op_47429_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47429_cast_fp16 = einsum(equation = var_47429_equation_0, values = (var_46949_cast_fp16, var_47348_cast_fp16))[name = tensor("op_47429_cast_fp16")]; tensor var_47431_equation_0 = const()[name = tensor("op_47431_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47431_cast_fp16 = einsum(equation = var_47431_equation_0, values = (var_46949_cast_fp16, var_47349_cast_fp16))[name = tensor("op_47431_cast_fp16")]; tensor var_47433_equation_0 = const()[name = tensor("op_47433_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47433_cast_fp16 = einsum(equation = var_47433_equation_0, values = (var_46949_cast_fp16, var_47350_cast_fp16))[name = tensor("op_47433_cast_fp16")]; tensor var_47435_equation_0 = const()[name = tensor("op_47435_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47435_cast_fp16 = einsum(equation = var_47435_equation_0, values = (var_46949_cast_fp16, var_47351_cast_fp16))[name = tensor("op_47435_cast_fp16")]; tensor var_47437_equation_0 = const()[name = tensor("op_47437_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47437_cast_fp16 = einsum(equation = var_47437_equation_0, values = (var_46953_cast_fp16, var_47352_cast_fp16))[name = tensor("op_47437_cast_fp16")]; tensor var_47439_equation_0 = const()[name = tensor("op_47439_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47439_cast_fp16 = einsum(equation = var_47439_equation_0, values = (var_46953_cast_fp16, var_47353_cast_fp16))[name = tensor("op_47439_cast_fp16")]; tensor var_47441_equation_0 = const()[name = tensor("op_47441_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47441_cast_fp16 = einsum(equation = var_47441_equation_0, values = (var_46953_cast_fp16, var_47354_cast_fp16))[name = tensor("op_47441_cast_fp16")]; tensor var_47443_equation_0 = const()[name = tensor("op_47443_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47443_cast_fp16 = einsum(equation = var_47443_equation_0, values = (var_46953_cast_fp16, var_47355_cast_fp16))[name = tensor("op_47443_cast_fp16")]; tensor var_47445_equation_0 = const()[name = tensor("op_47445_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47445_cast_fp16 = einsum(equation = var_47445_equation_0, values = (var_46957_cast_fp16, var_47356_cast_fp16))[name = tensor("op_47445_cast_fp16")]; tensor var_47447_equation_0 = const()[name = tensor("op_47447_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47447_cast_fp16 = einsum(equation = var_47447_equation_0, values = (var_46957_cast_fp16, var_47357_cast_fp16))[name = tensor("op_47447_cast_fp16")]; tensor var_47449_equation_0 = const()[name = tensor("op_47449_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47449_cast_fp16 = einsum(equation = var_47449_equation_0, values = (var_46957_cast_fp16, var_47358_cast_fp16))[name = tensor("op_47449_cast_fp16")]; tensor var_47451_equation_0 = const()[name = tensor("op_47451_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47451_cast_fp16 = einsum(equation = var_47451_equation_0, values = (var_46957_cast_fp16, var_47359_cast_fp16))[name = tensor("op_47451_cast_fp16")]; tensor var_47453_equation_0 = const()[name = tensor("op_47453_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47453_cast_fp16 = einsum(equation = var_47453_equation_0, values = (var_46961_cast_fp16, var_47360_cast_fp16))[name = tensor("op_47453_cast_fp16")]; tensor var_47455_equation_0 = const()[name = tensor("op_47455_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47455_cast_fp16 = einsum(equation = var_47455_equation_0, values = (var_46961_cast_fp16, var_47361_cast_fp16))[name = tensor("op_47455_cast_fp16")]; tensor var_47457_equation_0 = const()[name = tensor("op_47457_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47457_cast_fp16 = einsum(equation = var_47457_equation_0, values = (var_46961_cast_fp16, var_47362_cast_fp16))[name = tensor("op_47457_cast_fp16")]; tensor var_47459_equation_0 = const()[name = tensor("op_47459_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47459_cast_fp16 = einsum(equation = var_47459_equation_0, values = (var_46961_cast_fp16, var_47363_cast_fp16))[name = tensor("op_47459_cast_fp16")]; tensor var_47461_equation_0 = const()[name = tensor("op_47461_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47461_cast_fp16 = einsum(equation = var_47461_equation_0, values = (var_46965_cast_fp16, var_47364_cast_fp16))[name = tensor("op_47461_cast_fp16")]; tensor var_47463_equation_0 = const()[name = tensor("op_47463_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47463_cast_fp16 = einsum(equation = var_47463_equation_0, values = (var_46965_cast_fp16, var_47365_cast_fp16))[name = tensor("op_47463_cast_fp16")]; tensor var_47465_equation_0 = const()[name = tensor("op_47465_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47465_cast_fp16 = einsum(equation = var_47465_equation_0, values = (var_46965_cast_fp16, var_47366_cast_fp16))[name = tensor("op_47465_cast_fp16")]; tensor var_47467_equation_0 = const()[name = tensor("op_47467_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47467_cast_fp16 = einsum(equation = var_47467_equation_0, values = (var_46965_cast_fp16, var_47367_cast_fp16))[name = tensor("op_47467_cast_fp16")]; tensor var_47469_equation_0 = const()[name = tensor("op_47469_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47469_cast_fp16 = einsum(equation = var_47469_equation_0, values = (var_46969_cast_fp16, var_47368_cast_fp16))[name = tensor("op_47469_cast_fp16")]; tensor var_47471_equation_0 = const()[name = tensor("op_47471_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47471_cast_fp16 = einsum(equation = var_47471_equation_0, values = (var_46969_cast_fp16, var_47369_cast_fp16))[name = tensor("op_47471_cast_fp16")]; tensor var_47473_equation_0 = const()[name = tensor("op_47473_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47473_cast_fp16 = einsum(equation = var_47473_equation_0, values = (var_46969_cast_fp16, var_47370_cast_fp16))[name = tensor("op_47473_cast_fp16")]; tensor var_47475_equation_0 = const()[name = tensor("op_47475_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47475_cast_fp16 = einsum(equation = var_47475_equation_0, values = (var_46969_cast_fp16, var_47371_cast_fp16))[name = tensor("op_47475_cast_fp16")]; tensor var_47477_equation_0 = const()[name = tensor("op_47477_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47477_cast_fp16 = einsum(equation = var_47477_equation_0, values = (var_46973_cast_fp16, var_47372_cast_fp16))[name = tensor("op_47477_cast_fp16")]; tensor var_47479_equation_0 = const()[name = tensor("op_47479_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47479_cast_fp16 = einsum(equation = var_47479_equation_0, values = (var_46973_cast_fp16, var_47373_cast_fp16))[name = tensor("op_47479_cast_fp16")]; tensor var_47481_equation_0 = const()[name = tensor("op_47481_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47481_cast_fp16 = einsum(equation = var_47481_equation_0, values = (var_46973_cast_fp16, var_47374_cast_fp16))[name = tensor("op_47481_cast_fp16")]; tensor var_47483_equation_0 = const()[name = tensor("op_47483_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47483_cast_fp16 = einsum(equation = var_47483_equation_0, values = (var_46973_cast_fp16, var_47375_cast_fp16))[name = tensor("op_47483_cast_fp16")]; tensor var_47485_equation_0 = const()[name = tensor("op_47485_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47485_cast_fp16 = einsum(equation = var_47485_equation_0, values = (var_46977_cast_fp16, var_47376_cast_fp16))[name = tensor("op_47485_cast_fp16")]; tensor var_47487_equation_0 = const()[name = tensor("op_47487_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47487_cast_fp16 = einsum(equation = var_47487_equation_0, values = (var_46977_cast_fp16, var_47377_cast_fp16))[name = tensor("op_47487_cast_fp16")]; tensor var_47489_equation_0 = const()[name = tensor("op_47489_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47489_cast_fp16 = einsum(equation = var_47489_equation_0, values = (var_46977_cast_fp16, var_47378_cast_fp16))[name = tensor("op_47489_cast_fp16")]; tensor var_47491_equation_0 = const()[name = tensor("op_47491_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47491_cast_fp16 = einsum(equation = var_47491_equation_0, values = (var_46977_cast_fp16, var_47379_cast_fp16))[name = tensor("op_47491_cast_fp16")]; tensor var_47493_equation_0 = const()[name = tensor("op_47493_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47493_cast_fp16 = einsum(equation = var_47493_equation_0, values = (var_46981_cast_fp16, var_47380_cast_fp16))[name = tensor("op_47493_cast_fp16")]; tensor var_47495_equation_0 = const()[name = tensor("op_47495_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47495_cast_fp16 = einsum(equation = var_47495_equation_0, values = (var_46981_cast_fp16, var_47381_cast_fp16))[name = tensor("op_47495_cast_fp16")]; tensor var_47497_equation_0 = const()[name = tensor("op_47497_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47497_cast_fp16 = einsum(equation = var_47497_equation_0, values = (var_46981_cast_fp16, var_47382_cast_fp16))[name = tensor("op_47497_cast_fp16")]; tensor var_47499_equation_0 = const()[name = tensor("op_47499_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47499_cast_fp16 = einsum(equation = var_47499_equation_0, values = (var_46981_cast_fp16, var_47383_cast_fp16))[name = tensor("op_47499_cast_fp16")]; tensor var_47501_equation_0 = const()[name = tensor("op_47501_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47501_cast_fp16 = einsum(equation = var_47501_equation_0, values = (var_46985_cast_fp16, var_47384_cast_fp16))[name = tensor("op_47501_cast_fp16")]; tensor var_47503_equation_0 = const()[name = tensor("op_47503_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47503_cast_fp16 = einsum(equation = var_47503_equation_0, values = (var_46985_cast_fp16, var_47385_cast_fp16))[name = tensor("op_47503_cast_fp16")]; tensor var_47505_equation_0 = const()[name = tensor("op_47505_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47505_cast_fp16 = einsum(equation = var_47505_equation_0, values = (var_46985_cast_fp16, var_47386_cast_fp16))[name = tensor("op_47505_cast_fp16")]; tensor var_47507_equation_0 = const()[name = tensor("op_47507_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47507_cast_fp16 = einsum(equation = var_47507_equation_0, values = (var_46985_cast_fp16, var_47387_cast_fp16))[name = tensor("op_47507_cast_fp16")]; tensor var_47509_equation_0 = const()[name = tensor("op_47509_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47509_cast_fp16 = einsum(equation = var_47509_equation_0, values = (var_46989_cast_fp16, var_47388_cast_fp16))[name = tensor("op_47509_cast_fp16")]; tensor var_47511_equation_0 = const()[name = tensor("op_47511_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47511_cast_fp16 = einsum(equation = var_47511_equation_0, values = (var_46989_cast_fp16, var_47389_cast_fp16))[name = tensor("op_47511_cast_fp16")]; tensor var_47513_equation_0 = const()[name = tensor("op_47513_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47513_cast_fp16 = einsum(equation = var_47513_equation_0, values = (var_46989_cast_fp16, var_47390_cast_fp16))[name = tensor("op_47513_cast_fp16")]; tensor var_47515_equation_0 = const()[name = tensor("op_47515_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47515_cast_fp16 = einsum(equation = var_47515_equation_0, values = (var_46989_cast_fp16, var_47391_cast_fp16))[name = tensor("op_47515_cast_fp16")]; tensor var_47517_equation_0 = const()[name = tensor("op_47517_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47517_cast_fp16 = einsum(equation = var_47517_equation_0, values = (var_46993_cast_fp16, var_47392_cast_fp16))[name = tensor("op_47517_cast_fp16")]; tensor var_47519_equation_0 = const()[name = tensor("op_47519_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47519_cast_fp16 = einsum(equation = var_47519_equation_0, values = (var_46993_cast_fp16, var_47393_cast_fp16))[name = tensor("op_47519_cast_fp16")]; tensor var_47521_equation_0 = const()[name = tensor("op_47521_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47521_cast_fp16 = einsum(equation = var_47521_equation_0, values = (var_46993_cast_fp16, var_47394_cast_fp16))[name = tensor("op_47521_cast_fp16")]; tensor var_47523_equation_0 = const()[name = tensor("op_47523_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47523_cast_fp16 = einsum(equation = var_47523_equation_0, values = (var_46993_cast_fp16, var_47395_cast_fp16))[name = tensor("op_47523_cast_fp16")]; tensor var_47525_equation_0 = const()[name = tensor("op_47525_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47525_cast_fp16 = einsum(equation = var_47525_equation_0, values = (var_46997_cast_fp16, var_47396_cast_fp16))[name = tensor("op_47525_cast_fp16")]; tensor var_47527_equation_0 = const()[name = tensor("op_47527_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47527_cast_fp16 = einsum(equation = var_47527_equation_0, values = (var_46997_cast_fp16, var_47397_cast_fp16))[name = tensor("op_47527_cast_fp16")]; tensor var_47529_equation_0 = const()[name = tensor("op_47529_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47529_cast_fp16 = einsum(equation = var_47529_equation_0, values = (var_46997_cast_fp16, var_47398_cast_fp16))[name = tensor("op_47529_cast_fp16")]; tensor var_47531_equation_0 = const()[name = tensor("op_47531_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47531_cast_fp16 = einsum(equation = var_47531_equation_0, values = (var_46997_cast_fp16, var_47399_cast_fp16))[name = tensor("op_47531_cast_fp16")]; tensor var_47533_equation_0 = const()[name = tensor("op_47533_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47533_cast_fp16 = einsum(equation = var_47533_equation_0, values = (var_47001_cast_fp16, var_47400_cast_fp16))[name = tensor("op_47533_cast_fp16")]; tensor var_47535_equation_0 = const()[name = tensor("op_47535_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47535_cast_fp16 = einsum(equation = var_47535_equation_0, values = (var_47001_cast_fp16, var_47401_cast_fp16))[name = tensor("op_47535_cast_fp16")]; tensor var_47537_equation_0 = const()[name = tensor("op_47537_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47537_cast_fp16 = einsum(equation = var_47537_equation_0, values = (var_47001_cast_fp16, var_47402_cast_fp16))[name = tensor("op_47537_cast_fp16")]; tensor var_47539_equation_0 = const()[name = tensor("op_47539_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47539_cast_fp16 = einsum(equation = var_47539_equation_0, values = (var_47001_cast_fp16, var_47403_cast_fp16))[name = tensor("op_47539_cast_fp16")]; tensor var_47541_equation_0 = const()[name = tensor("op_47541_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47541_cast_fp16 = einsum(equation = var_47541_equation_0, values = (var_47005_cast_fp16, var_47404_cast_fp16))[name = tensor("op_47541_cast_fp16")]; tensor var_47543_equation_0 = const()[name = tensor("op_47543_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47543_cast_fp16 = einsum(equation = var_47543_equation_0, values = (var_47005_cast_fp16, var_47405_cast_fp16))[name = tensor("op_47543_cast_fp16")]; tensor var_47545_equation_0 = const()[name = tensor("op_47545_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47545_cast_fp16 = einsum(equation = var_47545_equation_0, values = (var_47005_cast_fp16, var_47406_cast_fp16))[name = tensor("op_47545_cast_fp16")]; tensor var_47547_equation_0 = const()[name = tensor("op_47547_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47547_cast_fp16 = einsum(equation = var_47547_equation_0, values = (var_47005_cast_fp16, var_47407_cast_fp16))[name = tensor("op_47547_cast_fp16")]; tensor var_47549_equation_0 = const()[name = tensor("op_47549_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47549_cast_fp16 = einsum(equation = var_47549_equation_0, values = (var_47009_cast_fp16, var_47408_cast_fp16))[name = tensor("op_47549_cast_fp16")]; tensor var_47551_equation_0 = const()[name = tensor("op_47551_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47551_cast_fp16 = einsum(equation = var_47551_equation_0, values = (var_47009_cast_fp16, var_47409_cast_fp16))[name = tensor("op_47551_cast_fp16")]; tensor var_47553_equation_0 = const()[name = tensor("op_47553_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47553_cast_fp16 = einsum(equation = var_47553_equation_0, values = (var_47009_cast_fp16, var_47410_cast_fp16))[name = tensor("op_47553_cast_fp16")]; tensor var_47555_equation_0 = const()[name = tensor("op_47555_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47555_cast_fp16 = einsum(equation = var_47555_equation_0, values = (var_47009_cast_fp16, var_47411_cast_fp16))[name = tensor("op_47555_cast_fp16")]; tensor var_47557_equation_0 = const()[name = tensor("op_47557_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47557_cast_fp16 = einsum(equation = var_47557_equation_0, values = (var_47013_cast_fp16, var_47412_cast_fp16))[name = tensor("op_47557_cast_fp16")]; tensor var_47559_equation_0 = const()[name = tensor("op_47559_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47559_cast_fp16 = einsum(equation = var_47559_equation_0, values = (var_47013_cast_fp16, var_47413_cast_fp16))[name = tensor("op_47559_cast_fp16")]; tensor var_47561_equation_0 = const()[name = tensor("op_47561_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47561_cast_fp16 = einsum(equation = var_47561_equation_0, values = (var_47013_cast_fp16, var_47414_cast_fp16))[name = tensor("op_47561_cast_fp16")]; tensor var_47563_equation_0 = const()[name = tensor("op_47563_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47563_cast_fp16 = einsum(equation = var_47563_equation_0, values = (var_47013_cast_fp16, var_47415_cast_fp16))[name = tensor("op_47563_cast_fp16")]; tensor var_47565_equation_0 = const()[name = tensor("op_47565_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47565_cast_fp16 = einsum(equation = var_47565_equation_0, values = (var_47017_cast_fp16, var_47416_cast_fp16))[name = tensor("op_47565_cast_fp16")]; tensor var_47567_equation_0 = const()[name = tensor("op_47567_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47567_cast_fp16 = einsum(equation = var_47567_equation_0, values = (var_47017_cast_fp16, var_47417_cast_fp16))[name = tensor("op_47567_cast_fp16")]; tensor var_47569_equation_0 = const()[name = tensor("op_47569_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47569_cast_fp16 = einsum(equation = var_47569_equation_0, values = (var_47017_cast_fp16, var_47418_cast_fp16))[name = tensor("op_47569_cast_fp16")]; tensor var_47571_equation_0 = const()[name = tensor("op_47571_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47571_cast_fp16 = einsum(equation = var_47571_equation_0, values = (var_47017_cast_fp16, var_47419_cast_fp16))[name = tensor("op_47571_cast_fp16")]; tensor var_47573_equation_0 = const()[name = tensor("op_47573_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47573_cast_fp16 = einsum(equation = var_47573_equation_0, values = (var_47021_cast_fp16, var_47420_cast_fp16))[name = tensor("op_47573_cast_fp16")]; tensor var_47575_equation_0 = const()[name = tensor("op_47575_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47575_cast_fp16 = einsum(equation = var_47575_equation_0, values = (var_47021_cast_fp16, var_47421_cast_fp16))[name = tensor("op_47575_cast_fp16")]; tensor var_47577_equation_0 = const()[name = tensor("op_47577_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47577_cast_fp16 = einsum(equation = var_47577_equation_0, values = (var_47021_cast_fp16, var_47422_cast_fp16))[name = tensor("op_47577_cast_fp16")]; tensor var_47579_equation_0 = const()[name = tensor("op_47579_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47579_cast_fp16 = einsum(equation = var_47579_equation_0, values = (var_47021_cast_fp16, var_47423_cast_fp16))[name = tensor("op_47579_cast_fp16")]; tensor var_47581_equation_0 = const()[name = tensor("op_47581_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47581_cast_fp16 = einsum(equation = var_47581_equation_0, values = (var_47025_cast_fp16, var_47424_cast_fp16))[name = tensor("op_47581_cast_fp16")]; tensor var_47583_equation_0 = const()[name = tensor("op_47583_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47583_cast_fp16 = einsum(equation = var_47583_equation_0, values = (var_47025_cast_fp16, var_47425_cast_fp16))[name = tensor("op_47583_cast_fp16")]; tensor var_47585_equation_0 = const()[name = tensor("op_47585_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47585_cast_fp16 = einsum(equation = var_47585_equation_0, values = (var_47025_cast_fp16, var_47426_cast_fp16))[name = tensor("op_47585_cast_fp16")]; tensor var_47587_equation_0 = const()[name = tensor("op_47587_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_47587_cast_fp16 = einsum(equation = var_47587_equation_0, values = (var_47025_cast_fp16, var_47427_cast_fp16))[name = tensor("op_47587_cast_fp16")]; tensor var_47589_interleave_0 = const()[name = tensor("op_47589_interleave_0"), val = tensor(false)]; tensor var_47589_cast_fp16 = concat(axis = var_46121, interleave = var_47589_interleave_0, values = (var_47429_cast_fp16, var_47431_cast_fp16, var_47433_cast_fp16, var_47435_cast_fp16))[name = tensor("op_47589_cast_fp16")]; tensor var_47591_interleave_0 = const()[name = tensor("op_47591_interleave_0"), val = tensor(false)]; tensor var_47591_cast_fp16 = concat(axis = var_46121, interleave = var_47591_interleave_0, values = (var_47437_cast_fp16, var_47439_cast_fp16, var_47441_cast_fp16, var_47443_cast_fp16))[name = tensor("op_47591_cast_fp16")]; tensor var_47593_interleave_0 = const()[name = tensor("op_47593_interleave_0"), val = tensor(false)]; tensor var_47593_cast_fp16 = concat(axis = var_46121, interleave = var_47593_interleave_0, values = (var_47445_cast_fp16, var_47447_cast_fp16, var_47449_cast_fp16, var_47451_cast_fp16))[name = tensor("op_47593_cast_fp16")]; tensor var_47595_interleave_0 = const()[name = tensor("op_47595_interleave_0"), val = tensor(false)]; tensor var_47595_cast_fp16 = concat(axis = var_46121, interleave = var_47595_interleave_0, values = (var_47453_cast_fp16, var_47455_cast_fp16, var_47457_cast_fp16, var_47459_cast_fp16))[name = tensor("op_47595_cast_fp16")]; tensor var_47597_interleave_0 = const()[name = tensor("op_47597_interleave_0"), val = tensor(false)]; tensor var_47597_cast_fp16 = concat(axis = var_46121, interleave = var_47597_interleave_0, values = (var_47461_cast_fp16, var_47463_cast_fp16, var_47465_cast_fp16, var_47467_cast_fp16))[name = tensor("op_47597_cast_fp16")]; tensor var_47599_interleave_0 = const()[name = tensor("op_47599_interleave_0"), val = tensor(false)]; tensor var_47599_cast_fp16 = concat(axis = var_46121, interleave = var_47599_interleave_0, values = (var_47469_cast_fp16, var_47471_cast_fp16, var_47473_cast_fp16, var_47475_cast_fp16))[name = tensor("op_47599_cast_fp16")]; tensor var_47601_interleave_0 = const()[name = tensor("op_47601_interleave_0"), val = tensor(false)]; tensor var_47601_cast_fp16 = concat(axis = var_46121, interleave = var_47601_interleave_0, values = (var_47477_cast_fp16, var_47479_cast_fp16, var_47481_cast_fp16, var_47483_cast_fp16))[name = tensor("op_47601_cast_fp16")]; tensor var_47603_interleave_0 = const()[name = tensor("op_47603_interleave_0"), val = tensor(false)]; tensor var_47603_cast_fp16 = concat(axis = var_46121, interleave = var_47603_interleave_0, values = (var_47485_cast_fp16, var_47487_cast_fp16, var_47489_cast_fp16, var_47491_cast_fp16))[name = tensor("op_47603_cast_fp16")]; tensor var_47605_interleave_0 = const()[name = tensor("op_47605_interleave_0"), val = tensor(false)]; tensor var_47605_cast_fp16 = concat(axis = var_46121, interleave = var_47605_interleave_0, values = (var_47493_cast_fp16, var_47495_cast_fp16, var_47497_cast_fp16, var_47499_cast_fp16))[name = tensor("op_47605_cast_fp16")]; tensor var_47607_interleave_0 = const()[name = tensor("op_47607_interleave_0"), val = tensor(false)]; tensor var_47607_cast_fp16 = concat(axis = var_46121, interleave = var_47607_interleave_0, values = (var_47501_cast_fp16, var_47503_cast_fp16, var_47505_cast_fp16, var_47507_cast_fp16))[name = tensor("op_47607_cast_fp16")]; tensor var_47609_interleave_0 = const()[name = tensor("op_47609_interleave_0"), val = tensor(false)]; tensor var_47609_cast_fp16 = concat(axis = var_46121, interleave = var_47609_interleave_0, values = (var_47509_cast_fp16, var_47511_cast_fp16, var_47513_cast_fp16, var_47515_cast_fp16))[name = tensor("op_47609_cast_fp16")]; tensor var_47611_interleave_0 = const()[name = tensor("op_47611_interleave_0"), val = tensor(false)]; tensor var_47611_cast_fp16 = concat(axis = var_46121, interleave = var_47611_interleave_0, values = (var_47517_cast_fp16, var_47519_cast_fp16, var_47521_cast_fp16, var_47523_cast_fp16))[name = tensor("op_47611_cast_fp16")]; tensor var_47613_interleave_0 = const()[name = tensor("op_47613_interleave_0"), val = tensor(false)]; tensor var_47613_cast_fp16 = concat(axis = var_46121, interleave = var_47613_interleave_0, values = (var_47525_cast_fp16, var_47527_cast_fp16, var_47529_cast_fp16, var_47531_cast_fp16))[name = tensor("op_47613_cast_fp16")]; tensor var_47615_interleave_0 = const()[name = tensor("op_47615_interleave_0"), val = tensor(false)]; tensor var_47615_cast_fp16 = concat(axis = var_46121, interleave = var_47615_interleave_0, values = (var_47533_cast_fp16, var_47535_cast_fp16, var_47537_cast_fp16, var_47539_cast_fp16))[name = tensor("op_47615_cast_fp16")]; tensor var_47617_interleave_0 = const()[name = tensor("op_47617_interleave_0"), val = tensor(false)]; tensor var_47617_cast_fp16 = concat(axis = var_46121, interleave = var_47617_interleave_0, values = (var_47541_cast_fp16, var_47543_cast_fp16, var_47545_cast_fp16, var_47547_cast_fp16))[name = tensor("op_47617_cast_fp16")]; tensor var_47619_interleave_0 = const()[name = tensor("op_47619_interleave_0"), val = tensor(false)]; tensor var_47619_cast_fp16 = concat(axis = var_46121, interleave = var_47619_interleave_0, values = (var_47549_cast_fp16, var_47551_cast_fp16, var_47553_cast_fp16, var_47555_cast_fp16))[name = tensor("op_47619_cast_fp16")]; tensor var_47621_interleave_0 = const()[name = tensor("op_47621_interleave_0"), val = tensor(false)]; tensor var_47621_cast_fp16 = concat(axis = var_46121, interleave = var_47621_interleave_0, values = (var_47557_cast_fp16, var_47559_cast_fp16, var_47561_cast_fp16, var_47563_cast_fp16))[name = tensor("op_47621_cast_fp16")]; tensor var_47623_interleave_0 = const()[name = tensor("op_47623_interleave_0"), val = tensor(false)]; tensor var_47623_cast_fp16 = concat(axis = var_46121, interleave = var_47623_interleave_0, values = (var_47565_cast_fp16, var_47567_cast_fp16, var_47569_cast_fp16, var_47571_cast_fp16))[name = tensor("op_47623_cast_fp16")]; tensor var_47625_interleave_0 = const()[name = tensor("op_47625_interleave_0"), val = tensor(false)]; tensor var_47625_cast_fp16 = concat(axis = var_46121, interleave = var_47625_interleave_0, values = (var_47573_cast_fp16, var_47575_cast_fp16, var_47577_cast_fp16, var_47579_cast_fp16))[name = tensor("op_47625_cast_fp16")]; tensor var_47627_interleave_0 = const()[name = tensor("op_47627_interleave_0"), val = tensor(false)]; tensor var_47627_cast_fp16 = concat(axis = var_46121, interleave = var_47627_interleave_0, values = (var_47581_cast_fp16, var_47583_cast_fp16, var_47585_cast_fp16, var_47587_cast_fp16))[name = tensor("op_47627_cast_fp16")]; tensor input_233_interleave_0 = const()[name = tensor("input_233_interleave_0"), val = tensor(false)]; tensor input_233_cast_fp16 = concat(axis = var_46146, interleave = input_233_interleave_0, values = (var_47589_cast_fp16, var_47591_cast_fp16, var_47593_cast_fp16, var_47595_cast_fp16, var_47597_cast_fp16, var_47599_cast_fp16, var_47601_cast_fp16, var_47603_cast_fp16, var_47605_cast_fp16, var_47607_cast_fp16, var_47609_cast_fp16, var_47611_cast_fp16, var_47613_cast_fp16, var_47615_cast_fp16, var_47617_cast_fp16, var_47619_cast_fp16, var_47621_cast_fp16, var_47623_cast_fp16, var_47625_cast_fp16, var_47627_cast_fp16))[name = tensor("input_233_cast_fp16")]; tensor var_47638_pad_type_0 = const()[name = tensor("op_47638_pad_type_0"), val = tensor("valid")]; tensor var_47638_strides_0 = const()[name = tensor("op_47638_strides_0"), val = tensor([1, 1])]; tensor var_47638_pad_0 = const()[name = tensor("op_47638_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_47638_dilations_0 = const()[name = tensor("op_47638_dilations_0"), val = tensor([1, 1])]; tensor var_47638_groups_0 = const()[name = tensor("op_47638_groups_0"), val = tensor(1)]; tensor layers_29_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(387568256))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388387520))), name = tensor("layers_29_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_29_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_29_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388387648)))]; tensor var_47638_cast_fp16 = conv(bias = layers_29_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_47638_dilations_0, groups = var_47638_groups_0, pad = var_47638_pad_0, pad_type = var_47638_pad_type_0, strides = var_47638_strides_0, weight = layers_29_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_233_cast_fp16)[name = tensor("op_47638_cast_fp16")]; tensor var_47644_pad_type_0 = const()[name = tensor("op_47644_pad_type_0"), val = tensor("valid")]; tensor var_47644_strides_0 = const()[name = tensor("op_47644_strides_0"), val = tensor([1, 1])]; tensor var_47644_pad_0 = const()[name = tensor("op_47644_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_47644_dilations_0 = const()[name = tensor("op_47644_dilations_0"), val = tensor([1, 1])]; tensor var_47644_groups_0 = const()[name = tensor("op_47644_groups_0"), val = tensor(1)]; tensor layers_29_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388402304))), name = tensor("layers_29_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388390272))), shape = tensor([1280, 1280, 1, 1])]; tensor var_47644_cast_fp16 = conv(dilations = var_47644_dilations_0, groups = var_47644_groups_0, pad = var_47644_pad_0, pad_type = var_47644_pad_type_0, strides = var_47644_strides_0, weight = layers_29_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_233_cast_fp16)[name = tensor("op_47644_cast_fp16")]; tensor obj_119_cast_fp16 = add(x = var_47638_cast_fp16, y = var_47644_cast_fp16)[name = tensor("obj_119_cast_fp16")]; tensor inputs_119_cast_fp16 = add(x = inputs_117_cast_fp16, y = obj_119_cast_fp16)[name = tensor("inputs_119_cast_fp16")]; tensor out_119_axes_0 = const()[name = tensor("out_119_axes_0"), val = tensor([1])]; tensor var_47655_to_fp16 = const()[name = tensor("op_47655_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_119_cast_fp16 = layer_norm(axes = out_119_axes_0, epsilon = var_47655_to_fp16, x = inputs_119_cast_fp16)[name = tensor("out_119_cast_fp16")]; tensor input_235_gamma_0_to_fp16 = const()[name = tensor("input_235_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388607168)))]; tensor input_235_beta_0_to_fp16 = const()[name = tensor("input_235_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388609792)))]; tensor input_235_epsilon_0_to_fp16 = const()[name = tensor("input_235_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_235_cast_fp16 = batch_norm(beta = input_235_beta_0_to_fp16, epsilon = input_235_epsilon_0_to_fp16, gamma = input_235_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_119_cast_fp16)[name = tensor("input_235_cast_fp16")]; tensor var_47673_pad_type_0 = const()[name = tensor("op_47673_pad_type_0"), val = tensor("valid")]; tensor var_47673_strides_0 = const()[name = tensor("op_47673_strides_0"), val = tensor([1, 1])]; tensor var_47673_pad_0 = const()[name = tensor("op_47673_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_47673_dilations_0 = const()[name = tensor("op_47673_dilations_0"), val = tensor([1, 1])]; tensor var_47673_groups_0 = const()[name = tensor("op_47673_groups_0"), val = tensor(1)]; tensor layers_29_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(388612416))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(391889280))), name = tensor("layers_29_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_29_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_29_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(391889408)))]; tensor var_47673_cast_fp16 = conv(bias = layers_29_fc1_inlier_module_bias_to_fp16, dilations = var_47673_dilations_0, groups = var_47673_groups_0, pad = var_47673_pad_0, pad_type = var_47673_pad_type_0, strides = var_47673_strides_0, weight = layers_29_fc1_inlier_module_weight_to_fp16_palettized, x = input_235_cast_fp16)[name = tensor("op_47673_cast_fp16")]; tensor var_47679_pad_type_0 = const()[name = tensor("op_47679_pad_type_0"), val = tensor("valid")]; tensor var_47679_strides_0 = const()[name = tensor("op_47679_strides_0"), val = tensor([1, 1])]; tensor var_47679_pad_0 = const()[name = tensor("op_47679_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_47679_dilations_0 = const()[name = tensor("op_47679_dilations_0"), val = tensor([1, 1])]; tensor var_47679_groups_0 = const()[name = tensor("op_47679_groups_0"), val = tensor(1)]; tensor layers_29_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(391957888))), name = tensor("layers_29_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(391899712))), shape = tensor([5120, 1280, 1, 1])]; tensor var_47679_cast_fp16 = conv(dilations = var_47679_dilations_0, groups = var_47679_groups_0, pad = var_47679_pad_0, pad_type = var_47679_pad_type_0, strides = var_47679_strides_0, weight = layers_29_fc1_outlier_module_weight_to_fp16_sparsified, x = input_235_cast_fp16)[name = tensor("op_47679_cast_fp16")]; tensor input_237_cast_fp16 = add(x = var_47673_cast_fp16, y = var_47679_cast_fp16)[name = tensor("input_237_cast_fp16")]; tensor input_239_mode_0 = const()[name = tensor("input_239_mode_0"), val = tensor("EXACT")]; tensor input_239_cast_fp16 = gelu(mode = input_239_mode_0, x = input_237_cast_fp16)[name = tensor("input_239_cast_fp16")]; tensor var_47690_pad_type_0 = const()[name = tensor("op_47690_pad_type_0"), val = tensor("valid")]; tensor var_47690_strides_0 = const()[name = tensor("op_47690_strides_0"), val = tensor([1, 1])]; tensor var_47690_pad_0 = const()[name = tensor("op_47690_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_47690_dilations_0 = const()[name = tensor("op_47690_dilations_0"), val = tensor([1, 1])]; tensor var_47690_groups_0 = const()[name = tensor("op_47690_groups_0"), val = tensor(1)]; tensor layers_29_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(392777152))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(396054016))), name = tensor("layers_29_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_29_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_29_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(396054144)))]; tensor var_47690_cast_fp16 = conv(bias = layers_29_fc2_inlier_module_bias_to_fp16, dilations = var_47690_dilations_0, groups = var_47690_groups_0, pad = var_47690_pad_0, pad_type = var_47690_pad_type_0, strides = var_47690_strides_0, weight = layers_29_fc2_inlier_module_weight_to_fp16_palettized, x = input_239_cast_fp16)[name = tensor("op_47690_cast_fp16")]; tensor var_47696_pad_type_0 = const()[name = tensor("op_47696_pad_type_0"), val = tensor("valid")]; tensor var_47696_strides_0 = const()[name = tensor("op_47696_strides_0"), val = tensor([1, 1])]; tensor var_47696_pad_0 = const()[name = tensor("op_47696_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_47696_dilations_0 = const()[name = tensor("op_47696_dilations_0"), val = tensor([1, 1])]; tensor var_47696_groups_0 = const()[name = tensor("op_47696_groups_0"), val = tensor(1)]; tensor layers_29_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(396118272))), name = tensor("layers_29_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(396056768))), shape = tensor([1280, 5120, 1, 1])]; tensor var_47696_cast_fp16 = conv(dilations = var_47696_dilations_0, groups = var_47696_groups_0, pad = var_47696_pad_0, pad_type = var_47696_pad_type_0, strides = var_47696_strides_0, weight = layers_29_fc2_outlier_module_weight_to_fp16_sparsified, x = input_239_cast_fp16)[name = tensor("op_47696_cast_fp16")]; tensor hidden_states_63_cast_fp16 = add(x = var_47690_cast_fp16, y = var_47696_cast_fp16)[name = tensor("hidden_states_63_cast_fp16")]; tensor inputs_121_cast_fp16 = add(x = inputs_119_cast_fp16, y = hidden_states_63_cast_fp16)[name = tensor("inputs_121_cast_fp16")]; tensor var_47702 = const()[name = tensor("op_47702"), val = tensor(3)]; tensor var_47727 = const()[name = tensor("op_47727"), val = tensor(1)]; tensor out_121_axes_0 = const()[name = tensor("out_121_axes_0"), val = tensor([1])]; tensor var_47744_to_fp16 = const()[name = tensor("op_47744_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_121_cast_fp16 = layer_norm(axes = out_121_axes_0, epsilon = var_47744_to_fp16, x = inputs_121_cast_fp16)[name = tensor("out_121_cast_fp16")]; tensor obj_121_gamma_0_to_fp16 = const()[name = tensor("obj_121_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(396937536)))]; tensor obj_121_beta_0_to_fp16 = const()[name = tensor("obj_121_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(396940160)))]; tensor obj_121_epsilon_0_to_fp16 = const()[name = tensor("obj_121_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_121_cast_fp16 = batch_norm(beta = obj_121_beta_0_to_fp16, epsilon = obj_121_epsilon_0_to_fp16, gamma = obj_121_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_121_cast_fp16)[name = tensor("obj_121_cast_fp16")]; tensor var_47766_pad_type_0 = const()[name = tensor("op_47766_pad_type_0"), val = tensor("valid")]; tensor var_47766_strides_0 = const()[name = tensor("op_47766_strides_0"), val = tensor([1, 1])]; tensor var_47766_pad_0 = const()[name = tensor("op_47766_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_47766_dilations_0 = const()[name = tensor("op_47766_dilations_0"), val = tensor([1, 1])]; tensor var_47766_groups_0 = const()[name = tensor("op_47766_groups_0"), val = tensor(1)]; tensor layers_30_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(396942784))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(397762048))), name = tensor("layers_30_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_30_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_30_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(397762176)))]; tensor var_47766_cast_fp16 = conv(bias = layers_30_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_47766_dilations_0, groups = var_47766_groups_0, pad = var_47766_pad_0, pad_type = var_47766_pad_type_0, strides = var_47766_strides_0, weight = layers_30_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_121_cast_fp16)[name = tensor("op_47766_cast_fp16")]; tensor var_47772_pad_type_0 = const()[name = tensor("op_47772_pad_type_0"), val = tensor("valid")]; tensor var_47772_strides_0 = const()[name = tensor("op_47772_strides_0"), val = tensor([1, 1])]; tensor var_47772_pad_0 = const()[name = tensor("op_47772_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_47772_dilations_0 = const()[name = tensor("op_47772_dilations_0"), val = tensor([1, 1])]; tensor var_47772_groups_0 = const()[name = tensor("op_47772_groups_0"), val = tensor(1)]; tensor layers_30_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(397784960))), name = tensor("layers_30_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(397764800))), shape = tensor([1280, 1280, 1, 1])]; tensor var_47772_cast_fp16 = conv(dilations = var_47772_dilations_0, groups = var_47772_groups_0, pad = var_47772_pad_0, pad_type = var_47772_pad_type_0, strides = var_47772_strides_0, weight = layers_30_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_121_cast_fp16)[name = tensor("op_47772_cast_fp16")]; tensor query_61_cast_fp16 = add(x = var_47766_cast_fp16, y = var_47772_cast_fp16)[name = tensor("query_61_cast_fp16")]; tensor var_47781_pad_type_0 = const()[name = tensor("op_47781_pad_type_0"), val = tensor("valid")]; tensor var_47781_strides_0 = const()[name = tensor("op_47781_strides_0"), val = tensor([1, 1])]; tensor var_47781_pad_0 = const()[name = tensor("op_47781_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_47781_dilations_0 = const()[name = tensor("op_47781_dilations_0"), val = tensor([1, 1])]; tensor var_47781_groups_0 = const()[name = tensor("op_47781_groups_0"), val = tensor(1)]; tensor layers_30_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(397989824))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(398809088))), name = tensor("layers_30_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_47781_cast_fp16 = conv(dilations = var_47781_dilations_0, groups = var_47781_groups_0, pad = var_47781_pad_0, pad_type = var_47781_pad_type_0, strides = var_47781_strides_0, weight = layers_30_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_121_cast_fp16)[name = tensor("op_47781_cast_fp16")]; tensor var_47787_pad_type_0 = const()[name = tensor("op_47787_pad_type_0"), val = tensor("valid")]; tensor var_47787_strides_0 = const()[name = tensor("op_47787_strides_0"), val = tensor([1, 1])]; tensor var_47787_pad_0 = const()[name = tensor("op_47787_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_47787_dilations_0 = const()[name = tensor("op_47787_dilations_0"), val = tensor([1, 1])]; tensor var_47787_groups_0 = const()[name = tensor("op_47787_groups_0"), val = tensor(1)]; tensor layers_30_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(398830400))), name = tensor("layers_30_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(398809216))), shape = tensor([1280, 1280, 1, 1])]; tensor var_47787_cast_fp16 = conv(dilations = var_47787_dilations_0, groups = var_47787_groups_0, pad = var_47787_pad_0, pad_type = var_47787_pad_type_0, strides = var_47787_strides_0, weight = layers_30_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_121_cast_fp16)[name = tensor("op_47787_cast_fp16")]; tensor key_61_cast_fp16 = add(x = var_47781_cast_fp16, y = var_47787_cast_fp16)[name = tensor("key_61_cast_fp16")]; tensor var_47797_pad_type_0 = const()[name = tensor("op_47797_pad_type_0"), val = tensor("valid")]; tensor var_47797_strides_0 = const()[name = tensor("op_47797_strides_0"), val = tensor([1, 1])]; tensor var_47797_pad_0 = const()[name = tensor("op_47797_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_47797_dilations_0 = const()[name = tensor("op_47797_dilations_0"), val = tensor([1, 1])]; tensor var_47797_groups_0 = const()[name = tensor("op_47797_groups_0"), val = tensor(1)]; tensor layers_30_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(399035264))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(399854528))), name = tensor("layers_30_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_30_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_30_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(399854656)))]; tensor var_47797_cast_fp16 = conv(bias = layers_30_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_47797_dilations_0, groups = var_47797_groups_0, pad = var_47797_pad_0, pad_type = var_47797_pad_type_0, strides = var_47797_strides_0, weight = layers_30_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_121_cast_fp16)[name = tensor("op_47797_cast_fp16")]; tensor var_47803_pad_type_0 = const()[name = tensor("op_47803_pad_type_0"), val = tensor("valid")]; tensor var_47803_strides_0 = const()[name = tensor("op_47803_strides_0"), val = tensor([1, 1])]; tensor var_47803_pad_0 = const()[name = tensor("op_47803_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_47803_dilations_0 = const()[name = tensor("op_47803_dilations_0"), val = tensor([1, 1])]; tensor var_47803_groups_0 = const()[name = tensor("op_47803_groups_0"), val = tensor(1)]; tensor layers_30_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(399869696))), name = tensor("layers_30_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(399857280))), shape = tensor([1280, 1280, 1, 1])]; tensor var_47803_cast_fp16 = conv(dilations = var_47803_dilations_0, groups = var_47803_groups_0, pad = var_47803_pad_0, pad_type = var_47803_pad_type_0, strides = var_47803_strides_0, weight = layers_30_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_121_cast_fp16)[name = tensor("op_47803_cast_fp16")]; tensor value_61_cast_fp16 = add(x = var_47797_cast_fp16, y = var_47803_cast_fp16)[name = tensor("value_61_cast_fp16")]; tensor var_47809_begin_0 = const()[name = tensor("op_47809_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_47809_end_0 = const()[name = tensor("op_47809_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_47809_end_mask_0 = const()[name = tensor("op_47809_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47809_cast_fp16 = slice_by_index(begin = var_47809_begin_0, end = var_47809_end_0, end_mask = var_47809_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47809_cast_fp16")]; tensor var_47813_begin_0 = const()[name = tensor("op_47813_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_47813_end_0 = const()[name = tensor("op_47813_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_47813_end_mask_0 = const()[name = tensor("op_47813_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47813_cast_fp16 = slice_by_index(begin = var_47813_begin_0, end = var_47813_end_0, end_mask = var_47813_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47813_cast_fp16")]; tensor var_47817_begin_0 = const()[name = tensor("op_47817_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_47817_end_0 = const()[name = tensor("op_47817_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_47817_end_mask_0 = const()[name = tensor("op_47817_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47817_cast_fp16 = slice_by_index(begin = var_47817_begin_0, end = var_47817_end_0, end_mask = var_47817_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47817_cast_fp16")]; tensor var_47821_begin_0 = const()[name = tensor("op_47821_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_47821_end_0 = const()[name = tensor("op_47821_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_47821_end_mask_0 = const()[name = tensor("op_47821_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47821_cast_fp16 = slice_by_index(begin = var_47821_begin_0, end = var_47821_end_0, end_mask = var_47821_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47821_cast_fp16")]; tensor var_47825_begin_0 = const()[name = tensor("op_47825_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_47825_end_0 = const()[name = tensor("op_47825_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_47825_end_mask_0 = const()[name = tensor("op_47825_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47825_cast_fp16 = slice_by_index(begin = var_47825_begin_0, end = var_47825_end_0, end_mask = var_47825_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47825_cast_fp16")]; tensor var_47829_begin_0 = const()[name = tensor("op_47829_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_47829_end_0 = const()[name = tensor("op_47829_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_47829_end_mask_0 = const()[name = tensor("op_47829_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47829_cast_fp16 = slice_by_index(begin = var_47829_begin_0, end = var_47829_end_0, end_mask = var_47829_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47829_cast_fp16")]; tensor var_47833_begin_0 = const()[name = tensor("op_47833_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_47833_end_0 = const()[name = tensor("op_47833_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_47833_end_mask_0 = const()[name = tensor("op_47833_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47833_cast_fp16 = slice_by_index(begin = var_47833_begin_0, end = var_47833_end_0, end_mask = var_47833_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47833_cast_fp16")]; tensor var_47837_begin_0 = const()[name = tensor("op_47837_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_47837_end_0 = const()[name = tensor("op_47837_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_47837_end_mask_0 = const()[name = tensor("op_47837_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47837_cast_fp16 = slice_by_index(begin = var_47837_begin_0, end = var_47837_end_0, end_mask = var_47837_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47837_cast_fp16")]; tensor var_47841_begin_0 = const()[name = tensor("op_47841_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_47841_end_0 = const()[name = tensor("op_47841_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_47841_end_mask_0 = const()[name = tensor("op_47841_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47841_cast_fp16 = slice_by_index(begin = var_47841_begin_0, end = var_47841_end_0, end_mask = var_47841_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47841_cast_fp16")]; tensor var_47845_begin_0 = const()[name = tensor("op_47845_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_47845_end_0 = const()[name = tensor("op_47845_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_47845_end_mask_0 = const()[name = tensor("op_47845_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47845_cast_fp16 = slice_by_index(begin = var_47845_begin_0, end = var_47845_end_0, end_mask = var_47845_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47845_cast_fp16")]; tensor var_47849_begin_0 = const()[name = tensor("op_47849_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_47849_end_0 = const()[name = tensor("op_47849_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_47849_end_mask_0 = const()[name = tensor("op_47849_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47849_cast_fp16 = slice_by_index(begin = var_47849_begin_0, end = var_47849_end_0, end_mask = var_47849_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47849_cast_fp16")]; tensor var_47853_begin_0 = const()[name = tensor("op_47853_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_47853_end_0 = const()[name = tensor("op_47853_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_47853_end_mask_0 = const()[name = tensor("op_47853_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47853_cast_fp16 = slice_by_index(begin = var_47853_begin_0, end = var_47853_end_0, end_mask = var_47853_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47853_cast_fp16")]; tensor var_47857_begin_0 = const()[name = tensor("op_47857_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_47857_end_0 = const()[name = tensor("op_47857_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_47857_end_mask_0 = const()[name = tensor("op_47857_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47857_cast_fp16 = slice_by_index(begin = var_47857_begin_0, end = var_47857_end_0, end_mask = var_47857_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47857_cast_fp16")]; tensor var_47861_begin_0 = const()[name = tensor("op_47861_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_47861_end_0 = const()[name = tensor("op_47861_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_47861_end_mask_0 = const()[name = tensor("op_47861_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47861_cast_fp16 = slice_by_index(begin = var_47861_begin_0, end = var_47861_end_0, end_mask = var_47861_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47861_cast_fp16")]; tensor var_47865_begin_0 = const()[name = tensor("op_47865_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_47865_end_0 = const()[name = tensor("op_47865_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_47865_end_mask_0 = const()[name = tensor("op_47865_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47865_cast_fp16 = slice_by_index(begin = var_47865_begin_0, end = var_47865_end_0, end_mask = var_47865_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47865_cast_fp16")]; tensor var_47869_begin_0 = const()[name = tensor("op_47869_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_47869_end_0 = const()[name = tensor("op_47869_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_47869_end_mask_0 = const()[name = tensor("op_47869_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47869_cast_fp16 = slice_by_index(begin = var_47869_begin_0, end = var_47869_end_0, end_mask = var_47869_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47869_cast_fp16")]; tensor var_47873_begin_0 = const()[name = tensor("op_47873_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_47873_end_0 = const()[name = tensor("op_47873_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_47873_end_mask_0 = const()[name = tensor("op_47873_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47873_cast_fp16 = slice_by_index(begin = var_47873_begin_0, end = var_47873_end_0, end_mask = var_47873_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47873_cast_fp16")]; tensor var_47877_begin_0 = const()[name = tensor("op_47877_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_47877_end_0 = const()[name = tensor("op_47877_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_47877_end_mask_0 = const()[name = tensor("op_47877_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47877_cast_fp16 = slice_by_index(begin = var_47877_begin_0, end = var_47877_end_0, end_mask = var_47877_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47877_cast_fp16")]; tensor var_47881_begin_0 = const()[name = tensor("op_47881_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_47881_end_0 = const()[name = tensor("op_47881_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_47881_end_mask_0 = const()[name = tensor("op_47881_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47881_cast_fp16 = slice_by_index(begin = var_47881_begin_0, end = var_47881_end_0, end_mask = var_47881_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47881_cast_fp16")]; tensor var_47885_begin_0 = const()[name = tensor("op_47885_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_47885_end_0 = const()[name = tensor("op_47885_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_47885_end_mask_0 = const()[name = tensor("op_47885_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_47885_cast_fp16 = slice_by_index(begin = var_47885_begin_0, end = var_47885_end_0, end_mask = var_47885_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_47885_cast_fp16")]; tensor var_47894_begin_0 = const()[name = tensor("op_47894_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_47894_end_0 = const()[name = tensor("op_47894_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_47894_end_mask_0 = const()[name = tensor("op_47894_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_47894_cast_fp16 = slice_by_index(begin = var_47894_begin_0, end = var_47894_end_0, end_mask = var_47894_end_mask_0, x = var_47809_cast_fp16)[name = tensor("op_47894_cast_fp16")]; tensor var_47901_begin_0 = const()[name = tensor("op_47901_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_47901_end_0 = const()[name = tensor("op_47901_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_47901_end_mask_0 = const()[name = tensor("op_47901_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_47901_cast_fp16 = slice_by_index(begin = var_47901_begin_0, end = var_47901_end_0, end_mask = var_47901_end_mask_0, x = var_47809_cast_fp16)[name = tensor("op_47901_cast_fp16")]; tensor var_47908_begin_0 = const()[name = tensor("op_47908_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_47908_end_0 = const()[name = tensor("op_47908_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_47908_end_mask_0 = const()[name = tensor("op_47908_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_47908_cast_fp16 = slice_by_index(begin = var_47908_begin_0, end = var_47908_end_0, end_mask = var_47908_end_mask_0, x = var_47809_cast_fp16)[name = tensor("op_47908_cast_fp16")]; tensor var_47915_begin_0 = const()[name = tensor("op_47915_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_47915_end_0 = const()[name = tensor("op_47915_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_47915_end_mask_0 = const()[name = tensor("op_47915_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_47915_cast_fp16 = slice_by_index(begin = var_47915_begin_0, end = var_47915_end_0, end_mask = var_47915_end_mask_0, x = var_47809_cast_fp16)[name = tensor("op_47915_cast_fp16")]; tensor var_47922_begin_0 = const()[name = tensor("op_47922_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_47922_end_0 = const()[name = tensor("op_47922_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_47922_end_mask_0 = const()[name = tensor("op_47922_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_47922_cast_fp16 = slice_by_index(begin = var_47922_begin_0, end = var_47922_end_0, end_mask = var_47922_end_mask_0, x = var_47813_cast_fp16)[name = tensor("op_47922_cast_fp16")]; tensor var_47929_begin_0 = const()[name = tensor("op_47929_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_47929_end_0 = const()[name = tensor("op_47929_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_47929_end_mask_0 = const()[name = tensor("op_47929_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_47929_cast_fp16 = slice_by_index(begin = var_47929_begin_0, end = var_47929_end_0, end_mask = var_47929_end_mask_0, x = var_47813_cast_fp16)[name = tensor("op_47929_cast_fp16")]; tensor var_47936_begin_0 = const()[name = tensor("op_47936_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_47936_end_0 = const()[name = tensor("op_47936_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_47936_end_mask_0 = const()[name = tensor("op_47936_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_47936_cast_fp16 = slice_by_index(begin = var_47936_begin_0, end = var_47936_end_0, end_mask = var_47936_end_mask_0, x = var_47813_cast_fp16)[name = tensor("op_47936_cast_fp16")]; tensor var_47943_begin_0 = const()[name = tensor("op_47943_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_47943_end_0 = const()[name = tensor("op_47943_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_47943_end_mask_0 = const()[name = tensor("op_47943_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_47943_cast_fp16 = slice_by_index(begin = var_47943_begin_0, end = var_47943_end_0, end_mask = var_47943_end_mask_0, x = var_47813_cast_fp16)[name = tensor("op_47943_cast_fp16")]; tensor var_47950_begin_0 = const()[name = tensor("op_47950_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_47950_end_0 = const()[name = tensor("op_47950_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_47950_end_mask_0 = const()[name = tensor("op_47950_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_47950_cast_fp16 = slice_by_index(begin = var_47950_begin_0, end = var_47950_end_0, end_mask = var_47950_end_mask_0, x = var_47817_cast_fp16)[name = tensor("op_47950_cast_fp16")]; tensor var_47957_begin_0 = const()[name = tensor("op_47957_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_47957_end_0 = const()[name = tensor("op_47957_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_47957_end_mask_0 = const()[name = tensor("op_47957_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_47957_cast_fp16 = slice_by_index(begin = var_47957_begin_0, end = var_47957_end_0, end_mask = var_47957_end_mask_0, x = var_47817_cast_fp16)[name = tensor("op_47957_cast_fp16")]; tensor var_47964_begin_0 = const()[name = tensor("op_47964_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_47964_end_0 = const()[name = tensor("op_47964_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_47964_end_mask_0 = const()[name = tensor("op_47964_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_47964_cast_fp16 = slice_by_index(begin = var_47964_begin_0, end = var_47964_end_0, end_mask = var_47964_end_mask_0, x = var_47817_cast_fp16)[name = tensor("op_47964_cast_fp16")]; tensor var_47971_begin_0 = const()[name = tensor("op_47971_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_47971_end_0 = const()[name = tensor("op_47971_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_47971_end_mask_0 = const()[name = tensor("op_47971_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_47971_cast_fp16 = slice_by_index(begin = var_47971_begin_0, end = var_47971_end_0, end_mask = var_47971_end_mask_0, x = var_47817_cast_fp16)[name = tensor("op_47971_cast_fp16")]; tensor var_47978_begin_0 = const()[name = tensor("op_47978_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_47978_end_0 = const()[name = tensor("op_47978_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_47978_end_mask_0 = const()[name = tensor("op_47978_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_47978_cast_fp16 = slice_by_index(begin = var_47978_begin_0, end = var_47978_end_0, end_mask = var_47978_end_mask_0, x = var_47821_cast_fp16)[name = tensor("op_47978_cast_fp16")]; tensor var_47985_begin_0 = const()[name = tensor("op_47985_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_47985_end_0 = const()[name = tensor("op_47985_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_47985_end_mask_0 = const()[name = tensor("op_47985_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_47985_cast_fp16 = slice_by_index(begin = var_47985_begin_0, end = var_47985_end_0, end_mask = var_47985_end_mask_0, x = var_47821_cast_fp16)[name = tensor("op_47985_cast_fp16")]; tensor var_47992_begin_0 = const()[name = tensor("op_47992_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_47992_end_0 = const()[name = tensor("op_47992_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_47992_end_mask_0 = const()[name = tensor("op_47992_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_47992_cast_fp16 = slice_by_index(begin = var_47992_begin_0, end = var_47992_end_0, end_mask = var_47992_end_mask_0, x = var_47821_cast_fp16)[name = tensor("op_47992_cast_fp16")]; tensor var_47999_begin_0 = const()[name = tensor("op_47999_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_47999_end_0 = const()[name = tensor("op_47999_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_47999_end_mask_0 = const()[name = tensor("op_47999_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_47999_cast_fp16 = slice_by_index(begin = var_47999_begin_0, end = var_47999_end_0, end_mask = var_47999_end_mask_0, x = var_47821_cast_fp16)[name = tensor("op_47999_cast_fp16")]; tensor var_48006_begin_0 = const()[name = tensor("op_48006_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_48006_end_0 = const()[name = tensor("op_48006_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_48006_end_mask_0 = const()[name = tensor("op_48006_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48006_cast_fp16 = slice_by_index(begin = var_48006_begin_0, end = var_48006_end_0, end_mask = var_48006_end_mask_0, x = var_47825_cast_fp16)[name = tensor("op_48006_cast_fp16")]; tensor var_48013_begin_0 = const()[name = tensor("op_48013_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_48013_end_0 = const()[name = tensor("op_48013_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_48013_end_mask_0 = const()[name = tensor("op_48013_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48013_cast_fp16 = slice_by_index(begin = var_48013_begin_0, end = var_48013_end_0, end_mask = var_48013_end_mask_0, x = var_47825_cast_fp16)[name = tensor("op_48013_cast_fp16")]; tensor var_48020_begin_0 = const()[name = tensor("op_48020_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_48020_end_0 = const()[name = tensor("op_48020_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_48020_end_mask_0 = const()[name = tensor("op_48020_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48020_cast_fp16 = slice_by_index(begin = var_48020_begin_0, end = var_48020_end_0, end_mask = var_48020_end_mask_0, x = var_47825_cast_fp16)[name = tensor("op_48020_cast_fp16")]; tensor var_48027_begin_0 = const()[name = tensor("op_48027_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_48027_end_0 = const()[name = tensor("op_48027_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_48027_end_mask_0 = const()[name = tensor("op_48027_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48027_cast_fp16 = slice_by_index(begin = var_48027_begin_0, end = var_48027_end_0, end_mask = var_48027_end_mask_0, x = var_47825_cast_fp16)[name = tensor("op_48027_cast_fp16")]; tensor var_48034_begin_0 = const()[name = tensor("op_48034_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_48034_end_0 = const()[name = tensor("op_48034_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_48034_end_mask_0 = const()[name = tensor("op_48034_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48034_cast_fp16 = slice_by_index(begin = var_48034_begin_0, end = var_48034_end_0, end_mask = var_48034_end_mask_0, x = var_47829_cast_fp16)[name = tensor("op_48034_cast_fp16")]; tensor var_48041_begin_0 = const()[name = tensor("op_48041_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_48041_end_0 = const()[name = tensor("op_48041_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_48041_end_mask_0 = const()[name = tensor("op_48041_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48041_cast_fp16 = slice_by_index(begin = var_48041_begin_0, end = var_48041_end_0, end_mask = var_48041_end_mask_0, x = var_47829_cast_fp16)[name = tensor("op_48041_cast_fp16")]; tensor var_48048_begin_0 = const()[name = tensor("op_48048_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_48048_end_0 = const()[name = tensor("op_48048_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_48048_end_mask_0 = const()[name = tensor("op_48048_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48048_cast_fp16 = slice_by_index(begin = var_48048_begin_0, end = var_48048_end_0, end_mask = var_48048_end_mask_0, x = var_47829_cast_fp16)[name = tensor("op_48048_cast_fp16")]; tensor var_48055_begin_0 = const()[name = tensor("op_48055_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_48055_end_0 = const()[name = tensor("op_48055_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_48055_end_mask_0 = const()[name = tensor("op_48055_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48055_cast_fp16 = slice_by_index(begin = var_48055_begin_0, end = var_48055_end_0, end_mask = var_48055_end_mask_0, x = var_47829_cast_fp16)[name = tensor("op_48055_cast_fp16")]; tensor var_48062_begin_0 = const()[name = tensor("op_48062_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_48062_end_0 = const()[name = tensor("op_48062_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_48062_end_mask_0 = const()[name = tensor("op_48062_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48062_cast_fp16 = slice_by_index(begin = var_48062_begin_0, end = var_48062_end_0, end_mask = var_48062_end_mask_0, x = var_47833_cast_fp16)[name = tensor("op_48062_cast_fp16")]; tensor var_48069_begin_0 = const()[name = tensor("op_48069_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_48069_end_0 = const()[name = tensor("op_48069_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_48069_end_mask_0 = const()[name = tensor("op_48069_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48069_cast_fp16 = slice_by_index(begin = var_48069_begin_0, end = var_48069_end_0, end_mask = var_48069_end_mask_0, x = var_47833_cast_fp16)[name = tensor("op_48069_cast_fp16")]; tensor var_48076_begin_0 = const()[name = tensor("op_48076_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_48076_end_0 = const()[name = tensor("op_48076_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_48076_end_mask_0 = const()[name = tensor("op_48076_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48076_cast_fp16 = slice_by_index(begin = var_48076_begin_0, end = var_48076_end_0, end_mask = var_48076_end_mask_0, x = var_47833_cast_fp16)[name = tensor("op_48076_cast_fp16")]; tensor var_48083_begin_0 = const()[name = tensor("op_48083_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_48083_end_0 = const()[name = tensor("op_48083_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_48083_end_mask_0 = const()[name = tensor("op_48083_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48083_cast_fp16 = slice_by_index(begin = var_48083_begin_0, end = var_48083_end_0, end_mask = var_48083_end_mask_0, x = var_47833_cast_fp16)[name = tensor("op_48083_cast_fp16")]; tensor var_48090_begin_0 = const()[name = tensor("op_48090_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_48090_end_0 = const()[name = tensor("op_48090_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_48090_end_mask_0 = const()[name = tensor("op_48090_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48090_cast_fp16 = slice_by_index(begin = var_48090_begin_0, end = var_48090_end_0, end_mask = var_48090_end_mask_0, x = var_47837_cast_fp16)[name = tensor("op_48090_cast_fp16")]; tensor var_48097_begin_0 = const()[name = tensor("op_48097_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_48097_end_0 = const()[name = tensor("op_48097_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_48097_end_mask_0 = const()[name = tensor("op_48097_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48097_cast_fp16 = slice_by_index(begin = var_48097_begin_0, end = var_48097_end_0, end_mask = var_48097_end_mask_0, x = var_47837_cast_fp16)[name = tensor("op_48097_cast_fp16")]; tensor var_48104_begin_0 = const()[name = tensor("op_48104_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_48104_end_0 = const()[name = tensor("op_48104_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_48104_end_mask_0 = const()[name = tensor("op_48104_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48104_cast_fp16 = slice_by_index(begin = var_48104_begin_0, end = var_48104_end_0, end_mask = var_48104_end_mask_0, x = var_47837_cast_fp16)[name = tensor("op_48104_cast_fp16")]; tensor var_48111_begin_0 = const()[name = tensor("op_48111_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_48111_end_0 = const()[name = tensor("op_48111_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_48111_end_mask_0 = const()[name = tensor("op_48111_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48111_cast_fp16 = slice_by_index(begin = var_48111_begin_0, end = var_48111_end_0, end_mask = var_48111_end_mask_0, x = var_47837_cast_fp16)[name = tensor("op_48111_cast_fp16")]; tensor var_48118_begin_0 = const()[name = tensor("op_48118_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_48118_end_0 = const()[name = tensor("op_48118_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_48118_end_mask_0 = const()[name = tensor("op_48118_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48118_cast_fp16 = slice_by_index(begin = var_48118_begin_0, end = var_48118_end_0, end_mask = var_48118_end_mask_0, x = var_47841_cast_fp16)[name = tensor("op_48118_cast_fp16")]; tensor var_48125_begin_0 = const()[name = tensor("op_48125_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_48125_end_0 = const()[name = tensor("op_48125_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_48125_end_mask_0 = const()[name = tensor("op_48125_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48125_cast_fp16 = slice_by_index(begin = var_48125_begin_0, end = var_48125_end_0, end_mask = var_48125_end_mask_0, x = var_47841_cast_fp16)[name = tensor("op_48125_cast_fp16")]; tensor var_48132_begin_0 = const()[name = tensor("op_48132_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_48132_end_0 = const()[name = tensor("op_48132_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_48132_end_mask_0 = const()[name = tensor("op_48132_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48132_cast_fp16 = slice_by_index(begin = var_48132_begin_0, end = var_48132_end_0, end_mask = var_48132_end_mask_0, x = var_47841_cast_fp16)[name = tensor("op_48132_cast_fp16")]; tensor var_48139_begin_0 = const()[name = tensor("op_48139_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_48139_end_0 = const()[name = tensor("op_48139_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_48139_end_mask_0 = const()[name = tensor("op_48139_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48139_cast_fp16 = slice_by_index(begin = var_48139_begin_0, end = var_48139_end_0, end_mask = var_48139_end_mask_0, x = var_47841_cast_fp16)[name = tensor("op_48139_cast_fp16")]; tensor var_48146_begin_0 = const()[name = tensor("op_48146_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_48146_end_0 = const()[name = tensor("op_48146_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_48146_end_mask_0 = const()[name = tensor("op_48146_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48146_cast_fp16 = slice_by_index(begin = var_48146_begin_0, end = var_48146_end_0, end_mask = var_48146_end_mask_0, x = var_47845_cast_fp16)[name = tensor("op_48146_cast_fp16")]; tensor var_48153_begin_0 = const()[name = tensor("op_48153_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_48153_end_0 = const()[name = tensor("op_48153_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_48153_end_mask_0 = const()[name = tensor("op_48153_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48153_cast_fp16 = slice_by_index(begin = var_48153_begin_0, end = var_48153_end_0, end_mask = var_48153_end_mask_0, x = var_47845_cast_fp16)[name = tensor("op_48153_cast_fp16")]; tensor var_48160_begin_0 = const()[name = tensor("op_48160_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_48160_end_0 = const()[name = tensor("op_48160_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_48160_end_mask_0 = const()[name = tensor("op_48160_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48160_cast_fp16 = slice_by_index(begin = var_48160_begin_0, end = var_48160_end_0, end_mask = var_48160_end_mask_0, x = var_47845_cast_fp16)[name = tensor("op_48160_cast_fp16")]; tensor var_48167_begin_0 = const()[name = tensor("op_48167_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_48167_end_0 = const()[name = tensor("op_48167_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_48167_end_mask_0 = const()[name = tensor("op_48167_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48167_cast_fp16 = slice_by_index(begin = var_48167_begin_0, end = var_48167_end_0, end_mask = var_48167_end_mask_0, x = var_47845_cast_fp16)[name = tensor("op_48167_cast_fp16")]; tensor var_48174_begin_0 = const()[name = tensor("op_48174_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_48174_end_0 = const()[name = tensor("op_48174_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_48174_end_mask_0 = const()[name = tensor("op_48174_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48174_cast_fp16 = slice_by_index(begin = var_48174_begin_0, end = var_48174_end_0, end_mask = var_48174_end_mask_0, x = var_47849_cast_fp16)[name = tensor("op_48174_cast_fp16")]; tensor var_48181_begin_0 = const()[name = tensor("op_48181_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_48181_end_0 = const()[name = tensor("op_48181_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_48181_end_mask_0 = const()[name = tensor("op_48181_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48181_cast_fp16 = slice_by_index(begin = var_48181_begin_0, end = var_48181_end_0, end_mask = var_48181_end_mask_0, x = var_47849_cast_fp16)[name = tensor("op_48181_cast_fp16")]; tensor var_48188_begin_0 = const()[name = tensor("op_48188_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_48188_end_0 = const()[name = tensor("op_48188_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_48188_end_mask_0 = const()[name = tensor("op_48188_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48188_cast_fp16 = slice_by_index(begin = var_48188_begin_0, end = var_48188_end_0, end_mask = var_48188_end_mask_0, x = var_47849_cast_fp16)[name = tensor("op_48188_cast_fp16")]; tensor var_48195_begin_0 = const()[name = tensor("op_48195_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_48195_end_0 = const()[name = tensor("op_48195_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_48195_end_mask_0 = const()[name = tensor("op_48195_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48195_cast_fp16 = slice_by_index(begin = var_48195_begin_0, end = var_48195_end_0, end_mask = var_48195_end_mask_0, x = var_47849_cast_fp16)[name = tensor("op_48195_cast_fp16")]; tensor var_48202_begin_0 = const()[name = tensor("op_48202_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_48202_end_0 = const()[name = tensor("op_48202_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_48202_end_mask_0 = const()[name = tensor("op_48202_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48202_cast_fp16 = slice_by_index(begin = var_48202_begin_0, end = var_48202_end_0, end_mask = var_48202_end_mask_0, x = var_47853_cast_fp16)[name = tensor("op_48202_cast_fp16")]; tensor var_48209_begin_0 = const()[name = tensor("op_48209_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_48209_end_0 = const()[name = tensor("op_48209_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_48209_end_mask_0 = const()[name = tensor("op_48209_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48209_cast_fp16 = slice_by_index(begin = var_48209_begin_0, end = var_48209_end_0, end_mask = var_48209_end_mask_0, x = var_47853_cast_fp16)[name = tensor("op_48209_cast_fp16")]; tensor var_48216_begin_0 = const()[name = tensor("op_48216_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_48216_end_0 = const()[name = tensor("op_48216_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_48216_end_mask_0 = const()[name = tensor("op_48216_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48216_cast_fp16 = slice_by_index(begin = var_48216_begin_0, end = var_48216_end_0, end_mask = var_48216_end_mask_0, x = var_47853_cast_fp16)[name = tensor("op_48216_cast_fp16")]; tensor var_48223_begin_0 = const()[name = tensor("op_48223_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_48223_end_0 = const()[name = tensor("op_48223_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_48223_end_mask_0 = const()[name = tensor("op_48223_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48223_cast_fp16 = slice_by_index(begin = var_48223_begin_0, end = var_48223_end_0, end_mask = var_48223_end_mask_0, x = var_47853_cast_fp16)[name = tensor("op_48223_cast_fp16")]; tensor var_48230_begin_0 = const()[name = tensor("op_48230_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_48230_end_0 = const()[name = tensor("op_48230_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_48230_end_mask_0 = const()[name = tensor("op_48230_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48230_cast_fp16 = slice_by_index(begin = var_48230_begin_0, end = var_48230_end_0, end_mask = var_48230_end_mask_0, x = var_47857_cast_fp16)[name = tensor("op_48230_cast_fp16")]; tensor var_48237_begin_0 = const()[name = tensor("op_48237_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_48237_end_0 = const()[name = tensor("op_48237_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_48237_end_mask_0 = const()[name = tensor("op_48237_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48237_cast_fp16 = slice_by_index(begin = var_48237_begin_0, end = var_48237_end_0, end_mask = var_48237_end_mask_0, x = var_47857_cast_fp16)[name = tensor("op_48237_cast_fp16")]; tensor var_48244_begin_0 = const()[name = tensor("op_48244_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_48244_end_0 = const()[name = tensor("op_48244_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_48244_end_mask_0 = const()[name = tensor("op_48244_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48244_cast_fp16 = slice_by_index(begin = var_48244_begin_0, end = var_48244_end_0, end_mask = var_48244_end_mask_0, x = var_47857_cast_fp16)[name = tensor("op_48244_cast_fp16")]; tensor var_48251_begin_0 = const()[name = tensor("op_48251_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_48251_end_0 = const()[name = tensor("op_48251_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_48251_end_mask_0 = const()[name = tensor("op_48251_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48251_cast_fp16 = slice_by_index(begin = var_48251_begin_0, end = var_48251_end_0, end_mask = var_48251_end_mask_0, x = var_47857_cast_fp16)[name = tensor("op_48251_cast_fp16")]; tensor var_48258_begin_0 = const()[name = tensor("op_48258_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_48258_end_0 = const()[name = tensor("op_48258_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_48258_end_mask_0 = const()[name = tensor("op_48258_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48258_cast_fp16 = slice_by_index(begin = var_48258_begin_0, end = var_48258_end_0, end_mask = var_48258_end_mask_0, x = var_47861_cast_fp16)[name = tensor("op_48258_cast_fp16")]; tensor var_48265_begin_0 = const()[name = tensor("op_48265_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_48265_end_0 = const()[name = tensor("op_48265_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_48265_end_mask_0 = const()[name = tensor("op_48265_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48265_cast_fp16 = slice_by_index(begin = var_48265_begin_0, end = var_48265_end_0, end_mask = var_48265_end_mask_0, x = var_47861_cast_fp16)[name = tensor("op_48265_cast_fp16")]; tensor var_48272_begin_0 = const()[name = tensor("op_48272_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_48272_end_0 = const()[name = tensor("op_48272_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_48272_end_mask_0 = const()[name = tensor("op_48272_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48272_cast_fp16 = slice_by_index(begin = var_48272_begin_0, end = var_48272_end_0, end_mask = var_48272_end_mask_0, x = var_47861_cast_fp16)[name = tensor("op_48272_cast_fp16")]; tensor var_48279_begin_0 = const()[name = tensor("op_48279_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_48279_end_0 = const()[name = tensor("op_48279_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_48279_end_mask_0 = const()[name = tensor("op_48279_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48279_cast_fp16 = slice_by_index(begin = var_48279_begin_0, end = var_48279_end_0, end_mask = var_48279_end_mask_0, x = var_47861_cast_fp16)[name = tensor("op_48279_cast_fp16")]; tensor var_48286_begin_0 = const()[name = tensor("op_48286_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_48286_end_0 = const()[name = tensor("op_48286_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_48286_end_mask_0 = const()[name = tensor("op_48286_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48286_cast_fp16 = slice_by_index(begin = var_48286_begin_0, end = var_48286_end_0, end_mask = var_48286_end_mask_0, x = var_47865_cast_fp16)[name = tensor("op_48286_cast_fp16")]; tensor var_48293_begin_0 = const()[name = tensor("op_48293_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_48293_end_0 = const()[name = tensor("op_48293_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_48293_end_mask_0 = const()[name = tensor("op_48293_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48293_cast_fp16 = slice_by_index(begin = var_48293_begin_0, end = var_48293_end_0, end_mask = var_48293_end_mask_0, x = var_47865_cast_fp16)[name = tensor("op_48293_cast_fp16")]; tensor var_48300_begin_0 = const()[name = tensor("op_48300_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_48300_end_0 = const()[name = tensor("op_48300_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_48300_end_mask_0 = const()[name = tensor("op_48300_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48300_cast_fp16 = slice_by_index(begin = var_48300_begin_0, end = var_48300_end_0, end_mask = var_48300_end_mask_0, x = var_47865_cast_fp16)[name = tensor("op_48300_cast_fp16")]; tensor var_48307_begin_0 = const()[name = tensor("op_48307_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_48307_end_0 = const()[name = tensor("op_48307_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_48307_end_mask_0 = const()[name = tensor("op_48307_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48307_cast_fp16 = slice_by_index(begin = var_48307_begin_0, end = var_48307_end_0, end_mask = var_48307_end_mask_0, x = var_47865_cast_fp16)[name = tensor("op_48307_cast_fp16")]; tensor var_48314_begin_0 = const()[name = tensor("op_48314_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_48314_end_0 = const()[name = tensor("op_48314_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_48314_end_mask_0 = const()[name = tensor("op_48314_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48314_cast_fp16 = slice_by_index(begin = var_48314_begin_0, end = var_48314_end_0, end_mask = var_48314_end_mask_0, x = var_47869_cast_fp16)[name = tensor("op_48314_cast_fp16")]; tensor var_48321_begin_0 = const()[name = tensor("op_48321_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_48321_end_0 = const()[name = tensor("op_48321_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_48321_end_mask_0 = const()[name = tensor("op_48321_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48321_cast_fp16 = slice_by_index(begin = var_48321_begin_0, end = var_48321_end_0, end_mask = var_48321_end_mask_0, x = var_47869_cast_fp16)[name = tensor("op_48321_cast_fp16")]; tensor var_48328_begin_0 = const()[name = tensor("op_48328_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_48328_end_0 = const()[name = tensor("op_48328_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_48328_end_mask_0 = const()[name = tensor("op_48328_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48328_cast_fp16 = slice_by_index(begin = var_48328_begin_0, end = var_48328_end_0, end_mask = var_48328_end_mask_0, x = var_47869_cast_fp16)[name = tensor("op_48328_cast_fp16")]; tensor var_48335_begin_0 = const()[name = tensor("op_48335_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_48335_end_0 = const()[name = tensor("op_48335_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_48335_end_mask_0 = const()[name = tensor("op_48335_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48335_cast_fp16 = slice_by_index(begin = var_48335_begin_0, end = var_48335_end_0, end_mask = var_48335_end_mask_0, x = var_47869_cast_fp16)[name = tensor("op_48335_cast_fp16")]; tensor var_48342_begin_0 = const()[name = tensor("op_48342_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_48342_end_0 = const()[name = tensor("op_48342_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_48342_end_mask_0 = const()[name = tensor("op_48342_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48342_cast_fp16 = slice_by_index(begin = var_48342_begin_0, end = var_48342_end_0, end_mask = var_48342_end_mask_0, x = var_47873_cast_fp16)[name = tensor("op_48342_cast_fp16")]; tensor var_48349_begin_0 = const()[name = tensor("op_48349_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_48349_end_0 = const()[name = tensor("op_48349_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_48349_end_mask_0 = const()[name = tensor("op_48349_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48349_cast_fp16 = slice_by_index(begin = var_48349_begin_0, end = var_48349_end_0, end_mask = var_48349_end_mask_0, x = var_47873_cast_fp16)[name = tensor("op_48349_cast_fp16")]; tensor var_48356_begin_0 = const()[name = tensor("op_48356_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_48356_end_0 = const()[name = tensor("op_48356_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_48356_end_mask_0 = const()[name = tensor("op_48356_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48356_cast_fp16 = slice_by_index(begin = var_48356_begin_0, end = var_48356_end_0, end_mask = var_48356_end_mask_0, x = var_47873_cast_fp16)[name = tensor("op_48356_cast_fp16")]; tensor var_48363_begin_0 = const()[name = tensor("op_48363_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_48363_end_0 = const()[name = tensor("op_48363_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_48363_end_mask_0 = const()[name = tensor("op_48363_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48363_cast_fp16 = slice_by_index(begin = var_48363_begin_0, end = var_48363_end_0, end_mask = var_48363_end_mask_0, x = var_47873_cast_fp16)[name = tensor("op_48363_cast_fp16")]; tensor var_48370_begin_0 = const()[name = tensor("op_48370_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_48370_end_0 = const()[name = tensor("op_48370_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_48370_end_mask_0 = const()[name = tensor("op_48370_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48370_cast_fp16 = slice_by_index(begin = var_48370_begin_0, end = var_48370_end_0, end_mask = var_48370_end_mask_0, x = var_47877_cast_fp16)[name = tensor("op_48370_cast_fp16")]; tensor var_48377_begin_0 = const()[name = tensor("op_48377_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_48377_end_0 = const()[name = tensor("op_48377_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_48377_end_mask_0 = const()[name = tensor("op_48377_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48377_cast_fp16 = slice_by_index(begin = var_48377_begin_0, end = var_48377_end_0, end_mask = var_48377_end_mask_0, x = var_47877_cast_fp16)[name = tensor("op_48377_cast_fp16")]; tensor var_48384_begin_0 = const()[name = tensor("op_48384_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_48384_end_0 = const()[name = tensor("op_48384_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_48384_end_mask_0 = const()[name = tensor("op_48384_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48384_cast_fp16 = slice_by_index(begin = var_48384_begin_0, end = var_48384_end_0, end_mask = var_48384_end_mask_0, x = var_47877_cast_fp16)[name = tensor("op_48384_cast_fp16")]; tensor var_48391_begin_0 = const()[name = tensor("op_48391_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_48391_end_0 = const()[name = tensor("op_48391_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_48391_end_mask_0 = const()[name = tensor("op_48391_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48391_cast_fp16 = slice_by_index(begin = var_48391_begin_0, end = var_48391_end_0, end_mask = var_48391_end_mask_0, x = var_47877_cast_fp16)[name = tensor("op_48391_cast_fp16")]; tensor var_48398_begin_0 = const()[name = tensor("op_48398_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_48398_end_0 = const()[name = tensor("op_48398_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_48398_end_mask_0 = const()[name = tensor("op_48398_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48398_cast_fp16 = slice_by_index(begin = var_48398_begin_0, end = var_48398_end_0, end_mask = var_48398_end_mask_0, x = var_47881_cast_fp16)[name = tensor("op_48398_cast_fp16")]; tensor var_48405_begin_0 = const()[name = tensor("op_48405_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_48405_end_0 = const()[name = tensor("op_48405_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_48405_end_mask_0 = const()[name = tensor("op_48405_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48405_cast_fp16 = slice_by_index(begin = var_48405_begin_0, end = var_48405_end_0, end_mask = var_48405_end_mask_0, x = var_47881_cast_fp16)[name = tensor("op_48405_cast_fp16")]; tensor var_48412_begin_0 = const()[name = tensor("op_48412_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_48412_end_0 = const()[name = tensor("op_48412_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_48412_end_mask_0 = const()[name = tensor("op_48412_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48412_cast_fp16 = slice_by_index(begin = var_48412_begin_0, end = var_48412_end_0, end_mask = var_48412_end_mask_0, x = var_47881_cast_fp16)[name = tensor("op_48412_cast_fp16")]; tensor var_48419_begin_0 = const()[name = tensor("op_48419_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_48419_end_0 = const()[name = tensor("op_48419_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_48419_end_mask_0 = const()[name = tensor("op_48419_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48419_cast_fp16 = slice_by_index(begin = var_48419_begin_0, end = var_48419_end_0, end_mask = var_48419_end_mask_0, x = var_47881_cast_fp16)[name = tensor("op_48419_cast_fp16")]; tensor var_48426_begin_0 = const()[name = tensor("op_48426_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_48426_end_0 = const()[name = tensor("op_48426_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_48426_end_mask_0 = const()[name = tensor("op_48426_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48426_cast_fp16 = slice_by_index(begin = var_48426_begin_0, end = var_48426_end_0, end_mask = var_48426_end_mask_0, x = var_47885_cast_fp16)[name = tensor("op_48426_cast_fp16")]; tensor var_48433_begin_0 = const()[name = tensor("op_48433_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_48433_end_0 = const()[name = tensor("op_48433_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_48433_end_mask_0 = const()[name = tensor("op_48433_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48433_cast_fp16 = slice_by_index(begin = var_48433_begin_0, end = var_48433_end_0, end_mask = var_48433_end_mask_0, x = var_47885_cast_fp16)[name = tensor("op_48433_cast_fp16")]; tensor var_48440_begin_0 = const()[name = tensor("op_48440_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_48440_end_0 = const()[name = tensor("op_48440_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_48440_end_mask_0 = const()[name = tensor("op_48440_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48440_cast_fp16 = slice_by_index(begin = var_48440_begin_0, end = var_48440_end_0, end_mask = var_48440_end_mask_0, x = var_47885_cast_fp16)[name = tensor("op_48440_cast_fp16")]; tensor var_48447_begin_0 = const()[name = tensor("op_48447_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_48447_end_0 = const()[name = tensor("op_48447_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_48447_end_mask_0 = const()[name = tensor("op_48447_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48447_cast_fp16 = slice_by_index(begin = var_48447_begin_0, end = var_48447_end_0, end_mask = var_48447_end_mask_0, x = var_47885_cast_fp16)[name = tensor("op_48447_cast_fp16")]; tensor k_61_perm_0 = const()[name = tensor("k_61_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_48452_begin_0 = const()[name = tensor("op_48452_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_48452_end_0 = const()[name = tensor("op_48452_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_48452_end_mask_0 = const()[name = tensor("op_48452_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_61_cast_fp16 = transpose(perm = k_61_perm_0, x = key_61_cast_fp16)[name = tensor("transpose_1")]; tensor var_48452_cast_fp16 = slice_by_index(begin = var_48452_begin_0, end = var_48452_end_0, end_mask = var_48452_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48452_cast_fp16")]; tensor var_48456_begin_0 = const()[name = tensor("op_48456_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_48456_end_0 = const()[name = tensor("op_48456_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_48456_end_mask_0 = const()[name = tensor("op_48456_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48456_cast_fp16 = slice_by_index(begin = var_48456_begin_0, end = var_48456_end_0, end_mask = var_48456_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48456_cast_fp16")]; tensor var_48460_begin_0 = const()[name = tensor("op_48460_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_48460_end_0 = const()[name = tensor("op_48460_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_48460_end_mask_0 = const()[name = tensor("op_48460_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48460_cast_fp16 = slice_by_index(begin = var_48460_begin_0, end = var_48460_end_0, end_mask = var_48460_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48460_cast_fp16")]; tensor var_48464_begin_0 = const()[name = tensor("op_48464_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_48464_end_0 = const()[name = tensor("op_48464_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_48464_end_mask_0 = const()[name = tensor("op_48464_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48464_cast_fp16 = slice_by_index(begin = var_48464_begin_0, end = var_48464_end_0, end_mask = var_48464_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48464_cast_fp16")]; tensor var_48468_begin_0 = const()[name = tensor("op_48468_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_48468_end_0 = const()[name = tensor("op_48468_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_48468_end_mask_0 = const()[name = tensor("op_48468_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48468_cast_fp16 = slice_by_index(begin = var_48468_begin_0, end = var_48468_end_0, end_mask = var_48468_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48468_cast_fp16")]; tensor var_48472_begin_0 = const()[name = tensor("op_48472_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_48472_end_0 = const()[name = tensor("op_48472_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_48472_end_mask_0 = const()[name = tensor("op_48472_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48472_cast_fp16 = slice_by_index(begin = var_48472_begin_0, end = var_48472_end_0, end_mask = var_48472_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48472_cast_fp16")]; tensor var_48476_begin_0 = const()[name = tensor("op_48476_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_48476_end_0 = const()[name = tensor("op_48476_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_48476_end_mask_0 = const()[name = tensor("op_48476_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48476_cast_fp16 = slice_by_index(begin = var_48476_begin_0, end = var_48476_end_0, end_mask = var_48476_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48476_cast_fp16")]; tensor var_48480_begin_0 = const()[name = tensor("op_48480_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_48480_end_0 = const()[name = tensor("op_48480_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_48480_end_mask_0 = const()[name = tensor("op_48480_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48480_cast_fp16 = slice_by_index(begin = var_48480_begin_0, end = var_48480_end_0, end_mask = var_48480_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48480_cast_fp16")]; tensor var_48484_begin_0 = const()[name = tensor("op_48484_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_48484_end_0 = const()[name = tensor("op_48484_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_48484_end_mask_0 = const()[name = tensor("op_48484_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48484_cast_fp16 = slice_by_index(begin = var_48484_begin_0, end = var_48484_end_0, end_mask = var_48484_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48484_cast_fp16")]; tensor var_48488_begin_0 = const()[name = tensor("op_48488_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_48488_end_0 = const()[name = tensor("op_48488_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_48488_end_mask_0 = const()[name = tensor("op_48488_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48488_cast_fp16 = slice_by_index(begin = var_48488_begin_0, end = var_48488_end_0, end_mask = var_48488_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48488_cast_fp16")]; tensor var_48492_begin_0 = const()[name = tensor("op_48492_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_48492_end_0 = const()[name = tensor("op_48492_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_48492_end_mask_0 = const()[name = tensor("op_48492_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48492_cast_fp16 = slice_by_index(begin = var_48492_begin_0, end = var_48492_end_0, end_mask = var_48492_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48492_cast_fp16")]; tensor var_48496_begin_0 = const()[name = tensor("op_48496_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_48496_end_0 = const()[name = tensor("op_48496_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_48496_end_mask_0 = const()[name = tensor("op_48496_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48496_cast_fp16 = slice_by_index(begin = var_48496_begin_0, end = var_48496_end_0, end_mask = var_48496_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48496_cast_fp16")]; tensor var_48500_begin_0 = const()[name = tensor("op_48500_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_48500_end_0 = const()[name = tensor("op_48500_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_48500_end_mask_0 = const()[name = tensor("op_48500_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48500_cast_fp16 = slice_by_index(begin = var_48500_begin_0, end = var_48500_end_0, end_mask = var_48500_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48500_cast_fp16")]; tensor var_48504_begin_0 = const()[name = tensor("op_48504_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_48504_end_0 = const()[name = tensor("op_48504_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_48504_end_mask_0 = const()[name = tensor("op_48504_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48504_cast_fp16 = slice_by_index(begin = var_48504_begin_0, end = var_48504_end_0, end_mask = var_48504_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48504_cast_fp16")]; tensor var_48508_begin_0 = const()[name = tensor("op_48508_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_48508_end_0 = const()[name = tensor("op_48508_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_48508_end_mask_0 = const()[name = tensor("op_48508_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48508_cast_fp16 = slice_by_index(begin = var_48508_begin_0, end = var_48508_end_0, end_mask = var_48508_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48508_cast_fp16")]; tensor var_48512_begin_0 = const()[name = tensor("op_48512_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_48512_end_0 = const()[name = tensor("op_48512_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_48512_end_mask_0 = const()[name = tensor("op_48512_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48512_cast_fp16 = slice_by_index(begin = var_48512_begin_0, end = var_48512_end_0, end_mask = var_48512_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48512_cast_fp16")]; tensor var_48516_begin_0 = const()[name = tensor("op_48516_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_48516_end_0 = const()[name = tensor("op_48516_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_48516_end_mask_0 = const()[name = tensor("op_48516_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48516_cast_fp16 = slice_by_index(begin = var_48516_begin_0, end = var_48516_end_0, end_mask = var_48516_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48516_cast_fp16")]; tensor var_48520_begin_0 = const()[name = tensor("op_48520_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_48520_end_0 = const()[name = tensor("op_48520_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_48520_end_mask_0 = const()[name = tensor("op_48520_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48520_cast_fp16 = slice_by_index(begin = var_48520_begin_0, end = var_48520_end_0, end_mask = var_48520_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48520_cast_fp16")]; tensor var_48524_begin_0 = const()[name = tensor("op_48524_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_48524_end_0 = const()[name = tensor("op_48524_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_48524_end_mask_0 = const()[name = tensor("op_48524_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48524_cast_fp16 = slice_by_index(begin = var_48524_begin_0, end = var_48524_end_0, end_mask = var_48524_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48524_cast_fp16")]; tensor var_48528_begin_0 = const()[name = tensor("op_48528_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_48528_end_0 = const()[name = tensor("op_48528_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_48528_end_mask_0 = const()[name = tensor("op_48528_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_48528_cast_fp16 = slice_by_index(begin = var_48528_begin_0, end = var_48528_end_0, end_mask = var_48528_end_mask_0, x = k_61_cast_fp16)[name = tensor("op_48528_cast_fp16")]; tensor var_48530_begin_0 = const()[name = tensor("op_48530_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_48530_end_0 = const()[name = tensor("op_48530_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_48530_end_mask_0 = const()[name = tensor("op_48530_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48530_cast_fp16 = slice_by_index(begin = var_48530_begin_0, end = var_48530_end_0, end_mask = var_48530_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48530_cast_fp16")]; tensor var_48534_begin_0 = const()[name = tensor("op_48534_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_48534_end_0 = const()[name = tensor("op_48534_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_48534_end_mask_0 = const()[name = tensor("op_48534_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48534_cast_fp16 = slice_by_index(begin = var_48534_begin_0, end = var_48534_end_0, end_mask = var_48534_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48534_cast_fp16")]; tensor var_48538_begin_0 = const()[name = tensor("op_48538_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_48538_end_0 = const()[name = tensor("op_48538_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_48538_end_mask_0 = const()[name = tensor("op_48538_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48538_cast_fp16 = slice_by_index(begin = var_48538_begin_0, end = var_48538_end_0, end_mask = var_48538_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48538_cast_fp16")]; tensor var_48542_begin_0 = const()[name = tensor("op_48542_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_48542_end_0 = const()[name = tensor("op_48542_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_48542_end_mask_0 = const()[name = tensor("op_48542_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48542_cast_fp16 = slice_by_index(begin = var_48542_begin_0, end = var_48542_end_0, end_mask = var_48542_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48542_cast_fp16")]; tensor var_48546_begin_0 = const()[name = tensor("op_48546_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_48546_end_0 = const()[name = tensor("op_48546_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_48546_end_mask_0 = const()[name = tensor("op_48546_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48546_cast_fp16 = slice_by_index(begin = var_48546_begin_0, end = var_48546_end_0, end_mask = var_48546_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48546_cast_fp16")]; tensor var_48550_begin_0 = const()[name = tensor("op_48550_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_48550_end_0 = const()[name = tensor("op_48550_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_48550_end_mask_0 = const()[name = tensor("op_48550_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48550_cast_fp16 = slice_by_index(begin = var_48550_begin_0, end = var_48550_end_0, end_mask = var_48550_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48550_cast_fp16")]; tensor var_48554_begin_0 = const()[name = tensor("op_48554_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_48554_end_0 = const()[name = tensor("op_48554_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_48554_end_mask_0 = const()[name = tensor("op_48554_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48554_cast_fp16 = slice_by_index(begin = var_48554_begin_0, end = var_48554_end_0, end_mask = var_48554_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48554_cast_fp16")]; tensor var_48558_begin_0 = const()[name = tensor("op_48558_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_48558_end_0 = const()[name = tensor("op_48558_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_48558_end_mask_0 = const()[name = tensor("op_48558_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48558_cast_fp16 = slice_by_index(begin = var_48558_begin_0, end = var_48558_end_0, end_mask = var_48558_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48558_cast_fp16")]; tensor var_48562_begin_0 = const()[name = tensor("op_48562_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_48562_end_0 = const()[name = tensor("op_48562_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_48562_end_mask_0 = const()[name = tensor("op_48562_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48562_cast_fp16 = slice_by_index(begin = var_48562_begin_0, end = var_48562_end_0, end_mask = var_48562_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48562_cast_fp16")]; tensor var_48566_begin_0 = const()[name = tensor("op_48566_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_48566_end_0 = const()[name = tensor("op_48566_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_48566_end_mask_0 = const()[name = tensor("op_48566_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48566_cast_fp16 = slice_by_index(begin = var_48566_begin_0, end = var_48566_end_0, end_mask = var_48566_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48566_cast_fp16")]; tensor var_48570_begin_0 = const()[name = tensor("op_48570_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_48570_end_0 = const()[name = tensor("op_48570_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_48570_end_mask_0 = const()[name = tensor("op_48570_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48570_cast_fp16 = slice_by_index(begin = var_48570_begin_0, end = var_48570_end_0, end_mask = var_48570_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48570_cast_fp16")]; tensor var_48574_begin_0 = const()[name = tensor("op_48574_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_48574_end_0 = const()[name = tensor("op_48574_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_48574_end_mask_0 = const()[name = tensor("op_48574_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48574_cast_fp16 = slice_by_index(begin = var_48574_begin_0, end = var_48574_end_0, end_mask = var_48574_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48574_cast_fp16")]; tensor var_48578_begin_0 = const()[name = tensor("op_48578_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_48578_end_0 = const()[name = tensor("op_48578_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_48578_end_mask_0 = const()[name = tensor("op_48578_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48578_cast_fp16 = slice_by_index(begin = var_48578_begin_0, end = var_48578_end_0, end_mask = var_48578_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48578_cast_fp16")]; tensor var_48582_begin_0 = const()[name = tensor("op_48582_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_48582_end_0 = const()[name = tensor("op_48582_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_48582_end_mask_0 = const()[name = tensor("op_48582_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48582_cast_fp16 = slice_by_index(begin = var_48582_begin_0, end = var_48582_end_0, end_mask = var_48582_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48582_cast_fp16")]; tensor var_48586_begin_0 = const()[name = tensor("op_48586_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_48586_end_0 = const()[name = tensor("op_48586_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_48586_end_mask_0 = const()[name = tensor("op_48586_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48586_cast_fp16 = slice_by_index(begin = var_48586_begin_0, end = var_48586_end_0, end_mask = var_48586_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48586_cast_fp16")]; tensor var_48590_begin_0 = const()[name = tensor("op_48590_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_48590_end_0 = const()[name = tensor("op_48590_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_48590_end_mask_0 = const()[name = tensor("op_48590_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48590_cast_fp16 = slice_by_index(begin = var_48590_begin_0, end = var_48590_end_0, end_mask = var_48590_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48590_cast_fp16")]; tensor var_48594_begin_0 = const()[name = tensor("op_48594_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_48594_end_0 = const()[name = tensor("op_48594_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_48594_end_mask_0 = const()[name = tensor("op_48594_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48594_cast_fp16 = slice_by_index(begin = var_48594_begin_0, end = var_48594_end_0, end_mask = var_48594_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48594_cast_fp16")]; tensor var_48598_begin_0 = const()[name = tensor("op_48598_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_48598_end_0 = const()[name = tensor("op_48598_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_48598_end_mask_0 = const()[name = tensor("op_48598_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48598_cast_fp16 = slice_by_index(begin = var_48598_begin_0, end = var_48598_end_0, end_mask = var_48598_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48598_cast_fp16")]; tensor var_48602_begin_0 = const()[name = tensor("op_48602_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_48602_end_0 = const()[name = tensor("op_48602_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_48602_end_mask_0 = const()[name = tensor("op_48602_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48602_cast_fp16 = slice_by_index(begin = var_48602_begin_0, end = var_48602_end_0, end_mask = var_48602_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48602_cast_fp16")]; tensor var_48606_begin_0 = const()[name = tensor("op_48606_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_48606_end_0 = const()[name = tensor("op_48606_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_48606_end_mask_0 = const()[name = tensor("op_48606_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_48606_cast_fp16 = slice_by_index(begin = var_48606_begin_0, end = var_48606_end_0, end_mask = var_48606_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_48606_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4801_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4801_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4801_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4801_equation_0, values = (var_48452_cast_fp16, var_47894_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4801_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4803_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4803_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4803_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4803_equation_0, values = (var_48452_cast_fp16, var_47901_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4803_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4805_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4805_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4805_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4805_equation_0, values = (var_48452_cast_fp16, var_47908_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4805_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4807_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4807_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4807_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4807_equation_0, values = (var_48452_cast_fp16, var_47915_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4807_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4809_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4809_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4809_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4809_equation_0, values = (var_48456_cast_fp16, var_47922_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4809_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4811_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4811_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4811_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4811_equation_0, values = (var_48456_cast_fp16, var_47929_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4811_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4813_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4813_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4813_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4813_equation_0, values = (var_48456_cast_fp16, var_47936_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4813_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4815_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4815_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4815_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4815_equation_0, values = (var_48456_cast_fp16, var_47943_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4815_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4817_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4817_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4817_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4817_equation_0, values = (var_48460_cast_fp16, var_47950_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4817_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4819_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4819_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4819_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4819_equation_0, values = (var_48460_cast_fp16, var_47957_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4819_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4821_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4821_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4821_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4821_equation_0, values = (var_48460_cast_fp16, var_47964_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4821_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4823_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4823_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4823_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4823_equation_0, values = (var_48460_cast_fp16, var_47971_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4823_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4825_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4825_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4825_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4825_equation_0, values = (var_48464_cast_fp16, var_47978_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4825_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4827_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4827_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4827_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4827_equation_0, values = (var_48464_cast_fp16, var_47985_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4827_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4829_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4829_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4829_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4829_equation_0, values = (var_48464_cast_fp16, var_47992_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4829_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4831_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4831_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4831_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4831_equation_0, values = (var_48464_cast_fp16, var_47999_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4831_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4833_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4833_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4833_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4833_equation_0, values = (var_48468_cast_fp16, var_48006_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4833_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4835_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4835_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4835_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4835_equation_0, values = (var_48468_cast_fp16, var_48013_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4835_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4837_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4837_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4837_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4837_equation_0, values = (var_48468_cast_fp16, var_48020_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4837_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4839_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4839_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4839_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4839_equation_0, values = (var_48468_cast_fp16, var_48027_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4839_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4841_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4841_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4841_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4841_equation_0, values = (var_48472_cast_fp16, var_48034_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4841_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4843_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4843_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4843_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4843_equation_0, values = (var_48472_cast_fp16, var_48041_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4843_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4845_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4845_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4845_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4845_equation_0, values = (var_48472_cast_fp16, var_48048_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4845_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4847_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4847_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4847_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4847_equation_0, values = (var_48472_cast_fp16, var_48055_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4847_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4849_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4849_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4849_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4849_equation_0, values = (var_48476_cast_fp16, var_48062_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4849_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4851_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4851_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4851_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4851_equation_0, values = (var_48476_cast_fp16, var_48069_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4851_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4853_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4853_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4853_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4853_equation_0, values = (var_48476_cast_fp16, var_48076_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4853_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4855_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4855_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4855_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4855_equation_0, values = (var_48476_cast_fp16, var_48083_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4855_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4857_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4857_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4857_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4857_equation_0, values = (var_48480_cast_fp16, var_48090_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4857_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4859_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4859_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4859_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4859_equation_0, values = (var_48480_cast_fp16, var_48097_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4859_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4861_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4861_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4861_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4861_equation_0, values = (var_48480_cast_fp16, var_48104_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4861_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4863_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4863_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4863_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4863_equation_0, values = (var_48480_cast_fp16, var_48111_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4863_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4865_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4865_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4865_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4865_equation_0, values = (var_48484_cast_fp16, var_48118_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4865_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4867_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4867_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4867_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4867_equation_0, values = (var_48484_cast_fp16, var_48125_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4867_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4869_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4869_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4869_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4869_equation_0, values = (var_48484_cast_fp16, var_48132_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4869_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4871_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4871_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4871_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4871_equation_0, values = (var_48484_cast_fp16, var_48139_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4871_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4873_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4873_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4873_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4873_equation_0, values = (var_48488_cast_fp16, var_48146_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4873_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4875_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4875_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4875_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4875_equation_0, values = (var_48488_cast_fp16, var_48153_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4875_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4877_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4877_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4877_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4877_equation_0, values = (var_48488_cast_fp16, var_48160_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4877_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4879_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4879_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4879_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4879_equation_0, values = (var_48488_cast_fp16, var_48167_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4879_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4881_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4881_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4881_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4881_equation_0, values = (var_48492_cast_fp16, var_48174_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4881_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4883_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4883_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4883_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4883_equation_0, values = (var_48492_cast_fp16, var_48181_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4883_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4885_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4885_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4885_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4885_equation_0, values = (var_48492_cast_fp16, var_48188_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4885_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4887_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4887_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4887_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4887_equation_0, values = (var_48492_cast_fp16, var_48195_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4887_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4889_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4889_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4889_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4889_equation_0, values = (var_48496_cast_fp16, var_48202_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4889_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4891_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4891_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4891_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4891_equation_0, values = (var_48496_cast_fp16, var_48209_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4891_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4893_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4893_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4893_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4893_equation_0, values = (var_48496_cast_fp16, var_48216_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4893_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4895_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4895_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4895_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4895_equation_0, values = (var_48496_cast_fp16, var_48223_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4895_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4897_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4897_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4897_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4897_equation_0, values = (var_48500_cast_fp16, var_48230_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4897_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4899_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4899_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4899_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4899_equation_0, values = (var_48500_cast_fp16, var_48237_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4899_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4901_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4901_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4901_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4901_equation_0, values = (var_48500_cast_fp16, var_48244_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4901_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4903_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4903_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4903_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4903_equation_0, values = (var_48500_cast_fp16, var_48251_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4903_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4905_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4905_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4905_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4905_equation_0, values = (var_48504_cast_fp16, var_48258_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4905_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4907_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4907_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4907_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4907_equation_0, values = (var_48504_cast_fp16, var_48265_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4907_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4909_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4909_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4909_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4909_equation_0, values = (var_48504_cast_fp16, var_48272_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4909_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4911_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4911_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4911_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4911_equation_0, values = (var_48504_cast_fp16, var_48279_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4911_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4913_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4913_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4913_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4913_equation_0, values = (var_48508_cast_fp16, var_48286_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4913_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4915_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4915_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4915_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4915_equation_0, values = (var_48508_cast_fp16, var_48293_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4915_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4917_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4917_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4917_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4917_equation_0, values = (var_48508_cast_fp16, var_48300_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4917_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4919_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4919_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4919_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4919_equation_0, values = (var_48508_cast_fp16, var_48307_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4919_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4921_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4921_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4921_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4921_equation_0, values = (var_48512_cast_fp16, var_48314_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4921_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4923_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4923_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4923_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4923_equation_0, values = (var_48512_cast_fp16, var_48321_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4923_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4925_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4925_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4925_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4925_equation_0, values = (var_48512_cast_fp16, var_48328_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4925_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4927_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4927_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4927_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4927_equation_0, values = (var_48512_cast_fp16, var_48335_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4927_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4929_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4929_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4929_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4929_equation_0, values = (var_48516_cast_fp16, var_48342_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4929_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4931_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4931_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4931_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4931_equation_0, values = (var_48516_cast_fp16, var_48349_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4931_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4933_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4933_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4933_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4933_equation_0, values = (var_48516_cast_fp16, var_48356_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4933_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4935_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4935_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4935_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4935_equation_0, values = (var_48516_cast_fp16, var_48363_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4935_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4937_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4937_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4937_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4937_equation_0, values = (var_48520_cast_fp16, var_48370_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4937_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4939_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4939_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4939_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4939_equation_0, values = (var_48520_cast_fp16, var_48377_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4939_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4941_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4941_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4941_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4941_equation_0, values = (var_48520_cast_fp16, var_48384_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4941_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4943_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4943_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4943_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4943_equation_0, values = (var_48520_cast_fp16, var_48391_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4943_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4945_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4945_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4945_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4945_equation_0, values = (var_48524_cast_fp16, var_48398_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4945_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4947_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4947_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4947_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4947_equation_0, values = (var_48524_cast_fp16, var_48405_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4947_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4949_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4949_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4949_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4949_equation_0, values = (var_48524_cast_fp16, var_48412_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4949_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4951_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4951_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4951_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4951_equation_0, values = (var_48524_cast_fp16, var_48419_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4951_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4953_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4953_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4953_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4953_equation_0, values = (var_48528_cast_fp16, var_48426_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4953_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4955_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4955_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4955_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4955_equation_0, values = (var_48528_cast_fp16, var_48433_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4955_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4957_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4957_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4957_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4957_equation_0, values = (var_48528_cast_fp16, var_48440_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4957_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4959_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4959_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4959_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4959_equation_0, values = (var_48528_cast_fp16, var_48447_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4959_cast_fp16")]; tensor var_48769_to_fp16 = const()[name = tensor("op_48769_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4801_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4801_cast_fp16, y = var_48769_to_fp16)[name = tensor("aw_chunk_4801_cast_fp16")]; tensor var_48771_to_fp16 = const()[name = tensor("op_48771_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4803_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4803_cast_fp16, y = var_48771_to_fp16)[name = tensor("aw_chunk_4803_cast_fp16")]; tensor var_48773_to_fp16 = const()[name = tensor("op_48773_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4805_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4805_cast_fp16, y = var_48773_to_fp16)[name = tensor("aw_chunk_4805_cast_fp16")]; tensor var_48775_to_fp16 = const()[name = tensor("op_48775_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4807_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4807_cast_fp16, y = var_48775_to_fp16)[name = tensor("aw_chunk_4807_cast_fp16")]; tensor var_48777_to_fp16 = const()[name = tensor("op_48777_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4809_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4809_cast_fp16, y = var_48777_to_fp16)[name = tensor("aw_chunk_4809_cast_fp16")]; tensor var_48779_to_fp16 = const()[name = tensor("op_48779_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4811_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4811_cast_fp16, y = var_48779_to_fp16)[name = tensor("aw_chunk_4811_cast_fp16")]; tensor var_48781_to_fp16 = const()[name = tensor("op_48781_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4813_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4813_cast_fp16, y = var_48781_to_fp16)[name = tensor("aw_chunk_4813_cast_fp16")]; tensor var_48783_to_fp16 = const()[name = tensor("op_48783_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4815_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4815_cast_fp16, y = var_48783_to_fp16)[name = tensor("aw_chunk_4815_cast_fp16")]; tensor var_48785_to_fp16 = const()[name = tensor("op_48785_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4817_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4817_cast_fp16, y = var_48785_to_fp16)[name = tensor("aw_chunk_4817_cast_fp16")]; tensor var_48787_to_fp16 = const()[name = tensor("op_48787_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4819_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4819_cast_fp16, y = var_48787_to_fp16)[name = tensor("aw_chunk_4819_cast_fp16")]; tensor var_48789_to_fp16 = const()[name = tensor("op_48789_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4821_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4821_cast_fp16, y = var_48789_to_fp16)[name = tensor("aw_chunk_4821_cast_fp16")]; tensor var_48791_to_fp16 = const()[name = tensor("op_48791_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4823_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4823_cast_fp16, y = var_48791_to_fp16)[name = tensor("aw_chunk_4823_cast_fp16")]; tensor var_48793_to_fp16 = const()[name = tensor("op_48793_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4825_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4825_cast_fp16, y = var_48793_to_fp16)[name = tensor("aw_chunk_4825_cast_fp16")]; tensor var_48795_to_fp16 = const()[name = tensor("op_48795_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4827_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4827_cast_fp16, y = var_48795_to_fp16)[name = tensor("aw_chunk_4827_cast_fp16")]; tensor var_48797_to_fp16 = const()[name = tensor("op_48797_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4829_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4829_cast_fp16, y = var_48797_to_fp16)[name = tensor("aw_chunk_4829_cast_fp16")]; tensor var_48799_to_fp16 = const()[name = tensor("op_48799_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4831_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4831_cast_fp16, y = var_48799_to_fp16)[name = tensor("aw_chunk_4831_cast_fp16")]; tensor var_48801_to_fp16 = const()[name = tensor("op_48801_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4833_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4833_cast_fp16, y = var_48801_to_fp16)[name = tensor("aw_chunk_4833_cast_fp16")]; tensor var_48803_to_fp16 = const()[name = tensor("op_48803_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4835_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4835_cast_fp16, y = var_48803_to_fp16)[name = tensor("aw_chunk_4835_cast_fp16")]; tensor var_48805_to_fp16 = const()[name = tensor("op_48805_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4837_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4837_cast_fp16, y = var_48805_to_fp16)[name = tensor("aw_chunk_4837_cast_fp16")]; tensor var_48807_to_fp16 = const()[name = tensor("op_48807_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4839_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4839_cast_fp16, y = var_48807_to_fp16)[name = tensor("aw_chunk_4839_cast_fp16")]; tensor var_48809_to_fp16 = const()[name = tensor("op_48809_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4841_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4841_cast_fp16, y = var_48809_to_fp16)[name = tensor("aw_chunk_4841_cast_fp16")]; tensor var_48811_to_fp16 = const()[name = tensor("op_48811_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4843_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4843_cast_fp16, y = var_48811_to_fp16)[name = tensor("aw_chunk_4843_cast_fp16")]; tensor var_48813_to_fp16 = const()[name = tensor("op_48813_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4845_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4845_cast_fp16, y = var_48813_to_fp16)[name = tensor("aw_chunk_4845_cast_fp16")]; tensor var_48815_to_fp16 = const()[name = tensor("op_48815_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4847_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4847_cast_fp16, y = var_48815_to_fp16)[name = tensor("aw_chunk_4847_cast_fp16")]; tensor var_48817_to_fp16 = const()[name = tensor("op_48817_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4849_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4849_cast_fp16, y = var_48817_to_fp16)[name = tensor("aw_chunk_4849_cast_fp16")]; tensor var_48819_to_fp16 = const()[name = tensor("op_48819_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4851_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4851_cast_fp16, y = var_48819_to_fp16)[name = tensor("aw_chunk_4851_cast_fp16")]; tensor var_48821_to_fp16 = const()[name = tensor("op_48821_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4853_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4853_cast_fp16, y = var_48821_to_fp16)[name = tensor("aw_chunk_4853_cast_fp16")]; tensor var_48823_to_fp16 = const()[name = tensor("op_48823_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4855_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4855_cast_fp16, y = var_48823_to_fp16)[name = tensor("aw_chunk_4855_cast_fp16")]; tensor var_48825_to_fp16 = const()[name = tensor("op_48825_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4857_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4857_cast_fp16, y = var_48825_to_fp16)[name = tensor("aw_chunk_4857_cast_fp16")]; tensor var_48827_to_fp16 = const()[name = tensor("op_48827_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4859_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4859_cast_fp16, y = var_48827_to_fp16)[name = tensor("aw_chunk_4859_cast_fp16")]; tensor var_48829_to_fp16 = const()[name = tensor("op_48829_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4861_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4861_cast_fp16, y = var_48829_to_fp16)[name = tensor("aw_chunk_4861_cast_fp16")]; tensor var_48831_to_fp16 = const()[name = tensor("op_48831_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4863_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4863_cast_fp16, y = var_48831_to_fp16)[name = tensor("aw_chunk_4863_cast_fp16")]; tensor var_48833_to_fp16 = const()[name = tensor("op_48833_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4865_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4865_cast_fp16, y = var_48833_to_fp16)[name = tensor("aw_chunk_4865_cast_fp16")]; tensor var_48835_to_fp16 = const()[name = tensor("op_48835_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4867_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4867_cast_fp16, y = var_48835_to_fp16)[name = tensor("aw_chunk_4867_cast_fp16")]; tensor var_48837_to_fp16 = const()[name = tensor("op_48837_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4869_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4869_cast_fp16, y = var_48837_to_fp16)[name = tensor("aw_chunk_4869_cast_fp16")]; tensor var_48839_to_fp16 = const()[name = tensor("op_48839_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4871_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4871_cast_fp16, y = var_48839_to_fp16)[name = tensor("aw_chunk_4871_cast_fp16")]; tensor var_48841_to_fp16 = const()[name = tensor("op_48841_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4873_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4873_cast_fp16, y = var_48841_to_fp16)[name = tensor("aw_chunk_4873_cast_fp16")]; tensor var_48843_to_fp16 = const()[name = tensor("op_48843_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4875_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4875_cast_fp16, y = var_48843_to_fp16)[name = tensor("aw_chunk_4875_cast_fp16")]; tensor var_48845_to_fp16 = const()[name = tensor("op_48845_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4877_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4877_cast_fp16, y = var_48845_to_fp16)[name = tensor("aw_chunk_4877_cast_fp16")]; tensor var_48847_to_fp16 = const()[name = tensor("op_48847_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4879_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4879_cast_fp16, y = var_48847_to_fp16)[name = tensor("aw_chunk_4879_cast_fp16")]; tensor var_48849_to_fp16 = const()[name = tensor("op_48849_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4881_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4881_cast_fp16, y = var_48849_to_fp16)[name = tensor("aw_chunk_4881_cast_fp16")]; tensor var_48851_to_fp16 = const()[name = tensor("op_48851_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4883_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4883_cast_fp16, y = var_48851_to_fp16)[name = tensor("aw_chunk_4883_cast_fp16")]; tensor var_48853_to_fp16 = const()[name = tensor("op_48853_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4885_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4885_cast_fp16, y = var_48853_to_fp16)[name = tensor("aw_chunk_4885_cast_fp16")]; tensor var_48855_to_fp16 = const()[name = tensor("op_48855_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4887_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4887_cast_fp16, y = var_48855_to_fp16)[name = tensor("aw_chunk_4887_cast_fp16")]; tensor var_48857_to_fp16 = const()[name = tensor("op_48857_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4889_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4889_cast_fp16, y = var_48857_to_fp16)[name = tensor("aw_chunk_4889_cast_fp16")]; tensor var_48859_to_fp16 = const()[name = tensor("op_48859_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4891_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4891_cast_fp16, y = var_48859_to_fp16)[name = tensor("aw_chunk_4891_cast_fp16")]; tensor var_48861_to_fp16 = const()[name = tensor("op_48861_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4893_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4893_cast_fp16, y = var_48861_to_fp16)[name = tensor("aw_chunk_4893_cast_fp16")]; tensor var_48863_to_fp16 = const()[name = tensor("op_48863_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4895_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4895_cast_fp16, y = var_48863_to_fp16)[name = tensor("aw_chunk_4895_cast_fp16")]; tensor var_48865_to_fp16 = const()[name = tensor("op_48865_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4897_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4897_cast_fp16, y = var_48865_to_fp16)[name = tensor("aw_chunk_4897_cast_fp16")]; tensor var_48867_to_fp16 = const()[name = tensor("op_48867_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4899_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4899_cast_fp16, y = var_48867_to_fp16)[name = tensor("aw_chunk_4899_cast_fp16")]; tensor var_48869_to_fp16 = const()[name = tensor("op_48869_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4901_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4901_cast_fp16, y = var_48869_to_fp16)[name = tensor("aw_chunk_4901_cast_fp16")]; tensor var_48871_to_fp16 = const()[name = tensor("op_48871_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4903_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4903_cast_fp16, y = var_48871_to_fp16)[name = tensor("aw_chunk_4903_cast_fp16")]; tensor var_48873_to_fp16 = const()[name = tensor("op_48873_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4905_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4905_cast_fp16, y = var_48873_to_fp16)[name = tensor("aw_chunk_4905_cast_fp16")]; tensor var_48875_to_fp16 = const()[name = tensor("op_48875_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4907_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4907_cast_fp16, y = var_48875_to_fp16)[name = tensor("aw_chunk_4907_cast_fp16")]; tensor var_48877_to_fp16 = const()[name = tensor("op_48877_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4909_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4909_cast_fp16, y = var_48877_to_fp16)[name = tensor("aw_chunk_4909_cast_fp16")]; tensor var_48879_to_fp16 = const()[name = tensor("op_48879_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4911_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4911_cast_fp16, y = var_48879_to_fp16)[name = tensor("aw_chunk_4911_cast_fp16")]; tensor var_48881_to_fp16 = const()[name = tensor("op_48881_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4913_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4913_cast_fp16, y = var_48881_to_fp16)[name = tensor("aw_chunk_4913_cast_fp16")]; tensor var_48883_to_fp16 = const()[name = tensor("op_48883_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4915_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4915_cast_fp16, y = var_48883_to_fp16)[name = tensor("aw_chunk_4915_cast_fp16")]; tensor var_48885_to_fp16 = const()[name = tensor("op_48885_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4917_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4917_cast_fp16, y = var_48885_to_fp16)[name = tensor("aw_chunk_4917_cast_fp16")]; tensor var_48887_to_fp16 = const()[name = tensor("op_48887_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4919_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4919_cast_fp16, y = var_48887_to_fp16)[name = tensor("aw_chunk_4919_cast_fp16")]; tensor var_48889_to_fp16 = const()[name = tensor("op_48889_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4921_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4921_cast_fp16, y = var_48889_to_fp16)[name = tensor("aw_chunk_4921_cast_fp16")]; tensor var_48891_to_fp16 = const()[name = tensor("op_48891_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4923_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4923_cast_fp16, y = var_48891_to_fp16)[name = tensor("aw_chunk_4923_cast_fp16")]; tensor var_48893_to_fp16 = const()[name = tensor("op_48893_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4925_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4925_cast_fp16, y = var_48893_to_fp16)[name = tensor("aw_chunk_4925_cast_fp16")]; tensor var_48895_to_fp16 = const()[name = tensor("op_48895_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4927_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4927_cast_fp16, y = var_48895_to_fp16)[name = tensor("aw_chunk_4927_cast_fp16")]; tensor var_48897_to_fp16 = const()[name = tensor("op_48897_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4929_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4929_cast_fp16, y = var_48897_to_fp16)[name = tensor("aw_chunk_4929_cast_fp16")]; tensor var_48899_to_fp16 = const()[name = tensor("op_48899_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4931_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4931_cast_fp16, y = var_48899_to_fp16)[name = tensor("aw_chunk_4931_cast_fp16")]; tensor var_48901_to_fp16 = const()[name = tensor("op_48901_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4933_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4933_cast_fp16, y = var_48901_to_fp16)[name = tensor("aw_chunk_4933_cast_fp16")]; tensor var_48903_to_fp16 = const()[name = tensor("op_48903_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4935_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4935_cast_fp16, y = var_48903_to_fp16)[name = tensor("aw_chunk_4935_cast_fp16")]; tensor var_48905_to_fp16 = const()[name = tensor("op_48905_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4937_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4937_cast_fp16, y = var_48905_to_fp16)[name = tensor("aw_chunk_4937_cast_fp16")]; tensor var_48907_to_fp16 = const()[name = tensor("op_48907_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4939_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4939_cast_fp16, y = var_48907_to_fp16)[name = tensor("aw_chunk_4939_cast_fp16")]; tensor var_48909_to_fp16 = const()[name = tensor("op_48909_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4941_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4941_cast_fp16, y = var_48909_to_fp16)[name = tensor("aw_chunk_4941_cast_fp16")]; tensor var_48911_to_fp16 = const()[name = tensor("op_48911_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4943_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4943_cast_fp16, y = var_48911_to_fp16)[name = tensor("aw_chunk_4943_cast_fp16")]; tensor var_48913_to_fp16 = const()[name = tensor("op_48913_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4945_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4945_cast_fp16, y = var_48913_to_fp16)[name = tensor("aw_chunk_4945_cast_fp16")]; tensor var_48915_to_fp16 = const()[name = tensor("op_48915_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4947_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4947_cast_fp16, y = var_48915_to_fp16)[name = tensor("aw_chunk_4947_cast_fp16")]; tensor var_48917_to_fp16 = const()[name = tensor("op_48917_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4949_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4949_cast_fp16, y = var_48917_to_fp16)[name = tensor("aw_chunk_4949_cast_fp16")]; tensor var_48919_to_fp16 = const()[name = tensor("op_48919_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4951_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4951_cast_fp16, y = var_48919_to_fp16)[name = tensor("aw_chunk_4951_cast_fp16")]; tensor var_48921_to_fp16 = const()[name = tensor("op_48921_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4953_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4953_cast_fp16, y = var_48921_to_fp16)[name = tensor("aw_chunk_4953_cast_fp16")]; tensor var_48923_to_fp16 = const()[name = tensor("op_48923_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4955_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4955_cast_fp16, y = var_48923_to_fp16)[name = tensor("aw_chunk_4955_cast_fp16")]; tensor var_48925_to_fp16 = const()[name = tensor("op_48925_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4957_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4957_cast_fp16, y = var_48925_to_fp16)[name = tensor("aw_chunk_4957_cast_fp16")]; tensor var_48927_to_fp16 = const()[name = tensor("op_48927_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4959_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4959_cast_fp16, y = var_48927_to_fp16)[name = tensor("aw_chunk_4959_cast_fp16")]; tensor var_48929_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4801_cast_fp16)[name = tensor("op_48929_cast_fp16")]; tensor var_48930_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4803_cast_fp16)[name = tensor("op_48930_cast_fp16")]; tensor var_48931_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4805_cast_fp16)[name = tensor("op_48931_cast_fp16")]; tensor var_48932_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4807_cast_fp16)[name = tensor("op_48932_cast_fp16")]; tensor var_48933_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4809_cast_fp16)[name = tensor("op_48933_cast_fp16")]; tensor var_48934_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4811_cast_fp16)[name = tensor("op_48934_cast_fp16")]; tensor var_48935_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4813_cast_fp16)[name = tensor("op_48935_cast_fp16")]; tensor var_48936_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4815_cast_fp16)[name = tensor("op_48936_cast_fp16")]; tensor var_48937_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4817_cast_fp16)[name = tensor("op_48937_cast_fp16")]; tensor var_48938_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4819_cast_fp16)[name = tensor("op_48938_cast_fp16")]; tensor var_48939_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4821_cast_fp16)[name = tensor("op_48939_cast_fp16")]; tensor var_48940_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4823_cast_fp16)[name = tensor("op_48940_cast_fp16")]; tensor var_48941_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4825_cast_fp16)[name = tensor("op_48941_cast_fp16")]; tensor var_48942_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4827_cast_fp16)[name = tensor("op_48942_cast_fp16")]; tensor var_48943_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4829_cast_fp16)[name = tensor("op_48943_cast_fp16")]; tensor var_48944_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4831_cast_fp16)[name = tensor("op_48944_cast_fp16")]; tensor var_48945_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4833_cast_fp16)[name = tensor("op_48945_cast_fp16")]; tensor var_48946_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4835_cast_fp16)[name = tensor("op_48946_cast_fp16")]; tensor var_48947_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4837_cast_fp16)[name = tensor("op_48947_cast_fp16")]; tensor var_48948_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4839_cast_fp16)[name = tensor("op_48948_cast_fp16")]; tensor var_48949_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4841_cast_fp16)[name = tensor("op_48949_cast_fp16")]; tensor var_48950_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4843_cast_fp16)[name = tensor("op_48950_cast_fp16")]; tensor var_48951_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4845_cast_fp16)[name = tensor("op_48951_cast_fp16")]; tensor var_48952_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4847_cast_fp16)[name = tensor("op_48952_cast_fp16")]; tensor var_48953_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4849_cast_fp16)[name = tensor("op_48953_cast_fp16")]; tensor var_48954_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4851_cast_fp16)[name = tensor("op_48954_cast_fp16")]; tensor var_48955_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4853_cast_fp16)[name = tensor("op_48955_cast_fp16")]; tensor var_48956_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4855_cast_fp16)[name = tensor("op_48956_cast_fp16")]; tensor var_48957_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4857_cast_fp16)[name = tensor("op_48957_cast_fp16")]; tensor var_48958_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4859_cast_fp16)[name = tensor("op_48958_cast_fp16")]; tensor var_48959_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4861_cast_fp16)[name = tensor("op_48959_cast_fp16")]; tensor var_48960_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4863_cast_fp16)[name = tensor("op_48960_cast_fp16")]; tensor var_48961_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4865_cast_fp16)[name = tensor("op_48961_cast_fp16")]; tensor var_48962_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4867_cast_fp16)[name = tensor("op_48962_cast_fp16")]; tensor var_48963_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4869_cast_fp16)[name = tensor("op_48963_cast_fp16")]; tensor var_48964_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4871_cast_fp16)[name = tensor("op_48964_cast_fp16")]; tensor var_48965_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4873_cast_fp16)[name = tensor("op_48965_cast_fp16")]; tensor var_48966_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4875_cast_fp16)[name = tensor("op_48966_cast_fp16")]; tensor var_48967_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4877_cast_fp16)[name = tensor("op_48967_cast_fp16")]; tensor var_48968_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4879_cast_fp16)[name = tensor("op_48968_cast_fp16")]; tensor var_48969_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4881_cast_fp16)[name = tensor("op_48969_cast_fp16")]; tensor var_48970_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4883_cast_fp16)[name = tensor("op_48970_cast_fp16")]; tensor var_48971_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4885_cast_fp16)[name = tensor("op_48971_cast_fp16")]; tensor var_48972_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4887_cast_fp16)[name = tensor("op_48972_cast_fp16")]; tensor var_48973_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4889_cast_fp16)[name = tensor("op_48973_cast_fp16")]; tensor var_48974_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4891_cast_fp16)[name = tensor("op_48974_cast_fp16")]; tensor var_48975_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4893_cast_fp16)[name = tensor("op_48975_cast_fp16")]; tensor var_48976_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4895_cast_fp16)[name = tensor("op_48976_cast_fp16")]; tensor var_48977_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4897_cast_fp16)[name = tensor("op_48977_cast_fp16")]; tensor var_48978_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4899_cast_fp16)[name = tensor("op_48978_cast_fp16")]; tensor var_48979_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4901_cast_fp16)[name = tensor("op_48979_cast_fp16")]; tensor var_48980_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4903_cast_fp16)[name = tensor("op_48980_cast_fp16")]; tensor var_48981_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4905_cast_fp16)[name = tensor("op_48981_cast_fp16")]; tensor var_48982_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4907_cast_fp16)[name = tensor("op_48982_cast_fp16")]; tensor var_48983_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4909_cast_fp16)[name = tensor("op_48983_cast_fp16")]; tensor var_48984_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4911_cast_fp16)[name = tensor("op_48984_cast_fp16")]; tensor var_48985_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4913_cast_fp16)[name = tensor("op_48985_cast_fp16")]; tensor var_48986_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4915_cast_fp16)[name = tensor("op_48986_cast_fp16")]; tensor var_48987_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4917_cast_fp16)[name = tensor("op_48987_cast_fp16")]; tensor var_48988_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4919_cast_fp16)[name = tensor("op_48988_cast_fp16")]; tensor var_48989_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4921_cast_fp16)[name = tensor("op_48989_cast_fp16")]; tensor var_48990_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4923_cast_fp16)[name = tensor("op_48990_cast_fp16")]; tensor var_48991_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4925_cast_fp16)[name = tensor("op_48991_cast_fp16")]; tensor var_48992_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4927_cast_fp16)[name = tensor("op_48992_cast_fp16")]; tensor var_48993_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4929_cast_fp16)[name = tensor("op_48993_cast_fp16")]; tensor var_48994_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4931_cast_fp16)[name = tensor("op_48994_cast_fp16")]; tensor var_48995_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4933_cast_fp16)[name = tensor("op_48995_cast_fp16")]; tensor var_48996_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4935_cast_fp16)[name = tensor("op_48996_cast_fp16")]; tensor var_48997_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4937_cast_fp16)[name = tensor("op_48997_cast_fp16")]; tensor var_48998_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4939_cast_fp16)[name = tensor("op_48998_cast_fp16")]; tensor var_48999_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4941_cast_fp16)[name = tensor("op_48999_cast_fp16")]; tensor var_49000_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4943_cast_fp16)[name = tensor("op_49000_cast_fp16")]; tensor var_49001_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4945_cast_fp16)[name = tensor("op_49001_cast_fp16")]; tensor var_49002_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4947_cast_fp16)[name = tensor("op_49002_cast_fp16")]; tensor var_49003_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4949_cast_fp16)[name = tensor("op_49003_cast_fp16")]; tensor var_49004_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4951_cast_fp16)[name = tensor("op_49004_cast_fp16")]; tensor var_49005_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4953_cast_fp16)[name = tensor("op_49005_cast_fp16")]; tensor var_49006_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4955_cast_fp16)[name = tensor("op_49006_cast_fp16")]; tensor var_49007_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4957_cast_fp16)[name = tensor("op_49007_cast_fp16")]; tensor var_49008_cast_fp16 = softmax(axis = var_47727, x = aw_chunk_4959_cast_fp16)[name = tensor("op_49008_cast_fp16")]; tensor var_49010_equation_0 = const()[name = tensor("op_49010_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49010_cast_fp16 = einsum(equation = var_49010_equation_0, values = (var_48530_cast_fp16, var_48929_cast_fp16))[name = tensor("op_49010_cast_fp16")]; tensor var_49012_equation_0 = const()[name = tensor("op_49012_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49012_cast_fp16 = einsum(equation = var_49012_equation_0, values = (var_48530_cast_fp16, var_48930_cast_fp16))[name = tensor("op_49012_cast_fp16")]; tensor var_49014_equation_0 = const()[name = tensor("op_49014_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49014_cast_fp16 = einsum(equation = var_49014_equation_0, values = (var_48530_cast_fp16, var_48931_cast_fp16))[name = tensor("op_49014_cast_fp16")]; tensor var_49016_equation_0 = const()[name = tensor("op_49016_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49016_cast_fp16 = einsum(equation = var_49016_equation_0, values = (var_48530_cast_fp16, var_48932_cast_fp16))[name = tensor("op_49016_cast_fp16")]; tensor var_49018_equation_0 = const()[name = tensor("op_49018_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49018_cast_fp16 = einsum(equation = var_49018_equation_0, values = (var_48534_cast_fp16, var_48933_cast_fp16))[name = tensor("op_49018_cast_fp16")]; tensor var_49020_equation_0 = const()[name = tensor("op_49020_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49020_cast_fp16 = einsum(equation = var_49020_equation_0, values = (var_48534_cast_fp16, var_48934_cast_fp16))[name = tensor("op_49020_cast_fp16")]; tensor var_49022_equation_0 = const()[name = tensor("op_49022_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49022_cast_fp16 = einsum(equation = var_49022_equation_0, values = (var_48534_cast_fp16, var_48935_cast_fp16))[name = tensor("op_49022_cast_fp16")]; tensor var_49024_equation_0 = const()[name = tensor("op_49024_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49024_cast_fp16 = einsum(equation = var_49024_equation_0, values = (var_48534_cast_fp16, var_48936_cast_fp16))[name = tensor("op_49024_cast_fp16")]; tensor var_49026_equation_0 = const()[name = tensor("op_49026_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49026_cast_fp16 = einsum(equation = var_49026_equation_0, values = (var_48538_cast_fp16, var_48937_cast_fp16))[name = tensor("op_49026_cast_fp16")]; tensor var_49028_equation_0 = const()[name = tensor("op_49028_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49028_cast_fp16 = einsum(equation = var_49028_equation_0, values = (var_48538_cast_fp16, var_48938_cast_fp16))[name = tensor("op_49028_cast_fp16")]; tensor var_49030_equation_0 = const()[name = tensor("op_49030_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49030_cast_fp16 = einsum(equation = var_49030_equation_0, values = (var_48538_cast_fp16, var_48939_cast_fp16))[name = tensor("op_49030_cast_fp16")]; tensor var_49032_equation_0 = const()[name = tensor("op_49032_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49032_cast_fp16 = einsum(equation = var_49032_equation_0, values = (var_48538_cast_fp16, var_48940_cast_fp16))[name = tensor("op_49032_cast_fp16")]; tensor var_49034_equation_0 = const()[name = tensor("op_49034_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49034_cast_fp16 = einsum(equation = var_49034_equation_0, values = (var_48542_cast_fp16, var_48941_cast_fp16))[name = tensor("op_49034_cast_fp16")]; tensor var_49036_equation_0 = const()[name = tensor("op_49036_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49036_cast_fp16 = einsum(equation = var_49036_equation_0, values = (var_48542_cast_fp16, var_48942_cast_fp16))[name = tensor("op_49036_cast_fp16")]; tensor var_49038_equation_0 = const()[name = tensor("op_49038_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49038_cast_fp16 = einsum(equation = var_49038_equation_0, values = (var_48542_cast_fp16, var_48943_cast_fp16))[name = tensor("op_49038_cast_fp16")]; tensor var_49040_equation_0 = const()[name = tensor("op_49040_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49040_cast_fp16 = einsum(equation = var_49040_equation_0, values = (var_48542_cast_fp16, var_48944_cast_fp16))[name = tensor("op_49040_cast_fp16")]; tensor var_49042_equation_0 = const()[name = tensor("op_49042_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49042_cast_fp16 = einsum(equation = var_49042_equation_0, values = (var_48546_cast_fp16, var_48945_cast_fp16))[name = tensor("op_49042_cast_fp16")]; tensor var_49044_equation_0 = const()[name = tensor("op_49044_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49044_cast_fp16 = einsum(equation = var_49044_equation_0, values = (var_48546_cast_fp16, var_48946_cast_fp16))[name = tensor("op_49044_cast_fp16")]; tensor var_49046_equation_0 = const()[name = tensor("op_49046_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49046_cast_fp16 = einsum(equation = var_49046_equation_0, values = (var_48546_cast_fp16, var_48947_cast_fp16))[name = tensor("op_49046_cast_fp16")]; tensor var_49048_equation_0 = const()[name = tensor("op_49048_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49048_cast_fp16 = einsum(equation = var_49048_equation_0, values = (var_48546_cast_fp16, var_48948_cast_fp16))[name = tensor("op_49048_cast_fp16")]; tensor var_49050_equation_0 = const()[name = tensor("op_49050_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49050_cast_fp16 = einsum(equation = var_49050_equation_0, values = (var_48550_cast_fp16, var_48949_cast_fp16))[name = tensor("op_49050_cast_fp16")]; tensor var_49052_equation_0 = const()[name = tensor("op_49052_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49052_cast_fp16 = einsum(equation = var_49052_equation_0, values = (var_48550_cast_fp16, var_48950_cast_fp16))[name = tensor("op_49052_cast_fp16")]; tensor var_49054_equation_0 = const()[name = tensor("op_49054_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49054_cast_fp16 = einsum(equation = var_49054_equation_0, values = (var_48550_cast_fp16, var_48951_cast_fp16))[name = tensor("op_49054_cast_fp16")]; tensor var_49056_equation_0 = const()[name = tensor("op_49056_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49056_cast_fp16 = einsum(equation = var_49056_equation_0, values = (var_48550_cast_fp16, var_48952_cast_fp16))[name = tensor("op_49056_cast_fp16")]; tensor var_49058_equation_0 = const()[name = tensor("op_49058_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49058_cast_fp16 = einsum(equation = var_49058_equation_0, values = (var_48554_cast_fp16, var_48953_cast_fp16))[name = tensor("op_49058_cast_fp16")]; tensor var_49060_equation_0 = const()[name = tensor("op_49060_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49060_cast_fp16 = einsum(equation = var_49060_equation_0, values = (var_48554_cast_fp16, var_48954_cast_fp16))[name = tensor("op_49060_cast_fp16")]; tensor var_49062_equation_0 = const()[name = tensor("op_49062_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49062_cast_fp16 = einsum(equation = var_49062_equation_0, values = (var_48554_cast_fp16, var_48955_cast_fp16))[name = tensor("op_49062_cast_fp16")]; tensor var_49064_equation_0 = const()[name = tensor("op_49064_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49064_cast_fp16 = einsum(equation = var_49064_equation_0, values = (var_48554_cast_fp16, var_48956_cast_fp16))[name = tensor("op_49064_cast_fp16")]; tensor var_49066_equation_0 = const()[name = tensor("op_49066_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49066_cast_fp16 = einsum(equation = var_49066_equation_0, values = (var_48558_cast_fp16, var_48957_cast_fp16))[name = tensor("op_49066_cast_fp16")]; tensor var_49068_equation_0 = const()[name = tensor("op_49068_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49068_cast_fp16 = einsum(equation = var_49068_equation_0, values = (var_48558_cast_fp16, var_48958_cast_fp16))[name = tensor("op_49068_cast_fp16")]; tensor var_49070_equation_0 = const()[name = tensor("op_49070_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49070_cast_fp16 = einsum(equation = var_49070_equation_0, values = (var_48558_cast_fp16, var_48959_cast_fp16))[name = tensor("op_49070_cast_fp16")]; tensor var_49072_equation_0 = const()[name = tensor("op_49072_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49072_cast_fp16 = einsum(equation = var_49072_equation_0, values = (var_48558_cast_fp16, var_48960_cast_fp16))[name = tensor("op_49072_cast_fp16")]; tensor var_49074_equation_0 = const()[name = tensor("op_49074_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49074_cast_fp16 = einsum(equation = var_49074_equation_0, values = (var_48562_cast_fp16, var_48961_cast_fp16))[name = tensor("op_49074_cast_fp16")]; tensor var_49076_equation_0 = const()[name = tensor("op_49076_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49076_cast_fp16 = einsum(equation = var_49076_equation_0, values = (var_48562_cast_fp16, var_48962_cast_fp16))[name = tensor("op_49076_cast_fp16")]; tensor var_49078_equation_0 = const()[name = tensor("op_49078_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49078_cast_fp16 = einsum(equation = var_49078_equation_0, values = (var_48562_cast_fp16, var_48963_cast_fp16))[name = tensor("op_49078_cast_fp16")]; tensor var_49080_equation_0 = const()[name = tensor("op_49080_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49080_cast_fp16 = einsum(equation = var_49080_equation_0, values = (var_48562_cast_fp16, var_48964_cast_fp16))[name = tensor("op_49080_cast_fp16")]; tensor var_49082_equation_0 = const()[name = tensor("op_49082_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49082_cast_fp16 = einsum(equation = var_49082_equation_0, values = (var_48566_cast_fp16, var_48965_cast_fp16))[name = tensor("op_49082_cast_fp16")]; tensor var_49084_equation_0 = const()[name = tensor("op_49084_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49084_cast_fp16 = einsum(equation = var_49084_equation_0, values = (var_48566_cast_fp16, var_48966_cast_fp16))[name = tensor("op_49084_cast_fp16")]; tensor var_49086_equation_0 = const()[name = tensor("op_49086_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49086_cast_fp16 = einsum(equation = var_49086_equation_0, values = (var_48566_cast_fp16, var_48967_cast_fp16))[name = tensor("op_49086_cast_fp16")]; tensor var_49088_equation_0 = const()[name = tensor("op_49088_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49088_cast_fp16 = einsum(equation = var_49088_equation_0, values = (var_48566_cast_fp16, var_48968_cast_fp16))[name = tensor("op_49088_cast_fp16")]; tensor var_49090_equation_0 = const()[name = tensor("op_49090_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49090_cast_fp16 = einsum(equation = var_49090_equation_0, values = (var_48570_cast_fp16, var_48969_cast_fp16))[name = tensor("op_49090_cast_fp16")]; tensor var_49092_equation_0 = const()[name = tensor("op_49092_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49092_cast_fp16 = einsum(equation = var_49092_equation_0, values = (var_48570_cast_fp16, var_48970_cast_fp16))[name = tensor("op_49092_cast_fp16")]; tensor var_49094_equation_0 = const()[name = tensor("op_49094_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49094_cast_fp16 = einsum(equation = var_49094_equation_0, values = (var_48570_cast_fp16, var_48971_cast_fp16))[name = tensor("op_49094_cast_fp16")]; tensor var_49096_equation_0 = const()[name = tensor("op_49096_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49096_cast_fp16 = einsum(equation = var_49096_equation_0, values = (var_48570_cast_fp16, var_48972_cast_fp16))[name = tensor("op_49096_cast_fp16")]; tensor var_49098_equation_0 = const()[name = tensor("op_49098_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49098_cast_fp16 = einsum(equation = var_49098_equation_0, values = (var_48574_cast_fp16, var_48973_cast_fp16))[name = tensor("op_49098_cast_fp16")]; tensor var_49100_equation_0 = const()[name = tensor("op_49100_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49100_cast_fp16 = einsum(equation = var_49100_equation_0, values = (var_48574_cast_fp16, var_48974_cast_fp16))[name = tensor("op_49100_cast_fp16")]; tensor var_49102_equation_0 = const()[name = tensor("op_49102_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49102_cast_fp16 = einsum(equation = var_49102_equation_0, values = (var_48574_cast_fp16, var_48975_cast_fp16))[name = tensor("op_49102_cast_fp16")]; tensor var_49104_equation_0 = const()[name = tensor("op_49104_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49104_cast_fp16 = einsum(equation = var_49104_equation_0, values = (var_48574_cast_fp16, var_48976_cast_fp16))[name = tensor("op_49104_cast_fp16")]; tensor var_49106_equation_0 = const()[name = tensor("op_49106_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49106_cast_fp16 = einsum(equation = var_49106_equation_0, values = (var_48578_cast_fp16, var_48977_cast_fp16))[name = tensor("op_49106_cast_fp16")]; tensor var_49108_equation_0 = const()[name = tensor("op_49108_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49108_cast_fp16 = einsum(equation = var_49108_equation_0, values = (var_48578_cast_fp16, var_48978_cast_fp16))[name = tensor("op_49108_cast_fp16")]; tensor var_49110_equation_0 = const()[name = tensor("op_49110_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49110_cast_fp16 = einsum(equation = var_49110_equation_0, values = (var_48578_cast_fp16, var_48979_cast_fp16))[name = tensor("op_49110_cast_fp16")]; tensor var_49112_equation_0 = const()[name = tensor("op_49112_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49112_cast_fp16 = einsum(equation = var_49112_equation_0, values = (var_48578_cast_fp16, var_48980_cast_fp16))[name = tensor("op_49112_cast_fp16")]; tensor var_49114_equation_0 = const()[name = tensor("op_49114_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49114_cast_fp16 = einsum(equation = var_49114_equation_0, values = (var_48582_cast_fp16, var_48981_cast_fp16))[name = tensor("op_49114_cast_fp16")]; tensor var_49116_equation_0 = const()[name = tensor("op_49116_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49116_cast_fp16 = einsum(equation = var_49116_equation_0, values = (var_48582_cast_fp16, var_48982_cast_fp16))[name = tensor("op_49116_cast_fp16")]; tensor var_49118_equation_0 = const()[name = tensor("op_49118_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49118_cast_fp16 = einsum(equation = var_49118_equation_0, values = (var_48582_cast_fp16, var_48983_cast_fp16))[name = tensor("op_49118_cast_fp16")]; tensor var_49120_equation_0 = const()[name = tensor("op_49120_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49120_cast_fp16 = einsum(equation = var_49120_equation_0, values = (var_48582_cast_fp16, var_48984_cast_fp16))[name = tensor("op_49120_cast_fp16")]; tensor var_49122_equation_0 = const()[name = tensor("op_49122_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49122_cast_fp16 = einsum(equation = var_49122_equation_0, values = (var_48586_cast_fp16, var_48985_cast_fp16))[name = tensor("op_49122_cast_fp16")]; tensor var_49124_equation_0 = const()[name = tensor("op_49124_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49124_cast_fp16 = einsum(equation = var_49124_equation_0, values = (var_48586_cast_fp16, var_48986_cast_fp16))[name = tensor("op_49124_cast_fp16")]; tensor var_49126_equation_0 = const()[name = tensor("op_49126_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49126_cast_fp16 = einsum(equation = var_49126_equation_0, values = (var_48586_cast_fp16, var_48987_cast_fp16))[name = tensor("op_49126_cast_fp16")]; tensor var_49128_equation_0 = const()[name = tensor("op_49128_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49128_cast_fp16 = einsum(equation = var_49128_equation_0, values = (var_48586_cast_fp16, var_48988_cast_fp16))[name = tensor("op_49128_cast_fp16")]; tensor var_49130_equation_0 = const()[name = tensor("op_49130_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49130_cast_fp16 = einsum(equation = var_49130_equation_0, values = (var_48590_cast_fp16, var_48989_cast_fp16))[name = tensor("op_49130_cast_fp16")]; tensor var_49132_equation_0 = const()[name = tensor("op_49132_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49132_cast_fp16 = einsum(equation = var_49132_equation_0, values = (var_48590_cast_fp16, var_48990_cast_fp16))[name = tensor("op_49132_cast_fp16")]; tensor var_49134_equation_0 = const()[name = tensor("op_49134_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49134_cast_fp16 = einsum(equation = var_49134_equation_0, values = (var_48590_cast_fp16, var_48991_cast_fp16))[name = tensor("op_49134_cast_fp16")]; tensor var_49136_equation_0 = const()[name = tensor("op_49136_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49136_cast_fp16 = einsum(equation = var_49136_equation_0, values = (var_48590_cast_fp16, var_48992_cast_fp16))[name = tensor("op_49136_cast_fp16")]; tensor var_49138_equation_0 = const()[name = tensor("op_49138_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49138_cast_fp16 = einsum(equation = var_49138_equation_0, values = (var_48594_cast_fp16, var_48993_cast_fp16))[name = tensor("op_49138_cast_fp16")]; tensor var_49140_equation_0 = const()[name = tensor("op_49140_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49140_cast_fp16 = einsum(equation = var_49140_equation_0, values = (var_48594_cast_fp16, var_48994_cast_fp16))[name = tensor("op_49140_cast_fp16")]; tensor var_49142_equation_0 = const()[name = tensor("op_49142_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49142_cast_fp16 = einsum(equation = var_49142_equation_0, values = (var_48594_cast_fp16, var_48995_cast_fp16))[name = tensor("op_49142_cast_fp16")]; tensor var_49144_equation_0 = const()[name = tensor("op_49144_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49144_cast_fp16 = einsum(equation = var_49144_equation_0, values = (var_48594_cast_fp16, var_48996_cast_fp16))[name = tensor("op_49144_cast_fp16")]; tensor var_49146_equation_0 = const()[name = tensor("op_49146_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49146_cast_fp16 = einsum(equation = var_49146_equation_0, values = (var_48598_cast_fp16, var_48997_cast_fp16))[name = tensor("op_49146_cast_fp16")]; tensor var_49148_equation_0 = const()[name = tensor("op_49148_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49148_cast_fp16 = einsum(equation = var_49148_equation_0, values = (var_48598_cast_fp16, var_48998_cast_fp16))[name = tensor("op_49148_cast_fp16")]; tensor var_49150_equation_0 = const()[name = tensor("op_49150_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49150_cast_fp16 = einsum(equation = var_49150_equation_0, values = (var_48598_cast_fp16, var_48999_cast_fp16))[name = tensor("op_49150_cast_fp16")]; tensor var_49152_equation_0 = const()[name = tensor("op_49152_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49152_cast_fp16 = einsum(equation = var_49152_equation_0, values = (var_48598_cast_fp16, var_49000_cast_fp16))[name = tensor("op_49152_cast_fp16")]; tensor var_49154_equation_0 = const()[name = tensor("op_49154_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49154_cast_fp16 = einsum(equation = var_49154_equation_0, values = (var_48602_cast_fp16, var_49001_cast_fp16))[name = tensor("op_49154_cast_fp16")]; tensor var_49156_equation_0 = const()[name = tensor("op_49156_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49156_cast_fp16 = einsum(equation = var_49156_equation_0, values = (var_48602_cast_fp16, var_49002_cast_fp16))[name = tensor("op_49156_cast_fp16")]; tensor var_49158_equation_0 = const()[name = tensor("op_49158_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49158_cast_fp16 = einsum(equation = var_49158_equation_0, values = (var_48602_cast_fp16, var_49003_cast_fp16))[name = tensor("op_49158_cast_fp16")]; tensor var_49160_equation_0 = const()[name = tensor("op_49160_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49160_cast_fp16 = einsum(equation = var_49160_equation_0, values = (var_48602_cast_fp16, var_49004_cast_fp16))[name = tensor("op_49160_cast_fp16")]; tensor var_49162_equation_0 = const()[name = tensor("op_49162_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49162_cast_fp16 = einsum(equation = var_49162_equation_0, values = (var_48606_cast_fp16, var_49005_cast_fp16))[name = tensor("op_49162_cast_fp16")]; tensor var_49164_equation_0 = const()[name = tensor("op_49164_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49164_cast_fp16 = einsum(equation = var_49164_equation_0, values = (var_48606_cast_fp16, var_49006_cast_fp16))[name = tensor("op_49164_cast_fp16")]; tensor var_49166_equation_0 = const()[name = tensor("op_49166_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49166_cast_fp16 = einsum(equation = var_49166_equation_0, values = (var_48606_cast_fp16, var_49007_cast_fp16))[name = tensor("op_49166_cast_fp16")]; tensor var_49168_equation_0 = const()[name = tensor("op_49168_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_49168_cast_fp16 = einsum(equation = var_49168_equation_0, values = (var_48606_cast_fp16, var_49008_cast_fp16))[name = tensor("op_49168_cast_fp16")]; tensor var_49170_interleave_0 = const()[name = tensor("op_49170_interleave_0"), val = tensor(false)]; tensor var_49170_cast_fp16 = concat(axis = var_47702, interleave = var_49170_interleave_0, values = (var_49010_cast_fp16, var_49012_cast_fp16, var_49014_cast_fp16, var_49016_cast_fp16))[name = tensor("op_49170_cast_fp16")]; tensor var_49172_interleave_0 = const()[name = tensor("op_49172_interleave_0"), val = tensor(false)]; tensor var_49172_cast_fp16 = concat(axis = var_47702, interleave = var_49172_interleave_0, values = (var_49018_cast_fp16, var_49020_cast_fp16, var_49022_cast_fp16, var_49024_cast_fp16))[name = tensor("op_49172_cast_fp16")]; tensor var_49174_interleave_0 = const()[name = tensor("op_49174_interleave_0"), val = tensor(false)]; tensor var_49174_cast_fp16 = concat(axis = var_47702, interleave = var_49174_interleave_0, values = (var_49026_cast_fp16, var_49028_cast_fp16, var_49030_cast_fp16, var_49032_cast_fp16))[name = tensor("op_49174_cast_fp16")]; tensor var_49176_interleave_0 = const()[name = tensor("op_49176_interleave_0"), val = tensor(false)]; tensor var_49176_cast_fp16 = concat(axis = var_47702, interleave = var_49176_interleave_0, values = (var_49034_cast_fp16, var_49036_cast_fp16, var_49038_cast_fp16, var_49040_cast_fp16))[name = tensor("op_49176_cast_fp16")]; tensor var_49178_interleave_0 = const()[name = tensor("op_49178_interleave_0"), val = tensor(false)]; tensor var_49178_cast_fp16 = concat(axis = var_47702, interleave = var_49178_interleave_0, values = (var_49042_cast_fp16, var_49044_cast_fp16, var_49046_cast_fp16, var_49048_cast_fp16))[name = tensor("op_49178_cast_fp16")]; tensor var_49180_interleave_0 = const()[name = tensor("op_49180_interleave_0"), val = tensor(false)]; tensor var_49180_cast_fp16 = concat(axis = var_47702, interleave = var_49180_interleave_0, values = (var_49050_cast_fp16, var_49052_cast_fp16, var_49054_cast_fp16, var_49056_cast_fp16))[name = tensor("op_49180_cast_fp16")]; tensor var_49182_interleave_0 = const()[name = tensor("op_49182_interleave_0"), val = tensor(false)]; tensor var_49182_cast_fp16 = concat(axis = var_47702, interleave = var_49182_interleave_0, values = (var_49058_cast_fp16, var_49060_cast_fp16, var_49062_cast_fp16, var_49064_cast_fp16))[name = tensor("op_49182_cast_fp16")]; tensor var_49184_interleave_0 = const()[name = tensor("op_49184_interleave_0"), val = tensor(false)]; tensor var_49184_cast_fp16 = concat(axis = var_47702, interleave = var_49184_interleave_0, values = (var_49066_cast_fp16, var_49068_cast_fp16, var_49070_cast_fp16, var_49072_cast_fp16))[name = tensor("op_49184_cast_fp16")]; tensor var_49186_interleave_0 = const()[name = tensor("op_49186_interleave_0"), val = tensor(false)]; tensor var_49186_cast_fp16 = concat(axis = var_47702, interleave = var_49186_interleave_0, values = (var_49074_cast_fp16, var_49076_cast_fp16, var_49078_cast_fp16, var_49080_cast_fp16))[name = tensor("op_49186_cast_fp16")]; tensor var_49188_interleave_0 = const()[name = tensor("op_49188_interleave_0"), val = tensor(false)]; tensor var_49188_cast_fp16 = concat(axis = var_47702, interleave = var_49188_interleave_0, values = (var_49082_cast_fp16, var_49084_cast_fp16, var_49086_cast_fp16, var_49088_cast_fp16))[name = tensor("op_49188_cast_fp16")]; tensor var_49190_interleave_0 = const()[name = tensor("op_49190_interleave_0"), val = tensor(false)]; tensor var_49190_cast_fp16 = concat(axis = var_47702, interleave = var_49190_interleave_0, values = (var_49090_cast_fp16, var_49092_cast_fp16, var_49094_cast_fp16, var_49096_cast_fp16))[name = tensor("op_49190_cast_fp16")]; tensor var_49192_interleave_0 = const()[name = tensor("op_49192_interleave_0"), val = tensor(false)]; tensor var_49192_cast_fp16 = concat(axis = var_47702, interleave = var_49192_interleave_0, values = (var_49098_cast_fp16, var_49100_cast_fp16, var_49102_cast_fp16, var_49104_cast_fp16))[name = tensor("op_49192_cast_fp16")]; tensor var_49194_interleave_0 = const()[name = tensor("op_49194_interleave_0"), val = tensor(false)]; tensor var_49194_cast_fp16 = concat(axis = var_47702, interleave = var_49194_interleave_0, values = (var_49106_cast_fp16, var_49108_cast_fp16, var_49110_cast_fp16, var_49112_cast_fp16))[name = tensor("op_49194_cast_fp16")]; tensor var_49196_interleave_0 = const()[name = tensor("op_49196_interleave_0"), val = tensor(false)]; tensor var_49196_cast_fp16 = concat(axis = var_47702, interleave = var_49196_interleave_0, values = (var_49114_cast_fp16, var_49116_cast_fp16, var_49118_cast_fp16, var_49120_cast_fp16))[name = tensor("op_49196_cast_fp16")]; tensor var_49198_interleave_0 = const()[name = tensor("op_49198_interleave_0"), val = tensor(false)]; tensor var_49198_cast_fp16 = concat(axis = var_47702, interleave = var_49198_interleave_0, values = (var_49122_cast_fp16, var_49124_cast_fp16, var_49126_cast_fp16, var_49128_cast_fp16))[name = tensor("op_49198_cast_fp16")]; tensor var_49200_interleave_0 = const()[name = tensor("op_49200_interleave_0"), val = tensor(false)]; tensor var_49200_cast_fp16 = concat(axis = var_47702, interleave = var_49200_interleave_0, values = (var_49130_cast_fp16, var_49132_cast_fp16, var_49134_cast_fp16, var_49136_cast_fp16))[name = tensor("op_49200_cast_fp16")]; tensor var_49202_interleave_0 = const()[name = tensor("op_49202_interleave_0"), val = tensor(false)]; tensor var_49202_cast_fp16 = concat(axis = var_47702, interleave = var_49202_interleave_0, values = (var_49138_cast_fp16, var_49140_cast_fp16, var_49142_cast_fp16, var_49144_cast_fp16))[name = tensor("op_49202_cast_fp16")]; tensor var_49204_interleave_0 = const()[name = tensor("op_49204_interleave_0"), val = tensor(false)]; tensor var_49204_cast_fp16 = concat(axis = var_47702, interleave = var_49204_interleave_0, values = (var_49146_cast_fp16, var_49148_cast_fp16, var_49150_cast_fp16, var_49152_cast_fp16))[name = tensor("op_49204_cast_fp16")]; tensor var_49206_interleave_0 = const()[name = tensor("op_49206_interleave_0"), val = tensor(false)]; tensor var_49206_cast_fp16 = concat(axis = var_47702, interleave = var_49206_interleave_0, values = (var_49154_cast_fp16, var_49156_cast_fp16, var_49158_cast_fp16, var_49160_cast_fp16))[name = tensor("op_49206_cast_fp16")]; tensor var_49208_interleave_0 = const()[name = tensor("op_49208_interleave_0"), val = tensor(false)]; tensor var_49208_cast_fp16 = concat(axis = var_47702, interleave = var_49208_interleave_0, values = (var_49162_cast_fp16, var_49164_cast_fp16, var_49166_cast_fp16, var_49168_cast_fp16))[name = tensor("op_49208_cast_fp16")]; tensor input_241_interleave_0 = const()[name = tensor("input_241_interleave_0"), val = tensor(false)]; tensor input_241_cast_fp16 = concat(axis = var_47727, interleave = input_241_interleave_0, values = (var_49170_cast_fp16, var_49172_cast_fp16, var_49174_cast_fp16, var_49176_cast_fp16, var_49178_cast_fp16, var_49180_cast_fp16, var_49182_cast_fp16, var_49184_cast_fp16, var_49186_cast_fp16, var_49188_cast_fp16, var_49190_cast_fp16, var_49192_cast_fp16, var_49194_cast_fp16, var_49196_cast_fp16, var_49198_cast_fp16, var_49200_cast_fp16, var_49202_cast_fp16, var_49204_cast_fp16, var_49206_cast_fp16, var_49208_cast_fp16))[name = tensor("input_241_cast_fp16")]; tensor var_49219_pad_type_0 = const()[name = tensor("op_49219_pad_type_0"), val = tensor("valid")]; tensor var_49219_strides_0 = const()[name = tensor("op_49219_strides_0"), val = tensor([1, 1])]; tensor var_49219_pad_0 = const()[name = tensor("op_49219_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_49219_dilations_0 = const()[name = tensor("op_49219_dilations_0"), val = tensor([1, 1])]; tensor var_49219_groups_0 = const()[name = tensor("op_49219_groups_0"), val = tensor(1)]; tensor layers_30_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(400074560))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(400893824))), name = tensor("layers_30_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_30_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_30_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(400893952)))]; tensor var_49219_cast_fp16 = conv(bias = layers_30_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_49219_dilations_0, groups = var_49219_groups_0, pad = var_49219_pad_0, pad_type = var_49219_pad_type_0, strides = var_49219_strides_0, weight = layers_30_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_241_cast_fp16)[name = tensor("op_49219_cast_fp16")]; tensor var_49225_pad_type_0 = const()[name = tensor("op_49225_pad_type_0"), val = tensor("valid")]; tensor var_49225_strides_0 = const()[name = tensor("op_49225_strides_0"), val = tensor([1, 1])]; tensor var_49225_pad_0 = const()[name = tensor("op_49225_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_49225_dilations_0 = const()[name = tensor("op_49225_dilations_0"), val = tensor([1, 1])]; tensor var_49225_groups_0 = const()[name = tensor("op_49225_groups_0"), val = tensor(1)]; tensor layers_30_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(400908032))), name = tensor("layers_30_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(400896576))), shape = tensor([1280, 1280, 1, 1])]; tensor var_49225_cast_fp16 = conv(dilations = var_49225_dilations_0, groups = var_49225_groups_0, pad = var_49225_pad_0, pad_type = var_49225_pad_type_0, strides = var_49225_strides_0, weight = layers_30_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_241_cast_fp16)[name = tensor("op_49225_cast_fp16")]; tensor obj_123_cast_fp16 = add(x = var_49219_cast_fp16, y = var_49225_cast_fp16)[name = tensor("obj_123_cast_fp16")]; tensor inputs_123_cast_fp16 = add(x = inputs_121_cast_fp16, y = obj_123_cast_fp16)[name = tensor("inputs_123_cast_fp16")]; tensor out_123_axes_0 = const()[name = tensor("out_123_axes_0"), val = tensor([1])]; tensor var_49236_to_fp16 = const()[name = tensor("op_49236_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_123_cast_fp16 = layer_norm(axes = out_123_axes_0, epsilon = var_49236_to_fp16, x = inputs_123_cast_fp16)[name = tensor("out_123_cast_fp16")]; tensor input_243_gamma_0_to_fp16 = const()[name = tensor("input_243_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(401112896)))]; tensor input_243_beta_0_to_fp16 = const()[name = tensor("input_243_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(401115520)))]; tensor input_243_epsilon_0_to_fp16 = const()[name = tensor("input_243_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_243_cast_fp16 = batch_norm(beta = input_243_beta_0_to_fp16, epsilon = input_243_epsilon_0_to_fp16, gamma = input_243_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_123_cast_fp16)[name = tensor("input_243_cast_fp16")]; tensor var_49254_pad_type_0 = const()[name = tensor("op_49254_pad_type_0"), val = tensor("valid")]; tensor var_49254_strides_0 = const()[name = tensor("op_49254_strides_0"), val = tensor([1, 1])]; tensor var_49254_pad_0 = const()[name = tensor("op_49254_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_49254_dilations_0 = const()[name = tensor("op_49254_dilations_0"), val = tensor([1, 1])]; tensor var_49254_groups_0 = const()[name = tensor("op_49254_groups_0"), val = tensor(1)]; tensor layers_30_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(401118144))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(404395008))), name = tensor("layers_30_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_30_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_30_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(404395136)))]; tensor var_49254_cast_fp16 = conv(bias = layers_30_fc1_inlier_module_bias_to_fp16, dilations = var_49254_dilations_0, groups = var_49254_groups_0, pad = var_49254_pad_0, pad_type = var_49254_pad_type_0, strides = var_49254_strides_0, weight = layers_30_fc1_inlier_module_weight_to_fp16_palettized, x = input_243_cast_fp16)[name = tensor("op_49254_cast_fp16")]; tensor var_49260_pad_type_0 = const()[name = tensor("op_49260_pad_type_0"), val = tensor("valid")]; tensor var_49260_strides_0 = const()[name = tensor("op_49260_strides_0"), val = tensor([1, 1])]; tensor var_49260_pad_0 = const()[name = tensor("op_49260_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_49260_dilations_0 = const()[name = tensor("op_49260_dilations_0"), val = tensor([1, 1])]; tensor var_49260_groups_0 = const()[name = tensor("op_49260_groups_0"), val = tensor(1)]; tensor layers_30_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(404464960))), name = tensor("layers_30_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(404405440))), shape = tensor([5120, 1280, 1, 1])]; tensor var_49260_cast_fp16 = conv(dilations = var_49260_dilations_0, groups = var_49260_groups_0, pad = var_49260_pad_0, pad_type = var_49260_pad_type_0, strides = var_49260_strides_0, weight = layers_30_fc1_outlier_module_weight_to_fp16_sparsified, x = input_243_cast_fp16)[name = tensor("op_49260_cast_fp16")]; tensor input_245_cast_fp16 = add(x = var_49254_cast_fp16, y = var_49260_cast_fp16)[name = tensor("input_245_cast_fp16")]; tensor input_247_mode_0 = const()[name = tensor("input_247_mode_0"), val = tensor("EXACT")]; tensor input_247_cast_fp16 = gelu(mode = input_247_mode_0, x = input_245_cast_fp16)[name = tensor("input_247_cast_fp16")]; tensor var_49271_pad_type_0 = const()[name = tensor("op_49271_pad_type_0"), val = tensor("valid")]; tensor var_49271_strides_0 = const()[name = tensor("op_49271_strides_0"), val = tensor([1, 1])]; tensor var_49271_pad_0 = const()[name = tensor("op_49271_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_49271_dilations_0 = const()[name = tensor("op_49271_dilations_0"), val = tensor([1, 1])]; tensor var_49271_groups_0 = const()[name = tensor("op_49271_groups_0"), val = tensor(1)]; tensor layers_30_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(405284224))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408561088))), name = tensor("layers_30_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_30_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_30_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408561216)))]; tensor var_49271_cast_fp16 = conv(bias = layers_30_fc2_inlier_module_bias_to_fp16, dilations = var_49271_dilations_0, groups = var_49271_groups_0, pad = var_49271_pad_0, pad_type = var_49271_pad_type_0, strides = var_49271_strides_0, weight = layers_30_fc2_inlier_module_weight_to_fp16_palettized, x = input_247_cast_fp16)[name = tensor("op_49271_cast_fp16")]; tensor var_49277_pad_type_0 = const()[name = tensor("op_49277_pad_type_0"), val = tensor("valid")]; tensor var_49277_strides_0 = const()[name = tensor("op_49277_strides_0"), val = tensor([1, 1])]; tensor var_49277_pad_0 = const()[name = tensor("op_49277_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_49277_dilations_0 = const()[name = tensor("op_49277_dilations_0"), val = tensor([1, 1])]; tensor var_49277_groups_0 = const()[name = tensor("op_49277_groups_0"), val = tensor(1)]; tensor layers_30_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408624832))), name = tensor("layers_30_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(408563840))), shape = tensor([1280, 5120, 1, 1])]; tensor var_49277_cast_fp16 = conv(dilations = var_49277_dilations_0, groups = var_49277_groups_0, pad = var_49277_pad_0, pad_type = var_49277_pad_type_0, strides = var_49277_strides_0, weight = layers_30_fc2_outlier_module_weight_to_fp16_sparsified, x = input_247_cast_fp16)[name = tensor("op_49277_cast_fp16")]; tensor hidden_states_65_cast_fp16 = add(x = var_49271_cast_fp16, y = var_49277_cast_fp16)[name = tensor("hidden_states_65_cast_fp16")]; tensor inputs_125_cast_fp16 = add(x = inputs_123_cast_fp16, y = hidden_states_65_cast_fp16)[name = tensor("inputs_125_cast_fp16")]; tensor var_49283 = const()[name = tensor("op_49283"), val = tensor(3)]; tensor var_49308 = const()[name = tensor("op_49308"), val = tensor(1)]; tensor out_125_axes_0 = const()[name = tensor("out_125_axes_0"), val = tensor([1])]; tensor var_49325_to_fp16 = const()[name = tensor("op_49325_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_125_cast_fp16 = layer_norm(axes = out_125_axes_0, epsilon = var_49325_to_fp16, x = inputs_125_cast_fp16)[name = tensor("out_125_cast_fp16")]; tensor obj_125_gamma_0_to_fp16 = const()[name = tensor("obj_125_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409444096)))]; tensor obj_125_beta_0_to_fp16 = const()[name = tensor("obj_125_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409446720)))]; tensor obj_125_epsilon_0_to_fp16 = const()[name = tensor("obj_125_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor obj_125_cast_fp16 = batch_norm(beta = obj_125_beta_0_to_fp16, epsilon = obj_125_epsilon_0_to_fp16, gamma = obj_125_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_125_cast_fp16)[name = tensor("obj_125_cast_fp16")]; tensor var_49347_pad_type_0 = const()[name = tensor("op_49347_pad_type_0"), val = tensor("valid")]; tensor var_49347_strides_0 = const()[name = tensor("op_49347_strides_0"), val = tensor([1, 1])]; tensor var_49347_pad_0 = const()[name = tensor("op_49347_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_49347_dilations_0 = const()[name = tensor("op_49347_dilations_0"), val = tensor([1, 1])]; tensor var_49347_groups_0 = const()[name = tensor("op_49347_groups_0"), val = tensor(1)]; tensor layers_31_self_attn_q_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(409449344))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410268608))), name = tensor("layers_31_self_attn_q_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_31_self_attn_q_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_31_self_attn_q_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410268736)))]; tensor var_49347_cast_fp16 = conv(bias = layers_31_self_attn_q_proj_inlier_module_bias_to_fp16, dilations = var_49347_dilations_0, groups = var_49347_groups_0, pad = var_49347_pad_0, pad_type = var_49347_pad_type_0, strides = var_49347_strides_0, weight = layers_31_self_attn_q_proj_inlier_module_weight_to_fp16_palettized, x = obj_125_cast_fp16)[name = tensor("op_49347_cast_fp16")]; tensor var_49353_pad_type_0 = const()[name = tensor("op_49353_pad_type_0"), val = tensor("valid")]; tensor var_49353_strides_0 = const()[name = tensor("op_49353_strides_0"), val = tensor([1, 1])]; tensor var_49353_pad_0 = const()[name = tensor("op_49353_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_49353_dilations_0 = const()[name = tensor("op_49353_dilations_0"), val = tensor([1, 1])]; tensor var_49353_groups_0 = const()[name = tensor("op_49353_groups_0"), val = tensor(1)]; tensor layers_31_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410290240))), name = tensor("layers_31_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410271360))), shape = tensor([1280, 1280, 1, 1])]; tensor var_49353_cast_fp16 = conv(dilations = var_49353_dilations_0, groups = var_49353_groups_0, pad = var_49353_pad_0, pad_type = var_49353_pad_type_0, strides = var_49353_strides_0, weight = layers_31_self_attn_q_proj_outlier_module_weight_to_fp16_sparsified, x = obj_125_cast_fp16)[name = tensor("op_49353_cast_fp16")]; tensor query_cast_fp16 = add(x = var_49347_cast_fp16, y = var_49353_cast_fp16)[name = tensor("query_cast_fp16")]; tensor var_49362_pad_type_0 = const()[name = tensor("op_49362_pad_type_0"), val = tensor("valid")]; tensor var_49362_strides_0 = const()[name = tensor("op_49362_strides_0"), val = tensor([1, 1])]; tensor var_49362_pad_0 = const()[name = tensor("op_49362_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_49362_dilations_0 = const()[name = tensor("op_49362_dilations_0"), val = tensor([1, 1])]; tensor var_49362_groups_0 = const()[name = tensor("op_49362_groups_0"), val = tensor(1)]; tensor layers_31_self_attn_k_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(410495104))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411314368))), name = tensor("layers_31_self_attn_k_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor var_49362_cast_fp16 = conv(dilations = var_49362_dilations_0, groups = var_49362_groups_0, pad = var_49362_pad_0, pad_type = var_49362_pad_type_0, strides = var_49362_strides_0, weight = layers_31_self_attn_k_proj_inlier_module_weight_to_fp16_palettized, x = obj_125_cast_fp16)[name = tensor("op_49362_cast_fp16")]; tensor var_49368_pad_type_0 = const()[name = tensor("op_49368_pad_type_0"), val = tensor("valid")]; tensor var_49368_strides_0 = const()[name = tensor("op_49368_strides_0"), val = tensor([1, 1])]; tensor var_49368_pad_0 = const()[name = tensor("op_49368_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_49368_dilations_0 = const()[name = tensor("op_49368_dilations_0"), val = tensor([1, 1])]; tensor var_49368_groups_0 = const()[name = tensor("op_49368_groups_0"), val = tensor(1)]; tensor layers_31_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411336768))), name = tensor("layers_31_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411314496))), shape = tensor([1280, 1280, 1, 1])]; tensor var_49368_cast_fp16 = conv(dilations = var_49368_dilations_0, groups = var_49368_groups_0, pad = var_49368_pad_0, pad_type = var_49368_pad_type_0, strides = var_49368_strides_0, weight = layers_31_self_attn_k_proj_outlier_module_weight_to_fp16_sparsified, x = obj_125_cast_fp16)[name = tensor("op_49368_cast_fp16")]; tensor key_cast_fp16 = add(x = var_49362_cast_fp16, y = var_49368_cast_fp16)[name = tensor("key_cast_fp16")]; tensor var_49378_pad_type_0 = const()[name = tensor("op_49378_pad_type_0"), val = tensor("valid")]; tensor var_49378_strides_0 = const()[name = tensor("op_49378_strides_0"), val = tensor([1, 1])]; tensor var_49378_pad_0 = const()[name = tensor("op_49378_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_49378_dilations_0 = const()[name = tensor("op_49378_dilations_0"), val = tensor([1, 1])]; tensor var_49378_groups_0 = const()[name = tensor("op_49378_groups_0"), val = tensor(1)]; tensor layers_31_self_attn_v_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411541632))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(412360896))), name = tensor("layers_31_self_attn_v_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_31_self_attn_v_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_31_self_attn_v_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(412361024)))]; tensor var_49378_cast_fp16 = conv(bias = layers_31_self_attn_v_proj_inlier_module_bias_to_fp16, dilations = var_49378_dilations_0, groups = var_49378_groups_0, pad = var_49378_pad_0, pad_type = var_49378_pad_type_0, strides = var_49378_strides_0, weight = layers_31_self_attn_v_proj_inlier_module_weight_to_fp16_palettized, x = obj_125_cast_fp16)[name = tensor("op_49378_cast_fp16")]; tensor var_49384_pad_type_0 = const()[name = tensor("op_49384_pad_type_0"), val = tensor("valid")]; tensor var_49384_strides_0 = const()[name = tensor("op_49384_strides_0"), val = tensor([1, 1])]; tensor var_49384_pad_0 = const()[name = tensor("op_49384_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_49384_dilations_0 = const()[name = tensor("op_49384_dilations_0"), val = tensor([1, 1])]; tensor var_49384_groups_0 = const()[name = tensor("op_49384_groups_0"), val = tensor(1)]; tensor layers_31_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(412376512))), name = tensor("layers_31_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(412363648))), shape = tensor([1280, 1280, 1, 1])]; tensor var_49384_cast_fp16 = conv(dilations = var_49384_dilations_0, groups = var_49384_groups_0, pad = var_49384_pad_0, pad_type = var_49384_pad_type_0, strides = var_49384_strides_0, weight = layers_31_self_attn_v_proj_outlier_module_weight_to_fp16_sparsified, x = obj_125_cast_fp16)[name = tensor("op_49384_cast_fp16")]; tensor value_cast_fp16 = add(x = var_49378_cast_fp16, y = var_49384_cast_fp16)[name = tensor("value_cast_fp16")]; tensor var_49390_begin_0 = const()[name = tensor("op_49390_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49390_end_0 = const()[name = tensor("op_49390_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49390_end_mask_0 = const()[name = tensor("op_49390_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49390_cast_fp16 = slice_by_index(begin = var_49390_begin_0, end = var_49390_end_0, end_mask = var_49390_end_mask_0, x = query_cast_fp16)[name = tensor("op_49390_cast_fp16")]; tensor var_49394_begin_0 = const()[name = tensor("op_49394_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_49394_end_0 = const()[name = tensor("op_49394_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_49394_end_mask_0 = const()[name = tensor("op_49394_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49394_cast_fp16 = slice_by_index(begin = var_49394_begin_0, end = var_49394_end_0, end_mask = var_49394_end_mask_0, x = query_cast_fp16)[name = tensor("op_49394_cast_fp16")]; tensor var_49398_begin_0 = const()[name = tensor("op_49398_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_49398_end_0 = const()[name = tensor("op_49398_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_49398_end_mask_0 = const()[name = tensor("op_49398_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49398_cast_fp16 = slice_by_index(begin = var_49398_begin_0, end = var_49398_end_0, end_mask = var_49398_end_mask_0, x = query_cast_fp16)[name = tensor("op_49398_cast_fp16")]; tensor var_49402_begin_0 = const()[name = tensor("op_49402_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_49402_end_0 = const()[name = tensor("op_49402_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_49402_end_mask_0 = const()[name = tensor("op_49402_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49402_cast_fp16 = slice_by_index(begin = var_49402_begin_0, end = var_49402_end_0, end_mask = var_49402_end_mask_0, x = query_cast_fp16)[name = tensor("op_49402_cast_fp16")]; tensor var_49406_begin_0 = const()[name = tensor("op_49406_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_49406_end_0 = const()[name = tensor("op_49406_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_49406_end_mask_0 = const()[name = tensor("op_49406_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49406_cast_fp16 = slice_by_index(begin = var_49406_begin_0, end = var_49406_end_0, end_mask = var_49406_end_mask_0, x = query_cast_fp16)[name = tensor("op_49406_cast_fp16")]; tensor var_49410_begin_0 = const()[name = tensor("op_49410_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_49410_end_0 = const()[name = tensor("op_49410_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_49410_end_mask_0 = const()[name = tensor("op_49410_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49410_cast_fp16 = slice_by_index(begin = var_49410_begin_0, end = var_49410_end_0, end_mask = var_49410_end_mask_0, x = query_cast_fp16)[name = tensor("op_49410_cast_fp16")]; tensor var_49414_begin_0 = const()[name = tensor("op_49414_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_49414_end_0 = const()[name = tensor("op_49414_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_49414_end_mask_0 = const()[name = tensor("op_49414_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49414_cast_fp16 = slice_by_index(begin = var_49414_begin_0, end = var_49414_end_0, end_mask = var_49414_end_mask_0, x = query_cast_fp16)[name = tensor("op_49414_cast_fp16")]; tensor var_49418_begin_0 = const()[name = tensor("op_49418_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_49418_end_0 = const()[name = tensor("op_49418_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_49418_end_mask_0 = const()[name = tensor("op_49418_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49418_cast_fp16 = slice_by_index(begin = var_49418_begin_0, end = var_49418_end_0, end_mask = var_49418_end_mask_0, x = query_cast_fp16)[name = tensor("op_49418_cast_fp16")]; tensor var_49422_begin_0 = const()[name = tensor("op_49422_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_49422_end_0 = const()[name = tensor("op_49422_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_49422_end_mask_0 = const()[name = tensor("op_49422_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49422_cast_fp16 = slice_by_index(begin = var_49422_begin_0, end = var_49422_end_0, end_mask = var_49422_end_mask_0, x = query_cast_fp16)[name = tensor("op_49422_cast_fp16")]; tensor var_49426_begin_0 = const()[name = tensor("op_49426_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_49426_end_0 = const()[name = tensor("op_49426_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_49426_end_mask_0 = const()[name = tensor("op_49426_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49426_cast_fp16 = slice_by_index(begin = var_49426_begin_0, end = var_49426_end_0, end_mask = var_49426_end_mask_0, x = query_cast_fp16)[name = tensor("op_49426_cast_fp16")]; tensor var_49430_begin_0 = const()[name = tensor("op_49430_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_49430_end_0 = const()[name = tensor("op_49430_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_49430_end_mask_0 = const()[name = tensor("op_49430_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49430_cast_fp16 = slice_by_index(begin = var_49430_begin_0, end = var_49430_end_0, end_mask = var_49430_end_mask_0, x = query_cast_fp16)[name = tensor("op_49430_cast_fp16")]; tensor var_49434_begin_0 = const()[name = tensor("op_49434_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_49434_end_0 = const()[name = tensor("op_49434_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_49434_end_mask_0 = const()[name = tensor("op_49434_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49434_cast_fp16 = slice_by_index(begin = var_49434_begin_0, end = var_49434_end_0, end_mask = var_49434_end_mask_0, x = query_cast_fp16)[name = tensor("op_49434_cast_fp16")]; tensor var_49438_begin_0 = const()[name = tensor("op_49438_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_49438_end_0 = const()[name = tensor("op_49438_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_49438_end_mask_0 = const()[name = tensor("op_49438_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49438_cast_fp16 = slice_by_index(begin = var_49438_begin_0, end = var_49438_end_0, end_mask = var_49438_end_mask_0, x = query_cast_fp16)[name = tensor("op_49438_cast_fp16")]; tensor var_49442_begin_0 = const()[name = tensor("op_49442_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_49442_end_0 = const()[name = tensor("op_49442_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_49442_end_mask_0 = const()[name = tensor("op_49442_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49442_cast_fp16 = slice_by_index(begin = var_49442_begin_0, end = var_49442_end_0, end_mask = var_49442_end_mask_0, x = query_cast_fp16)[name = tensor("op_49442_cast_fp16")]; tensor var_49446_begin_0 = const()[name = tensor("op_49446_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_49446_end_0 = const()[name = tensor("op_49446_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_49446_end_mask_0 = const()[name = tensor("op_49446_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49446_cast_fp16 = slice_by_index(begin = var_49446_begin_0, end = var_49446_end_0, end_mask = var_49446_end_mask_0, x = query_cast_fp16)[name = tensor("op_49446_cast_fp16")]; tensor var_49450_begin_0 = const()[name = tensor("op_49450_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_49450_end_0 = const()[name = tensor("op_49450_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_49450_end_mask_0 = const()[name = tensor("op_49450_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49450_cast_fp16 = slice_by_index(begin = var_49450_begin_0, end = var_49450_end_0, end_mask = var_49450_end_mask_0, x = query_cast_fp16)[name = tensor("op_49450_cast_fp16")]; tensor var_49454_begin_0 = const()[name = tensor("op_49454_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_49454_end_0 = const()[name = tensor("op_49454_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_49454_end_mask_0 = const()[name = tensor("op_49454_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49454_cast_fp16 = slice_by_index(begin = var_49454_begin_0, end = var_49454_end_0, end_mask = var_49454_end_mask_0, x = query_cast_fp16)[name = tensor("op_49454_cast_fp16")]; tensor var_49458_begin_0 = const()[name = tensor("op_49458_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_49458_end_0 = const()[name = tensor("op_49458_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_49458_end_mask_0 = const()[name = tensor("op_49458_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49458_cast_fp16 = slice_by_index(begin = var_49458_begin_0, end = var_49458_end_0, end_mask = var_49458_end_mask_0, x = query_cast_fp16)[name = tensor("op_49458_cast_fp16")]; tensor var_49462_begin_0 = const()[name = tensor("op_49462_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_49462_end_0 = const()[name = tensor("op_49462_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_49462_end_mask_0 = const()[name = tensor("op_49462_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49462_cast_fp16 = slice_by_index(begin = var_49462_begin_0, end = var_49462_end_0, end_mask = var_49462_end_mask_0, x = query_cast_fp16)[name = tensor("op_49462_cast_fp16")]; tensor var_49466_begin_0 = const()[name = tensor("op_49466_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_49466_end_0 = const()[name = tensor("op_49466_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_49466_end_mask_0 = const()[name = tensor("op_49466_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_49466_cast_fp16 = slice_by_index(begin = var_49466_begin_0, end = var_49466_end_0, end_mask = var_49466_end_mask_0, x = query_cast_fp16)[name = tensor("op_49466_cast_fp16")]; tensor var_49475_begin_0 = const()[name = tensor("op_49475_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49475_end_0 = const()[name = tensor("op_49475_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49475_end_mask_0 = const()[name = tensor("op_49475_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49475_cast_fp16 = slice_by_index(begin = var_49475_begin_0, end = var_49475_end_0, end_mask = var_49475_end_mask_0, x = var_49390_cast_fp16)[name = tensor("op_49475_cast_fp16")]; tensor var_49482_begin_0 = const()[name = tensor("op_49482_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49482_end_0 = const()[name = tensor("op_49482_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49482_end_mask_0 = const()[name = tensor("op_49482_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49482_cast_fp16 = slice_by_index(begin = var_49482_begin_0, end = var_49482_end_0, end_mask = var_49482_end_mask_0, x = var_49390_cast_fp16)[name = tensor("op_49482_cast_fp16")]; tensor var_49489_begin_0 = const()[name = tensor("op_49489_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49489_end_0 = const()[name = tensor("op_49489_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49489_end_mask_0 = const()[name = tensor("op_49489_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49489_cast_fp16 = slice_by_index(begin = var_49489_begin_0, end = var_49489_end_0, end_mask = var_49489_end_mask_0, x = var_49390_cast_fp16)[name = tensor("op_49489_cast_fp16")]; tensor var_49496_begin_0 = const()[name = tensor("op_49496_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_49496_end_0 = const()[name = tensor("op_49496_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49496_end_mask_0 = const()[name = tensor("op_49496_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49496_cast_fp16 = slice_by_index(begin = var_49496_begin_0, end = var_49496_end_0, end_mask = var_49496_end_mask_0, x = var_49390_cast_fp16)[name = tensor("op_49496_cast_fp16")]; tensor var_49503_begin_0 = const()[name = tensor("op_49503_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49503_end_0 = const()[name = tensor("op_49503_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49503_end_mask_0 = const()[name = tensor("op_49503_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49503_cast_fp16 = slice_by_index(begin = var_49503_begin_0, end = var_49503_end_0, end_mask = var_49503_end_mask_0, x = var_49394_cast_fp16)[name = tensor("op_49503_cast_fp16")]; tensor var_49510_begin_0 = const()[name = tensor("op_49510_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49510_end_0 = const()[name = tensor("op_49510_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49510_end_mask_0 = const()[name = tensor("op_49510_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49510_cast_fp16 = slice_by_index(begin = var_49510_begin_0, end = var_49510_end_0, end_mask = var_49510_end_mask_0, x = var_49394_cast_fp16)[name = tensor("op_49510_cast_fp16")]; tensor var_49517_begin_0 = const()[name = tensor("op_49517_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49517_end_0 = const()[name = tensor("op_49517_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49517_end_mask_0 = const()[name = tensor("op_49517_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49517_cast_fp16 = slice_by_index(begin = var_49517_begin_0, end = var_49517_end_0, end_mask = var_49517_end_mask_0, x = var_49394_cast_fp16)[name = tensor("op_49517_cast_fp16")]; tensor var_49524_begin_0 = const()[name = tensor("op_49524_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_49524_end_0 = const()[name = tensor("op_49524_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49524_end_mask_0 = const()[name = tensor("op_49524_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49524_cast_fp16 = slice_by_index(begin = var_49524_begin_0, end = var_49524_end_0, end_mask = var_49524_end_mask_0, x = var_49394_cast_fp16)[name = tensor("op_49524_cast_fp16")]; tensor var_49531_begin_0 = const()[name = tensor("op_49531_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49531_end_0 = const()[name = tensor("op_49531_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49531_end_mask_0 = const()[name = tensor("op_49531_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49531_cast_fp16 = slice_by_index(begin = var_49531_begin_0, end = var_49531_end_0, end_mask = var_49531_end_mask_0, x = var_49398_cast_fp16)[name = tensor("op_49531_cast_fp16")]; tensor var_49538_begin_0 = const()[name = tensor("op_49538_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49538_end_0 = const()[name = tensor("op_49538_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49538_end_mask_0 = const()[name = tensor("op_49538_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49538_cast_fp16 = slice_by_index(begin = var_49538_begin_0, end = var_49538_end_0, end_mask = var_49538_end_mask_0, x = var_49398_cast_fp16)[name = tensor("op_49538_cast_fp16")]; tensor var_49545_begin_0 = const()[name = tensor("op_49545_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49545_end_0 = const()[name = tensor("op_49545_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49545_end_mask_0 = const()[name = tensor("op_49545_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49545_cast_fp16 = slice_by_index(begin = var_49545_begin_0, end = var_49545_end_0, end_mask = var_49545_end_mask_0, x = var_49398_cast_fp16)[name = tensor("op_49545_cast_fp16")]; tensor var_49552_begin_0 = const()[name = tensor("op_49552_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_49552_end_0 = const()[name = tensor("op_49552_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49552_end_mask_0 = const()[name = tensor("op_49552_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49552_cast_fp16 = slice_by_index(begin = var_49552_begin_0, end = var_49552_end_0, end_mask = var_49552_end_mask_0, x = var_49398_cast_fp16)[name = tensor("op_49552_cast_fp16")]; tensor var_49559_begin_0 = const()[name = tensor("op_49559_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49559_end_0 = const()[name = tensor("op_49559_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49559_end_mask_0 = const()[name = tensor("op_49559_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49559_cast_fp16 = slice_by_index(begin = var_49559_begin_0, end = var_49559_end_0, end_mask = var_49559_end_mask_0, x = var_49402_cast_fp16)[name = tensor("op_49559_cast_fp16")]; tensor var_49566_begin_0 = const()[name = tensor("op_49566_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49566_end_0 = const()[name = tensor("op_49566_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49566_end_mask_0 = const()[name = tensor("op_49566_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49566_cast_fp16 = slice_by_index(begin = var_49566_begin_0, end = var_49566_end_0, end_mask = var_49566_end_mask_0, x = var_49402_cast_fp16)[name = tensor("op_49566_cast_fp16")]; tensor var_49573_begin_0 = const()[name = tensor("op_49573_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49573_end_0 = const()[name = tensor("op_49573_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49573_end_mask_0 = const()[name = tensor("op_49573_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49573_cast_fp16 = slice_by_index(begin = var_49573_begin_0, end = var_49573_end_0, end_mask = var_49573_end_mask_0, x = var_49402_cast_fp16)[name = tensor("op_49573_cast_fp16")]; tensor var_49580_begin_0 = const()[name = tensor("op_49580_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_49580_end_0 = const()[name = tensor("op_49580_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49580_end_mask_0 = const()[name = tensor("op_49580_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49580_cast_fp16 = slice_by_index(begin = var_49580_begin_0, end = var_49580_end_0, end_mask = var_49580_end_mask_0, x = var_49402_cast_fp16)[name = tensor("op_49580_cast_fp16")]; tensor var_49587_begin_0 = const()[name = tensor("op_49587_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49587_end_0 = const()[name = tensor("op_49587_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49587_end_mask_0 = const()[name = tensor("op_49587_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49587_cast_fp16 = slice_by_index(begin = var_49587_begin_0, end = var_49587_end_0, end_mask = var_49587_end_mask_0, x = var_49406_cast_fp16)[name = tensor("op_49587_cast_fp16")]; tensor var_49594_begin_0 = const()[name = tensor("op_49594_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49594_end_0 = const()[name = tensor("op_49594_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49594_end_mask_0 = const()[name = tensor("op_49594_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49594_cast_fp16 = slice_by_index(begin = var_49594_begin_0, end = var_49594_end_0, end_mask = var_49594_end_mask_0, x = var_49406_cast_fp16)[name = tensor("op_49594_cast_fp16")]; tensor var_49601_begin_0 = const()[name = tensor("op_49601_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49601_end_0 = const()[name = tensor("op_49601_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49601_end_mask_0 = const()[name = tensor("op_49601_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49601_cast_fp16 = slice_by_index(begin = var_49601_begin_0, end = var_49601_end_0, end_mask = var_49601_end_mask_0, x = var_49406_cast_fp16)[name = tensor("op_49601_cast_fp16")]; tensor var_49608_begin_0 = const()[name = tensor("op_49608_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_49608_end_0 = const()[name = tensor("op_49608_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49608_end_mask_0 = const()[name = tensor("op_49608_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49608_cast_fp16 = slice_by_index(begin = var_49608_begin_0, end = var_49608_end_0, end_mask = var_49608_end_mask_0, x = var_49406_cast_fp16)[name = tensor("op_49608_cast_fp16")]; tensor var_49615_begin_0 = const()[name = tensor("op_49615_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49615_end_0 = const()[name = tensor("op_49615_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49615_end_mask_0 = const()[name = tensor("op_49615_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49615_cast_fp16 = slice_by_index(begin = var_49615_begin_0, end = var_49615_end_0, end_mask = var_49615_end_mask_0, x = var_49410_cast_fp16)[name = tensor("op_49615_cast_fp16")]; tensor var_49622_begin_0 = const()[name = tensor("op_49622_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49622_end_0 = const()[name = tensor("op_49622_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49622_end_mask_0 = const()[name = tensor("op_49622_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49622_cast_fp16 = slice_by_index(begin = var_49622_begin_0, end = var_49622_end_0, end_mask = var_49622_end_mask_0, x = var_49410_cast_fp16)[name = tensor("op_49622_cast_fp16")]; tensor var_49629_begin_0 = const()[name = tensor("op_49629_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49629_end_0 = const()[name = tensor("op_49629_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49629_end_mask_0 = const()[name = tensor("op_49629_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49629_cast_fp16 = slice_by_index(begin = var_49629_begin_0, end = var_49629_end_0, end_mask = var_49629_end_mask_0, x = var_49410_cast_fp16)[name = tensor("op_49629_cast_fp16")]; tensor var_49636_begin_0 = const()[name = tensor("op_49636_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_49636_end_0 = const()[name = tensor("op_49636_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49636_end_mask_0 = const()[name = tensor("op_49636_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49636_cast_fp16 = slice_by_index(begin = var_49636_begin_0, end = var_49636_end_0, end_mask = var_49636_end_mask_0, x = var_49410_cast_fp16)[name = tensor("op_49636_cast_fp16")]; tensor var_49643_begin_0 = const()[name = tensor("op_49643_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49643_end_0 = const()[name = tensor("op_49643_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49643_end_mask_0 = const()[name = tensor("op_49643_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49643_cast_fp16 = slice_by_index(begin = var_49643_begin_0, end = var_49643_end_0, end_mask = var_49643_end_mask_0, x = var_49414_cast_fp16)[name = tensor("op_49643_cast_fp16")]; tensor var_49650_begin_0 = const()[name = tensor("op_49650_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49650_end_0 = const()[name = tensor("op_49650_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49650_end_mask_0 = const()[name = tensor("op_49650_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49650_cast_fp16 = slice_by_index(begin = var_49650_begin_0, end = var_49650_end_0, end_mask = var_49650_end_mask_0, x = var_49414_cast_fp16)[name = tensor("op_49650_cast_fp16")]; tensor var_49657_begin_0 = const()[name = tensor("op_49657_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49657_end_0 = const()[name = tensor("op_49657_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49657_end_mask_0 = const()[name = tensor("op_49657_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49657_cast_fp16 = slice_by_index(begin = var_49657_begin_0, end = var_49657_end_0, end_mask = var_49657_end_mask_0, x = var_49414_cast_fp16)[name = tensor("op_49657_cast_fp16")]; tensor var_49664_begin_0 = const()[name = tensor("op_49664_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_49664_end_0 = const()[name = tensor("op_49664_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49664_end_mask_0 = const()[name = tensor("op_49664_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49664_cast_fp16 = slice_by_index(begin = var_49664_begin_0, end = var_49664_end_0, end_mask = var_49664_end_mask_0, x = var_49414_cast_fp16)[name = tensor("op_49664_cast_fp16")]; tensor var_49671_begin_0 = const()[name = tensor("op_49671_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49671_end_0 = const()[name = tensor("op_49671_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49671_end_mask_0 = const()[name = tensor("op_49671_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49671_cast_fp16 = slice_by_index(begin = var_49671_begin_0, end = var_49671_end_0, end_mask = var_49671_end_mask_0, x = var_49418_cast_fp16)[name = tensor("op_49671_cast_fp16")]; tensor var_49678_begin_0 = const()[name = tensor("op_49678_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49678_end_0 = const()[name = tensor("op_49678_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49678_end_mask_0 = const()[name = tensor("op_49678_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49678_cast_fp16 = slice_by_index(begin = var_49678_begin_0, end = var_49678_end_0, end_mask = var_49678_end_mask_0, x = var_49418_cast_fp16)[name = tensor("op_49678_cast_fp16")]; tensor var_49685_begin_0 = const()[name = tensor("op_49685_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49685_end_0 = const()[name = tensor("op_49685_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49685_end_mask_0 = const()[name = tensor("op_49685_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49685_cast_fp16 = slice_by_index(begin = var_49685_begin_0, end = var_49685_end_0, end_mask = var_49685_end_mask_0, x = var_49418_cast_fp16)[name = tensor("op_49685_cast_fp16")]; tensor var_49692_begin_0 = const()[name = tensor("op_49692_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_49692_end_0 = const()[name = tensor("op_49692_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49692_end_mask_0 = const()[name = tensor("op_49692_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49692_cast_fp16 = slice_by_index(begin = var_49692_begin_0, end = var_49692_end_0, end_mask = var_49692_end_mask_0, x = var_49418_cast_fp16)[name = tensor("op_49692_cast_fp16")]; tensor var_49699_begin_0 = const()[name = tensor("op_49699_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49699_end_0 = const()[name = tensor("op_49699_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49699_end_mask_0 = const()[name = tensor("op_49699_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49699_cast_fp16 = slice_by_index(begin = var_49699_begin_0, end = var_49699_end_0, end_mask = var_49699_end_mask_0, x = var_49422_cast_fp16)[name = tensor("op_49699_cast_fp16")]; tensor var_49706_begin_0 = const()[name = tensor("op_49706_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49706_end_0 = const()[name = tensor("op_49706_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49706_end_mask_0 = const()[name = tensor("op_49706_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49706_cast_fp16 = slice_by_index(begin = var_49706_begin_0, end = var_49706_end_0, end_mask = var_49706_end_mask_0, x = var_49422_cast_fp16)[name = tensor("op_49706_cast_fp16")]; tensor var_49713_begin_0 = const()[name = tensor("op_49713_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49713_end_0 = const()[name = tensor("op_49713_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49713_end_mask_0 = const()[name = tensor("op_49713_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49713_cast_fp16 = slice_by_index(begin = var_49713_begin_0, end = var_49713_end_0, end_mask = var_49713_end_mask_0, x = var_49422_cast_fp16)[name = tensor("op_49713_cast_fp16")]; tensor var_49720_begin_0 = const()[name = tensor("op_49720_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_49720_end_0 = const()[name = tensor("op_49720_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49720_end_mask_0 = const()[name = tensor("op_49720_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49720_cast_fp16 = slice_by_index(begin = var_49720_begin_0, end = var_49720_end_0, end_mask = var_49720_end_mask_0, x = var_49422_cast_fp16)[name = tensor("op_49720_cast_fp16")]; tensor var_49727_begin_0 = const()[name = tensor("op_49727_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49727_end_0 = const()[name = tensor("op_49727_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49727_end_mask_0 = const()[name = tensor("op_49727_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49727_cast_fp16 = slice_by_index(begin = var_49727_begin_0, end = var_49727_end_0, end_mask = var_49727_end_mask_0, x = var_49426_cast_fp16)[name = tensor("op_49727_cast_fp16")]; tensor var_49734_begin_0 = const()[name = tensor("op_49734_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49734_end_0 = const()[name = tensor("op_49734_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49734_end_mask_0 = const()[name = tensor("op_49734_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49734_cast_fp16 = slice_by_index(begin = var_49734_begin_0, end = var_49734_end_0, end_mask = var_49734_end_mask_0, x = var_49426_cast_fp16)[name = tensor("op_49734_cast_fp16")]; tensor var_49741_begin_0 = const()[name = tensor("op_49741_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49741_end_0 = const()[name = tensor("op_49741_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49741_end_mask_0 = const()[name = tensor("op_49741_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49741_cast_fp16 = slice_by_index(begin = var_49741_begin_0, end = var_49741_end_0, end_mask = var_49741_end_mask_0, x = var_49426_cast_fp16)[name = tensor("op_49741_cast_fp16")]; tensor var_49748_begin_0 = const()[name = tensor("op_49748_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_49748_end_0 = const()[name = tensor("op_49748_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49748_end_mask_0 = const()[name = tensor("op_49748_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49748_cast_fp16 = slice_by_index(begin = var_49748_begin_0, end = var_49748_end_0, end_mask = var_49748_end_mask_0, x = var_49426_cast_fp16)[name = tensor("op_49748_cast_fp16")]; tensor var_49755_begin_0 = const()[name = tensor("op_49755_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49755_end_0 = const()[name = tensor("op_49755_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49755_end_mask_0 = const()[name = tensor("op_49755_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49755_cast_fp16 = slice_by_index(begin = var_49755_begin_0, end = var_49755_end_0, end_mask = var_49755_end_mask_0, x = var_49430_cast_fp16)[name = tensor("op_49755_cast_fp16")]; tensor var_49762_begin_0 = const()[name = tensor("op_49762_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49762_end_0 = const()[name = tensor("op_49762_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49762_end_mask_0 = const()[name = tensor("op_49762_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49762_cast_fp16 = slice_by_index(begin = var_49762_begin_0, end = var_49762_end_0, end_mask = var_49762_end_mask_0, x = var_49430_cast_fp16)[name = tensor("op_49762_cast_fp16")]; tensor var_49769_begin_0 = const()[name = tensor("op_49769_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49769_end_0 = const()[name = tensor("op_49769_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49769_end_mask_0 = const()[name = tensor("op_49769_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49769_cast_fp16 = slice_by_index(begin = var_49769_begin_0, end = var_49769_end_0, end_mask = var_49769_end_mask_0, x = var_49430_cast_fp16)[name = tensor("op_49769_cast_fp16")]; tensor var_49776_begin_0 = const()[name = tensor("op_49776_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_49776_end_0 = const()[name = tensor("op_49776_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49776_end_mask_0 = const()[name = tensor("op_49776_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49776_cast_fp16 = slice_by_index(begin = var_49776_begin_0, end = var_49776_end_0, end_mask = var_49776_end_mask_0, x = var_49430_cast_fp16)[name = tensor("op_49776_cast_fp16")]; tensor var_49783_begin_0 = const()[name = tensor("op_49783_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49783_end_0 = const()[name = tensor("op_49783_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49783_end_mask_0 = const()[name = tensor("op_49783_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49783_cast_fp16 = slice_by_index(begin = var_49783_begin_0, end = var_49783_end_0, end_mask = var_49783_end_mask_0, x = var_49434_cast_fp16)[name = tensor("op_49783_cast_fp16")]; tensor var_49790_begin_0 = const()[name = tensor("op_49790_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49790_end_0 = const()[name = tensor("op_49790_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49790_end_mask_0 = const()[name = tensor("op_49790_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49790_cast_fp16 = slice_by_index(begin = var_49790_begin_0, end = var_49790_end_0, end_mask = var_49790_end_mask_0, x = var_49434_cast_fp16)[name = tensor("op_49790_cast_fp16")]; tensor var_49797_begin_0 = const()[name = tensor("op_49797_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49797_end_0 = const()[name = tensor("op_49797_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49797_end_mask_0 = const()[name = tensor("op_49797_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49797_cast_fp16 = slice_by_index(begin = var_49797_begin_0, end = var_49797_end_0, end_mask = var_49797_end_mask_0, x = var_49434_cast_fp16)[name = tensor("op_49797_cast_fp16")]; tensor var_49804_begin_0 = const()[name = tensor("op_49804_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_49804_end_0 = const()[name = tensor("op_49804_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49804_end_mask_0 = const()[name = tensor("op_49804_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49804_cast_fp16 = slice_by_index(begin = var_49804_begin_0, end = var_49804_end_0, end_mask = var_49804_end_mask_0, x = var_49434_cast_fp16)[name = tensor("op_49804_cast_fp16")]; tensor var_49811_begin_0 = const()[name = tensor("op_49811_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49811_end_0 = const()[name = tensor("op_49811_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49811_end_mask_0 = const()[name = tensor("op_49811_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49811_cast_fp16 = slice_by_index(begin = var_49811_begin_0, end = var_49811_end_0, end_mask = var_49811_end_mask_0, x = var_49438_cast_fp16)[name = tensor("op_49811_cast_fp16")]; tensor var_49818_begin_0 = const()[name = tensor("op_49818_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49818_end_0 = const()[name = tensor("op_49818_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49818_end_mask_0 = const()[name = tensor("op_49818_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49818_cast_fp16 = slice_by_index(begin = var_49818_begin_0, end = var_49818_end_0, end_mask = var_49818_end_mask_0, x = var_49438_cast_fp16)[name = tensor("op_49818_cast_fp16")]; tensor var_49825_begin_0 = const()[name = tensor("op_49825_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49825_end_0 = const()[name = tensor("op_49825_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49825_end_mask_0 = const()[name = tensor("op_49825_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49825_cast_fp16 = slice_by_index(begin = var_49825_begin_0, end = var_49825_end_0, end_mask = var_49825_end_mask_0, x = var_49438_cast_fp16)[name = tensor("op_49825_cast_fp16")]; tensor var_49832_begin_0 = const()[name = tensor("op_49832_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_49832_end_0 = const()[name = tensor("op_49832_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49832_end_mask_0 = const()[name = tensor("op_49832_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49832_cast_fp16 = slice_by_index(begin = var_49832_begin_0, end = var_49832_end_0, end_mask = var_49832_end_mask_0, x = var_49438_cast_fp16)[name = tensor("op_49832_cast_fp16")]; tensor var_49839_begin_0 = const()[name = tensor("op_49839_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49839_end_0 = const()[name = tensor("op_49839_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49839_end_mask_0 = const()[name = tensor("op_49839_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49839_cast_fp16 = slice_by_index(begin = var_49839_begin_0, end = var_49839_end_0, end_mask = var_49839_end_mask_0, x = var_49442_cast_fp16)[name = tensor("op_49839_cast_fp16")]; tensor var_49846_begin_0 = const()[name = tensor("op_49846_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49846_end_0 = const()[name = tensor("op_49846_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49846_end_mask_0 = const()[name = tensor("op_49846_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49846_cast_fp16 = slice_by_index(begin = var_49846_begin_0, end = var_49846_end_0, end_mask = var_49846_end_mask_0, x = var_49442_cast_fp16)[name = tensor("op_49846_cast_fp16")]; tensor var_49853_begin_0 = const()[name = tensor("op_49853_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49853_end_0 = const()[name = tensor("op_49853_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49853_end_mask_0 = const()[name = tensor("op_49853_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49853_cast_fp16 = slice_by_index(begin = var_49853_begin_0, end = var_49853_end_0, end_mask = var_49853_end_mask_0, x = var_49442_cast_fp16)[name = tensor("op_49853_cast_fp16")]; tensor var_49860_begin_0 = const()[name = tensor("op_49860_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_49860_end_0 = const()[name = tensor("op_49860_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49860_end_mask_0 = const()[name = tensor("op_49860_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49860_cast_fp16 = slice_by_index(begin = var_49860_begin_0, end = var_49860_end_0, end_mask = var_49860_end_mask_0, x = var_49442_cast_fp16)[name = tensor("op_49860_cast_fp16")]; tensor var_49867_begin_0 = const()[name = tensor("op_49867_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49867_end_0 = const()[name = tensor("op_49867_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49867_end_mask_0 = const()[name = tensor("op_49867_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49867_cast_fp16 = slice_by_index(begin = var_49867_begin_0, end = var_49867_end_0, end_mask = var_49867_end_mask_0, x = var_49446_cast_fp16)[name = tensor("op_49867_cast_fp16")]; tensor var_49874_begin_0 = const()[name = tensor("op_49874_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49874_end_0 = const()[name = tensor("op_49874_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49874_end_mask_0 = const()[name = tensor("op_49874_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49874_cast_fp16 = slice_by_index(begin = var_49874_begin_0, end = var_49874_end_0, end_mask = var_49874_end_mask_0, x = var_49446_cast_fp16)[name = tensor("op_49874_cast_fp16")]; tensor var_49881_begin_0 = const()[name = tensor("op_49881_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49881_end_0 = const()[name = tensor("op_49881_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49881_end_mask_0 = const()[name = tensor("op_49881_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49881_cast_fp16 = slice_by_index(begin = var_49881_begin_0, end = var_49881_end_0, end_mask = var_49881_end_mask_0, x = var_49446_cast_fp16)[name = tensor("op_49881_cast_fp16")]; tensor var_49888_begin_0 = const()[name = tensor("op_49888_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_49888_end_0 = const()[name = tensor("op_49888_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49888_end_mask_0 = const()[name = tensor("op_49888_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49888_cast_fp16 = slice_by_index(begin = var_49888_begin_0, end = var_49888_end_0, end_mask = var_49888_end_mask_0, x = var_49446_cast_fp16)[name = tensor("op_49888_cast_fp16")]; tensor var_49895_begin_0 = const()[name = tensor("op_49895_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49895_end_0 = const()[name = tensor("op_49895_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49895_end_mask_0 = const()[name = tensor("op_49895_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49895_cast_fp16 = slice_by_index(begin = var_49895_begin_0, end = var_49895_end_0, end_mask = var_49895_end_mask_0, x = var_49450_cast_fp16)[name = tensor("op_49895_cast_fp16")]; tensor var_49902_begin_0 = const()[name = tensor("op_49902_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49902_end_0 = const()[name = tensor("op_49902_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49902_end_mask_0 = const()[name = tensor("op_49902_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49902_cast_fp16 = slice_by_index(begin = var_49902_begin_0, end = var_49902_end_0, end_mask = var_49902_end_mask_0, x = var_49450_cast_fp16)[name = tensor("op_49902_cast_fp16")]; tensor var_49909_begin_0 = const()[name = tensor("op_49909_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49909_end_0 = const()[name = tensor("op_49909_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49909_end_mask_0 = const()[name = tensor("op_49909_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49909_cast_fp16 = slice_by_index(begin = var_49909_begin_0, end = var_49909_end_0, end_mask = var_49909_end_mask_0, x = var_49450_cast_fp16)[name = tensor("op_49909_cast_fp16")]; tensor var_49916_begin_0 = const()[name = tensor("op_49916_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_49916_end_0 = const()[name = tensor("op_49916_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49916_end_mask_0 = const()[name = tensor("op_49916_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49916_cast_fp16 = slice_by_index(begin = var_49916_begin_0, end = var_49916_end_0, end_mask = var_49916_end_mask_0, x = var_49450_cast_fp16)[name = tensor("op_49916_cast_fp16")]; tensor var_49923_begin_0 = const()[name = tensor("op_49923_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49923_end_0 = const()[name = tensor("op_49923_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49923_end_mask_0 = const()[name = tensor("op_49923_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49923_cast_fp16 = slice_by_index(begin = var_49923_begin_0, end = var_49923_end_0, end_mask = var_49923_end_mask_0, x = var_49454_cast_fp16)[name = tensor("op_49923_cast_fp16")]; tensor var_49930_begin_0 = const()[name = tensor("op_49930_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49930_end_0 = const()[name = tensor("op_49930_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49930_end_mask_0 = const()[name = tensor("op_49930_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49930_cast_fp16 = slice_by_index(begin = var_49930_begin_0, end = var_49930_end_0, end_mask = var_49930_end_mask_0, x = var_49454_cast_fp16)[name = tensor("op_49930_cast_fp16")]; tensor var_49937_begin_0 = const()[name = tensor("op_49937_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49937_end_0 = const()[name = tensor("op_49937_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49937_end_mask_0 = const()[name = tensor("op_49937_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49937_cast_fp16 = slice_by_index(begin = var_49937_begin_0, end = var_49937_end_0, end_mask = var_49937_end_mask_0, x = var_49454_cast_fp16)[name = tensor("op_49937_cast_fp16")]; tensor var_49944_begin_0 = const()[name = tensor("op_49944_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_49944_end_0 = const()[name = tensor("op_49944_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49944_end_mask_0 = const()[name = tensor("op_49944_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49944_cast_fp16 = slice_by_index(begin = var_49944_begin_0, end = var_49944_end_0, end_mask = var_49944_end_mask_0, x = var_49454_cast_fp16)[name = tensor("op_49944_cast_fp16")]; tensor var_49951_begin_0 = const()[name = tensor("op_49951_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49951_end_0 = const()[name = tensor("op_49951_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49951_end_mask_0 = const()[name = tensor("op_49951_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49951_cast_fp16 = slice_by_index(begin = var_49951_begin_0, end = var_49951_end_0, end_mask = var_49951_end_mask_0, x = var_49458_cast_fp16)[name = tensor("op_49951_cast_fp16")]; tensor var_49958_begin_0 = const()[name = tensor("op_49958_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49958_end_0 = const()[name = tensor("op_49958_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49958_end_mask_0 = const()[name = tensor("op_49958_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49958_cast_fp16 = slice_by_index(begin = var_49958_begin_0, end = var_49958_end_0, end_mask = var_49958_end_mask_0, x = var_49458_cast_fp16)[name = tensor("op_49958_cast_fp16")]; tensor var_49965_begin_0 = const()[name = tensor("op_49965_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49965_end_0 = const()[name = tensor("op_49965_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49965_end_mask_0 = const()[name = tensor("op_49965_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49965_cast_fp16 = slice_by_index(begin = var_49965_begin_0, end = var_49965_end_0, end_mask = var_49965_end_mask_0, x = var_49458_cast_fp16)[name = tensor("op_49965_cast_fp16")]; tensor var_49972_begin_0 = const()[name = tensor("op_49972_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_49972_end_0 = const()[name = tensor("op_49972_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_49972_end_mask_0 = const()[name = tensor("op_49972_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49972_cast_fp16 = slice_by_index(begin = var_49972_begin_0, end = var_49972_end_0, end_mask = var_49972_end_mask_0, x = var_49458_cast_fp16)[name = tensor("op_49972_cast_fp16")]; tensor var_49979_begin_0 = const()[name = tensor("op_49979_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_49979_end_0 = const()[name = tensor("op_49979_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_49979_end_mask_0 = const()[name = tensor("op_49979_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49979_cast_fp16 = slice_by_index(begin = var_49979_begin_0, end = var_49979_end_0, end_mask = var_49979_end_mask_0, x = var_49462_cast_fp16)[name = tensor("op_49979_cast_fp16")]; tensor var_49986_begin_0 = const()[name = tensor("op_49986_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_49986_end_0 = const()[name = tensor("op_49986_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_49986_end_mask_0 = const()[name = tensor("op_49986_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49986_cast_fp16 = slice_by_index(begin = var_49986_begin_0, end = var_49986_end_0, end_mask = var_49986_end_mask_0, x = var_49462_cast_fp16)[name = tensor("op_49986_cast_fp16")]; tensor var_49993_begin_0 = const()[name = tensor("op_49993_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_49993_end_0 = const()[name = tensor("op_49993_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_49993_end_mask_0 = const()[name = tensor("op_49993_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_49993_cast_fp16 = slice_by_index(begin = var_49993_begin_0, end = var_49993_end_0, end_mask = var_49993_end_mask_0, x = var_49462_cast_fp16)[name = tensor("op_49993_cast_fp16")]; tensor var_50000_begin_0 = const()[name = tensor("op_50000_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_50000_end_0 = const()[name = tensor("op_50000_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_50000_end_mask_0 = const()[name = tensor("op_50000_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50000_cast_fp16 = slice_by_index(begin = var_50000_begin_0, end = var_50000_end_0, end_mask = var_50000_end_mask_0, x = var_49462_cast_fp16)[name = tensor("op_50000_cast_fp16")]; tensor var_50007_begin_0 = const()[name = tensor("op_50007_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_50007_end_0 = const()[name = tensor("op_50007_end_0"), val = tensor([1, 64, 1, 375])]; tensor var_50007_end_mask_0 = const()[name = tensor("op_50007_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50007_cast_fp16 = slice_by_index(begin = var_50007_begin_0, end = var_50007_end_0, end_mask = var_50007_end_mask_0, x = var_49466_cast_fp16)[name = tensor("op_50007_cast_fp16")]; tensor var_50014_begin_0 = const()[name = tensor("op_50014_begin_0"), val = tensor([0, 0, 0, 375])]; tensor var_50014_end_0 = const()[name = tensor("op_50014_end_0"), val = tensor([1, 64, 1, 750])]; tensor var_50014_end_mask_0 = const()[name = tensor("op_50014_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50014_cast_fp16 = slice_by_index(begin = var_50014_begin_0, end = var_50014_end_0, end_mask = var_50014_end_mask_0, x = var_49466_cast_fp16)[name = tensor("op_50014_cast_fp16")]; tensor var_50021_begin_0 = const()[name = tensor("op_50021_begin_0"), val = tensor([0, 0, 0, 750])]; tensor var_50021_end_0 = const()[name = tensor("op_50021_end_0"), val = tensor([1, 64, 1, 1125])]; tensor var_50021_end_mask_0 = const()[name = tensor("op_50021_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50021_cast_fp16 = slice_by_index(begin = var_50021_begin_0, end = var_50021_end_0, end_mask = var_50021_end_mask_0, x = var_49466_cast_fp16)[name = tensor("op_50021_cast_fp16")]; tensor var_50028_begin_0 = const()[name = tensor("op_50028_begin_0"), val = tensor([0, 0, 0, 1125])]; tensor var_50028_end_0 = const()[name = tensor("op_50028_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_50028_end_mask_0 = const()[name = tensor("op_50028_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50028_cast_fp16 = slice_by_index(begin = var_50028_begin_0, end = var_50028_end_0, end_mask = var_50028_end_mask_0, x = var_49466_cast_fp16)[name = tensor("op_50028_cast_fp16")]; tensor k_perm_0 = const()[name = tensor("k_perm_0"), val = tensor([0, 3, 2, 1])]; tensor var_50033_begin_0 = const()[name = tensor("op_50033_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_50033_end_0 = const()[name = tensor("op_50033_end_0"), val = tensor([1, 1500, 1, 64])]; tensor var_50033_end_mask_0 = const()[name = tensor("op_50033_end_mask_0"), val = tensor([true, true, true, false])]; tensor k_cast_fp16 = transpose(perm = k_perm_0, x = key_cast_fp16)[name = tensor("transpose_0")]; tensor var_50033_cast_fp16 = slice_by_index(begin = var_50033_begin_0, end = var_50033_end_0, end_mask = var_50033_end_mask_0, x = k_cast_fp16)[name = tensor("op_50033_cast_fp16")]; tensor var_50037_begin_0 = const()[name = tensor("op_50037_begin_0"), val = tensor([0, 0, 0, 64])]; tensor var_50037_end_0 = const()[name = tensor("op_50037_end_0"), val = tensor([1, 1500, 1, 128])]; tensor var_50037_end_mask_0 = const()[name = tensor("op_50037_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50037_cast_fp16 = slice_by_index(begin = var_50037_begin_0, end = var_50037_end_0, end_mask = var_50037_end_mask_0, x = k_cast_fp16)[name = tensor("op_50037_cast_fp16")]; tensor var_50041_begin_0 = const()[name = tensor("op_50041_begin_0"), val = tensor([0, 0, 0, 128])]; tensor var_50041_end_0 = const()[name = tensor("op_50041_end_0"), val = tensor([1, 1500, 1, 192])]; tensor var_50041_end_mask_0 = const()[name = tensor("op_50041_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50041_cast_fp16 = slice_by_index(begin = var_50041_begin_0, end = var_50041_end_0, end_mask = var_50041_end_mask_0, x = k_cast_fp16)[name = tensor("op_50041_cast_fp16")]; tensor var_50045_begin_0 = const()[name = tensor("op_50045_begin_0"), val = tensor([0, 0, 0, 192])]; tensor var_50045_end_0 = const()[name = tensor("op_50045_end_0"), val = tensor([1, 1500, 1, 256])]; tensor var_50045_end_mask_0 = const()[name = tensor("op_50045_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50045_cast_fp16 = slice_by_index(begin = var_50045_begin_0, end = var_50045_end_0, end_mask = var_50045_end_mask_0, x = k_cast_fp16)[name = tensor("op_50045_cast_fp16")]; tensor var_50049_begin_0 = const()[name = tensor("op_50049_begin_0"), val = tensor([0, 0, 0, 256])]; tensor var_50049_end_0 = const()[name = tensor("op_50049_end_0"), val = tensor([1, 1500, 1, 320])]; tensor var_50049_end_mask_0 = const()[name = tensor("op_50049_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50049_cast_fp16 = slice_by_index(begin = var_50049_begin_0, end = var_50049_end_0, end_mask = var_50049_end_mask_0, x = k_cast_fp16)[name = tensor("op_50049_cast_fp16")]; tensor var_50053_begin_0 = const()[name = tensor("op_50053_begin_0"), val = tensor([0, 0, 0, 320])]; tensor var_50053_end_0 = const()[name = tensor("op_50053_end_0"), val = tensor([1, 1500, 1, 384])]; tensor var_50053_end_mask_0 = const()[name = tensor("op_50053_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50053_cast_fp16 = slice_by_index(begin = var_50053_begin_0, end = var_50053_end_0, end_mask = var_50053_end_mask_0, x = k_cast_fp16)[name = tensor("op_50053_cast_fp16")]; tensor var_50057_begin_0 = const()[name = tensor("op_50057_begin_0"), val = tensor([0, 0, 0, 384])]; tensor var_50057_end_0 = const()[name = tensor("op_50057_end_0"), val = tensor([1, 1500, 1, 448])]; tensor var_50057_end_mask_0 = const()[name = tensor("op_50057_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50057_cast_fp16 = slice_by_index(begin = var_50057_begin_0, end = var_50057_end_0, end_mask = var_50057_end_mask_0, x = k_cast_fp16)[name = tensor("op_50057_cast_fp16")]; tensor var_50061_begin_0 = const()[name = tensor("op_50061_begin_0"), val = tensor([0, 0, 0, 448])]; tensor var_50061_end_0 = const()[name = tensor("op_50061_end_0"), val = tensor([1, 1500, 1, 512])]; tensor var_50061_end_mask_0 = const()[name = tensor("op_50061_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50061_cast_fp16 = slice_by_index(begin = var_50061_begin_0, end = var_50061_end_0, end_mask = var_50061_end_mask_0, x = k_cast_fp16)[name = tensor("op_50061_cast_fp16")]; tensor var_50065_begin_0 = const()[name = tensor("op_50065_begin_0"), val = tensor([0, 0, 0, 512])]; tensor var_50065_end_0 = const()[name = tensor("op_50065_end_0"), val = tensor([1, 1500, 1, 576])]; tensor var_50065_end_mask_0 = const()[name = tensor("op_50065_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50065_cast_fp16 = slice_by_index(begin = var_50065_begin_0, end = var_50065_end_0, end_mask = var_50065_end_mask_0, x = k_cast_fp16)[name = tensor("op_50065_cast_fp16")]; tensor var_50069_begin_0 = const()[name = tensor("op_50069_begin_0"), val = tensor([0, 0, 0, 576])]; tensor var_50069_end_0 = const()[name = tensor("op_50069_end_0"), val = tensor([1, 1500, 1, 640])]; tensor var_50069_end_mask_0 = const()[name = tensor("op_50069_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50069_cast_fp16 = slice_by_index(begin = var_50069_begin_0, end = var_50069_end_0, end_mask = var_50069_end_mask_0, x = k_cast_fp16)[name = tensor("op_50069_cast_fp16")]; tensor var_50073_begin_0 = const()[name = tensor("op_50073_begin_0"), val = tensor([0, 0, 0, 640])]; tensor var_50073_end_0 = const()[name = tensor("op_50073_end_0"), val = tensor([1, 1500, 1, 704])]; tensor var_50073_end_mask_0 = const()[name = tensor("op_50073_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50073_cast_fp16 = slice_by_index(begin = var_50073_begin_0, end = var_50073_end_0, end_mask = var_50073_end_mask_0, x = k_cast_fp16)[name = tensor("op_50073_cast_fp16")]; tensor var_50077_begin_0 = const()[name = tensor("op_50077_begin_0"), val = tensor([0, 0, 0, 704])]; tensor var_50077_end_0 = const()[name = tensor("op_50077_end_0"), val = tensor([1, 1500, 1, 768])]; tensor var_50077_end_mask_0 = const()[name = tensor("op_50077_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50077_cast_fp16 = slice_by_index(begin = var_50077_begin_0, end = var_50077_end_0, end_mask = var_50077_end_mask_0, x = k_cast_fp16)[name = tensor("op_50077_cast_fp16")]; tensor var_50081_begin_0 = const()[name = tensor("op_50081_begin_0"), val = tensor([0, 0, 0, 768])]; tensor var_50081_end_0 = const()[name = tensor("op_50081_end_0"), val = tensor([1, 1500, 1, 832])]; tensor var_50081_end_mask_0 = const()[name = tensor("op_50081_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50081_cast_fp16 = slice_by_index(begin = var_50081_begin_0, end = var_50081_end_0, end_mask = var_50081_end_mask_0, x = k_cast_fp16)[name = tensor("op_50081_cast_fp16")]; tensor var_50085_begin_0 = const()[name = tensor("op_50085_begin_0"), val = tensor([0, 0, 0, 832])]; tensor var_50085_end_0 = const()[name = tensor("op_50085_end_0"), val = tensor([1, 1500, 1, 896])]; tensor var_50085_end_mask_0 = const()[name = tensor("op_50085_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50085_cast_fp16 = slice_by_index(begin = var_50085_begin_0, end = var_50085_end_0, end_mask = var_50085_end_mask_0, x = k_cast_fp16)[name = tensor("op_50085_cast_fp16")]; tensor var_50089_begin_0 = const()[name = tensor("op_50089_begin_0"), val = tensor([0, 0, 0, 896])]; tensor var_50089_end_0 = const()[name = tensor("op_50089_end_0"), val = tensor([1, 1500, 1, 960])]; tensor var_50089_end_mask_0 = const()[name = tensor("op_50089_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50089_cast_fp16 = slice_by_index(begin = var_50089_begin_0, end = var_50089_end_0, end_mask = var_50089_end_mask_0, x = k_cast_fp16)[name = tensor("op_50089_cast_fp16")]; tensor var_50093_begin_0 = const()[name = tensor("op_50093_begin_0"), val = tensor([0, 0, 0, 960])]; tensor var_50093_end_0 = const()[name = tensor("op_50093_end_0"), val = tensor([1, 1500, 1, 1024])]; tensor var_50093_end_mask_0 = const()[name = tensor("op_50093_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50093_cast_fp16 = slice_by_index(begin = var_50093_begin_0, end = var_50093_end_0, end_mask = var_50093_end_mask_0, x = k_cast_fp16)[name = tensor("op_50093_cast_fp16")]; tensor var_50097_begin_0 = const()[name = tensor("op_50097_begin_0"), val = tensor([0, 0, 0, 1024])]; tensor var_50097_end_0 = const()[name = tensor("op_50097_end_0"), val = tensor([1, 1500, 1, 1088])]; tensor var_50097_end_mask_0 = const()[name = tensor("op_50097_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50097_cast_fp16 = slice_by_index(begin = var_50097_begin_0, end = var_50097_end_0, end_mask = var_50097_end_mask_0, x = k_cast_fp16)[name = tensor("op_50097_cast_fp16")]; tensor var_50101_begin_0 = const()[name = tensor("op_50101_begin_0"), val = tensor([0, 0, 0, 1088])]; tensor var_50101_end_0 = const()[name = tensor("op_50101_end_0"), val = tensor([1, 1500, 1, 1152])]; tensor var_50101_end_mask_0 = const()[name = tensor("op_50101_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50101_cast_fp16 = slice_by_index(begin = var_50101_begin_0, end = var_50101_end_0, end_mask = var_50101_end_mask_0, x = k_cast_fp16)[name = tensor("op_50101_cast_fp16")]; tensor var_50105_begin_0 = const()[name = tensor("op_50105_begin_0"), val = tensor([0, 0, 0, 1152])]; tensor var_50105_end_0 = const()[name = tensor("op_50105_end_0"), val = tensor([1, 1500, 1, 1216])]; tensor var_50105_end_mask_0 = const()[name = tensor("op_50105_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50105_cast_fp16 = slice_by_index(begin = var_50105_begin_0, end = var_50105_end_0, end_mask = var_50105_end_mask_0, x = k_cast_fp16)[name = tensor("op_50105_cast_fp16")]; tensor var_50109_begin_0 = const()[name = tensor("op_50109_begin_0"), val = tensor([0, 0, 0, 1216])]; tensor var_50109_end_0 = const()[name = tensor("op_50109_end_0"), val = tensor([1, 1500, 1, 1280])]; tensor var_50109_end_mask_0 = const()[name = tensor("op_50109_end_mask_0"), val = tensor([true, true, true, false])]; tensor var_50109_cast_fp16 = slice_by_index(begin = var_50109_begin_0, end = var_50109_end_0, end_mask = var_50109_end_mask_0, x = k_cast_fp16)[name = tensor("op_50109_cast_fp16")]; tensor var_50111_begin_0 = const()[name = tensor("op_50111_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_50111_end_0 = const()[name = tensor("op_50111_end_0"), val = tensor([1, 64, 1, 1500])]; tensor var_50111_end_mask_0 = const()[name = tensor("op_50111_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50111_cast_fp16 = slice_by_index(begin = var_50111_begin_0, end = var_50111_end_0, end_mask = var_50111_end_mask_0, x = value_cast_fp16)[name = tensor("op_50111_cast_fp16")]; tensor var_50115_begin_0 = const()[name = tensor("op_50115_begin_0"), val = tensor([0, 64, 0, 0])]; tensor var_50115_end_0 = const()[name = tensor("op_50115_end_0"), val = tensor([1, 128, 1, 1500])]; tensor var_50115_end_mask_0 = const()[name = tensor("op_50115_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50115_cast_fp16 = slice_by_index(begin = var_50115_begin_0, end = var_50115_end_0, end_mask = var_50115_end_mask_0, x = value_cast_fp16)[name = tensor("op_50115_cast_fp16")]; tensor var_50119_begin_0 = const()[name = tensor("op_50119_begin_0"), val = tensor([0, 128, 0, 0])]; tensor var_50119_end_0 = const()[name = tensor("op_50119_end_0"), val = tensor([1, 192, 1, 1500])]; tensor var_50119_end_mask_0 = const()[name = tensor("op_50119_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50119_cast_fp16 = slice_by_index(begin = var_50119_begin_0, end = var_50119_end_0, end_mask = var_50119_end_mask_0, x = value_cast_fp16)[name = tensor("op_50119_cast_fp16")]; tensor var_50123_begin_0 = const()[name = tensor("op_50123_begin_0"), val = tensor([0, 192, 0, 0])]; tensor var_50123_end_0 = const()[name = tensor("op_50123_end_0"), val = tensor([1, 256, 1, 1500])]; tensor var_50123_end_mask_0 = const()[name = tensor("op_50123_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50123_cast_fp16 = slice_by_index(begin = var_50123_begin_0, end = var_50123_end_0, end_mask = var_50123_end_mask_0, x = value_cast_fp16)[name = tensor("op_50123_cast_fp16")]; tensor var_50127_begin_0 = const()[name = tensor("op_50127_begin_0"), val = tensor([0, 256, 0, 0])]; tensor var_50127_end_0 = const()[name = tensor("op_50127_end_0"), val = tensor([1, 320, 1, 1500])]; tensor var_50127_end_mask_0 = const()[name = tensor("op_50127_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50127_cast_fp16 = slice_by_index(begin = var_50127_begin_0, end = var_50127_end_0, end_mask = var_50127_end_mask_0, x = value_cast_fp16)[name = tensor("op_50127_cast_fp16")]; tensor var_50131_begin_0 = const()[name = tensor("op_50131_begin_0"), val = tensor([0, 320, 0, 0])]; tensor var_50131_end_0 = const()[name = tensor("op_50131_end_0"), val = tensor([1, 384, 1, 1500])]; tensor var_50131_end_mask_0 = const()[name = tensor("op_50131_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50131_cast_fp16 = slice_by_index(begin = var_50131_begin_0, end = var_50131_end_0, end_mask = var_50131_end_mask_0, x = value_cast_fp16)[name = tensor("op_50131_cast_fp16")]; tensor var_50135_begin_0 = const()[name = tensor("op_50135_begin_0"), val = tensor([0, 384, 0, 0])]; tensor var_50135_end_0 = const()[name = tensor("op_50135_end_0"), val = tensor([1, 448, 1, 1500])]; tensor var_50135_end_mask_0 = const()[name = tensor("op_50135_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50135_cast_fp16 = slice_by_index(begin = var_50135_begin_0, end = var_50135_end_0, end_mask = var_50135_end_mask_0, x = value_cast_fp16)[name = tensor("op_50135_cast_fp16")]; tensor var_50139_begin_0 = const()[name = tensor("op_50139_begin_0"), val = tensor([0, 448, 0, 0])]; tensor var_50139_end_0 = const()[name = tensor("op_50139_end_0"), val = tensor([1, 512, 1, 1500])]; tensor var_50139_end_mask_0 = const()[name = tensor("op_50139_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50139_cast_fp16 = slice_by_index(begin = var_50139_begin_0, end = var_50139_end_0, end_mask = var_50139_end_mask_0, x = value_cast_fp16)[name = tensor("op_50139_cast_fp16")]; tensor var_50143_begin_0 = const()[name = tensor("op_50143_begin_0"), val = tensor([0, 512, 0, 0])]; tensor var_50143_end_0 = const()[name = tensor("op_50143_end_0"), val = tensor([1, 576, 1, 1500])]; tensor var_50143_end_mask_0 = const()[name = tensor("op_50143_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50143_cast_fp16 = slice_by_index(begin = var_50143_begin_0, end = var_50143_end_0, end_mask = var_50143_end_mask_0, x = value_cast_fp16)[name = tensor("op_50143_cast_fp16")]; tensor var_50147_begin_0 = const()[name = tensor("op_50147_begin_0"), val = tensor([0, 576, 0, 0])]; tensor var_50147_end_0 = const()[name = tensor("op_50147_end_0"), val = tensor([1, 640, 1, 1500])]; tensor var_50147_end_mask_0 = const()[name = tensor("op_50147_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50147_cast_fp16 = slice_by_index(begin = var_50147_begin_0, end = var_50147_end_0, end_mask = var_50147_end_mask_0, x = value_cast_fp16)[name = tensor("op_50147_cast_fp16")]; tensor var_50151_begin_0 = const()[name = tensor("op_50151_begin_0"), val = tensor([0, 640, 0, 0])]; tensor var_50151_end_0 = const()[name = tensor("op_50151_end_0"), val = tensor([1, 704, 1, 1500])]; tensor var_50151_end_mask_0 = const()[name = tensor("op_50151_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50151_cast_fp16 = slice_by_index(begin = var_50151_begin_0, end = var_50151_end_0, end_mask = var_50151_end_mask_0, x = value_cast_fp16)[name = tensor("op_50151_cast_fp16")]; tensor var_50155_begin_0 = const()[name = tensor("op_50155_begin_0"), val = tensor([0, 704, 0, 0])]; tensor var_50155_end_0 = const()[name = tensor("op_50155_end_0"), val = tensor([1, 768, 1, 1500])]; tensor var_50155_end_mask_0 = const()[name = tensor("op_50155_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50155_cast_fp16 = slice_by_index(begin = var_50155_begin_0, end = var_50155_end_0, end_mask = var_50155_end_mask_0, x = value_cast_fp16)[name = tensor("op_50155_cast_fp16")]; tensor var_50159_begin_0 = const()[name = tensor("op_50159_begin_0"), val = tensor([0, 768, 0, 0])]; tensor var_50159_end_0 = const()[name = tensor("op_50159_end_0"), val = tensor([1, 832, 1, 1500])]; tensor var_50159_end_mask_0 = const()[name = tensor("op_50159_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50159_cast_fp16 = slice_by_index(begin = var_50159_begin_0, end = var_50159_end_0, end_mask = var_50159_end_mask_0, x = value_cast_fp16)[name = tensor("op_50159_cast_fp16")]; tensor var_50163_begin_0 = const()[name = tensor("op_50163_begin_0"), val = tensor([0, 832, 0, 0])]; tensor var_50163_end_0 = const()[name = tensor("op_50163_end_0"), val = tensor([1, 896, 1, 1500])]; tensor var_50163_end_mask_0 = const()[name = tensor("op_50163_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50163_cast_fp16 = slice_by_index(begin = var_50163_begin_0, end = var_50163_end_0, end_mask = var_50163_end_mask_0, x = value_cast_fp16)[name = tensor("op_50163_cast_fp16")]; tensor var_50167_begin_0 = const()[name = tensor("op_50167_begin_0"), val = tensor([0, 896, 0, 0])]; tensor var_50167_end_0 = const()[name = tensor("op_50167_end_0"), val = tensor([1, 960, 1, 1500])]; tensor var_50167_end_mask_0 = const()[name = tensor("op_50167_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50167_cast_fp16 = slice_by_index(begin = var_50167_begin_0, end = var_50167_end_0, end_mask = var_50167_end_mask_0, x = value_cast_fp16)[name = tensor("op_50167_cast_fp16")]; tensor var_50171_begin_0 = const()[name = tensor("op_50171_begin_0"), val = tensor([0, 960, 0, 0])]; tensor var_50171_end_0 = const()[name = tensor("op_50171_end_0"), val = tensor([1, 1024, 1, 1500])]; tensor var_50171_end_mask_0 = const()[name = tensor("op_50171_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50171_cast_fp16 = slice_by_index(begin = var_50171_begin_0, end = var_50171_end_0, end_mask = var_50171_end_mask_0, x = value_cast_fp16)[name = tensor("op_50171_cast_fp16")]; tensor var_50175_begin_0 = const()[name = tensor("op_50175_begin_0"), val = tensor([0, 1024, 0, 0])]; tensor var_50175_end_0 = const()[name = tensor("op_50175_end_0"), val = tensor([1, 1088, 1, 1500])]; tensor var_50175_end_mask_0 = const()[name = tensor("op_50175_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50175_cast_fp16 = slice_by_index(begin = var_50175_begin_0, end = var_50175_end_0, end_mask = var_50175_end_mask_0, x = value_cast_fp16)[name = tensor("op_50175_cast_fp16")]; tensor var_50179_begin_0 = const()[name = tensor("op_50179_begin_0"), val = tensor([0, 1088, 0, 0])]; tensor var_50179_end_0 = const()[name = tensor("op_50179_end_0"), val = tensor([1, 1152, 1, 1500])]; tensor var_50179_end_mask_0 = const()[name = tensor("op_50179_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50179_cast_fp16 = slice_by_index(begin = var_50179_begin_0, end = var_50179_end_0, end_mask = var_50179_end_mask_0, x = value_cast_fp16)[name = tensor("op_50179_cast_fp16")]; tensor var_50183_begin_0 = const()[name = tensor("op_50183_begin_0"), val = tensor([0, 1152, 0, 0])]; tensor var_50183_end_0 = const()[name = tensor("op_50183_end_0"), val = tensor([1, 1216, 1, 1500])]; tensor var_50183_end_mask_0 = const()[name = tensor("op_50183_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50183_cast_fp16 = slice_by_index(begin = var_50183_begin_0, end = var_50183_end_0, end_mask = var_50183_end_mask_0, x = value_cast_fp16)[name = tensor("op_50183_cast_fp16")]; tensor var_50187_begin_0 = const()[name = tensor("op_50187_begin_0"), val = tensor([0, 1216, 0, 0])]; tensor var_50187_end_0 = const()[name = tensor("op_50187_end_0"), val = tensor([1, 1280, 1, 1500])]; tensor var_50187_end_mask_0 = const()[name = tensor("op_50187_end_mask_0"), val = tensor([true, false, true, true])]; tensor var_50187_cast_fp16 = slice_by_index(begin = var_50187_begin_0, end = var_50187_end_0, end_mask = var_50187_end_mask_0, x = value_cast_fp16)[name = tensor("op_50187_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4961_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4961_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4961_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4961_equation_0, values = (var_50033_cast_fp16, var_49475_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4961_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4963_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4963_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4963_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4963_equation_0, values = (var_50033_cast_fp16, var_49482_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4963_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4965_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4965_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4965_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4965_equation_0, values = (var_50033_cast_fp16, var_49489_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4965_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4967_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4967_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4967_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4967_equation_0, values = (var_50033_cast_fp16, var_49496_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4967_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4969_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4969_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4969_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4969_equation_0, values = (var_50037_cast_fp16, var_49503_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4969_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4971_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4971_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4971_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4971_equation_0, values = (var_50037_cast_fp16, var_49510_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4971_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4973_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4973_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4973_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4973_equation_0, values = (var_50037_cast_fp16, var_49517_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4973_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4975_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4975_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4975_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4975_equation_0, values = (var_50037_cast_fp16, var_49524_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4975_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4977_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4977_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4977_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4977_equation_0, values = (var_50041_cast_fp16, var_49531_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4977_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4979_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4979_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4979_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4979_equation_0, values = (var_50041_cast_fp16, var_49538_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4979_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4981_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4981_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4981_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4981_equation_0, values = (var_50041_cast_fp16, var_49545_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4981_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4983_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4983_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4983_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4983_equation_0, values = (var_50041_cast_fp16, var_49552_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4983_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4985_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4985_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4985_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4985_equation_0, values = (var_50045_cast_fp16, var_49559_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4985_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4987_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4987_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4987_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4987_equation_0, values = (var_50045_cast_fp16, var_49566_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4987_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4989_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4989_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4989_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4989_equation_0, values = (var_50045_cast_fp16, var_49573_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4989_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4991_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4991_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4991_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4991_equation_0, values = (var_50045_cast_fp16, var_49580_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4991_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4993_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4993_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4993_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4993_equation_0, values = (var_50049_cast_fp16, var_49587_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4993_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4995_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4995_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4995_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4995_equation_0, values = (var_50049_cast_fp16, var_49594_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4995_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4997_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4997_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4997_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4997_equation_0, values = (var_50049_cast_fp16, var_49601_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4997_cast_fp16")]; tensor _SplitHeadsQ__mh_w_4999_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_4999_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_4999_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_4999_equation_0, values = (var_50049_cast_fp16, var_49608_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_4999_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5001_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5001_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5001_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5001_equation_0, values = (var_50053_cast_fp16, var_49615_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5001_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5003_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5003_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5003_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5003_equation_0, values = (var_50053_cast_fp16, var_49622_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5003_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5005_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5005_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5005_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5005_equation_0, values = (var_50053_cast_fp16, var_49629_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5005_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5007_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5007_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5007_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5007_equation_0, values = (var_50053_cast_fp16, var_49636_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5007_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5009_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5009_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5009_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5009_equation_0, values = (var_50057_cast_fp16, var_49643_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5009_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5011_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5011_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5011_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5011_equation_0, values = (var_50057_cast_fp16, var_49650_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5011_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5013_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5013_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5013_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5013_equation_0, values = (var_50057_cast_fp16, var_49657_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5013_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5015_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5015_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5015_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5015_equation_0, values = (var_50057_cast_fp16, var_49664_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5015_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5017_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5017_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5017_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5017_equation_0, values = (var_50061_cast_fp16, var_49671_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5017_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5019_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5019_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5019_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5019_equation_0, values = (var_50061_cast_fp16, var_49678_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5019_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5021_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5021_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5021_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5021_equation_0, values = (var_50061_cast_fp16, var_49685_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5021_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5023_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5023_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5023_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5023_equation_0, values = (var_50061_cast_fp16, var_49692_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5023_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5025_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5025_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5025_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5025_equation_0, values = (var_50065_cast_fp16, var_49699_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5025_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5027_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5027_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5027_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5027_equation_0, values = (var_50065_cast_fp16, var_49706_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5027_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5029_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5029_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5029_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5029_equation_0, values = (var_50065_cast_fp16, var_49713_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5029_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5031_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5031_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5031_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5031_equation_0, values = (var_50065_cast_fp16, var_49720_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5031_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5033_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5033_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5033_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5033_equation_0, values = (var_50069_cast_fp16, var_49727_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5033_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5035_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5035_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5035_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5035_equation_0, values = (var_50069_cast_fp16, var_49734_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5035_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5037_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5037_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5037_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5037_equation_0, values = (var_50069_cast_fp16, var_49741_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5037_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5039_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5039_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5039_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5039_equation_0, values = (var_50069_cast_fp16, var_49748_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5039_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5041_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5041_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5041_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5041_equation_0, values = (var_50073_cast_fp16, var_49755_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5041_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5043_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5043_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5043_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5043_equation_0, values = (var_50073_cast_fp16, var_49762_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5043_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5045_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5045_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5045_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5045_equation_0, values = (var_50073_cast_fp16, var_49769_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5045_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5047_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5047_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5047_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5047_equation_0, values = (var_50073_cast_fp16, var_49776_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5047_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5049_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5049_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5049_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5049_equation_0, values = (var_50077_cast_fp16, var_49783_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5049_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5051_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5051_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5051_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5051_equation_0, values = (var_50077_cast_fp16, var_49790_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5051_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5053_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5053_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5053_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5053_equation_0, values = (var_50077_cast_fp16, var_49797_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5053_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5055_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5055_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5055_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5055_equation_0, values = (var_50077_cast_fp16, var_49804_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5055_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5057_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5057_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5057_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5057_equation_0, values = (var_50081_cast_fp16, var_49811_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5057_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5059_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5059_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5059_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5059_equation_0, values = (var_50081_cast_fp16, var_49818_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5059_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5061_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5061_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5061_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5061_equation_0, values = (var_50081_cast_fp16, var_49825_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5061_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5063_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5063_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5063_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5063_equation_0, values = (var_50081_cast_fp16, var_49832_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5063_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5065_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5065_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5065_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5065_equation_0, values = (var_50085_cast_fp16, var_49839_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5065_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5067_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5067_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5067_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5067_equation_0, values = (var_50085_cast_fp16, var_49846_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5067_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5069_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5069_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5069_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5069_equation_0, values = (var_50085_cast_fp16, var_49853_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5069_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5071_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5071_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5071_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5071_equation_0, values = (var_50085_cast_fp16, var_49860_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5071_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5073_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5073_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5073_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5073_equation_0, values = (var_50089_cast_fp16, var_49867_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5073_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5075_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5075_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5075_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5075_equation_0, values = (var_50089_cast_fp16, var_49874_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5075_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5077_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5077_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5077_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5077_equation_0, values = (var_50089_cast_fp16, var_49881_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5077_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5079_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5079_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5079_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5079_equation_0, values = (var_50089_cast_fp16, var_49888_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5079_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5081_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5081_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5081_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5081_equation_0, values = (var_50093_cast_fp16, var_49895_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5081_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5083_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5083_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5083_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5083_equation_0, values = (var_50093_cast_fp16, var_49902_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5083_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5085_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5085_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5085_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5085_equation_0, values = (var_50093_cast_fp16, var_49909_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5085_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5087_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5087_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5087_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5087_equation_0, values = (var_50093_cast_fp16, var_49916_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5087_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5089_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5089_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5089_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5089_equation_0, values = (var_50097_cast_fp16, var_49923_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5089_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5091_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5091_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5091_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5091_equation_0, values = (var_50097_cast_fp16, var_49930_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5091_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5093_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5093_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5093_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5093_equation_0, values = (var_50097_cast_fp16, var_49937_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5093_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5095_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5095_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5095_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5095_equation_0, values = (var_50097_cast_fp16, var_49944_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5095_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5097_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5097_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5097_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5097_equation_0, values = (var_50101_cast_fp16, var_49951_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5097_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5099_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5099_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5099_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5099_equation_0, values = (var_50101_cast_fp16, var_49958_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5099_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5101_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5101_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5101_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5101_equation_0, values = (var_50101_cast_fp16, var_49965_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5101_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5103_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5103_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5103_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5103_equation_0, values = (var_50101_cast_fp16, var_49972_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5103_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5105_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5105_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5105_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5105_equation_0, values = (var_50105_cast_fp16, var_49979_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5105_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5107_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5107_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5107_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5107_equation_0, values = (var_50105_cast_fp16, var_49986_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5107_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5109_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5109_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5109_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5109_equation_0, values = (var_50105_cast_fp16, var_49993_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5109_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5111_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5111_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5111_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5111_equation_0, values = (var_50105_cast_fp16, var_50000_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5111_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5113_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5113_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5113_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5113_equation_0, values = (var_50109_cast_fp16, var_50007_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5113_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5115_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5115_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5115_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5115_equation_0, values = (var_50109_cast_fp16, var_50014_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5115_cast_fp16")]; tensor _SplitHeadsQ__mh_w_5117_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_5117_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_5117_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_5117_equation_0, values = (var_50109_cast_fp16, var_50021_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_5117_cast_fp16")]; tensor _SplitHeadsQ__mh_w_equation_0 = const()[name = tensor("_SplitHeadsQ__mh_w_equation_0"), val = tensor("bkhc,bchq->bkhq")]; tensor _SplitHeadsQ__mh_w_cast_fp16 = einsum(equation = _SplitHeadsQ__mh_w_equation_0, values = (var_50109_cast_fp16, var_50028_cast_fp16))[name = tensor("_SplitHeadsQ__mh_w_cast_fp16")]; tensor var_50350_to_fp16 = const()[name = tensor("op_50350_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4961_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4961_cast_fp16, y = var_50350_to_fp16)[name = tensor("aw_chunk_4961_cast_fp16")]; tensor var_50352_to_fp16 = const()[name = tensor("op_50352_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4963_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4963_cast_fp16, y = var_50352_to_fp16)[name = tensor("aw_chunk_4963_cast_fp16")]; tensor var_50354_to_fp16 = const()[name = tensor("op_50354_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4965_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4965_cast_fp16, y = var_50354_to_fp16)[name = tensor("aw_chunk_4965_cast_fp16")]; tensor var_50356_to_fp16 = const()[name = tensor("op_50356_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4967_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4967_cast_fp16, y = var_50356_to_fp16)[name = tensor("aw_chunk_4967_cast_fp16")]; tensor var_50358_to_fp16 = const()[name = tensor("op_50358_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4969_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4969_cast_fp16, y = var_50358_to_fp16)[name = tensor("aw_chunk_4969_cast_fp16")]; tensor var_50360_to_fp16 = const()[name = tensor("op_50360_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4971_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4971_cast_fp16, y = var_50360_to_fp16)[name = tensor("aw_chunk_4971_cast_fp16")]; tensor var_50362_to_fp16 = const()[name = tensor("op_50362_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4973_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4973_cast_fp16, y = var_50362_to_fp16)[name = tensor("aw_chunk_4973_cast_fp16")]; tensor var_50364_to_fp16 = const()[name = tensor("op_50364_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4975_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4975_cast_fp16, y = var_50364_to_fp16)[name = tensor("aw_chunk_4975_cast_fp16")]; tensor var_50366_to_fp16 = const()[name = tensor("op_50366_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4977_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4977_cast_fp16, y = var_50366_to_fp16)[name = tensor("aw_chunk_4977_cast_fp16")]; tensor var_50368_to_fp16 = const()[name = tensor("op_50368_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4979_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4979_cast_fp16, y = var_50368_to_fp16)[name = tensor("aw_chunk_4979_cast_fp16")]; tensor var_50370_to_fp16 = const()[name = tensor("op_50370_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4981_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4981_cast_fp16, y = var_50370_to_fp16)[name = tensor("aw_chunk_4981_cast_fp16")]; tensor var_50372_to_fp16 = const()[name = tensor("op_50372_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4983_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4983_cast_fp16, y = var_50372_to_fp16)[name = tensor("aw_chunk_4983_cast_fp16")]; tensor var_50374_to_fp16 = const()[name = tensor("op_50374_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4985_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4985_cast_fp16, y = var_50374_to_fp16)[name = tensor("aw_chunk_4985_cast_fp16")]; tensor var_50376_to_fp16 = const()[name = tensor("op_50376_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4987_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4987_cast_fp16, y = var_50376_to_fp16)[name = tensor("aw_chunk_4987_cast_fp16")]; tensor var_50378_to_fp16 = const()[name = tensor("op_50378_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4989_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4989_cast_fp16, y = var_50378_to_fp16)[name = tensor("aw_chunk_4989_cast_fp16")]; tensor var_50380_to_fp16 = const()[name = tensor("op_50380_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4991_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4991_cast_fp16, y = var_50380_to_fp16)[name = tensor("aw_chunk_4991_cast_fp16")]; tensor var_50382_to_fp16 = const()[name = tensor("op_50382_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4993_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4993_cast_fp16, y = var_50382_to_fp16)[name = tensor("aw_chunk_4993_cast_fp16")]; tensor var_50384_to_fp16 = const()[name = tensor("op_50384_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4995_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4995_cast_fp16, y = var_50384_to_fp16)[name = tensor("aw_chunk_4995_cast_fp16")]; tensor var_50386_to_fp16 = const()[name = tensor("op_50386_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4997_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4997_cast_fp16, y = var_50386_to_fp16)[name = tensor("aw_chunk_4997_cast_fp16")]; tensor var_50388_to_fp16 = const()[name = tensor("op_50388_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_4999_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_4999_cast_fp16, y = var_50388_to_fp16)[name = tensor("aw_chunk_4999_cast_fp16")]; tensor var_50390_to_fp16 = const()[name = tensor("op_50390_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5001_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5001_cast_fp16, y = var_50390_to_fp16)[name = tensor("aw_chunk_5001_cast_fp16")]; tensor var_50392_to_fp16 = const()[name = tensor("op_50392_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5003_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5003_cast_fp16, y = var_50392_to_fp16)[name = tensor("aw_chunk_5003_cast_fp16")]; tensor var_50394_to_fp16 = const()[name = tensor("op_50394_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5005_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5005_cast_fp16, y = var_50394_to_fp16)[name = tensor("aw_chunk_5005_cast_fp16")]; tensor var_50396_to_fp16 = const()[name = tensor("op_50396_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5007_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5007_cast_fp16, y = var_50396_to_fp16)[name = tensor("aw_chunk_5007_cast_fp16")]; tensor var_50398_to_fp16 = const()[name = tensor("op_50398_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5009_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5009_cast_fp16, y = var_50398_to_fp16)[name = tensor("aw_chunk_5009_cast_fp16")]; tensor var_50400_to_fp16 = const()[name = tensor("op_50400_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5011_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5011_cast_fp16, y = var_50400_to_fp16)[name = tensor("aw_chunk_5011_cast_fp16")]; tensor var_50402_to_fp16 = const()[name = tensor("op_50402_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5013_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5013_cast_fp16, y = var_50402_to_fp16)[name = tensor("aw_chunk_5013_cast_fp16")]; tensor var_50404_to_fp16 = const()[name = tensor("op_50404_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5015_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5015_cast_fp16, y = var_50404_to_fp16)[name = tensor("aw_chunk_5015_cast_fp16")]; tensor var_50406_to_fp16 = const()[name = tensor("op_50406_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5017_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5017_cast_fp16, y = var_50406_to_fp16)[name = tensor("aw_chunk_5017_cast_fp16")]; tensor var_50408_to_fp16 = const()[name = tensor("op_50408_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5019_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5019_cast_fp16, y = var_50408_to_fp16)[name = tensor("aw_chunk_5019_cast_fp16")]; tensor var_50410_to_fp16 = const()[name = tensor("op_50410_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5021_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5021_cast_fp16, y = var_50410_to_fp16)[name = tensor("aw_chunk_5021_cast_fp16")]; tensor var_50412_to_fp16 = const()[name = tensor("op_50412_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5023_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5023_cast_fp16, y = var_50412_to_fp16)[name = tensor("aw_chunk_5023_cast_fp16")]; tensor var_50414_to_fp16 = const()[name = tensor("op_50414_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5025_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5025_cast_fp16, y = var_50414_to_fp16)[name = tensor("aw_chunk_5025_cast_fp16")]; tensor var_50416_to_fp16 = const()[name = tensor("op_50416_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5027_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5027_cast_fp16, y = var_50416_to_fp16)[name = tensor("aw_chunk_5027_cast_fp16")]; tensor var_50418_to_fp16 = const()[name = tensor("op_50418_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5029_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5029_cast_fp16, y = var_50418_to_fp16)[name = tensor("aw_chunk_5029_cast_fp16")]; tensor var_50420_to_fp16 = const()[name = tensor("op_50420_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5031_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5031_cast_fp16, y = var_50420_to_fp16)[name = tensor("aw_chunk_5031_cast_fp16")]; tensor var_50422_to_fp16 = const()[name = tensor("op_50422_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5033_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5033_cast_fp16, y = var_50422_to_fp16)[name = tensor("aw_chunk_5033_cast_fp16")]; tensor var_50424_to_fp16 = const()[name = tensor("op_50424_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5035_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5035_cast_fp16, y = var_50424_to_fp16)[name = tensor("aw_chunk_5035_cast_fp16")]; tensor var_50426_to_fp16 = const()[name = tensor("op_50426_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5037_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5037_cast_fp16, y = var_50426_to_fp16)[name = tensor("aw_chunk_5037_cast_fp16")]; tensor var_50428_to_fp16 = const()[name = tensor("op_50428_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5039_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5039_cast_fp16, y = var_50428_to_fp16)[name = tensor("aw_chunk_5039_cast_fp16")]; tensor var_50430_to_fp16 = const()[name = tensor("op_50430_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5041_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5041_cast_fp16, y = var_50430_to_fp16)[name = tensor("aw_chunk_5041_cast_fp16")]; tensor var_50432_to_fp16 = const()[name = tensor("op_50432_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5043_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5043_cast_fp16, y = var_50432_to_fp16)[name = tensor("aw_chunk_5043_cast_fp16")]; tensor var_50434_to_fp16 = const()[name = tensor("op_50434_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5045_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5045_cast_fp16, y = var_50434_to_fp16)[name = tensor("aw_chunk_5045_cast_fp16")]; tensor var_50436_to_fp16 = const()[name = tensor("op_50436_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5047_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5047_cast_fp16, y = var_50436_to_fp16)[name = tensor("aw_chunk_5047_cast_fp16")]; tensor var_50438_to_fp16 = const()[name = tensor("op_50438_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5049_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5049_cast_fp16, y = var_50438_to_fp16)[name = tensor("aw_chunk_5049_cast_fp16")]; tensor var_50440_to_fp16 = const()[name = tensor("op_50440_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5051_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5051_cast_fp16, y = var_50440_to_fp16)[name = tensor("aw_chunk_5051_cast_fp16")]; tensor var_50442_to_fp16 = const()[name = tensor("op_50442_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5053_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5053_cast_fp16, y = var_50442_to_fp16)[name = tensor("aw_chunk_5053_cast_fp16")]; tensor var_50444_to_fp16 = const()[name = tensor("op_50444_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5055_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5055_cast_fp16, y = var_50444_to_fp16)[name = tensor("aw_chunk_5055_cast_fp16")]; tensor var_50446_to_fp16 = const()[name = tensor("op_50446_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5057_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5057_cast_fp16, y = var_50446_to_fp16)[name = tensor("aw_chunk_5057_cast_fp16")]; tensor var_50448_to_fp16 = const()[name = tensor("op_50448_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5059_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5059_cast_fp16, y = var_50448_to_fp16)[name = tensor("aw_chunk_5059_cast_fp16")]; tensor var_50450_to_fp16 = const()[name = tensor("op_50450_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5061_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5061_cast_fp16, y = var_50450_to_fp16)[name = tensor("aw_chunk_5061_cast_fp16")]; tensor var_50452_to_fp16 = const()[name = tensor("op_50452_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5063_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5063_cast_fp16, y = var_50452_to_fp16)[name = tensor("aw_chunk_5063_cast_fp16")]; tensor var_50454_to_fp16 = const()[name = tensor("op_50454_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5065_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5065_cast_fp16, y = var_50454_to_fp16)[name = tensor("aw_chunk_5065_cast_fp16")]; tensor var_50456_to_fp16 = const()[name = tensor("op_50456_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5067_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5067_cast_fp16, y = var_50456_to_fp16)[name = tensor("aw_chunk_5067_cast_fp16")]; tensor var_50458_to_fp16 = const()[name = tensor("op_50458_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5069_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5069_cast_fp16, y = var_50458_to_fp16)[name = tensor("aw_chunk_5069_cast_fp16")]; tensor var_50460_to_fp16 = const()[name = tensor("op_50460_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5071_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5071_cast_fp16, y = var_50460_to_fp16)[name = tensor("aw_chunk_5071_cast_fp16")]; tensor var_50462_to_fp16 = const()[name = tensor("op_50462_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5073_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5073_cast_fp16, y = var_50462_to_fp16)[name = tensor("aw_chunk_5073_cast_fp16")]; tensor var_50464_to_fp16 = const()[name = tensor("op_50464_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5075_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5075_cast_fp16, y = var_50464_to_fp16)[name = tensor("aw_chunk_5075_cast_fp16")]; tensor var_50466_to_fp16 = const()[name = tensor("op_50466_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5077_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5077_cast_fp16, y = var_50466_to_fp16)[name = tensor("aw_chunk_5077_cast_fp16")]; tensor var_50468_to_fp16 = const()[name = tensor("op_50468_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5079_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5079_cast_fp16, y = var_50468_to_fp16)[name = tensor("aw_chunk_5079_cast_fp16")]; tensor var_50470_to_fp16 = const()[name = tensor("op_50470_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5081_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5081_cast_fp16, y = var_50470_to_fp16)[name = tensor("aw_chunk_5081_cast_fp16")]; tensor var_50472_to_fp16 = const()[name = tensor("op_50472_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5083_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5083_cast_fp16, y = var_50472_to_fp16)[name = tensor("aw_chunk_5083_cast_fp16")]; tensor var_50474_to_fp16 = const()[name = tensor("op_50474_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5085_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5085_cast_fp16, y = var_50474_to_fp16)[name = tensor("aw_chunk_5085_cast_fp16")]; tensor var_50476_to_fp16 = const()[name = tensor("op_50476_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5087_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5087_cast_fp16, y = var_50476_to_fp16)[name = tensor("aw_chunk_5087_cast_fp16")]; tensor var_50478_to_fp16 = const()[name = tensor("op_50478_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5089_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5089_cast_fp16, y = var_50478_to_fp16)[name = tensor("aw_chunk_5089_cast_fp16")]; tensor var_50480_to_fp16 = const()[name = tensor("op_50480_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5091_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5091_cast_fp16, y = var_50480_to_fp16)[name = tensor("aw_chunk_5091_cast_fp16")]; tensor var_50482_to_fp16 = const()[name = tensor("op_50482_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5093_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5093_cast_fp16, y = var_50482_to_fp16)[name = tensor("aw_chunk_5093_cast_fp16")]; tensor var_50484_to_fp16 = const()[name = tensor("op_50484_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5095_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5095_cast_fp16, y = var_50484_to_fp16)[name = tensor("aw_chunk_5095_cast_fp16")]; tensor var_50486_to_fp16 = const()[name = tensor("op_50486_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5097_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5097_cast_fp16, y = var_50486_to_fp16)[name = tensor("aw_chunk_5097_cast_fp16")]; tensor var_50488_to_fp16 = const()[name = tensor("op_50488_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5099_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5099_cast_fp16, y = var_50488_to_fp16)[name = tensor("aw_chunk_5099_cast_fp16")]; tensor var_50490_to_fp16 = const()[name = tensor("op_50490_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5101_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5101_cast_fp16, y = var_50490_to_fp16)[name = tensor("aw_chunk_5101_cast_fp16")]; tensor var_50492_to_fp16 = const()[name = tensor("op_50492_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5103_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5103_cast_fp16, y = var_50492_to_fp16)[name = tensor("aw_chunk_5103_cast_fp16")]; tensor var_50494_to_fp16 = const()[name = tensor("op_50494_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5105_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5105_cast_fp16, y = var_50494_to_fp16)[name = tensor("aw_chunk_5105_cast_fp16")]; tensor var_50496_to_fp16 = const()[name = tensor("op_50496_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5107_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5107_cast_fp16, y = var_50496_to_fp16)[name = tensor("aw_chunk_5107_cast_fp16")]; tensor var_50498_to_fp16 = const()[name = tensor("op_50498_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5109_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5109_cast_fp16, y = var_50498_to_fp16)[name = tensor("aw_chunk_5109_cast_fp16")]; tensor var_50500_to_fp16 = const()[name = tensor("op_50500_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5111_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5111_cast_fp16, y = var_50500_to_fp16)[name = tensor("aw_chunk_5111_cast_fp16")]; tensor var_50502_to_fp16 = const()[name = tensor("op_50502_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5113_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5113_cast_fp16, y = var_50502_to_fp16)[name = tensor("aw_chunk_5113_cast_fp16")]; tensor var_50504_to_fp16 = const()[name = tensor("op_50504_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5115_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5115_cast_fp16, y = var_50504_to_fp16)[name = tensor("aw_chunk_5115_cast_fp16")]; tensor var_50506_to_fp16 = const()[name = tensor("op_50506_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_5117_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_5117_cast_fp16, y = var_50506_to_fp16)[name = tensor("aw_chunk_5117_cast_fp16")]; tensor var_50508_to_fp16 = const()[name = tensor("op_50508_to_fp16"), val = tensor(0x1p-3)]; tensor aw_chunk_cast_fp16 = mul(x = _SplitHeadsQ__mh_w_cast_fp16, y = var_50508_to_fp16)[name = tensor("aw_chunk_cast_fp16")]; tensor var_50510_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4961_cast_fp16)[name = tensor("op_50510_cast_fp16")]; tensor var_50511_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4963_cast_fp16)[name = tensor("op_50511_cast_fp16")]; tensor var_50512_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4965_cast_fp16)[name = tensor("op_50512_cast_fp16")]; tensor var_50513_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4967_cast_fp16)[name = tensor("op_50513_cast_fp16")]; tensor var_50514_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4969_cast_fp16)[name = tensor("op_50514_cast_fp16")]; tensor var_50515_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4971_cast_fp16)[name = tensor("op_50515_cast_fp16")]; tensor var_50516_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4973_cast_fp16)[name = tensor("op_50516_cast_fp16")]; tensor var_50517_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4975_cast_fp16)[name = tensor("op_50517_cast_fp16")]; tensor var_50518_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4977_cast_fp16)[name = tensor("op_50518_cast_fp16")]; tensor var_50519_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4979_cast_fp16)[name = tensor("op_50519_cast_fp16")]; tensor var_50520_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4981_cast_fp16)[name = tensor("op_50520_cast_fp16")]; tensor var_50521_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4983_cast_fp16)[name = tensor("op_50521_cast_fp16")]; tensor var_50522_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4985_cast_fp16)[name = tensor("op_50522_cast_fp16")]; tensor var_50523_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4987_cast_fp16)[name = tensor("op_50523_cast_fp16")]; tensor var_50524_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4989_cast_fp16)[name = tensor("op_50524_cast_fp16")]; tensor var_50525_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4991_cast_fp16)[name = tensor("op_50525_cast_fp16")]; tensor var_50526_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4993_cast_fp16)[name = tensor("op_50526_cast_fp16")]; tensor var_50527_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4995_cast_fp16)[name = tensor("op_50527_cast_fp16")]; tensor var_50528_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4997_cast_fp16)[name = tensor("op_50528_cast_fp16")]; tensor var_50529_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_4999_cast_fp16)[name = tensor("op_50529_cast_fp16")]; tensor var_50530_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5001_cast_fp16)[name = tensor("op_50530_cast_fp16")]; tensor var_50531_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5003_cast_fp16)[name = tensor("op_50531_cast_fp16")]; tensor var_50532_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5005_cast_fp16)[name = tensor("op_50532_cast_fp16")]; tensor var_50533_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5007_cast_fp16)[name = tensor("op_50533_cast_fp16")]; tensor var_50534_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5009_cast_fp16)[name = tensor("op_50534_cast_fp16")]; tensor var_50535_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5011_cast_fp16)[name = tensor("op_50535_cast_fp16")]; tensor var_50536_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5013_cast_fp16)[name = tensor("op_50536_cast_fp16")]; tensor var_50537_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5015_cast_fp16)[name = tensor("op_50537_cast_fp16")]; tensor var_50538_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5017_cast_fp16)[name = tensor("op_50538_cast_fp16")]; tensor var_50539_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5019_cast_fp16)[name = tensor("op_50539_cast_fp16")]; tensor var_50540_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5021_cast_fp16)[name = tensor("op_50540_cast_fp16")]; tensor var_50541_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5023_cast_fp16)[name = tensor("op_50541_cast_fp16")]; tensor var_50542_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5025_cast_fp16)[name = tensor("op_50542_cast_fp16")]; tensor var_50543_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5027_cast_fp16)[name = tensor("op_50543_cast_fp16")]; tensor var_50544_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5029_cast_fp16)[name = tensor("op_50544_cast_fp16")]; tensor var_50545_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5031_cast_fp16)[name = tensor("op_50545_cast_fp16")]; tensor var_50546_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5033_cast_fp16)[name = tensor("op_50546_cast_fp16")]; tensor var_50547_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5035_cast_fp16)[name = tensor("op_50547_cast_fp16")]; tensor var_50548_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5037_cast_fp16)[name = tensor("op_50548_cast_fp16")]; tensor var_50549_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5039_cast_fp16)[name = tensor("op_50549_cast_fp16")]; tensor var_50550_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5041_cast_fp16)[name = tensor("op_50550_cast_fp16")]; tensor var_50551_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5043_cast_fp16)[name = tensor("op_50551_cast_fp16")]; tensor var_50552_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5045_cast_fp16)[name = tensor("op_50552_cast_fp16")]; tensor var_50553_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5047_cast_fp16)[name = tensor("op_50553_cast_fp16")]; tensor var_50554_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5049_cast_fp16)[name = tensor("op_50554_cast_fp16")]; tensor var_50555_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5051_cast_fp16)[name = tensor("op_50555_cast_fp16")]; tensor var_50556_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5053_cast_fp16)[name = tensor("op_50556_cast_fp16")]; tensor var_50557_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5055_cast_fp16)[name = tensor("op_50557_cast_fp16")]; tensor var_50558_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5057_cast_fp16)[name = tensor("op_50558_cast_fp16")]; tensor var_50559_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5059_cast_fp16)[name = tensor("op_50559_cast_fp16")]; tensor var_50560_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5061_cast_fp16)[name = tensor("op_50560_cast_fp16")]; tensor var_50561_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5063_cast_fp16)[name = tensor("op_50561_cast_fp16")]; tensor var_50562_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5065_cast_fp16)[name = tensor("op_50562_cast_fp16")]; tensor var_50563_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5067_cast_fp16)[name = tensor("op_50563_cast_fp16")]; tensor var_50564_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5069_cast_fp16)[name = tensor("op_50564_cast_fp16")]; tensor var_50565_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5071_cast_fp16)[name = tensor("op_50565_cast_fp16")]; tensor var_50566_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5073_cast_fp16)[name = tensor("op_50566_cast_fp16")]; tensor var_50567_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5075_cast_fp16)[name = tensor("op_50567_cast_fp16")]; tensor var_50568_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5077_cast_fp16)[name = tensor("op_50568_cast_fp16")]; tensor var_50569_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5079_cast_fp16)[name = tensor("op_50569_cast_fp16")]; tensor var_50570_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5081_cast_fp16)[name = tensor("op_50570_cast_fp16")]; tensor var_50571_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5083_cast_fp16)[name = tensor("op_50571_cast_fp16")]; tensor var_50572_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5085_cast_fp16)[name = tensor("op_50572_cast_fp16")]; tensor var_50573_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5087_cast_fp16)[name = tensor("op_50573_cast_fp16")]; tensor var_50574_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5089_cast_fp16)[name = tensor("op_50574_cast_fp16")]; tensor var_50575_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5091_cast_fp16)[name = tensor("op_50575_cast_fp16")]; tensor var_50576_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5093_cast_fp16)[name = tensor("op_50576_cast_fp16")]; tensor var_50577_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5095_cast_fp16)[name = tensor("op_50577_cast_fp16")]; tensor var_50578_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5097_cast_fp16)[name = tensor("op_50578_cast_fp16")]; tensor var_50579_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5099_cast_fp16)[name = tensor("op_50579_cast_fp16")]; tensor var_50580_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5101_cast_fp16)[name = tensor("op_50580_cast_fp16")]; tensor var_50581_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5103_cast_fp16)[name = tensor("op_50581_cast_fp16")]; tensor var_50582_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5105_cast_fp16)[name = tensor("op_50582_cast_fp16")]; tensor var_50583_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5107_cast_fp16)[name = tensor("op_50583_cast_fp16")]; tensor var_50584_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5109_cast_fp16)[name = tensor("op_50584_cast_fp16")]; tensor var_50585_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5111_cast_fp16)[name = tensor("op_50585_cast_fp16")]; tensor var_50586_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5113_cast_fp16)[name = tensor("op_50586_cast_fp16")]; tensor var_50587_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5115_cast_fp16)[name = tensor("op_50587_cast_fp16")]; tensor var_50588_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_5117_cast_fp16)[name = tensor("op_50588_cast_fp16")]; tensor var_50589_cast_fp16 = softmax(axis = var_49308, x = aw_chunk_cast_fp16)[name = tensor("op_50589_cast_fp16")]; tensor var_50591_equation_0 = const()[name = tensor("op_50591_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50591_cast_fp16 = einsum(equation = var_50591_equation_0, values = (var_50111_cast_fp16, var_50510_cast_fp16))[name = tensor("op_50591_cast_fp16")]; tensor var_50593_equation_0 = const()[name = tensor("op_50593_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50593_cast_fp16 = einsum(equation = var_50593_equation_0, values = (var_50111_cast_fp16, var_50511_cast_fp16))[name = tensor("op_50593_cast_fp16")]; tensor var_50595_equation_0 = const()[name = tensor("op_50595_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50595_cast_fp16 = einsum(equation = var_50595_equation_0, values = (var_50111_cast_fp16, var_50512_cast_fp16))[name = tensor("op_50595_cast_fp16")]; tensor var_50597_equation_0 = const()[name = tensor("op_50597_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50597_cast_fp16 = einsum(equation = var_50597_equation_0, values = (var_50111_cast_fp16, var_50513_cast_fp16))[name = tensor("op_50597_cast_fp16")]; tensor var_50599_equation_0 = const()[name = tensor("op_50599_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50599_cast_fp16 = einsum(equation = var_50599_equation_0, values = (var_50115_cast_fp16, var_50514_cast_fp16))[name = tensor("op_50599_cast_fp16")]; tensor var_50601_equation_0 = const()[name = tensor("op_50601_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50601_cast_fp16 = einsum(equation = var_50601_equation_0, values = (var_50115_cast_fp16, var_50515_cast_fp16))[name = tensor("op_50601_cast_fp16")]; tensor var_50603_equation_0 = const()[name = tensor("op_50603_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50603_cast_fp16 = einsum(equation = var_50603_equation_0, values = (var_50115_cast_fp16, var_50516_cast_fp16))[name = tensor("op_50603_cast_fp16")]; tensor var_50605_equation_0 = const()[name = tensor("op_50605_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50605_cast_fp16 = einsum(equation = var_50605_equation_0, values = (var_50115_cast_fp16, var_50517_cast_fp16))[name = tensor("op_50605_cast_fp16")]; tensor var_50607_equation_0 = const()[name = tensor("op_50607_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50607_cast_fp16 = einsum(equation = var_50607_equation_0, values = (var_50119_cast_fp16, var_50518_cast_fp16))[name = tensor("op_50607_cast_fp16")]; tensor var_50609_equation_0 = const()[name = tensor("op_50609_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50609_cast_fp16 = einsum(equation = var_50609_equation_0, values = (var_50119_cast_fp16, var_50519_cast_fp16))[name = tensor("op_50609_cast_fp16")]; tensor var_50611_equation_0 = const()[name = tensor("op_50611_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50611_cast_fp16 = einsum(equation = var_50611_equation_0, values = (var_50119_cast_fp16, var_50520_cast_fp16))[name = tensor("op_50611_cast_fp16")]; tensor var_50613_equation_0 = const()[name = tensor("op_50613_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50613_cast_fp16 = einsum(equation = var_50613_equation_0, values = (var_50119_cast_fp16, var_50521_cast_fp16))[name = tensor("op_50613_cast_fp16")]; tensor var_50615_equation_0 = const()[name = tensor("op_50615_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50615_cast_fp16 = einsum(equation = var_50615_equation_0, values = (var_50123_cast_fp16, var_50522_cast_fp16))[name = tensor("op_50615_cast_fp16")]; tensor var_50617_equation_0 = const()[name = tensor("op_50617_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50617_cast_fp16 = einsum(equation = var_50617_equation_0, values = (var_50123_cast_fp16, var_50523_cast_fp16))[name = tensor("op_50617_cast_fp16")]; tensor var_50619_equation_0 = const()[name = tensor("op_50619_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50619_cast_fp16 = einsum(equation = var_50619_equation_0, values = (var_50123_cast_fp16, var_50524_cast_fp16))[name = tensor("op_50619_cast_fp16")]; tensor var_50621_equation_0 = const()[name = tensor("op_50621_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50621_cast_fp16 = einsum(equation = var_50621_equation_0, values = (var_50123_cast_fp16, var_50525_cast_fp16))[name = tensor("op_50621_cast_fp16")]; tensor var_50623_equation_0 = const()[name = tensor("op_50623_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50623_cast_fp16 = einsum(equation = var_50623_equation_0, values = (var_50127_cast_fp16, var_50526_cast_fp16))[name = tensor("op_50623_cast_fp16")]; tensor var_50625_equation_0 = const()[name = tensor("op_50625_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50625_cast_fp16 = einsum(equation = var_50625_equation_0, values = (var_50127_cast_fp16, var_50527_cast_fp16))[name = tensor("op_50625_cast_fp16")]; tensor var_50627_equation_0 = const()[name = tensor("op_50627_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50627_cast_fp16 = einsum(equation = var_50627_equation_0, values = (var_50127_cast_fp16, var_50528_cast_fp16))[name = tensor("op_50627_cast_fp16")]; tensor var_50629_equation_0 = const()[name = tensor("op_50629_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50629_cast_fp16 = einsum(equation = var_50629_equation_0, values = (var_50127_cast_fp16, var_50529_cast_fp16))[name = tensor("op_50629_cast_fp16")]; tensor var_50631_equation_0 = const()[name = tensor("op_50631_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50631_cast_fp16 = einsum(equation = var_50631_equation_0, values = (var_50131_cast_fp16, var_50530_cast_fp16))[name = tensor("op_50631_cast_fp16")]; tensor var_50633_equation_0 = const()[name = tensor("op_50633_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50633_cast_fp16 = einsum(equation = var_50633_equation_0, values = (var_50131_cast_fp16, var_50531_cast_fp16))[name = tensor("op_50633_cast_fp16")]; tensor var_50635_equation_0 = const()[name = tensor("op_50635_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50635_cast_fp16 = einsum(equation = var_50635_equation_0, values = (var_50131_cast_fp16, var_50532_cast_fp16))[name = tensor("op_50635_cast_fp16")]; tensor var_50637_equation_0 = const()[name = tensor("op_50637_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50637_cast_fp16 = einsum(equation = var_50637_equation_0, values = (var_50131_cast_fp16, var_50533_cast_fp16))[name = tensor("op_50637_cast_fp16")]; tensor var_50639_equation_0 = const()[name = tensor("op_50639_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50639_cast_fp16 = einsum(equation = var_50639_equation_0, values = (var_50135_cast_fp16, var_50534_cast_fp16))[name = tensor("op_50639_cast_fp16")]; tensor var_50641_equation_0 = const()[name = tensor("op_50641_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50641_cast_fp16 = einsum(equation = var_50641_equation_0, values = (var_50135_cast_fp16, var_50535_cast_fp16))[name = tensor("op_50641_cast_fp16")]; tensor var_50643_equation_0 = const()[name = tensor("op_50643_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50643_cast_fp16 = einsum(equation = var_50643_equation_0, values = (var_50135_cast_fp16, var_50536_cast_fp16))[name = tensor("op_50643_cast_fp16")]; tensor var_50645_equation_0 = const()[name = tensor("op_50645_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50645_cast_fp16 = einsum(equation = var_50645_equation_0, values = (var_50135_cast_fp16, var_50537_cast_fp16))[name = tensor("op_50645_cast_fp16")]; tensor var_50647_equation_0 = const()[name = tensor("op_50647_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50647_cast_fp16 = einsum(equation = var_50647_equation_0, values = (var_50139_cast_fp16, var_50538_cast_fp16))[name = tensor("op_50647_cast_fp16")]; tensor var_50649_equation_0 = const()[name = tensor("op_50649_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50649_cast_fp16 = einsum(equation = var_50649_equation_0, values = (var_50139_cast_fp16, var_50539_cast_fp16))[name = tensor("op_50649_cast_fp16")]; tensor var_50651_equation_0 = const()[name = tensor("op_50651_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50651_cast_fp16 = einsum(equation = var_50651_equation_0, values = (var_50139_cast_fp16, var_50540_cast_fp16))[name = tensor("op_50651_cast_fp16")]; tensor var_50653_equation_0 = const()[name = tensor("op_50653_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50653_cast_fp16 = einsum(equation = var_50653_equation_0, values = (var_50139_cast_fp16, var_50541_cast_fp16))[name = tensor("op_50653_cast_fp16")]; tensor var_50655_equation_0 = const()[name = tensor("op_50655_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50655_cast_fp16 = einsum(equation = var_50655_equation_0, values = (var_50143_cast_fp16, var_50542_cast_fp16))[name = tensor("op_50655_cast_fp16")]; tensor var_50657_equation_0 = const()[name = tensor("op_50657_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50657_cast_fp16 = einsum(equation = var_50657_equation_0, values = (var_50143_cast_fp16, var_50543_cast_fp16))[name = tensor("op_50657_cast_fp16")]; tensor var_50659_equation_0 = const()[name = tensor("op_50659_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50659_cast_fp16 = einsum(equation = var_50659_equation_0, values = (var_50143_cast_fp16, var_50544_cast_fp16))[name = tensor("op_50659_cast_fp16")]; tensor var_50661_equation_0 = const()[name = tensor("op_50661_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50661_cast_fp16 = einsum(equation = var_50661_equation_0, values = (var_50143_cast_fp16, var_50545_cast_fp16))[name = tensor("op_50661_cast_fp16")]; tensor var_50663_equation_0 = const()[name = tensor("op_50663_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50663_cast_fp16 = einsum(equation = var_50663_equation_0, values = (var_50147_cast_fp16, var_50546_cast_fp16))[name = tensor("op_50663_cast_fp16")]; tensor var_50665_equation_0 = const()[name = tensor("op_50665_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50665_cast_fp16 = einsum(equation = var_50665_equation_0, values = (var_50147_cast_fp16, var_50547_cast_fp16))[name = tensor("op_50665_cast_fp16")]; tensor var_50667_equation_0 = const()[name = tensor("op_50667_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50667_cast_fp16 = einsum(equation = var_50667_equation_0, values = (var_50147_cast_fp16, var_50548_cast_fp16))[name = tensor("op_50667_cast_fp16")]; tensor var_50669_equation_0 = const()[name = tensor("op_50669_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50669_cast_fp16 = einsum(equation = var_50669_equation_0, values = (var_50147_cast_fp16, var_50549_cast_fp16))[name = tensor("op_50669_cast_fp16")]; tensor var_50671_equation_0 = const()[name = tensor("op_50671_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50671_cast_fp16 = einsum(equation = var_50671_equation_0, values = (var_50151_cast_fp16, var_50550_cast_fp16))[name = tensor("op_50671_cast_fp16")]; tensor var_50673_equation_0 = const()[name = tensor("op_50673_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50673_cast_fp16 = einsum(equation = var_50673_equation_0, values = (var_50151_cast_fp16, var_50551_cast_fp16))[name = tensor("op_50673_cast_fp16")]; tensor var_50675_equation_0 = const()[name = tensor("op_50675_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50675_cast_fp16 = einsum(equation = var_50675_equation_0, values = (var_50151_cast_fp16, var_50552_cast_fp16))[name = tensor("op_50675_cast_fp16")]; tensor var_50677_equation_0 = const()[name = tensor("op_50677_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50677_cast_fp16 = einsum(equation = var_50677_equation_0, values = (var_50151_cast_fp16, var_50553_cast_fp16))[name = tensor("op_50677_cast_fp16")]; tensor var_50679_equation_0 = const()[name = tensor("op_50679_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50679_cast_fp16 = einsum(equation = var_50679_equation_0, values = (var_50155_cast_fp16, var_50554_cast_fp16))[name = tensor("op_50679_cast_fp16")]; tensor var_50681_equation_0 = const()[name = tensor("op_50681_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50681_cast_fp16 = einsum(equation = var_50681_equation_0, values = (var_50155_cast_fp16, var_50555_cast_fp16))[name = tensor("op_50681_cast_fp16")]; tensor var_50683_equation_0 = const()[name = tensor("op_50683_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50683_cast_fp16 = einsum(equation = var_50683_equation_0, values = (var_50155_cast_fp16, var_50556_cast_fp16))[name = tensor("op_50683_cast_fp16")]; tensor var_50685_equation_0 = const()[name = tensor("op_50685_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50685_cast_fp16 = einsum(equation = var_50685_equation_0, values = (var_50155_cast_fp16, var_50557_cast_fp16))[name = tensor("op_50685_cast_fp16")]; tensor var_50687_equation_0 = const()[name = tensor("op_50687_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50687_cast_fp16 = einsum(equation = var_50687_equation_0, values = (var_50159_cast_fp16, var_50558_cast_fp16))[name = tensor("op_50687_cast_fp16")]; tensor var_50689_equation_0 = const()[name = tensor("op_50689_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50689_cast_fp16 = einsum(equation = var_50689_equation_0, values = (var_50159_cast_fp16, var_50559_cast_fp16))[name = tensor("op_50689_cast_fp16")]; tensor var_50691_equation_0 = const()[name = tensor("op_50691_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50691_cast_fp16 = einsum(equation = var_50691_equation_0, values = (var_50159_cast_fp16, var_50560_cast_fp16))[name = tensor("op_50691_cast_fp16")]; tensor var_50693_equation_0 = const()[name = tensor("op_50693_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50693_cast_fp16 = einsum(equation = var_50693_equation_0, values = (var_50159_cast_fp16, var_50561_cast_fp16))[name = tensor("op_50693_cast_fp16")]; tensor var_50695_equation_0 = const()[name = tensor("op_50695_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50695_cast_fp16 = einsum(equation = var_50695_equation_0, values = (var_50163_cast_fp16, var_50562_cast_fp16))[name = tensor("op_50695_cast_fp16")]; tensor var_50697_equation_0 = const()[name = tensor("op_50697_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50697_cast_fp16 = einsum(equation = var_50697_equation_0, values = (var_50163_cast_fp16, var_50563_cast_fp16))[name = tensor("op_50697_cast_fp16")]; tensor var_50699_equation_0 = const()[name = tensor("op_50699_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50699_cast_fp16 = einsum(equation = var_50699_equation_0, values = (var_50163_cast_fp16, var_50564_cast_fp16))[name = tensor("op_50699_cast_fp16")]; tensor var_50701_equation_0 = const()[name = tensor("op_50701_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50701_cast_fp16 = einsum(equation = var_50701_equation_0, values = (var_50163_cast_fp16, var_50565_cast_fp16))[name = tensor("op_50701_cast_fp16")]; tensor var_50703_equation_0 = const()[name = tensor("op_50703_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50703_cast_fp16 = einsum(equation = var_50703_equation_0, values = (var_50167_cast_fp16, var_50566_cast_fp16))[name = tensor("op_50703_cast_fp16")]; tensor var_50705_equation_0 = const()[name = tensor("op_50705_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50705_cast_fp16 = einsum(equation = var_50705_equation_0, values = (var_50167_cast_fp16, var_50567_cast_fp16))[name = tensor("op_50705_cast_fp16")]; tensor var_50707_equation_0 = const()[name = tensor("op_50707_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50707_cast_fp16 = einsum(equation = var_50707_equation_0, values = (var_50167_cast_fp16, var_50568_cast_fp16))[name = tensor("op_50707_cast_fp16")]; tensor var_50709_equation_0 = const()[name = tensor("op_50709_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50709_cast_fp16 = einsum(equation = var_50709_equation_0, values = (var_50167_cast_fp16, var_50569_cast_fp16))[name = tensor("op_50709_cast_fp16")]; tensor var_50711_equation_0 = const()[name = tensor("op_50711_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50711_cast_fp16 = einsum(equation = var_50711_equation_0, values = (var_50171_cast_fp16, var_50570_cast_fp16))[name = tensor("op_50711_cast_fp16")]; tensor var_50713_equation_0 = const()[name = tensor("op_50713_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50713_cast_fp16 = einsum(equation = var_50713_equation_0, values = (var_50171_cast_fp16, var_50571_cast_fp16))[name = tensor("op_50713_cast_fp16")]; tensor var_50715_equation_0 = const()[name = tensor("op_50715_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50715_cast_fp16 = einsum(equation = var_50715_equation_0, values = (var_50171_cast_fp16, var_50572_cast_fp16))[name = tensor("op_50715_cast_fp16")]; tensor var_50717_equation_0 = const()[name = tensor("op_50717_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50717_cast_fp16 = einsum(equation = var_50717_equation_0, values = (var_50171_cast_fp16, var_50573_cast_fp16))[name = tensor("op_50717_cast_fp16")]; tensor var_50719_equation_0 = const()[name = tensor("op_50719_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50719_cast_fp16 = einsum(equation = var_50719_equation_0, values = (var_50175_cast_fp16, var_50574_cast_fp16))[name = tensor("op_50719_cast_fp16")]; tensor var_50721_equation_0 = const()[name = tensor("op_50721_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50721_cast_fp16 = einsum(equation = var_50721_equation_0, values = (var_50175_cast_fp16, var_50575_cast_fp16))[name = tensor("op_50721_cast_fp16")]; tensor var_50723_equation_0 = const()[name = tensor("op_50723_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50723_cast_fp16 = einsum(equation = var_50723_equation_0, values = (var_50175_cast_fp16, var_50576_cast_fp16))[name = tensor("op_50723_cast_fp16")]; tensor var_50725_equation_0 = const()[name = tensor("op_50725_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50725_cast_fp16 = einsum(equation = var_50725_equation_0, values = (var_50175_cast_fp16, var_50577_cast_fp16))[name = tensor("op_50725_cast_fp16")]; tensor var_50727_equation_0 = const()[name = tensor("op_50727_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50727_cast_fp16 = einsum(equation = var_50727_equation_0, values = (var_50179_cast_fp16, var_50578_cast_fp16))[name = tensor("op_50727_cast_fp16")]; tensor var_50729_equation_0 = const()[name = tensor("op_50729_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50729_cast_fp16 = einsum(equation = var_50729_equation_0, values = (var_50179_cast_fp16, var_50579_cast_fp16))[name = tensor("op_50729_cast_fp16")]; tensor var_50731_equation_0 = const()[name = tensor("op_50731_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50731_cast_fp16 = einsum(equation = var_50731_equation_0, values = (var_50179_cast_fp16, var_50580_cast_fp16))[name = tensor("op_50731_cast_fp16")]; tensor var_50733_equation_0 = const()[name = tensor("op_50733_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50733_cast_fp16 = einsum(equation = var_50733_equation_0, values = (var_50179_cast_fp16, var_50581_cast_fp16))[name = tensor("op_50733_cast_fp16")]; tensor var_50735_equation_0 = const()[name = tensor("op_50735_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50735_cast_fp16 = einsum(equation = var_50735_equation_0, values = (var_50183_cast_fp16, var_50582_cast_fp16))[name = tensor("op_50735_cast_fp16")]; tensor var_50737_equation_0 = const()[name = tensor("op_50737_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50737_cast_fp16 = einsum(equation = var_50737_equation_0, values = (var_50183_cast_fp16, var_50583_cast_fp16))[name = tensor("op_50737_cast_fp16")]; tensor var_50739_equation_0 = const()[name = tensor("op_50739_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50739_cast_fp16 = einsum(equation = var_50739_equation_0, values = (var_50183_cast_fp16, var_50584_cast_fp16))[name = tensor("op_50739_cast_fp16")]; tensor var_50741_equation_0 = const()[name = tensor("op_50741_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50741_cast_fp16 = einsum(equation = var_50741_equation_0, values = (var_50183_cast_fp16, var_50585_cast_fp16))[name = tensor("op_50741_cast_fp16")]; tensor var_50743_equation_0 = const()[name = tensor("op_50743_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50743_cast_fp16 = einsum(equation = var_50743_equation_0, values = (var_50187_cast_fp16, var_50586_cast_fp16))[name = tensor("op_50743_cast_fp16")]; tensor var_50745_equation_0 = const()[name = tensor("op_50745_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50745_cast_fp16 = einsum(equation = var_50745_equation_0, values = (var_50187_cast_fp16, var_50587_cast_fp16))[name = tensor("op_50745_cast_fp16")]; tensor var_50747_equation_0 = const()[name = tensor("op_50747_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50747_cast_fp16 = einsum(equation = var_50747_equation_0, values = (var_50187_cast_fp16, var_50588_cast_fp16))[name = tensor("op_50747_cast_fp16")]; tensor var_50749_equation_0 = const()[name = tensor("op_50749_equation_0"), val = tensor("bchk,bkhq->bchq")]; tensor var_50749_cast_fp16 = einsum(equation = var_50749_equation_0, values = (var_50187_cast_fp16, var_50589_cast_fp16))[name = tensor("op_50749_cast_fp16")]; tensor var_50751_interleave_0 = const()[name = tensor("op_50751_interleave_0"), val = tensor(false)]; tensor var_50751_cast_fp16 = concat(axis = var_49283, interleave = var_50751_interleave_0, values = (var_50591_cast_fp16, var_50593_cast_fp16, var_50595_cast_fp16, var_50597_cast_fp16))[name = tensor("op_50751_cast_fp16")]; tensor var_50753_interleave_0 = const()[name = tensor("op_50753_interleave_0"), val = tensor(false)]; tensor var_50753_cast_fp16 = concat(axis = var_49283, interleave = var_50753_interleave_0, values = (var_50599_cast_fp16, var_50601_cast_fp16, var_50603_cast_fp16, var_50605_cast_fp16))[name = tensor("op_50753_cast_fp16")]; tensor var_50755_interleave_0 = const()[name = tensor("op_50755_interleave_0"), val = tensor(false)]; tensor var_50755_cast_fp16 = concat(axis = var_49283, interleave = var_50755_interleave_0, values = (var_50607_cast_fp16, var_50609_cast_fp16, var_50611_cast_fp16, var_50613_cast_fp16))[name = tensor("op_50755_cast_fp16")]; tensor var_50757_interleave_0 = const()[name = tensor("op_50757_interleave_0"), val = tensor(false)]; tensor var_50757_cast_fp16 = concat(axis = var_49283, interleave = var_50757_interleave_0, values = (var_50615_cast_fp16, var_50617_cast_fp16, var_50619_cast_fp16, var_50621_cast_fp16))[name = tensor("op_50757_cast_fp16")]; tensor var_50759_interleave_0 = const()[name = tensor("op_50759_interleave_0"), val = tensor(false)]; tensor var_50759_cast_fp16 = concat(axis = var_49283, interleave = var_50759_interleave_0, values = (var_50623_cast_fp16, var_50625_cast_fp16, var_50627_cast_fp16, var_50629_cast_fp16))[name = tensor("op_50759_cast_fp16")]; tensor var_50761_interleave_0 = const()[name = tensor("op_50761_interleave_0"), val = tensor(false)]; tensor var_50761_cast_fp16 = concat(axis = var_49283, interleave = var_50761_interleave_0, values = (var_50631_cast_fp16, var_50633_cast_fp16, var_50635_cast_fp16, var_50637_cast_fp16))[name = tensor("op_50761_cast_fp16")]; tensor var_50763_interleave_0 = const()[name = tensor("op_50763_interleave_0"), val = tensor(false)]; tensor var_50763_cast_fp16 = concat(axis = var_49283, interleave = var_50763_interleave_0, values = (var_50639_cast_fp16, var_50641_cast_fp16, var_50643_cast_fp16, var_50645_cast_fp16))[name = tensor("op_50763_cast_fp16")]; tensor var_50765_interleave_0 = const()[name = tensor("op_50765_interleave_0"), val = tensor(false)]; tensor var_50765_cast_fp16 = concat(axis = var_49283, interleave = var_50765_interleave_0, values = (var_50647_cast_fp16, var_50649_cast_fp16, var_50651_cast_fp16, var_50653_cast_fp16))[name = tensor("op_50765_cast_fp16")]; tensor var_50767_interleave_0 = const()[name = tensor("op_50767_interleave_0"), val = tensor(false)]; tensor var_50767_cast_fp16 = concat(axis = var_49283, interleave = var_50767_interleave_0, values = (var_50655_cast_fp16, var_50657_cast_fp16, var_50659_cast_fp16, var_50661_cast_fp16))[name = tensor("op_50767_cast_fp16")]; tensor var_50769_interleave_0 = const()[name = tensor("op_50769_interleave_0"), val = tensor(false)]; tensor var_50769_cast_fp16 = concat(axis = var_49283, interleave = var_50769_interleave_0, values = (var_50663_cast_fp16, var_50665_cast_fp16, var_50667_cast_fp16, var_50669_cast_fp16))[name = tensor("op_50769_cast_fp16")]; tensor var_50771_interleave_0 = const()[name = tensor("op_50771_interleave_0"), val = tensor(false)]; tensor var_50771_cast_fp16 = concat(axis = var_49283, interleave = var_50771_interleave_0, values = (var_50671_cast_fp16, var_50673_cast_fp16, var_50675_cast_fp16, var_50677_cast_fp16))[name = tensor("op_50771_cast_fp16")]; tensor var_50773_interleave_0 = const()[name = tensor("op_50773_interleave_0"), val = tensor(false)]; tensor var_50773_cast_fp16 = concat(axis = var_49283, interleave = var_50773_interleave_0, values = (var_50679_cast_fp16, var_50681_cast_fp16, var_50683_cast_fp16, var_50685_cast_fp16))[name = tensor("op_50773_cast_fp16")]; tensor var_50775_interleave_0 = const()[name = tensor("op_50775_interleave_0"), val = tensor(false)]; tensor var_50775_cast_fp16 = concat(axis = var_49283, interleave = var_50775_interleave_0, values = (var_50687_cast_fp16, var_50689_cast_fp16, var_50691_cast_fp16, var_50693_cast_fp16))[name = tensor("op_50775_cast_fp16")]; tensor var_50777_interleave_0 = const()[name = tensor("op_50777_interleave_0"), val = tensor(false)]; tensor var_50777_cast_fp16 = concat(axis = var_49283, interleave = var_50777_interleave_0, values = (var_50695_cast_fp16, var_50697_cast_fp16, var_50699_cast_fp16, var_50701_cast_fp16))[name = tensor("op_50777_cast_fp16")]; tensor var_50779_interleave_0 = const()[name = tensor("op_50779_interleave_0"), val = tensor(false)]; tensor var_50779_cast_fp16 = concat(axis = var_49283, interleave = var_50779_interleave_0, values = (var_50703_cast_fp16, var_50705_cast_fp16, var_50707_cast_fp16, var_50709_cast_fp16))[name = tensor("op_50779_cast_fp16")]; tensor var_50781_interleave_0 = const()[name = tensor("op_50781_interleave_0"), val = tensor(false)]; tensor var_50781_cast_fp16 = concat(axis = var_49283, interleave = var_50781_interleave_0, values = (var_50711_cast_fp16, var_50713_cast_fp16, var_50715_cast_fp16, var_50717_cast_fp16))[name = tensor("op_50781_cast_fp16")]; tensor var_50783_interleave_0 = const()[name = tensor("op_50783_interleave_0"), val = tensor(false)]; tensor var_50783_cast_fp16 = concat(axis = var_49283, interleave = var_50783_interleave_0, values = (var_50719_cast_fp16, var_50721_cast_fp16, var_50723_cast_fp16, var_50725_cast_fp16))[name = tensor("op_50783_cast_fp16")]; tensor var_50785_interleave_0 = const()[name = tensor("op_50785_interleave_0"), val = tensor(false)]; tensor var_50785_cast_fp16 = concat(axis = var_49283, interleave = var_50785_interleave_0, values = (var_50727_cast_fp16, var_50729_cast_fp16, var_50731_cast_fp16, var_50733_cast_fp16))[name = tensor("op_50785_cast_fp16")]; tensor var_50787_interleave_0 = const()[name = tensor("op_50787_interleave_0"), val = tensor(false)]; tensor var_50787_cast_fp16 = concat(axis = var_49283, interleave = var_50787_interleave_0, values = (var_50735_cast_fp16, var_50737_cast_fp16, var_50739_cast_fp16, var_50741_cast_fp16))[name = tensor("op_50787_cast_fp16")]; tensor var_50789_interleave_0 = const()[name = tensor("op_50789_interleave_0"), val = tensor(false)]; tensor var_50789_cast_fp16 = concat(axis = var_49283, interleave = var_50789_interleave_0, values = (var_50743_cast_fp16, var_50745_cast_fp16, var_50747_cast_fp16, var_50749_cast_fp16))[name = tensor("op_50789_cast_fp16")]; tensor input_249_interleave_0 = const()[name = tensor("input_249_interleave_0"), val = tensor(false)]; tensor input_249_cast_fp16 = concat(axis = var_49308, interleave = input_249_interleave_0, values = (var_50751_cast_fp16, var_50753_cast_fp16, var_50755_cast_fp16, var_50757_cast_fp16, var_50759_cast_fp16, var_50761_cast_fp16, var_50763_cast_fp16, var_50765_cast_fp16, var_50767_cast_fp16, var_50769_cast_fp16, var_50771_cast_fp16, var_50773_cast_fp16, var_50775_cast_fp16, var_50777_cast_fp16, var_50779_cast_fp16, var_50781_cast_fp16, var_50783_cast_fp16, var_50785_cast_fp16, var_50787_cast_fp16, var_50789_cast_fp16))[name = tensor("input_249_cast_fp16")]; tensor var_50800_pad_type_0 = const()[name = tensor("op_50800_pad_type_0"), val = tensor("valid")]; tensor var_50800_strides_0 = const()[name = tensor("op_50800_strides_0"), val = tensor([1, 1])]; tensor var_50800_pad_0 = const()[name = tensor("op_50800_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_50800_dilations_0 = const()[name = tensor("op_50800_dilations_0"), val = tensor([1, 1])]; tensor var_50800_groups_0 = const()[name = tensor("op_50800_groups_0"), val = tensor(1)]; tensor layers_31_self_attn_o_proj_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(412581376))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413400640))), name = tensor("layers_31_self_attn_o_proj_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 1280, 1, 1])]; tensor layers_31_self_attn_o_proj_inlier_module_bias_to_fp16 = const()[name = tensor("layers_31_self_attn_o_proj_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413400768)))]; tensor var_50800_cast_fp16 = conv(bias = layers_31_self_attn_o_proj_inlier_module_bias_to_fp16, dilations = var_50800_dilations_0, groups = var_50800_groups_0, pad = var_50800_pad_0, pad_type = var_50800_pad_type_0, strides = var_50800_strides_0, weight = layers_31_self_attn_o_proj_inlier_module_weight_to_fp16_palettized, x = input_249_cast_fp16)[name = tensor("op_50800_cast_fp16")]; tensor var_50806_pad_type_0 = const()[name = tensor("op_50806_pad_type_0"), val = tensor("valid")]; tensor var_50806_strides_0 = const()[name = tensor("op_50806_strides_0"), val = tensor([1, 1])]; tensor var_50806_pad_0 = const()[name = tensor("op_50806_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_50806_dilations_0 = const()[name = tensor("op_50806_dilations_0"), val = tensor([1, 1])]; tensor var_50806_groups_0 = const()[name = tensor("op_50806_groups_0"), val = tensor(1)]; tensor layers_31_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413416000))), name = tensor("layers_31_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413403392))), shape = tensor([1280, 1280, 1, 1])]; tensor var_50806_cast_fp16 = conv(dilations = var_50806_dilations_0, groups = var_50806_groups_0, pad = var_50806_pad_0, pad_type = var_50806_pad_type_0, strides = var_50806_strides_0, weight = layers_31_self_attn_o_proj_outlier_module_weight_to_fp16_sparsified, x = input_249_cast_fp16)[name = tensor("op_50806_cast_fp16")]; tensor obj_cast_fp16 = add(x = var_50800_cast_fp16, y = var_50806_cast_fp16)[name = tensor("obj_cast_fp16")]; tensor inputs_127_cast_fp16 = add(x = inputs_125_cast_fp16, y = obj_cast_fp16)[name = tensor("inputs_127_cast_fp16")]; tensor out_127_axes_0 = const()[name = tensor("out_127_axes_0"), val = tensor([1])]; tensor var_50817_to_fp16 = const()[name = tensor("op_50817_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_127_cast_fp16 = layer_norm(axes = out_127_axes_0, epsilon = var_50817_to_fp16, x = inputs_127_cast_fp16)[name = tensor("out_127_cast_fp16")]; tensor input_251_gamma_0_to_fp16 = const()[name = tensor("input_251_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413620864)))]; tensor input_251_beta_0_to_fp16 = const()[name = tensor("input_251_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413623488)))]; tensor input_251_epsilon_0_to_fp16 = const()[name = tensor("input_251_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor input_251_cast_fp16 = batch_norm(beta = input_251_beta_0_to_fp16, epsilon = input_251_epsilon_0_to_fp16, gamma = input_251_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_127_cast_fp16)[name = tensor("input_251_cast_fp16")]; tensor var_50835_pad_type_0 = const()[name = tensor("op_50835_pad_type_0"), val = tensor("valid")]; tensor var_50835_strides_0 = const()[name = tensor("op_50835_strides_0"), val = tensor([1, 1])]; tensor var_50835_pad_0 = const()[name = tensor("op_50835_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_50835_dilations_0 = const()[name = tensor("op_50835_dilations_0"), val = tensor([1, 1])]; tensor var_50835_groups_0 = const()[name = tensor("op_50835_groups_0"), val = tensor(1)]; tensor layers_31_fc1_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(413626112))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(416902976))), name = tensor("layers_31_fc1_inlier_module_weight_to_fp16_palettized"), shape = tensor([5120, 1280, 1, 1])]; tensor layers_31_fc1_inlier_module_bias_to_fp16 = const()[name = tensor("layers_31_fc1_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(416903104)))]; tensor var_50835_cast_fp16 = conv(bias = layers_31_fc1_inlier_module_bias_to_fp16, dilations = var_50835_dilations_0, groups = var_50835_groups_0, pad = var_50835_pad_0, pad_type = var_50835_pad_type_0, strides = var_50835_strides_0, weight = layers_31_fc1_inlier_module_weight_to_fp16_palettized, x = input_251_cast_fp16)[name = tensor("op_50835_cast_fp16")]; tensor var_50841_pad_type_0 = const()[name = tensor("op_50841_pad_type_0"), val = tensor("valid")]; tensor var_50841_strides_0 = const()[name = tensor("op_50841_strides_0"), val = tensor([1, 1])]; tensor var_50841_pad_0 = const()[name = tensor("op_50841_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_50841_dilations_0 = const()[name = tensor("op_50841_dilations_0"), val = tensor([1, 1])]; tensor var_50841_groups_0 = const()[name = tensor("op_50841_groups_0"), val = tensor(1)]; tensor layers_31_fc1_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(416975104))), name = tensor("layers_31_fc1_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(416913408))), shape = tensor([5120, 1280, 1, 1])]; tensor var_50841_cast_fp16 = conv(dilations = var_50841_dilations_0, groups = var_50841_groups_0, pad = var_50841_pad_0, pad_type = var_50841_pad_type_0, strides = var_50841_strides_0, weight = layers_31_fc1_outlier_module_weight_to_fp16_sparsified, x = input_251_cast_fp16)[name = tensor("op_50841_cast_fp16")]; tensor input_253_cast_fp16 = add(x = var_50835_cast_fp16, y = var_50841_cast_fp16)[name = tensor("input_253_cast_fp16")]; tensor input_mode_0 = const()[name = tensor("input_mode_0"), val = tensor("EXACT")]; tensor input_cast_fp16 = gelu(mode = input_mode_0, x = input_253_cast_fp16)[name = tensor("input_cast_fp16")]; tensor var_50852_pad_type_0 = const()[name = tensor("op_50852_pad_type_0"), val = tensor("valid")]; tensor var_50852_strides_0 = const()[name = tensor("op_50852_strides_0"), val = tensor([1, 1])]; tensor var_50852_pad_0 = const()[name = tensor("op_50852_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_50852_dilations_0 = const()[name = tensor("op_50852_dilations_0"), val = tensor([1, 1])]; tensor var_50852_groups_0 = const()[name = tensor("op_50852_groups_0"), val = tensor(1)]; tensor layers_31_fc2_inlier_module_weight_to_fp16_palettized = constexpr_lut_to_dense()[indices = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(417794368))), lut = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(421071232))), name = tensor("layers_31_fc2_inlier_module_weight_to_fp16_palettized"), shape = tensor([1280, 5120, 1, 1])]; tensor layers_31_fc2_inlier_module_bias_to_fp16 = const()[name = tensor("layers_31_fc2_inlier_module_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(421071360)))]; tensor var_50852_cast_fp16 = conv(bias = layers_31_fc2_inlier_module_bias_to_fp16, dilations = var_50852_dilations_0, groups = var_50852_groups_0, pad = var_50852_pad_0, pad_type = var_50852_pad_type_0, strides = var_50852_strides_0, weight = layers_31_fc2_inlier_module_weight_to_fp16_palettized, x = input_cast_fp16)[name = tensor("op_50852_cast_fp16")]; tensor var_50858_pad_type_0 = const()[name = tensor("op_50858_pad_type_0"), val = tensor("valid")]; tensor var_50858_strides_0 = const()[name = tensor("op_50858_strides_0"), val = tensor([1, 1])]; tensor var_50858_pad_0 = const()[name = tensor("op_50858_pad_0"), val = tensor([0, 0, 0, 0])]; tensor var_50858_dilations_0 = const()[name = tensor("op_50858_dilations_0"), val = tensor([1, 1])]; tensor var_50858_groups_0 = const()[name = tensor("op_50858_groups_0"), val = tensor(1)]; tensor layers_31_fc2_outlier_module_weight_to_fp16_sparsified = constexpr_sparse_to_dense()[mask = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(421144256))), name = tensor("layers_31_fc2_outlier_module_weight_to_fp16_sparsified"), nonzero_data = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(421073984))), shape = tensor([1280, 5120, 1, 1])]; tensor var_50858_cast_fp16 = conv(dilations = var_50858_dilations_0, groups = var_50858_groups_0, pad = var_50858_pad_0, pad_type = var_50858_pad_type_0, strides = var_50858_strides_0, weight = layers_31_fc2_outlier_module_weight_to_fp16_sparsified, x = input_cast_fp16)[name = tensor("op_50858_cast_fp16")]; tensor hidden_states_cast_fp16 = add(x = var_50852_cast_fp16, y = var_50858_cast_fp16)[name = tensor("hidden_states_cast_fp16")]; tensor inputs_cast_fp16 = add(x = inputs_127_cast_fp16, y = hidden_states_cast_fp16)[name = tensor("inputs_cast_fp16")]; tensor out_axes_0 = const()[name = tensor("out_axes_0"), val = tensor([1])]; tensor var_50873_to_fp16 = const()[name = tensor("op_50873_to_fp16"), val = tensor(0x1.5p-17)]; tensor out_cast_fp16 = layer_norm(axes = out_axes_0, epsilon = var_50873_to_fp16, x = inputs_cast_fp16)[name = tensor("out_cast_fp16")]; tensor encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(421963520)))]; tensor encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(421966144)))]; tensor encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = var_97_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor("encoder_output_embeds_type_fp32_cast_fp16")]; } -> (encoder_output_embeds); }