diff --git "a/ggml-small-encoder.mlmodelc/model.mil" "b/ggml-small-encoder.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/ggml-small-encoder.mlmodelc/model.mil" @@ -0,0 +1,732 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.2.2"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +{ + func main(tensor logmel_data) { + tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; + tensor var_40 = const()[name = tensor("op_40"), val = tensor([1])]; + tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; + tensor var_44_pad_type_0 = const()[name = tensor("op_44_pad_type_0"), val = tensor("custom")]; + tensor var_44_pad_0 = const()[name = tensor("op_44_pad_0"), val = tensor([1, 1])]; + tensor weight_3_to_fp16 = const()[name = tensor("weight_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor bias_3_to_fp16 = const()[name = tensor("bias_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(368768)))]; + tensor var_44_cast_fp16 = conv(bias = bias_3_to_fp16, dilations = var_42, groups = var_32, pad = var_44_pad_0, pad_type = var_44_pad_type_0, strides = var_40, weight = weight_3_to_fp16, x = logmel_data)[name = tensor("op_44_cast_fp16")]; + tensor input_1_mode_0 = const()[name = tensor("input_1_mode_0"), val = tensor("EXACT")]; + tensor input_1_cast_fp16 = gelu(mode = input_1_mode_0, x = var_44_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_49 = const()[name = tensor("op_49"), val = tensor(1)]; + tensor var_58 = const()[name = tensor("op_58"), val = tensor([2])]; + tensor var_60 = const()[name = tensor("op_60"), val = tensor([1])]; + tensor var_62_pad_type_0 = const()[name = tensor("op_62_pad_type_0"), val = tensor("custom")]; + tensor var_62_pad_0 = const()[name = tensor("op_62_pad_0"), val = tensor([1, 1])]; + tensor weight_7_to_fp16 = const()[name = tensor("weight_7_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(370368)))]; + tensor bias_7_to_fp16 = const()[name = tensor("bias_7_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3909376)))]; + tensor var_62_cast_fp16 = conv(bias = bias_7_to_fp16, dilations = var_60, groups = var_49, pad = var_62_pad_0, pad_type = var_62_pad_type_0, strides = var_58, weight = weight_7_to_fp16, x = input_1_cast_fp16)[name = tensor("op_62_cast_fp16")]; + tensor x_3_mode_0 = const()[name = tensor("x_3_mode_0"), val = tensor("EXACT")]; + tensor x_3_cast_fp16 = gelu(mode = x_3_mode_0, x = var_62_cast_fp16)[name = tensor("x_3_cast_fp16")]; + tensor var_68 = const()[name = tensor("op_68"), val = tensor([0, 2, 1])]; + tensor positional_embedding_to_fp16 = const()[name = tensor("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3910976)))]; + tensor transpose_120 = transpose(perm = var_68, x = x_3_cast_fp16)[name = tensor("transpose_120")]; + tensor var_71_cast_fp16 = add(x = transpose_120, y = positional_embedding_to_fp16)[name = tensor("op_71_cast_fp16")]; + tensor var_84 = const()[name = tensor("op_84"), val = tensor(-1)]; + tensor var_100_axes_0 = const()[name = tensor("op_100_axes_0"), val = tensor([-1])]; + tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6215040)))]; + tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6216640)))]; + tensor var_90_to_fp16 = const()[name = tensor("op_90_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_100_cast_fp16 = layer_norm(axes = var_100_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_90_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = var_71_cast_fp16)[name = tensor("op_100_cast_fp16")]; + tensor var_111_to_fp16 = const()[name = tensor("op_111_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6218240)))]; + tensor var_112_to_fp16 = const()[name = tensor("op_112_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7397952)))]; + tensor linear_0_cast_fp16 = linear(bias = var_112_to_fp16, weight = var_111_to_fp16, x = var_100_cast_fp16)[name = tensor("linear_0_cast_fp16")]; + tensor var_115_to_fp16 = const()[name = tensor("op_115_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7399552)))]; + tensor linear_1_bias_0_to_fp16 = const()[name = tensor("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8579264)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_115_to_fp16, x = var_100_cast_fp16)[name = tensor("linear_1_cast_fp16")]; + tensor var_119_to_fp16 = const()[name = tensor("op_119_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8580864)))]; + tensor var_120_to_fp16 = const()[name = tensor("op_120_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9760576)))]; + tensor linear_2_cast_fp16 = linear(bias = var_120_to_fp16, weight = var_119_to_fp16, x = var_100_cast_fp16)[name = tensor("linear_2_cast_fp16")]; + tensor var_128 = const()[name = tensor("op_128"), val = tensor([1, 1500, 12, -1])]; + tensor var_129_cast_fp16 = reshape(shape = var_128, x = linear_0_cast_fp16)[name = tensor("op_129_cast_fp16")]; + tensor const_84_to_fp16 = const()[name = tensor("const_84_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_3_cast_fp16 = mul(x = var_129_cast_fp16, y = const_84_to_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_135 = const()[name = tensor("op_135"), val = tensor([1, 1500, 12, -1])]; + tensor var_136_cast_fp16 = reshape(shape = var_135, x = linear_1_cast_fp16)[name = tensor("op_136_cast_fp16")]; + tensor const_85_to_fp16 = const()[name = tensor("const_85_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_3_cast_fp16 = mul(x = var_136_cast_fp16, y = const_85_to_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_142 = const()[name = tensor("op_142"), val = tensor([1, 1500, 12, -1])]; + tensor var_143_cast_fp16 = reshape(shape = var_142, x = linear_2_cast_fp16)[name = tensor("op_143_cast_fp16")]; + tensor var_144 = const()[name = tensor("op_144"), val = tensor([0, 2, 1, 3])]; + tensor qk_1_transpose_x_0 = const()[name = tensor("qk_1_transpose_x_0"), val = tensor(false)]; + tensor qk_1_transpose_y_0 = const()[name = tensor("qk_1_transpose_y_0"), val = tensor(false)]; + tensor transpose_48_perm_0 = const()[name = tensor("transpose_48_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_49_perm_0 = const()[name = tensor("transpose_49_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_117 = transpose(perm = transpose_49_perm_0, x = k_3_cast_fp16)[name = tensor("transpose_117")]; + tensor transpose_118 = transpose(perm = transpose_48_perm_0, x = q_3_cast_fp16)[name = tensor("transpose_118")]; + tensor qk_1_cast_fp16 = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_118, y = transpose_117)[name = tensor("qk_1_cast_fp16")]; + tensor var_148_cast_fp16 = softmax(axis = var_84, x = qk_1_cast_fp16)[name = tensor("op_148_cast_fp16")]; + tensor var_150_transpose_x_0 = const()[name = tensor("op_150_transpose_x_0"), val = tensor(false)]; + tensor var_150_transpose_y_0 = const()[name = tensor("op_150_transpose_y_0"), val = tensor(false)]; + tensor transpose_119 = transpose(perm = var_144, x = var_143_cast_fp16)[name = tensor("transpose_119")]; + tensor var_150_cast_fp16 = matmul(transpose_x = var_150_transpose_x_0, transpose_y = var_150_transpose_y_0, x = var_148_cast_fp16, y = transpose_119)[name = tensor("op_150_cast_fp16")]; + tensor var_151 = const()[name = tensor("op_151"), val = tensor([0, 2, 1, 3])]; + tensor concat_0 = const()[name = tensor("concat_0"), val = tensor([1, 1500, 768])]; + tensor transpose_116 = transpose(perm = var_151, x = var_150_cast_fp16)[name = tensor("transpose_116")]; + tensor x_11_cast_fp16 = reshape(shape = concat_0, x = transpose_116)[name = tensor("x_11_cast_fp16")]; + tensor var_156_to_fp16 = const()[name = tensor("op_156_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9762176)))]; + tensor var_157_to_fp16 = const()[name = tensor("op_157_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10941888)))]; + tensor linear_3_cast_fp16 = linear(bias = var_157_to_fp16, weight = var_156_to_fp16, x = x_11_cast_fp16)[name = tensor("linear_3_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = var_71_cast_fp16, y = linear_3_cast_fp16)[name = tensor("x_13_cast_fp16")]; + tensor var_164_axes_0 = const()[name = tensor("op_164_axes_0"), val = tensor([-1])]; + tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10943488)))]; + tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10945088)))]; + tensor var_164_cast_fp16 = layer_norm(axes = var_164_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_90_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_13_cast_fp16)[name = tensor("op_164_cast_fp16")]; + tensor var_173_to_fp16 = const()[name = tensor("op_173_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10946688)))]; + tensor var_174_to_fp16 = const()[name = tensor("op_174_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15665344)))]; + tensor linear_4_cast_fp16 = linear(bias = var_174_to_fp16, weight = var_173_to_fp16, x = var_164_cast_fp16)[name = tensor("linear_4_cast_fp16")]; + tensor x_17_mode_0 = const()[name = tensor("x_17_mode_0"), val = tensor("EXACT")]; + tensor x_17_cast_fp16 = gelu(mode = x_17_mode_0, x = linear_4_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_179_to_fp16 = const()[name = tensor("op_179_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15671552)))]; + tensor var_180_to_fp16 = const()[name = tensor("op_180_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20390208)))]; + tensor linear_5_cast_fp16 = linear(bias = var_180_to_fp16, weight = var_179_to_fp16, x = x_17_cast_fp16)[name = tensor("linear_5_cast_fp16")]; + tensor x_19_cast_fp16 = add(x = x_13_cast_fp16, y = linear_5_cast_fp16)[name = tensor("x_19_cast_fp16")]; + tensor var_190 = const()[name = tensor("op_190"), val = tensor(-1)]; + tensor var_206_axes_0 = const()[name = tensor("op_206_axes_0"), val = tensor([-1])]; + tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20391808)))]; + tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20393408)))]; + tensor var_196_to_fp16 = const()[name = tensor("op_196_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_206_cast_fp16 = layer_norm(axes = var_206_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_196_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_19_cast_fp16)[name = tensor("op_206_cast_fp16")]; + tensor var_217_to_fp16 = const()[name = tensor("op_217_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20395008)))]; + tensor var_218_to_fp16 = const()[name = tensor("op_218_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21574720)))]; + tensor linear_6_cast_fp16 = linear(bias = var_218_to_fp16, weight = var_217_to_fp16, x = var_206_cast_fp16)[name = tensor("linear_6_cast_fp16")]; + tensor var_221_to_fp16 = const()[name = tensor("op_221_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21576320)))]; + tensor linear_7_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_221_to_fp16, x = var_206_cast_fp16)[name = tensor("linear_7_cast_fp16")]; + tensor var_225_to_fp16 = const()[name = tensor("op_225_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22756032)))]; + tensor var_226_to_fp16 = const()[name = tensor("op_226_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23935744)))]; + tensor linear_8_cast_fp16 = linear(bias = var_226_to_fp16, weight = var_225_to_fp16, x = var_206_cast_fp16)[name = tensor("linear_8_cast_fp16")]; + tensor var_234 = const()[name = tensor("op_234"), val = tensor([1, 1500, 12, -1])]; + tensor var_235_cast_fp16 = reshape(shape = var_234, x = linear_6_cast_fp16)[name = tensor("op_235_cast_fp16")]; + tensor const_86_to_fp16 = const()[name = tensor("const_86_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_7_cast_fp16 = mul(x = var_235_cast_fp16, y = const_86_to_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_241 = const()[name = tensor("op_241"), val = tensor([1, 1500, 12, -1])]; + tensor var_242_cast_fp16 = reshape(shape = var_241, x = linear_7_cast_fp16)[name = tensor("op_242_cast_fp16")]; + tensor const_87_to_fp16 = const()[name = tensor("const_87_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_7_cast_fp16 = mul(x = var_242_cast_fp16, y = const_87_to_fp16)[name = tensor("k_7_cast_fp16")]; + tensor var_248 = const()[name = tensor("op_248"), val = tensor([1, 1500, 12, -1])]; + tensor var_249_cast_fp16 = reshape(shape = var_248, x = linear_8_cast_fp16)[name = tensor("op_249_cast_fp16")]; + tensor var_250 = const()[name = tensor("op_250"), val = tensor([0, 2, 1, 3])]; + tensor qk_3_transpose_x_0 = const()[name = tensor("qk_3_transpose_x_0"), val = tensor(false)]; + tensor qk_3_transpose_y_0 = const()[name = tensor("qk_3_transpose_y_0"), val = tensor(false)]; + tensor transpose_50_perm_0 = const()[name = tensor("transpose_50_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_51_perm_0 = const()[name = tensor("transpose_51_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_113 = transpose(perm = transpose_51_perm_0, x = k_7_cast_fp16)[name = tensor("transpose_113")]; + tensor transpose_114 = transpose(perm = transpose_50_perm_0, x = q_7_cast_fp16)[name = tensor("transpose_114")]; + tensor qk_3_cast_fp16 = matmul(transpose_x = qk_3_transpose_x_0, transpose_y = qk_3_transpose_y_0, x = transpose_114, y = transpose_113)[name = tensor("qk_3_cast_fp16")]; + tensor var_254_cast_fp16 = softmax(axis = var_190, x = qk_3_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_256_transpose_x_0 = const()[name = tensor("op_256_transpose_x_0"), val = tensor(false)]; + tensor var_256_transpose_y_0 = const()[name = tensor("op_256_transpose_y_0"), val = tensor(false)]; + tensor transpose_115 = transpose(perm = var_250, x = var_249_cast_fp16)[name = tensor("transpose_115")]; + tensor var_256_cast_fp16 = matmul(transpose_x = var_256_transpose_x_0, transpose_y = var_256_transpose_y_0, x = var_254_cast_fp16, y = transpose_115)[name = tensor("op_256_cast_fp16")]; + tensor var_257 = const()[name = tensor("op_257"), val = tensor([0, 2, 1, 3])]; + tensor concat_1 = const()[name = tensor("concat_1"), val = tensor([1, 1500, 768])]; + tensor transpose_112 = transpose(perm = var_257, x = var_256_cast_fp16)[name = tensor("transpose_112")]; + tensor x_23_cast_fp16 = reshape(shape = concat_1, x = transpose_112)[name = tensor("x_23_cast_fp16")]; + tensor var_262_to_fp16 = const()[name = tensor("op_262_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23937344)))]; + tensor var_263_to_fp16 = const()[name = tensor("op_263_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25117056)))]; + tensor linear_9_cast_fp16 = linear(bias = var_263_to_fp16, weight = var_262_to_fp16, x = x_23_cast_fp16)[name = tensor("linear_9_cast_fp16")]; + tensor x_25_cast_fp16 = add(x = x_19_cast_fp16, y = linear_9_cast_fp16)[name = tensor("x_25_cast_fp16")]; + tensor var_270_axes_0 = const()[name = tensor("op_270_axes_0"), val = tensor([-1])]; + tensor blocks_1_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25118656)))]; + tensor blocks_1_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_1_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25120256)))]; + tensor var_270_cast_fp16 = layer_norm(axes = var_270_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_196_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_25_cast_fp16)[name = tensor("op_270_cast_fp16")]; + tensor var_279_to_fp16 = const()[name = tensor("op_279_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25121856)))]; + tensor var_280_to_fp16 = const()[name = tensor("op_280_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29840512)))]; + tensor linear_10_cast_fp16 = linear(bias = var_280_to_fp16, weight = var_279_to_fp16, x = var_270_cast_fp16)[name = tensor("linear_10_cast_fp16")]; + tensor x_29_mode_0 = const()[name = tensor("x_29_mode_0"), val = tensor("EXACT")]; + tensor x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = linear_10_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor var_285_to_fp16 = const()[name = tensor("op_285_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29846720)))]; + tensor var_286_to_fp16 = const()[name = tensor("op_286_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34565376)))]; + tensor linear_11_cast_fp16 = linear(bias = var_286_to_fp16, weight = var_285_to_fp16, x = x_29_cast_fp16)[name = tensor("linear_11_cast_fp16")]; + tensor x_31_cast_fp16 = add(x = x_25_cast_fp16, y = linear_11_cast_fp16)[name = tensor("x_31_cast_fp16")]; + tensor var_296 = const()[name = tensor("op_296"), val = tensor(-1)]; + tensor var_312_axes_0 = const()[name = tensor("op_312_axes_0"), val = tensor([-1])]; + tensor blocks_2_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_2_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34566976)))]; + tensor blocks_2_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_2_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34568576)))]; + tensor var_302_to_fp16 = const()[name = tensor("op_302_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_312_cast_fp16 = layer_norm(axes = var_312_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_302_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_31_cast_fp16)[name = tensor("op_312_cast_fp16")]; + tensor var_323_to_fp16 = const()[name = tensor("op_323_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34570176)))]; + tensor var_324_to_fp16 = const()[name = tensor("op_324_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35749888)))]; + tensor linear_12_cast_fp16 = linear(bias = var_324_to_fp16, weight = var_323_to_fp16, x = var_312_cast_fp16)[name = tensor("linear_12_cast_fp16")]; + tensor var_327_to_fp16 = const()[name = tensor("op_327_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35751488)))]; + tensor linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_327_to_fp16, x = var_312_cast_fp16)[name = tensor("linear_13_cast_fp16")]; + tensor var_331_to_fp16 = const()[name = tensor("op_331_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36931200)))]; + tensor var_332_to_fp16 = const()[name = tensor("op_332_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38110912)))]; + tensor linear_14_cast_fp16 = linear(bias = var_332_to_fp16, weight = var_331_to_fp16, x = var_312_cast_fp16)[name = tensor("linear_14_cast_fp16")]; + tensor var_340 = const()[name = tensor("op_340"), val = tensor([1, 1500, 12, -1])]; + tensor var_341_cast_fp16 = reshape(shape = var_340, x = linear_12_cast_fp16)[name = tensor("op_341_cast_fp16")]; + tensor const_88_to_fp16 = const()[name = tensor("const_88_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_11_cast_fp16 = mul(x = var_341_cast_fp16, y = const_88_to_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_347 = const()[name = tensor("op_347"), val = tensor([1, 1500, 12, -1])]; + tensor var_348_cast_fp16 = reshape(shape = var_347, x = linear_13_cast_fp16)[name = tensor("op_348_cast_fp16")]; + tensor const_89_to_fp16 = const()[name = tensor("const_89_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_11_cast_fp16 = mul(x = var_348_cast_fp16, y = const_89_to_fp16)[name = tensor("k_11_cast_fp16")]; + tensor var_354 = const()[name = tensor("op_354"), val = tensor([1, 1500, 12, -1])]; + tensor var_355_cast_fp16 = reshape(shape = var_354, x = linear_14_cast_fp16)[name = tensor("op_355_cast_fp16")]; + tensor var_356 = const()[name = tensor("op_356"), val = tensor([0, 2, 1, 3])]; + tensor qk_5_transpose_x_0 = const()[name = tensor("qk_5_transpose_x_0"), val = tensor(false)]; + tensor qk_5_transpose_y_0 = const()[name = tensor("qk_5_transpose_y_0"), val = tensor(false)]; + tensor transpose_52_perm_0 = const()[name = tensor("transpose_52_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_53_perm_0 = const()[name = tensor("transpose_53_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_109 = transpose(perm = transpose_53_perm_0, x = k_11_cast_fp16)[name = tensor("transpose_109")]; + tensor transpose_110 = transpose(perm = transpose_52_perm_0, x = q_11_cast_fp16)[name = tensor("transpose_110")]; + tensor qk_5_cast_fp16 = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_110, y = transpose_109)[name = tensor("qk_5_cast_fp16")]; + tensor var_360_cast_fp16 = softmax(axis = var_296, x = qk_5_cast_fp16)[name = tensor("op_360_cast_fp16")]; + tensor var_362_transpose_x_0 = const()[name = tensor("op_362_transpose_x_0"), val = tensor(false)]; + tensor var_362_transpose_y_0 = const()[name = tensor("op_362_transpose_y_0"), val = tensor(false)]; + tensor transpose_111 = transpose(perm = var_356, x = var_355_cast_fp16)[name = tensor("transpose_111")]; + tensor var_362_cast_fp16 = matmul(transpose_x = var_362_transpose_x_0, transpose_y = var_362_transpose_y_0, x = var_360_cast_fp16, y = transpose_111)[name = tensor("op_362_cast_fp16")]; + tensor var_363 = const()[name = tensor("op_363"), val = tensor([0, 2, 1, 3])]; + tensor concat_2 = const()[name = tensor("concat_2"), val = tensor([1, 1500, 768])]; + tensor transpose_108 = transpose(perm = var_363, x = var_362_cast_fp16)[name = tensor("transpose_108")]; + tensor x_35_cast_fp16 = reshape(shape = concat_2, x = transpose_108)[name = tensor("x_35_cast_fp16")]; + tensor var_368_to_fp16 = const()[name = tensor("op_368_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38112512)))]; + tensor var_369_to_fp16 = const()[name = tensor("op_369_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39292224)))]; + tensor linear_15_cast_fp16 = linear(bias = var_369_to_fp16, weight = var_368_to_fp16, x = x_35_cast_fp16)[name = tensor("linear_15_cast_fp16")]; + tensor x_37_cast_fp16 = add(x = x_31_cast_fp16, y = linear_15_cast_fp16)[name = tensor("x_37_cast_fp16")]; + tensor var_376_axes_0 = const()[name = tensor("op_376_axes_0"), val = tensor([-1])]; + tensor blocks_2_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_2_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39293824)))]; + tensor blocks_2_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_2_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39295424)))]; + tensor var_376_cast_fp16 = layer_norm(axes = var_376_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_302_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_37_cast_fp16)[name = tensor("op_376_cast_fp16")]; + tensor var_385_to_fp16 = const()[name = tensor("op_385_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39297024)))]; + tensor var_386_to_fp16 = const()[name = tensor("op_386_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44015680)))]; + tensor linear_16_cast_fp16 = linear(bias = var_386_to_fp16, weight = var_385_to_fp16, x = var_376_cast_fp16)[name = tensor("linear_16_cast_fp16")]; + tensor x_41_mode_0 = const()[name = tensor("x_41_mode_0"), val = tensor("EXACT")]; + tensor x_41_cast_fp16 = gelu(mode = x_41_mode_0, x = linear_16_cast_fp16)[name = tensor("x_41_cast_fp16")]; + tensor var_391_to_fp16 = const()[name = tensor("op_391_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44021888)))]; + tensor var_392_to_fp16 = const()[name = tensor("op_392_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48740544)))]; + tensor linear_17_cast_fp16 = linear(bias = var_392_to_fp16, weight = var_391_to_fp16, x = x_41_cast_fp16)[name = tensor("linear_17_cast_fp16")]; + tensor x_43_cast_fp16 = add(x = x_37_cast_fp16, y = linear_17_cast_fp16)[name = tensor("x_43_cast_fp16")]; + tensor var_402 = const()[name = tensor("op_402"), val = tensor(-1)]; + tensor var_418_axes_0 = const()[name = tensor("op_418_axes_0"), val = tensor([-1])]; + tensor blocks_3_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_3_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48742144)))]; + tensor blocks_3_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_3_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48743744)))]; + tensor var_408_to_fp16 = const()[name = tensor("op_408_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_418_cast_fp16 = layer_norm(axes = var_418_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_408_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_43_cast_fp16)[name = tensor("op_418_cast_fp16")]; + tensor var_429_to_fp16 = const()[name = tensor("op_429_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48745344)))]; + tensor var_430_to_fp16 = const()[name = tensor("op_430_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49925056)))]; + tensor linear_18_cast_fp16 = linear(bias = var_430_to_fp16, weight = var_429_to_fp16, x = var_418_cast_fp16)[name = tensor("linear_18_cast_fp16")]; + tensor var_433_to_fp16 = const()[name = tensor("op_433_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49926656)))]; + tensor linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_433_to_fp16, x = var_418_cast_fp16)[name = tensor("linear_19_cast_fp16")]; + tensor var_437_to_fp16 = const()[name = tensor("op_437_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51106368)))]; + tensor var_438_to_fp16 = const()[name = tensor("op_438_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52286080)))]; + tensor linear_20_cast_fp16 = linear(bias = var_438_to_fp16, weight = var_437_to_fp16, x = var_418_cast_fp16)[name = tensor("linear_20_cast_fp16")]; + tensor var_446 = const()[name = tensor("op_446"), val = tensor([1, 1500, 12, -1])]; + tensor var_447_cast_fp16 = reshape(shape = var_446, x = linear_18_cast_fp16)[name = tensor("op_447_cast_fp16")]; + tensor const_90_to_fp16 = const()[name = tensor("const_90_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_15_cast_fp16 = mul(x = var_447_cast_fp16, y = const_90_to_fp16)[name = tensor("q_15_cast_fp16")]; + tensor var_453 = const()[name = tensor("op_453"), val = tensor([1, 1500, 12, -1])]; + tensor var_454_cast_fp16 = reshape(shape = var_453, x = linear_19_cast_fp16)[name = tensor("op_454_cast_fp16")]; + tensor const_91_to_fp16 = const()[name = tensor("const_91_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_15_cast_fp16 = mul(x = var_454_cast_fp16, y = const_91_to_fp16)[name = tensor("k_15_cast_fp16")]; + tensor var_460 = const()[name = tensor("op_460"), val = tensor([1, 1500, 12, -1])]; + tensor var_461_cast_fp16 = reshape(shape = var_460, x = linear_20_cast_fp16)[name = tensor("op_461_cast_fp16")]; + tensor var_462 = const()[name = tensor("op_462"), val = tensor([0, 2, 1, 3])]; + tensor qk_7_transpose_x_0 = const()[name = tensor("qk_7_transpose_x_0"), val = tensor(false)]; + tensor qk_7_transpose_y_0 = const()[name = tensor("qk_7_transpose_y_0"), val = tensor(false)]; + tensor transpose_54_perm_0 = const()[name = tensor("transpose_54_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_55_perm_0 = const()[name = tensor("transpose_55_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_105 = transpose(perm = transpose_55_perm_0, x = k_15_cast_fp16)[name = tensor("transpose_105")]; + tensor transpose_106 = transpose(perm = transpose_54_perm_0, x = q_15_cast_fp16)[name = tensor("transpose_106")]; + tensor qk_7_cast_fp16 = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_106, y = transpose_105)[name = tensor("qk_7_cast_fp16")]; + tensor var_466_cast_fp16 = softmax(axis = var_402, x = qk_7_cast_fp16)[name = tensor("op_466_cast_fp16")]; + tensor var_468_transpose_x_0 = const()[name = tensor("op_468_transpose_x_0"), val = tensor(false)]; + tensor var_468_transpose_y_0 = const()[name = tensor("op_468_transpose_y_0"), val = tensor(false)]; + tensor transpose_107 = transpose(perm = var_462, x = var_461_cast_fp16)[name = tensor("transpose_107")]; + tensor var_468_cast_fp16 = matmul(transpose_x = var_468_transpose_x_0, transpose_y = var_468_transpose_y_0, x = var_466_cast_fp16, y = transpose_107)[name = tensor("op_468_cast_fp16")]; + tensor var_469 = const()[name = tensor("op_469"), val = tensor([0, 2, 1, 3])]; + tensor concat_3 = const()[name = tensor("concat_3"), val = tensor([1, 1500, 768])]; + tensor transpose_104 = transpose(perm = var_469, x = var_468_cast_fp16)[name = tensor("transpose_104")]; + tensor x_47_cast_fp16 = reshape(shape = concat_3, x = transpose_104)[name = tensor("x_47_cast_fp16")]; + tensor var_474_to_fp16 = const()[name = tensor("op_474_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52287680)))]; + tensor var_475_to_fp16 = const()[name = tensor("op_475_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53467392)))]; + tensor linear_21_cast_fp16 = linear(bias = var_475_to_fp16, weight = var_474_to_fp16, x = x_47_cast_fp16)[name = tensor("linear_21_cast_fp16")]; + tensor x_49_cast_fp16 = add(x = x_43_cast_fp16, y = linear_21_cast_fp16)[name = tensor("x_49_cast_fp16")]; + tensor var_482_axes_0 = const()[name = tensor("op_482_axes_0"), val = tensor([-1])]; + tensor blocks_3_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_3_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53468992)))]; + tensor blocks_3_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_3_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53470592)))]; + tensor var_482_cast_fp16 = layer_norm(axes = var_482_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_408_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_49_cast_fp16)[name = tensor("op_482_cast_fp16")]; + tensor var_491_to_fp16 = const()[name = tensor("op_491_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53472192)))]; + tensor var_492_to_fp16 = const()[name = tensor("op_492_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58190848)))]; + tensor linear_22_cast_fp16 = linear(bias = var_492_to_fp16, weight = var_491_to_fp16, x = var_482_cast_fp16)[name = tensor("linear_22_cast_fp16")]; + tensor x_53_mode_0 = const()[name = tensor("x_53_mode_0"), val = tensor("EXACT")]; + tensor x_53_cast_fp16 = gelu(mode = x_53_mode_0, x = linear_22_cast_fp16)[name = tensor("x_53_cast_fp16")]; + tensor var_497_to_fp16 = const()[name = tensor("op_497_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58197056)))]; + tensor var_498_to_fp16 = const()[name = tensor("op_498_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62915712)))]; + tensor linear_23_cast_fp16 = linear(bias = var_498_to_fp16, weight = var_497_to_fp16, x = x_53_cast_fp16)[name = tensor("linear_23_cast_fp16")]; + tensor x_55_cast_fp16 = add(x = x_49_cast_fp16, y = linear_23_cast_fp16)[name = tensor("x_55_cast_fp16")]; + tensor var_508 = const()[name = tensor("op_508"), val = tensor(-1)]; + tensor var_524_axes_0 = const()[name = tensor("op_524_axes_0"), val = tensor([-1])]; + tensor blocks_4_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_4_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62917312)))]; + tensor blocks_4_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_4_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62918912)))]; + tensor var_514_to_fp16 = const()[name = tensor("op_514_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_524_cast_fp16 = layer_norm(axes = var_524_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_514_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_55_cast_fp16)[name = tensor("op_524_cast_fp16")]; + tensor var_535_to_fp16 = const()[name = tensor("op_535_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62920512)))]; + tensor var_536_to_fp16 = const()[name = tensor("op_536_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64100224)))]; + tensor linear_24_cast_fp16 = linear(bias = var_536_to_fp16, weight = var_535_to_fp16, x = var_524_cast_fp16)[name = tensor("linear_24_cast_fp16")]; + tensor var_539_to_fp16 = const()[name = tensor("op_539_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64101824)))]; + tensor linear_25_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_539_to_fp16, x = var_524_cast_fp16)[name = tensor("linear_25_cast_fp16")]; + tensor var_543_to_fp16 = const()[name = tensor("op_543_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65281536)))]; + tensor var_544_to_fp16 = const()[name = tensor("op_544_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66461248)))]; + tensor linear_26_cast_fp16 = linear(bias = var_544_to_fp16, weight = var_543_to_fp16, x = var_524_cast_fp16)[name = tensor("linear_26_cast_fp16")]; + tensor var_552 = const()[name = tensor("op_552"), val = tensor([1, 1500, 12, -1])]; + tensor var_553_cast_fp16 = reshape(shape = var_552, x = linear_24_cast_fp16)[name = tensor("op_553_cast_fp16")]; + tensor const_92_to_fp16 = const()[name = tensor("const_92_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_19_cast_fp16 = mul(x = var_553_cast_fp16, y = const_92_to_fp16)[name = tensor("q_19_cast_fp16")]; + tensor var_559 = const()[name = tensor("op_559"), val = tensor([1, 1500, 12, -1])]; + tensor var_560_cast_fp16 = reshape(shape = var_559, x = linear_25_cast_fp16)[name = tensor("op_560_cast_fp16")]; + tensor const_93_to_fp16 = const()[name = tensor("const_93_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_19_cast_fp16 = mul(x = var_560_cast_fp16, y = const_93_to_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_566 = const()[name = tensor("op_566"), val = tensor([1, 1500, 12, -1])]; + tensor var_567_cast_fp16 = reshape(shape = var_566, x = linear_26_cast_fp16)[name = tensor("op_567_cast_fp16")]; + tensor var_568 = const()[name = tensor("op_568"), val = tensor([0, 2, 1, 3])]; + tensor qk_9_transpose_x_0 = const()[name = tensor("qk_9_transpose_x_0"), val = tensor(false)]; + tensor qk_9_transpose_y_0 = const()[name = tensor("qk_9_transpose_y_0"), val = tensor(false)]; + tensor transpose_56_perm_0 = const()[name = tensor("transpose_56_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_57_perm_0 = const()[name = tensor("transpose_57_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_101 = transpose(perm = transpose_57_perm_0, x = k_19_cast_fp16)[name = tensor("transpose_101")]; + tensor transpose_102 = transpose(perm = transpose_56_perm_0, x = q_19_cast_fp16)[name = tensor("transpose_102")]; + tensor qk_9_cast_fp16 = matmul(transpose_x = qk_9_transpose_x_0, transpose_y = qk_9_transpose_y_0, x = transpose_102, y = transpose_101)[name = tensor("qk_9_cast_fp16")]; + tensor var_572_cast_fp16 = softmax(axis = var_508, x = qk_9_cast_fp16)[name = tensor("op_572_cast_fp16")]; + tensor var_574_transpose_x_0 = const()[name = tensor("op_574_transpose_x_0"), val = tensor(false)]; + tensor var_574_transpose_y_0 = const()[name = tensor("op_574_transpose_y_0"), val = tensor(false)]; + tensor transpose_103 = transpose(perm = var_568, x = var_567_cast_fp16)[name = tensor("transpose_103")]; + tensor var_574_cast_fp16 = matmul(transpose_x = var_574_transpose_x_0, transpose_y = var_574_transpose_y_0, x = var_572_cast_fp16, y = transpose_103)[name = tensor("op_574_cast_fp16")]; + tensor var_575 = const()[name = tensor("op_575"), val = tensor([0, 2, 1, 3])]; + tensor concat_4 = const()[name = tensor("concat_4"), val = tensor([1, 1500, 768])]; + tensor transpose_100 = transpose(perm = var_575, x = var_574_cast_fp16)[name = tensor("transpose_100")]; + tensor x_59_cast_fp16 = reshape(shape = concat_4, x = transpose_100)[name = tensor("x_59_cast_fp16")]; + tensor var_580_to_fp16 = const()[name = tensor("op_580_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66462848)))]; + tensor var_581_to_fp16 = const()[name = tensor("op_581_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67642560)))]; + tensor linear_27_cast_fp16 = linear(bias = var_581_to_fp16, weight = var_580_to_fp16, x = x_59_cast_fp16)[name = tensor("linear_27_cast_fp16")]; + tensor x_61_cast_fp16 = add(x = x_55_cast_fp16, y = linear_27_cast_fp16)[name = tensor("x_61_cast_fp16")]; + tensor var_588_axes_0 = const()[name = tensor("op_588_axes_0"), val = tensor([-1])]; + tensor blocks_4_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_4_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67644160)))]; + tensor blocks_4_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_4_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67645760)))]; + tensor var_588_cast_fp16 = layer_norm(axes = var_588_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_514_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_61_cast_fp16)[name = tensor("op_588_cast_fp16")]; + tensor var_597_to_fp16 = const()[name = tensor("op_597_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67647360)))]; + tensor var_598_to_fp16 = const()[name = tensor("op_598_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72366016)))]; + tensor linear_28_cast_fp16 = linear(bias = var_598_to_fp16, weight = var_597_to_fp16, x = var_588_cast_fp16)[name = tensor("linear_28_cast_fp16")]; + tensor x_65_mode_0 = const()[name = tensor("x_65_mode_0"), val = tensor("EXACT")]; + tensor x_65_cast_fp16 = gelu(mode = x_65_mode_0, x = linear_28_cast_fp16)[name = tensor("x_65_cast_fp16")]; + tensor var_603_to_fp16 = const()[name = tensor("op_603_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72372224)))]; + tensor var_604_to_fp16 = const()[name = tensor("op_604_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77090880)))]; + tensor linear_29_cast_fp16 = linear(bias = var_604_to_fp16, weight = var_603_to_fp16, x = x_65_cast_fp16)[name = tensor("linear_29_cast_fp16")]; + tensor x_67_cast_fp16 = add(x = x_61_cast_fp16, y = linear_29_cast_fp16)[name = tensor("x_67_cast_fp16")]; + tensor var_614 = const()[name = tensor("op_614"), val = tensor(-1)]; + tensor var_630_axes_0 = const()[name = tensor("op_630_axes_0"), val = tensor([-1])]; + tensor blocks_5_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_5_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77092480)))]; + tensor blocks_5_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_5_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77094080)))]; + tensor var_620_to_fp16 = const()[name = tensor("op_620_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_630_cast_fp16 = layer_norm(axes = var_630_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_620_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_67_cast_fp16)[name = tensor("op_630_cast_fp16")]; + tensor var_641_to_fp16 = const()[name = tensor("op_641_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77095680)))]; + tensor var_642_to_fp16 = const()[name = tensor("op_642_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78275392)))]; + tensor linear_30_cast_fp16 = linear(bias = var_642_to_fp16, weight = var_641_to_fp16, x = var_630_cast_fp16)[name = tensor("linear_30_cast_fp16")]; + tensor var_645_to_fp16 = const()[name = tensor("op_645_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78276992)))]; + tensor linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_645_to_fp16, x = var_630_cast_fp16)[name = tensor("linear_31_cast_fp16")]; + tensor var_649_to_fp16 = const()[name = tensor("op_649_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(79456704)))]; + tensor var_650_to_fp16 = const()[name = tensor("op_650_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80636416)))]; + tensor linear_32_cast_fp16 = linear(bias = var_650_to_fp16, weight = var_649_to_fp16, x = var_630_cast_fp16)[name = tensor("linear_32_cast_fp16")]; + tensor var_658 = const()[name = tensor("op_658"), val = tensor([1, 1500, 12, -1])]; + tensor var_659_cast_fp16 = reshape(shape = var_658, x = linear_30_cast_fp16)[name = tensor("op_659_cast_fp16")]; + tensor const_94_to_fp16 = const()[name = tensor("const_94_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_23_cast_fp16 = mul(x = var_659_cast_fp16, y = const_94_to_fp16)[name = tensor("q_23_cast_fp16")]; + tensor var_665 = const()[name = tensor("op_665"), val = tensor([1, 1500, 12, -1])]; + tensor var_666_cast_fp16 = reshape(shape = var_665, x = linear_31_cast_fp16)[name = tensor("op_666_cast_fp16")]; + tensor const_95_to_fp16 = const()[name = tensor("const_95_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_23_cast_fp16 = mul(x = var_666_cast_fp16, y = const_95_to_fp16)[name = tensor("k_23_cast_fp16")]; + tensor var_672 = const()[name = tensor("op_672"), val = tensor([1, 1500, 12, -1])]; + tensor var_673_cast_fp16 = reshape(shape = var_672, x = linear_32_cast_fp16)[name = tensor("op_673_cast_fp16")]; + tensor var_674 = const()[name = tensor("op_674"), val = tensor([0, 2, 1, 3])]; + tensor qk_11_transpose_x_0 = const()[name = tensor("qk_11_transpose_x_0"), val = tensor(false)]; + tensor qk_11_transpose_y_0 = const()[name = tensor("qk_11_transpose_y_0"), val = tensor(false)]; + tensor transpose_58_perm_0 = const()[name = tensor("transpose_58_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_59_perm_0 = const()[name = tensor("transpose_59_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_97 = transpose(perm = transpose_59_perm_0, x = k_23_cast_fp16)[name = tensor("transpose_97")]; + tensor transpose_98 = transpose(perm = transpose_58_perm_0, x = q_23_cast_fp16)[name = tensor("transpose_98")]; + tensor qk_11_cast_fp16 = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_98, y = transpose_97)[name = tensor("qk_11_cast_fp16")]; + tensor var_678_cast_fp16 = softmax(axis = var_614, x = qk_11_cast_fp16)[name = tensor("op_678_cast_fp16")]; + tensor var_680_transpose_x_0 = const()[name = tensor("op_680_transpose_x_0"), val = tensor(false)]; + tensor var_680_transpose_y_0 = const()[name = tensor("op_680_transpose_y_0"), val = tensor(false)]; + tensor transpose_99 = transpose(perm = var_674, x = var_673_cast_fp16)[name = tensor("transpose_99")]; + tensor var_680_cast_fp16 = matmul(transpose_x = var_680_transpose_x_0, transpose_y = var_680_transpose_y_0, x = var_678_cast_fp16, y = transpose_99)[name = tensor("op_680_cast_fp16")]; + tensor var_681 = const()[name = tensor("op_681"), val = tensor([0, 2, 1, 3])]; + tensor concat_5 = const()[name = tensor("concat_5"), val = tensor([1, 1500, 768])]; + tensor transpose_96 = transpose(perm = var_681, x = var_680_cast_fp16)[name = tensor("transpose_96")]; + tensor x_71_cast_fp16 = reshape(shape = concat_5, x = transpose_96)[name = tensor("x_71_cast_fp16")]; + tensor var_686_to_fp16 = const()[name = tensor("op_686_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80638016)))]; + tensor var_687_to_fp16 = const()[name = tensor("op_687_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81817728)))]; + tensor linear_33_cast_fp16 = linear(bias = var_687_to_fp16, weight = var_686_to_fp16, x = x_71_cast_fp16)[name = tensor("linear_33_cast_fp16")]; + tensor x_73_cast_fp16 = add(x = x_67_cast_fp16, y = linear_33_cast_fp16)[name = tensor("x_73_cast_fp16")]; + tensor var_694_axes_0 = const()[name = tensor("op_694_axes_0"), val = tensor([-1])]; + tensor blocks_5_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_5_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81819328)))]; + tensor blocks_5_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_5_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81820928)))]; + tensor var_694_cast_fp16 = layer_norm(axes = var_694_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_620_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_73_cast_fp16)[name = tensor("op_694_cast_fp16")]; + tensor var_703_to_fp16 = const()[name = tensor("op_703_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81822528)))]; + tensor var_704_to_fp16 = const()[name = tensor("op_704_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86541184)))]; + tensor linear_34_cast_fp16 = linear(bias = var_704_to_fp16, weight = var_703_to_fp16, x = var_694_cast_fp16)[name = tensor("linear_34_cast_fp16")]; + tensor x_77_mode_0 = const()[name = tensor("x_77_mode_0"), val = tensor("EXACT")]; + tensor x_77_cast_fp16 = gelu(mode = x_77_mode_0, x = linear_34_cast_fp16)[name = tensor("x_77_cast_fp16")]; + tensor var_709_to_fp16 = const()[name = tensor("op_709_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86547392)))]; + tensor var_710_to_fp16 = const()[name = tensor("op_710_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91266048)))]; + tensor linear_35_cast_fp16 = linear(bias = var_710_to_fp16, weight = var_709_to_fp16, x = x_77_cast_fp16)[name = tensor("linear_35_cast_fp16")]; + tensor x_79_cast_fp16 = add(x = x_73_cast_fp16, y = linear_35_cast_fp16)[name = tensor("x_79_cast_fp16")]; + tensor var_720 = const()[name = tensor("op_720"), val = tensor(-1)]; + tensor var_736_axes_0 = const()[name = tensor("op_736_axes_0"), val = tensor([-1])]; + tensor blocks_6_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_6_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91267648)))]; + tensor blocks_6_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_6_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91269248)))]; + tensor var_726_to_fp16 = const()[name = tensor("op_726_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_736_cast_fp16 = layer_norm(axes = var_736_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_726_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_79_cast_fp16)[name = tensor("op_736_cast_fp16")]; + tensor var_747_to_fp16 = const()[name = tensor("op_747_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91270848)))]; + tensor var_748_to_fp16 = const()[name = tensor("op_748_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92450560)))]; + tensor linear_36_cast_fp16 = linear(bias = var_748_to_fp16, weight = var_747_to_fp16, x = var_736_cast_fp16)[name = tensor("linear_36_cast_fp16")]; + tensor var_751_to_fp16 = const()[name = tensor("op_751_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92452160)))]; + tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_751_to_fp16, x = var_736_cast_fp16)[name = tensor("linear_37_cast_fp16")]; + tensor var_755_to_fp16 = const()[name = tensor("op_755_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93631872)))]; + tensor var_756_to_fp16 = const()[name = tensor("op_756_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94811584)))]; + tensor linear_38_cast_fp16 = linear(bias = var_756_to_fp16, weight = var_755_to_fp16, x = var_736_cast_fp16)[name = tensor("linear_38_cast_fp16")]; + tensor var_764 = const()[name = tensor("op_764"), val = tensor([1, 1500, 12, -1])]; + tensor var_765_cast_fp16 = reshape(shape = var_764, x = linear_36_cast_fp16)[name = tensor("op_765_cast_fp16")]; + tensor const_96_to_fp16 = const()[name = tensor("const_96_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_27_cast_fp16 = mul(x = var_765_cast_fp16, y = const_96_to_fp16)[name = tensor("q_27_cast_fp16")]; + tensor var_771 = const()[name = tensor("op_771"), val = tensor([1, 1500, 12, -1])]; + tensor var_772_cast_fp16 = reshape(shape = var_771, x = linear_37_cast_fp16)[name = tensor("op_772_cast_fp16")]; + tensor const_97_to_fp16 = const()[name = tensor("const_97_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_27_cast_fp16 = mul(x = var_772_cast_fp16, y = const_97_to_fp16)[name = tensor("k_27_cast_fp16")]; + tensor var_778 = const()[name = tensor("op_778"), val = tensor([1, 1500, 12, -1])]; + tensor var_779_cast_fp16 = reshape(shape = var_778, x = linear_38_cast_fp16)[name = tensor("op_779_cast_fp16")]; + tensor var_780 = const()[name = tensor("op_780"), val = tensor([0, 2, 1, 3])]; + tensor qk_13_transpose_x_0 = const()[name = tensor("qk_13_transpose_x_0"), val = tensor(false)]; + tensor qk_13_transpose_y_0 = const()[name = tensor("qk_13_transpose_y_0"), val = tensor(false)]; + tensor transpose_60_perm_0 = const()[name = tensor("transpose_60_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_61_perm_0 = const()[name = tensor("transpose_61_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_93 = transpose(perm = transpose_61_perm_0, x = k_27_cast_fp16)[name = tensor("transpose_93")]; + tensor transpose_94 = transpose(perm = transpose_60_perm_0, x = q_27_cast_fp16)[name = tensor("transpose_94")]; + tensor qk_13_cast_fp16 = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_94, y = transpose_93)[name = tensor("qk_13_cast_fp16")]; + tensor var_784_cast_fp16 = softmax(axis = var_720, x = qk_13_cast_fp16)[name = tensor("op_784_cast_fp16")]; + tensor var_786_transpose_x_0 = const()[name = tensor("op_786_transpose_x_0"), val = tensor(false)]; + tensor var_786_transpose_y_0 = const()[name = tensor("op_786_transpose_y_0"), val = tensor(false)]; + tensor transpose_95 = transpose(perm = var_780, x = var_779_cast_fp16)[name = tensor("transpose_95")]; + tensor var_786_cast_fp16 = matmul(transpose_x = var_786_transpose_x_0, transpose_y = var_786_transpose_y_0, x = var_784_cast_fp16, y = transpose_95)[name = tensor("op_786_cast_fp16")]; + tensor var_787 = const()[name = tensor("op_787"), val = tensor([0, 2, 1, 3])]; + tensor concat_6 = const()[name = tensor("concat_6"), val = tensor([1, 1500, 768])]; + tensor transpose_92 = transpose(perm = var_787, x = var_786_cast_fp16)[name = tensor("transpose_92")]; + tensor x_83_cast_fp16 = reshape(shape = concat_6, x = transpose_92)[name = tensor("x_83_cast_fp16")]; + tensor var_792_to_fp16 = const()[name = tensor("op_792_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94813184)))]; + tensor var_793_to_fp16 = const()[name = tensor("op_793_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95992896)))]; + tensor linear_39_cast_fp16 = linear(bias = var_793_to_fp16, weight = var_792_to_fp16, x = x_83_cast_fp16)[name = tensor("linear_39_cast_fp16")]; + tensor x_85_cast_fp16 = add(x = x_79_cast_fp16, y = linear_39_cast_fp16)[name = tensor("x_85_cast_fp16")]; + tensor var_800_axes_0 = const()[name = tensor("op_800_axes_0"), val = tensor([-1])]; + tensor blocks_6_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_6_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95994496)))]; + tensor blocks_6_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_6_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95996096)))]; + tensor var_800_cast_fp16 = layer_norm(axes = var_800_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_726_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_85_cast_fp16)[name = tensor("op_800_cast_fp16")]; + tensor var_809_to_fp16 = const()[name = tensor("op_809_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(95997696)))]; + tensor var_810_to_fp16 = const()[name = tensor("op_810_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100716352)))]; + tensor linear_40_cast_fp16 = linear(bias = var_810_to_fp16, weight = var_809_to_fp16, x = var_800_cast_fp16)[name = tensor("linear_40_cast_fp16")]; + tensor x_89_mode_0 = const()[name = tensor("x_89_mode_0"), val = tensor("EXACT")]; + tensor x_89_cast_fp16 = gelu(mode = x_89_mode_0, x = linear_40_cast_fp16)[name = tensor("x_89_cast_fp16")]; + tensor var_815_to_fp16 = const()[name = tensor("op_815_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100722560)))]; + tensor var_816_to_fp16 = const()[name = tensor("op_816_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105441216)))]; + tensor linear_41_cast_fp16 = linear(bias = var_816_to_fp16, weight = var_815_to_fp16, x = x_89_cast_fp16)[name = tensor("linear_41_cast_fp16")]; + tensor x_91_cast_fp16 = add(x = x_85_cast_fp16, y = linear_41_cast_fp16)[name = tensor("x_91_cast_fp16")]; + tensor var_826 = const()[name = tensor("op_826"), val = tensor(-1)]; + tensor var_842_axes_0 = const()[name = tensor("op_842_axes_0"), val = tensor([-1])]; + tensor blocks_7_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_7_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105442816)))]; + tensor blocks_7_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_7_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105444416)))]; + tensor var_832_to_fp16 = const()[name = tensor("op_832_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_842_cast_fp16 = layer_norm(axes = var_842_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_832_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_91_cast_fp16)[name = tensor("op_842_cast_fp16")]; + tensor var_853_to_fp16 = const()[name = tensor("op_853_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105446016)))]; + tensor var_854_to_fp16 = const()[name = tensor("op_854_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106625728)))]; + tensor linear_42_cast_fp16 = linear(bias = var_854_to_fp16, weight = var_853_to_fp16, x = var_842_cast_fp16)[name = tensor("linear_42_cast_fp16")]; + tensor var_857_to_fp16 = const()[name = tensor("op_857_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106627328)))]; + tensor linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_857_to_fp16, x = var_842_cast_fp16)[name = tensor("linear_43_cast_fp16")]; + tensor var_861_to_fp16 = const()[name = tensor("op_861_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107807040)))]; + tensor var_862_to_fp16 = const()[name = tensor("op_862_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108986752)))]; + tensor linear_44_cast_fp16 = linear(bias = var_862_to_fp16, weight = var_861_to_fp16, x = var_842_cast_fp16)[name = tensor("linear_44_cast_fp16")]; + tensor var_870 = const()[name = tensor("op_870"), val = tensor([1, 1500, 12, -1])]; + tensor var_871_cast_fp16 = reshape(shape = var_870, x = linear_42_cast_fp16)[name = tensor("op_871_cast_fp16")]; + tensor const_98_to_fp16 = const()[name = tensor("const_98_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_31_cast_fp16 = mul(x = var_871_cast_fp16, y = const_98_to_fp16)[name = tensor("q_31_cast_fp16")]; + tensor var_877 = const()[name = tensor("op_877"), val = tensor([1, 1500, 12, -1])]; + tensor var_878_cast_fp16 = reshape(shape = var_877, x = linear_43_cast_fp16)[name = tensor("op_878_cast_fp16")]; + tensor const_99_to_fp16 = const()[name = tensor("const_99_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_31_cast_fp16 = mul(x = var_878_cast_fp16, y = const_99_to_fp16)[name = tensor("k_31_cast_fp16")]; + tensor var_884 = const()[name = tensor("op_884"), val = tensor([1, 1500, 12, -1])]; + tensor var_885_cast_fp16 = reshape(shape = var_884, x = linear_44_cast_fp16)[name = tensor("op_885_cast_fp16")]; + tensor var_886 = const()[name = tensor("op_886"), val = tensor([0, 2, 1, 3])]; + tensor qk_15_transpose_x_0 = const()[name = tensor("qk_15_transpose_x_0"), val = tensor(false)]; + tensor qk_15_transpose_y_0 = const()[name = tensor("qk_15_transpose_y_0"), val = tensor(false)]; + tensor transpose_62_perm_0 = const()[name = tensor("transpose_62_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_63_perm_0 = const()[name = tensor("transpose_63_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_89 = transpose(perm = transpose_63_perm_0, x = k_31_cast_fp16)[name = tensor("transpose_89")]; + tensor transpose_90 = transpose(perm = transpose_62_perm_0, x = q_31_cast_fp16)[name = tensor("transpose_90")]; + tensor qk_15_cast_fp16 = matmul(transpose_x = qk_15_transpose_x_0, transpose_y = qk_15_transpose_y_0, x = transpose_90, y = transpose_89)[name = tensor("qk_15_cast_fp16")]; + tensor var_890_cast_fp16 = softmax(axis = var_826, x = qk_15_cast_fp16)[name = tensor("op_890_cast_fp16")]; + tensor var_892_transpose_x_0 = const()[name = tensor("op_892_transpose_x_0"), val = tensor(false)]; + tensor var_892_transpose_y_0 = const()[name = tensor("op_892_transpose_y_0"), val = tensor(false)]; + tensor transpose_91 = transpose(perm = var_886, x = var_885_cast_fp16)[name = tensor("transpose_91")]; + tensor var_892_cast_fp16 = matmul(transpose_x = var_892_transpose_x_0, transpose_y = var_892_transpose_y_0, x = var_890_cast_fp16, y = transpose_91)[name = tensor("op_892_cast_fp16")]; + tensor var_893 = const()[name = tensor("op_893"), val = tensor([0, 2, 1, 3])]; + tensor concat_7 = const()[name = tensor("concat_7"), val = tensor([1, 1500, 768])]; + tensor transpose_88 = transpose(perm = var_893, x = var_892_cast_fp16)[name = tensor("transpose_88")]; + tensor x_95_cast_fp16 = reshape(shape = concat_7, x = transpose_88)[name = tensor("x_95_cast_fp16")]; + tensor var_898_to_fp16 = const()[name = tensor("op_898_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108988352)))]; + tensor var_899_to_fp16 = const()[name = tensor("op_899_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110168064)))]; + tensor linear_45_cast_fp16 = linear(bias = var_899_to_fp16, weight = var_898_to_fp16, x = x_95_cast_fp16)[name = tensor("linear_45_cast_fp16")]; + tensor x_97_cast_fp16 = add(x = x_91_cast_fp16, y = linear_45_cast_fp16)[name = tensor("x_97_cast_fp16")]; + tensor var_906_axes_0 = const()[name = tensor("op_906_axes_0"), val = tensor([-1])]; + tensor blocks_7_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_7_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110169664)))]; + tensor blocks_7_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_7_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110171264)))]; + tensor var_906_cast_fp16 = layer_norm(axes = var_906_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_832_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_97_cast_fp16)[name = tensor("op_906_cast_fp16")]; + tensor var_915_to_fp16 = const()[name = tensor("op_915_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110172864)))]; + tensor var_916_to_fp16 = const()[name = tensor("op_916_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114891520)))]; + tensor linear_46_cast_fp16 = linear(bias = var_916_to_fp16, weight = var_915_to_fp16, x = var_906_cast_fp16)[name = tensor("linear_46_cast_fp16")]; + tensor x_101_mode_0 = const()[name = tensor("x_101_mode_0"), val = tensor("EXACT")]; + tensor x_101_cast_fp16 = gelu(mode = x_101_mode_0, x = linear_46_cast_fp16)[name = tensor("x_101_cast_fp16")]; + tensor var_921_to_fp16 = const()[name = tensor("op_921_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114897728)))]; + tensor var_922_to_fp16 = const()[name = tensor("op_922_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119616384)))]; + tensor linear_47_cast_fp16 = linear(bias = var_922_to_fp16, weight = var_921_to_fp16, x = x_101_cast_fp16)[name = tensor("linear_47_cast_fp16")]; + tensor x_103_cast_fp16 = add(x = x_97_cast_fp16, y = linear_47_cast_fp16)[name = tensor("x_103_cast_fp16")]; + tensor var_932 = const()[name = tensor("op_932"), val = tensor(-1)]; + tensor var_948_axes_0 = const()[name = tensor("op_948_axes_0"), val = tensor([-1])]; + tensor blocks_8_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_8_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119617984)))]; + tensor blocks_8_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_8_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119619584)))]; + tensor var_938_to_fp16 = const()[name = tensor("op_938_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_948_cast_fp16 = layer_norm(axes = var_948_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_938_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_103_cast_fp16)[name = tensor("op_948_cast_fp16")]; + tensor var_959_to_fp16 = const()[name = tensor("op_959_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119621184)))]; + tensor var_960_to_fp16 = const()[name = tensor("op_960_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120800896)))]; + tensor linear_48_cast_fp16 = linear(bias = var_960_to_fp16, weight = var_959_to_fp16, x = var_948_cast_fp16)[name = tensor("linear_48_cast_fp16")]; + tensor var_963_to_fp16 = const()[name = tensor("op_963_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120802496)))]; + tensor linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_963_to_fp16, x = var_948_cast_fp16)[name = tensor("linear_49_cast_fp16")]; + tensor var_967_to_fp16 = const()[name = tensor("op_967_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121982208)))]; + tensor var_968_to_fp16 = const()[name = tensor("op_968_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123161920)))]; + tensor linear_50_cast_fp16 = linear(bias = var_968_to_fp16, weight = var_967_to_fp16, x = var_948_cast_fp16)[name = tensor("linear_50_cast_fp16")]; + tensor var_976 = const()[name = tensor("op_976"), val = tensor([1, 1500, 12, -1])]; + tensor var_977_cast_fp16 = reshape(shape = var_976, x = linear_48_cast_fp16)[name = tensor("op_977_cast_fp16")]; + tensor const_100_to_fp16 = const()[name = tensor("const_100_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_35_cast_fp16 = mul(x = var_977_cast_fp16, y = const_100_to_fp16)[name = tensor("q_35_cast_fp16")]; + tensor var_983 = const()[name = tensor("op_983"), val = tensor([1, 1500, 12, -1])]; + tensor var_984_cast_fp16 = reshape(shape = var_983, x = linear_49_cast_fp16)[name = tensor("op_984_cast_fp16")]; + tensor const_101_to_fp16 = const()[name = tensor("const_101_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_35_cast_fp16 = mul(x = var_984_cast_fp16, y = const_101_to_fp16)[name = tensor("k_35_cast_fp16")]; + tensor var_990 = const()[name = tensor("op_990"), val = tensor([1, 1500, 12, -1])]; + tensor var_991_cast_fp16 = reshape(shape = var_990, x = linear_50_cast_fp16)[name = tensor("op_991_cast_fp16")]; + tensor var_992 = const()[name = tensor("op_992"), val = tensor([0, 2, 1, 3])]; + tensor qk_17_transpose_x_0 = const()[name = tensor("qk_17_transpose_x_0"), val = tensor(false)]; + tensor qk_17_transpose_y_0 = const()[name = tensor("qk_17_transpose_y_0"), val = tensor(false)]; + tensor transpose_64_perm_0 = const()[name = tensor("transpose_64_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_65_perm_0 = const()[name = tensor("transpose_65_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_85 = transpose(perm = transpose_65_perm_0, x = k_35_cast_fp16)[name = tensor("transpose_85")]; + tensor transpose_86 = transpose(perm = transpose_64_perm_0, x = q_35_cast_fp16)[name = tensor("transpose_86")]; + tensor qk_17_cast_fp16 = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_86, y = transpose_85)[name = tensor("qk_17_cast_fp16")]; + tensor var_996_cast_fp16 = softmax(axis = var_932, x = qk_17_cast_fp16)[name = tensor("op_996_cast_fp16")]; + tensor var_998_transpose_x_0 = const()[name = tensor("op_998_transpose_x_0"), val = tensor(false)]; + tensor var_998_transpose_y_0 = const()[name = tensor("op_998_transpose_y_0"), val = tensor(false)]; + tensor transpose_87 = transpose(perm = var_992, x = var_991_cast_fp16)[name = tensor("transpose_87")]; + tensor var_998_cast_fp16 = matmul(transpose_x = var_998_transpose_x_0, transpose_y = var_998_transpose_y_0, x = var_996_cast_fp16, y = transpose_87)[name = tensor("op_998_cast_fp16")]; + tensor var_999 = const()[name = tensor("op_999"), val = tensor([0, 2, 1, 3])]; + tensor concat_8 = const()[name = tensor("concat_8"), val = tensor([1, 1500, 768])]; + tensor transpose_84 = transpose(perm = var_999, x = var_998_cast_fp16)[name = tensor("transpose_84")]; + tensor x_107_cast_fp16 = reshape(shape = concat_8, x = transpose_84)[name = tensor("x_107_cast_fp16")]; + tensor var_1004_to_fp16 = const()[name = tensor("op_1004_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123163520)))]; + tensor var_1005_to_fp16 = const()[name = tensor("op_1005_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124343232)))]; + tensor linear_51_cast_fp16 = linear(bias = var_1005_to_fp16, weight = var_1004_to_fp16, x = x_107_cast_fp16)[name = tensor("linear_51_cast_fp16")]; + tensor x_109_cast_fp16 = add(x = x_103_cast_fp16, y = linear_51_cast_fp16)[name = tensor("x_109_cast_fp16")]; + tensor var_1012_axes_0 = const()[name = tensor("op_1012_axes_0"), val = tensor([-1])]; + tensor blocks_8_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_8_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124344832)))]; + tensor blocks_8_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_8_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124346432)))]; + tensor var_1012_cast_fp16 = layer_norm(axes = var_1012_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_938_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_109_cast_fp16)[name = tensor("op_1012_cast_fp16")]; + tensor var_1021_to_fp16 = const()[name = tensor("op_1021_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124348032)))]; + tensor var_1022_to_fp16 = const()[name = tensor("op_1022_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129066688)))]; + tensor linear_52_cast_fp16 = linear(bias = var_1022_to_fp16, weight = var_1021_to_fp16, x = var_1012_cast_fp16)[name = tensor("linear_52_cast_fp16")]; + tensor x_113_mode_0 = const()[name = tensor("x_113_mode_0"), val = tensor("EXACT")]; + tensor x_113_cast_fp16 = gelu(mode = x_113_mode_0, x = linear_52_cast_fp16)[name = tensor("x_113_cast_fp16")]; + tensor var_1027_to_fp16 = const()[name = tensor("op_1027_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129072896)))]; + tensor var_1028_to_fp16 = const()[name = tensor("op_1028_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133791552)))]; + tensor linear_53_cast_fp16 = linear(bias = var_1028_to_fp16, weight = var_1027_to_fp16, x = x_113_cast_fp16)[name = tensor("linear_53_cast_fp16")]; + tensor x_115_cast_fp16 = add(x = x_109_cast_fp16, y = linear_53_cast_fp16)[name = tensor("x_115_cast_fp16")]; + tensor var_1038 = const()[name = tensor("op_1038"), val = tensor(-1)]; + tensor var_1054_axes_0 = const()[name = tensor("op_1054_axes_0"), val = tensor([-1])]; + tensor blocks_9_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_9_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133793152)))]; + tensor blocks_9_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_9_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133794752)))]; + tensor var_1044_to_fp16 = const()[name = tensor("op_1044_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_1054_cast_fp16 = layer_norm(axes = var_1054_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_1044_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_115_cast_fp16)[name = tensor("op_1054_cast_fp16")]; + tensor var_1065_to_fp16 = const()[name = tensor("op_1065_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133796352)))]; + tensor var_1066_to_fp16 = const()[name = tensor("op_1066_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134976064)))]; + tensor linear_54_cast_fp16 = linear(bias = var_1066_to_fp16, weight = var_1065_to_fp16, x = var_1054_cast_fp16)[name = tensor("linear_54_cast_fp16")]; + tensor var_1069_to_fp16 = const()[name = tensor("op_1069_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134977664)))]; + tensor linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1069_to_fp16, x = var_1054_cast_fp16)[name = tensor("linear_55_cast_fp16")]; + tensor var_1073_to_fp16 = const()[name = tensor("op_1073_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136157376)))]; + tensor var_1074_to_fp16 = const()[name = tensor("op_1074_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137337088)))]; + tensor linear_56_cast_fp16 = linear(bias = var_1074_to_fp16, weight = var_1073_to_fp16, x = var_1054_cast_fp16)[name = tensor("linear_56_cast_fp16")]; + tensor var_1082 = const()[name = tensor("op_1082"), val = tensor([1, 1500, 12, -1])]; + tensor var_1083_cast_fp16 = reshape(shape = var_1082, x = linear_54_cast_fp16)[name = tensor("op_1083_cast_fp16")]; + tensor const_102_to_fp16 = const()[name = tensor("const_102_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_39_cast_fp16 = mul(x = var_1083_cast_fp16, y = const_102_to_fp16)[name = tensor("q_39_cast_fp16")]; + tensor var_1089 = const()[name = tensor("op_1089"), val = tensor([1, 1500, 12, -1])]; + tensor var_1090_cast_fp16 = reshape(shape = var_1089, x = linear_55_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor const_103_to_fp16 = const()[name = tensor("const_103_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_39_cast_fp16 = mul(x = var_1090_cast_fp16, y = const_103_to_fp16)[name = tensor("k_39_cast_fp16")]; + tensor var_1096 = const()[name = tensor("op_1096"), val = tensor([1, 1500, 12, -1])]; + tensor var_1097_cast_fp16 = reshape(shape = var_1096, x = linear_56_cast_fp16)[name = tensor("op_1097_cast_fp16")]; + tensor var_1098 = const()[name = tensor("op_1098"), val = tensor([0, 2, 1, 3])]; + tensor qk_19_transpose_x_0 = const()[name = tensor("qk_19_transpose_x_0"), val = tensor(false)]; + tensor qk_19_transpose_y_0 = const()[name = tensor("qk_19_transpose_y_0"), val = tensor(false)]; + tensor transpose_66_perm_0 = const()[name = tensor("transpose_66_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_67_perm_0 = const()[name = tensor("transpose_67_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_81 = transpose(perm = transpose_67_perm_0, x = k_39_cast_fp16)[name = tensor("transpose_81")]; + tensor transpose_82 = transpose(perm = transpose_66_perm_0, x = q_39_cast_fp16)[name = tensor("transpose_82")]; + tensor qk_19_cast_fp16 = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_82, y = transpose_81)[name = tensor("qk_19_cast_fp16")]; + tensor var_1102_cast_fp16 = softmax(axis = var_1038, x = qk_19_cast_fp16)[name = tensor("op_1102_cast_fp16")]; + tensor var_1104_transpose_x_0 = const()[name = tensor("op_1104_transpose_x_0"), val = tensor(false)]; + tensor var_1104_transpose_y_0 = const()[name = tensor("op_1104_transpose_y_0"), val = tensor(false)]; + tensor transpose_83 = transpose(perm = var_1098, x = var_1097_cast_fp16)[name = tensor("transpose_83")]; + tensor var_1104_cast_fp16 = matmul(transpose_x = var_1104_transpose_x_0, transpose_y = var_1104_transpose_y_0, x = var_1102_cast_fp16, y = transpose_83)[name = tensor("op_1104_cast_fp16")]; + tensor var_1105 = const()[name = tensor("op_1105"), val = tensor([0, 2, 1, 3])]; + tensor concat_9 = const()[name = tensor("concat_9"), val = tensor([1, 1500, 768])]; + tensor transpose_80 = transpose(perm = var_1105, x = var_1104_cast_fp16)[name = tensor("transpose_80")]; + tensor x_119_cast_fp16 = reshape(shape = concat_9, x = transpose_80)[name = tensor("x_119_cast_fp16")]; + tensor var_1110_to_fp16 = const()[name = tensor("op_1110_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137338688)))]; + tensor var_1111_to_fp16 = const()[name = tensor("op_1111_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138518400)))]; + tensor linear_57_cast_fp16 = linear(bias = var_1111_to_fp16, weight = var_1110_to_fp16, x = x_119_cast_fp16)[name = tensor("linear_57_cast_fp16")]; + tensor x_121_cast_fp16 = add(x = x_115_cast_fp16, y = linear_57_cast_fp16)[name = tensor("x_121_cast_fp16")]; + tensor var_1118_axes_0 = const()[name = tensor("op_1118_axes_0"), val = tensor([-1])]; + tensor blocks_9_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_9_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138520000)))]; + tensor blocks_9_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_9_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138521600)))]; + tensor var_1118_cast_fp16 = layer_norm(axes = var_1118_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_1044_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_121_cast_fp16)[name = tensor("op_1118_cast_fp16")]; + tensor var_1127_to_fp16 = const()[name = tensor("op_1127_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138523200)))]; + tensor var_1128_to_fp16 = const()[name = tensor("op_1128_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143241856)))]; + tensor linear_58_cast_fp16 = linear(bias = var_1128_to_fp16, weight = var_1127_to_fp16, x = var_1118_cast_fp16)[name = tensor("linear_58_cast_fp16")]; + tensor x_125_mode_0 = const()[name = tensor("x_125_mode_0"), val = tensor("EXACT")]; + tensor x_125_cast_fp16 = gelu(mode = x_125_mode_0, x = linear_58_cast_fp16)[name = tensor("x_125_cast_fp16")]; + tensor var_1133_to_fp16 = const()[name = tensor("op_1133_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143248064)))]; + tensor var_1134_to_fp16 = const()[name = tensor("op_1134_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147966720)))]; + tensor linear_59_cast_fp16 = linear(bias = var_1134_to_fp16, weight = var_1133_to_fp16, x = x_125_cast_fp16)[name = tensor("linear_59_cast_fp16")]; + tensor x_127_cast_fp16 = add(x = x_121_cast_fp16, y = linear_59_cast_fp16)[name = tensor("x_127_cast_fp16")]; + tensor var_1144 = const()[name = tensor("op_1144"), val = tensor(-1)]; + tensor var_1160_axes_0 = const()[name = tensor("op_1160_axes_0"), val = tensor([-1])]; + tensor blocks_10_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_10_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147968320)))]; + tensor blocks_10_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_10_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147969920)))]; + tensor var_1150_to_fp16 = const()[name = tensor("op_1150_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_1160_cast_fp16 = layer_norm(axes = var_1160_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_1150_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_127_cast_fp16)[name = tensor("op_1160_cast_fp16")]; + tensor var_1171_to_fp16 = const()[name = tensor("op_1171_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147971520)))]; + tensor var_1172_to_fp16 = const()[name = tensor("op_1172_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149151232)))]; + tensor linear_60_cast_fp16 = linear(bias = var_1172_to_fp16, weight = var_1171_to_fp16, x = var_1160_cast_fp16)[name = tensor("linear_60_cast_fp16")]; + tensor var_1175_to_fp16 = const()[name = tensor("op_1175_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149152832)))]; + tensor linear_61_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1175_to_fp16, x = var_1160_cast_fp16)[name = tensor("linear_61_cast_fp16")]; + tensor var_1179_to_fp16 = const()[name = tensor("op_1179_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150332544)))]; + tensor var_1180_to_fp16 = const()[name = tensor("op_1180_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151512256)))]; + tensor linear_62_cast_fp16 = linear(bias = var_1180_to_fp16, weight = var_1179_to_fp16, x = var_1160_cast_fp16)[name = tensor("linear_62_cast_fp16")]; + tensor var_1188 = const()[name = tensor("op_1188"), val = tensor([1, 1500, 12, -1])]; + tensor var_1189_cast_fp16 = reshape(shape = var_1188, x = linear_60_cast_fp16)[name = tensor("op_1189_cast_fp16")]; + tensor const_104_to_fp16 = const()[name = tensor("const_104_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_43_cast_fp16 = mul(x = var_1189_cast_fp16, y = const_104_to_fp16)[name = tensor("q_43_cast_fp16")]; + tensor var_1195 = const()[name = tensor("op_1195"), val = tensor([1, 1500, 12, -1])]; + tensor var_1196_cast_fp16 = reshape(shape = var_1195, x = linear_61_cast_fp16)[name = tensor("op_1196_cast_fp16")]; + tensor const_105_to_fp16 = const()[name = tensor("const_105_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_43_cast_fp16 = mul(x = var_1196_cast_fp16, y = const_105_to_fp16)[name = tensor("k_43_cast_fp16")]; + tensor var_1202 = const()[name = tensor("op_1202"), val = tensor([1, 1500, 12, -1])]; + tensor var_1203_cast_fp16 = reshape(shape = var_1202, x = linear_62_cast_fp16)[name = tensor("op_1203_cast_fp16")]; + tensor var_1204 = const()[name = tensor("op_1204"), val = tensor([0, 2, 1, 3])]; + tensor qk_21_transpose_x_0 = const()[name = tensor("qk_21_transpose_x_0"), val = tensor(false)]; + tensor qk_21_transpose_y_0 = const()[name = tensor("qk_21_transpose_y_0"), val = tensor(false)]; + tensor transpose_68_perm_0 = const()[name = tensor("transpose_68_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_69_perm_0 = const()[name = tensor("transpose_69_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_77 = transpose(perm = transpose_69_perm_0, x = k_43_cast_fp16)[name = tensor("transpose_77")]; + tensor transpose_78 = transpose(perm = transpose_68_perm_0, x = q_43_cast_fp16)[name = tensor("transpose_78")]; + tensor qk_21_cast_fp16 = matmul(transpose_x = qk_21_transpose_x_0, transpose_y = qk_21_transpose_y_0, x = transpose_78, y = transpose_77)[name = tensor("qk_21_cast_fp16")]; + tensor var_1208_cast_fp16 = softmax(axis = var_1144, x = qk_21_cast_fp16)[name = tensor("op_1208_cast_fp16")]; + tensor var_1210_transpose_x_0 = const()[name = tensor("op_1210_transpose_x_0"), val = tensor(false)]; + tensor var_1210_transpose_y_0 = const()[name = tensor("op_1210_transpose_y_0"), val = tensor(false)]; + tensor transpose_79 = transpose(perm = var_1204, x = var_1203_cast_fp16)[name = tensor("transpose_79")]; + tensor var_1210_cast_fp16 = matmul(transpose_x = var_1210_transpose_x_0, transpose_y = var_1210_transpose_y_0, x = var_1208_cast_fp16, y = transpose_79)[name = tensor("op_1210_cast_fp16")]; + tensor var_1211 = const()[name = tensor("op_1211"), val = tensor([0, 2, 1, 3])]; + tensor concat_10 = const()[name = tensor("concat_10"), val = tensor([1, 1500, 768])]; + tensor transpose_76 = transpose(perm = var_1211, x = var_1210_cast_fp16)[name = tensor("transpose_76")]; + tensor x_131_cast_fp16 = reshape(shape = concat_10, x = transpose_76)[name = tensor("x_131_cast_fp16")]; + tensor var_1216_to_fp16 = const()[name = tensor("op_1216_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151513856)))]; + tensor var_1217_to_fp16 = const()[name = tensor("op_1217_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152693568)))]; + tensor linear_63_cast_fp16 = linear(bias = var_1217_to_fp16, weight = var_1216_to_fp16, x = x_131_cast_fp16)[name = tensor("linear_63_cast_fp16")]; + tensor x_133_cast_fp16 = add(x = x_127_cast_fp16, y = linear_63_cast_fp16)[name = tensor("x_133_cast_fp16")]; + tensor var_1224_axes_0 = const()[name = tensor("op_1224_axes_0"), val = tensor([-1])]; + tensor blocks_10_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_10_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152695168)))]; + tensor blocks_10_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_10_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152696768)))]; + tensor var_1224_cast_fp16 = layer_norm(axes = var_1224_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_1150_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_133_cast_fp16)[name = tensor("op_1224_cast_fp16")]; + tensor var_1233_to_fp16 = const()[name = tensor("op_1233_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152698368)))]; + tensor var_1234_to_fp16 = const()[name = tensor("op_1234_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157417024)))]; + tensor linear_64_cast_fp16 = linear(bias = var_1234_to_fp16, weight = var_1233_to_fp16, x = var_1224_cast_fp16)[name = tensor("linear_64_cast_fp16")]; + tensor x_137_mode_0 = const()[name = tensor("x_137_mode_0"), val = tensor("EXACT")]; + tensor x_137_cast_fp16 = gelu(mode = x_137_mode_0, x = linear_64_cast_fp16)[name = tensor("x_137_cast_fp16")]; + tensor var_1239_to_fp16 = const()[name = tensor("op_1239_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157423232)))]; + tensor var_1240_to_fp16 = const()[name = tensor("op_1240_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162141888)))]; + tensor linear_65_cast_fp16 = linear(bias = var_1240_to_fp16, weight = var_1239_to_fp16, x = x_137_cast_fp16)[name = tensor("linear_65_cast_fp16")]; + tensor x_139_cast_fp16 = add(x = x_133_cast_fp16, y = linear_65_cast_fp16)[name = tensor("x_139_cast_fp16")]; + tensor var_1250 = const()[name = tensor("op_1250"), val = tensor(-1)]; + tensor var_1266_axes_0 = const()[name = tensor("op_1266_axes_0"), val = tensor([-1])]; + tensor blocks_11_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_11_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162143488)))]; + tensor blocks_11_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_11_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162145088)))]; + tensor var_1256_to_fp16 = const()[name = tensor("op_1256_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_1266_cast_fp16 = layer_norm(axes = var_1266_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_1256_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_139_cast_fp16)[name = tensor("op_1266_cast_fp16")]; + tensor var_1277_to_fp16 = const()[name = tensor("op_1277_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162146688)))]; + tensor var_1278_to_fp16 = const()[name = tensor("op_1278_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163326400)))]; + tensor linear_66_cast_fp16 = linear(bias = var_1278_to_fp16, weight = var_1277_to_fp16, x = var_1266_cast_fp16)[name = tensor("linear_66_cast_fp16")]; + tensor var_1281_to_fp16 = const()[name = tensor("op_1281_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163328000)))]; + tensor linear_67_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = var_1281_to_fp16, x = var_1266_cast_fp16)[name = tensor("linear_67_cast_fp16")]; + tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(164507712)))]; + tensor var_1286_to_fp16 = const()[name = tensor("op_1286_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165687424)))]; + tensor linear_68_cast_fp16 = linear(bias = var_1286_to_fp16, weight = var_1285_to_fp16, x = var_1266_cast_fp16)[name = tensor("linear_68_cast_fp16")]; + tensor var_1294 = const()[name = tensor("op_1294"), val = tensor([1, 1500, 12, -1])]; + tensor var_1295_cast_fp16 = reshape(shape = var_1294, x = linear_66_cast_fp16)[name = tensor("op_1295_cast_fp16")]; + tensor const_106_to_fp16 = const()[name = tensor("const_106_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor q_cast_fp16 = mul(x = var_1295_cast_fp16, y = const_106_to_fp16)[name = tensor("q_cast_fp16")]; + tensor var_1301 = const()[name = tensor("op_1301"), val = tensor([1, 1500, 12, -1])]; + tensor var_1302_cast_fp16 = reshape(shape = var_1301, x = linear_67_cast_fp16)[name = tensor("op_1302_cast_fp16")]; + tensor const_107_to_fp16 = const()[name = tensor("const_107_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; + tensor k_cast_fp16 = mul(x = var_1302_cast_fp16, y = const_107_to_fp16)[name = tensor("k_cast_fp16")]; + tensor var_1308 = const()[name = tensor("op_1308"), val = tensor([1, 1500, 12, -1])]; + tensor var_1309_cast_fp16 = reshape(shape = var_1308, x = linear_68_cast_fp16)[name = tensor("op_1309_cast_fp16")]; + tensor var_1310 = const()[name = tensor("op_1310"), val = tensor([0, 2, 1, 3])]; + tensor qk_transpose_x_0 = const()[name = tensor("qk_transpose_x_0"), val = tensor(false)]; + tensor qk_transpose_y_0 = const()[name = tensor("qk_transpose_y_0"), val = tensor(false)]; + tensor transpose_70_perm_0 = const()[name = tensor("transpose_70_perm_0"), val = tensor([0, 2, -3, -1])]; + tensor transpose_71_perm_0 = const()[name = tensor("transpose_71_perm_0"), val = tensor([0, 2, -1, -3])]; + tensor transpose_73 = transpose(perm = transpose_71_perm_0, x = k_cast_fp16)[name = tensor("transpose_73")]; + tensor transpose_74 = transpose(perm = transpose_70_perm_0, x = q_cast_fp16)[name = tensor("transpose_74")]; + tensor qk_cast_fp16 = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_74, y = transpose_73)[name = tensor("qk_cast_fp16")]; + tensor var_1314_cast_fp16 = softmax(axis = var_1250, x = qk_cast_fp16)[name = tensor("op_1314_cast_fp16")]; + tensor var_1316_transpose_x_0 = const()[name = tensor("op_1316_transpose_x_0"), val = tensor(false)]; + tensor var_1316_transpose_y_0 = const()[name = tensor("op_1316_transpose_y_0"), val = tensor(false)]; + tensor transpose_75 = transpose(perm = var_1310, x = var_1309_cast_fp16)[name = tensor("transpose_75")]; + tensor var_1316_cast_fp16 = matmul(transpose_x = var_1316_transpose_x_0, transpose_y = var_1316_transpose_y_0, x = var_1314_cast_fp16, y = transpose_75)[name = tensor("op_1316_cast_fp16")]; + tensor var_1317 = const()[name = tensor("op_1317"), val = tensor([0, 2, 1, 3])]; + tensor concat_11 = const()[name = tensor("concat_11"), val = tensor([1, 1500, 768])]; + tensor transpose_72 = transpose(perm = var_1317, x = var_1316_cast_fp16)[name = tensor("transpose_72")]; + tensor x_143_cast_fp16 = reshape(shape = concat_11, x = transpose_72)[name = tensor("x_143_cast_fp16")]; + tensor var_1322_to_fp16 = const()[name = tensor("op_1322_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165689024)))]; + tensor var_1323_to_fp16 = const()[name = tensor("op_1323_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166868736)))]; + tensor linear_69_cast_fp16 = linear(bias = var_1323_to_fp16, weight = var_1322_to_fp16, x = x_143_cast_fp16)[name = tensor("linear_69_cast_fp16")]; + tensor x_145_cast_fp16 = add(x = x_139_cast_fp16, y = linear_69_cast_fp16)[name = tensor("x_145_cast_fp16")]; + tensor var_1330_axes_0 = const()[name = tensor("op_1330_axes_0"), val = tensor([-1])]; + tensor blocks_11_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_11_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166870336)))]; + tensor blocks_11_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_11_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166871936)))]; + tensor var_1330_cast_fp16 = layer_norm(axes = var_1330_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_1256_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_145_cast_fp16)[name = tensor("op_1330_cast_fp16")]; + tensor var_1339_to_fp16 = const()[name = tensor("op_1339_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166873536)))]; + tensor var_1340_to_fp16 = const()[name = tensor("op_1340_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171592192)))]; + tensor linear_70_cast_fp16 = linear(bias = var_1340_to_fp16, weight = var_1339_to_fp16, x = var_1330_cast_fp16)[name = tensor("linear_70_cast_fp16")]; + tensor x_149_mode_0 = const()[name = tensor("x_149_mode_0"), val = tensor("EXACT")]; + tensor x_149_cast_fp16 = gelu(mode = x_149_mode_0, x = linear_70_cast_fp16)[name = tensor("x_149_cast_fp16")]; + tensor var_1345_to_fp16 = const()[name = tensor("op_1345_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171598400)))]; + tensor var_1346_to_fp16 = const()[name = tensor("op_1346_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176317056)))]; + tensor linear_71_cast_fp16 = linear(bias = var_1346_to_fp16, weight = var_1345_to_fp16, x = x_149_cast_fp16)[name = tensor("linear_71_cast_fp16")]; + tensor x_cast_fp16 = add(x = x_145_cast_fp16, y = linear_71_cast_fp16)[name = tensor("x_cast_fp16")]; + tensor var_1359_axes_0 = const()[name = tensor("op_1359_axes_0"), val = tensor([-1])]; + tensor ln_post_weight_to_fp16 = const()[name = tensor("ln_post_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176318656)))]; + tensor ln_post_bias_to_fp16 = const()[name = tensor("ln_post_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176320256)))]; + tensor var_1350_to_fp16 = const()[name = tensor("op_1350_to_fp16"), val = tensor(0x1.5p-17)]; + tensor output = layer_norm(axes = var_1359_axes_0, beta = ln_post_bias_to_fp16, epsilon = var_1350_to_fp16, gamma = ln_post_weight_to_fp16, x = x_cast_fp16)[name = tensor("op_1359_cast_fp16")]; + } -> (output); +} \ No newline at end of file