diff --git "a/SmolLM2-135M-Instruct-4bit.mlmodelc/model.mil" "b/SmolLM2-135M-Instruct-4bit.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/SmolLM2-135M-Instruct-4bit.mlmodelc/model.mil" @@ -0,0 +1,6234 @@ +program(1.3) +[buildInfo = dict({{"coremlc-component-MIL", "3402.3.2"}, {"coremlc-version", "3402.4.1"}})] +{ + func main(tensor causal_mask, tensor input_ids, state> key_cache, state> value_cache) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"causal_mask", [1, 1, 1, 1]}, {"input_ids", [1, 1]}}), ("RangeDims", {{"causal_mask", [[1, 1], [1, 1], [1, 2048], [1, 2048]]}, {"input_ids", [[1, 1], [1, 2048]]}})))] { + tensor var_7_shape_cast_fp16 = shape(x = causal_mask)[name = string("op_7_shape_cast_fp16")]; + int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; + int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; + bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; + string var_7_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_7_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; + uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(3)]; + tensor var_7_shape_cast_fp16_to_int16 = cast(dtype = var_7_shape_cast_fp16_to_int16_dtype_0, x = var_7_shape_cast_fp16)[name = string("cast_248")]; + int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_7_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; + string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor var_10_shape = shape(x = input_ids)[name = string("op_10_shape")]; + int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; + int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; + bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; + string var_10_shape_to_uint16_dtype_0 = const()[name = string("op_10_shape_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; + tensor var_10_shape_to_uint16 = cast(dtype = var_10_shape_to_uint16_dtype_0, x = var_10_shape)[name = string("cast_247")]; + uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_10_shape_to_uint16)[name = string("gather_1_cast_uint16")]; + string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_245")]; + int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_246")]; + int32 past_seen_tokens = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("past_seen_tokens")]; + int32 var_81 = const()[name = string("op_81"), val = int32(-1)]; + int32 var_85 = const()[name = string("op_85"), val = int32(3)]; + int32 inputs_embeds_axis_0 = const()[name = string("inputs_embeds_axis_0"), val = int32(0)]; + int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)]; + bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)]; + tensor model_model_embed_tokens_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(14155904))))[name = string("model_model_embed_tokens_weight_to_fp16_quantized")]; + tensor inputs_embeds_cast_fp16 = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = input_ids, validate_indices = inputs_embeds_validate_indices_0, x = model_model_embed_tokens_weight_to_fp16_quantized)[name = string("inputs_embeds_cast_fp16")]; + tensor var_183_shape_cast_fp16 = shape(x = inputs_embeds_cast_fp16)[name = string("op_183_shape_cast_fp16")]; + int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; + int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; + bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; + string var_183_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_183_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; + tensor var_183_shape_cast_fp16_to_uint16 = cast(dtype = var_183_shape_cast_fp16_to_uint16_dtype_0, x = var_183_shape_cast_fp16)[name = string("cast_244")]; + uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_183_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; + string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_243")]; + int32 var_185 = add(x = past_seen_tokens, y = gather_2_cast_uint16_to_int32)[name = string("op_185")]; + int32 const_0 = const()[name = string("const_0"), val = int32(1)]; + tensor cache_position = range_1d(end = var_185, start = past_seen_tokens, step = const_0)[name = string("cache_position")]; + tensor position_ids_axes_0 = const()[name = string("position_ids_axes_0"), val = tensor([0])]; + tensor position_ids = expand_dims(axes = position_ids_axes_0, x = cache_position)[name = string("position_ids")]; + tensor var_198_axes_0 = const()[name = string("op_198_axes_0"), val = tensor([1])]; + tensor var_198 = expand_dims(axes = var_198_axes_0, x = position_ids)[name = string("op_198")]; + bool var_203_transpose_x_0 = const()[name = string("op_203_transpose_x_0"), val = bool(false)]; + bool var_203_transpose_y_0 = const()[name = string("op_203_transpose_y_0"), val = bool(false)]; + tensor const_2_to_fp16 = const()[name = string("const_2_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15925440)))]; + string cast_2_to_fp16_dtype_0 = const()[name = string("cast_2_to_fp16_dtype_0"), val = string("fp16")]; + tensor var_198_to_fp16 = cast(dtype = cast_2_to_fp16_dtype_0, x = var_198)[name = string("cast_242")]; + tensor var_203_cast_fp16 = matmul(transpose_x = var_203_transpose_x_0, transpose_y = var_203_transpose_y_0, x = const_2_to_fp16, y = var_198_to_fp16)[name = string("op_203_cast_fp16")]; + tensor freqs_perm_0 = const()[name = string("freqs_perm_0"), val = tensor([0, 2, 1])]; + bool emb_interleave_0 = const()[name = string("emb_interleave_0"), val = bool(false)]; + tensor freqs_cast_fp16 = transpose(perm = freqs_perm_0, x = var_203_cast_fp16)[name = string("transpose_120")]; + tensor emb_cast_fp16 = concat(axis = var_81, interleave = emb_interleave_0, values = (freqs_cast_fp16, freqs_cast_fp16))[name = string("emb_cast_fp16")]; + tensor cos_1_cast_fp16 = cos(x = emb_cast_fp16)[name = string("cos_1_cast_fp16")]; + tensor sin_1_cast_fp16 = sin(x = emb_cast_fp16)[name = string("sin_1_cast_fp16")]; + fp16 var_76_promoted_to_fp16 = const()[name = string("op_76_promoted_to_fp16"), val = fp16(0x1p+1)]; + tensor var_224_cast_fp16 = pow(x = inputs_embeds_cast_fp16, y = var_76_promoted_to_fp16)[name = string("op_224_cast_fp16")]; + tensor variance_1_axes_0 = const()[name = string("variance_1_axes_0"), val = tensor([-1])]; + bool variance_1_keep_dims_0 = const()[name = string("variance_1_keep_dims_0"), val = bool(true)]; + tensor variance_1_cast_fp16 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_224_cast_fp16)[name = string("variance_1_cast_fp16")]; + fp16 var_227_to_fp16 = const()[name = string("op_227_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_228_cast_fp16 = add(x = variance_1_cast_fp16, y = var_227_to_fp16)[name = string("op_228_cast_fp16")]; + fp32 var_229_epsilon_0 = const()[name = string("op_229_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_229_cast_fp16 = rsqrt(epsilon = var_229_epsilon_0, x = var_228_cast_fp16)[name = string("op_229_cast_fp16")]; + tensor hidden_states_3_cast_fp16 = mul(x = inputs_embeds_cast_fp16, y = var_229_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; + tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15925568)))]; + tensor hidden_states_7_cast_fp16 = mul(x = model_model_layers_0_input_layernorm_weight_to_fp16, y = hidden_states_3_cast_fp16)[name = string("hidden_states_7_cast_fp16")]; + tensor var_240_shape_cast_fp16 = shape(x = hidden_states_7_cast_fp16)[name = string("op_240_shape_cast_fp16")]; + int32 gather_4 = const()[name = string("gather_4"), val = int32(1)]; + int32 gather_5_axis_0 = const()[name = string("gather_5_axis_0"), val = int32(0)]; + int32 gather_5_batch_dims_0 = const()[name = string("gather_5_batch_dims_0"), val = int32(0)]; + bool gather_5_validate_indices_0 = const()[name = string("gather_5_validate_indices_0"), val = bool(false)]; + string var_240_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_240_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_5_to_uint16 = const()[name = string("select_5_to_uint16"), val = uint16(1)]; + tensor var_240_shape_cast_fp16_to_uint16 = cast(dtype = var_240_shape_cast_fp16_to_uint16_dtype_0, x = var_240_shape_cast_fp16)[name = string("cast_241")]; + uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_240_shape_cast_fp16_to_uint16)[name = string("gather_5_cast_uint16")]; + string gather_5_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_5_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_0_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(15926784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16092736))))[name = string("model_model_layers_0_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16113536)))]; + tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_0_cast_fp16")]; + tensor model_model_layers_0_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16114752))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16170112))))[name = string("model_model_layers_0_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16177088)))]; + tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_0_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_1_cast_fp16")]; + tensor model_model_layers_0_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16177536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16232896))))[name = string("model_model_layers_0_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_2_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_0_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_2_cast_fp16")]; + tensor concat_0x = const()[name = string("concat_0x"), val = tensor([1, -1, 9, 64])]; + tensor var_249_cast_fp16 = reshape(shape = concat_0x, x = linear_0_cast_fp16)[name = string("op_249_cast_fp16")]; + tensor q_1_perm_0 = const()[name = string("q_1_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_1x = const()[name = string("concat_1x"), val = tensor([1, -1, 3, 64])]; + tensor var_252_cast_fp16 = reshape(shape = concat_1x, x = linear_1_cast_fp16)[name = string("op_252_cast_fp16")]; + tensor k_1_perm_0 = const()[name = string("k_1_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_2x = const()[name = string("concat_2x"), val = tensor([1, -1, 3, 64])]; + tensor var_255_cast_fp16 = reshape(shape = concat_2x, x = linear_2_cast_fp16)[name = string("op_255_cast_fp16")]; + tensor v_state_1_perm_0 = const()[name = string("v_state_1_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor cos_7_axes_0 = const()[name = string("cos_7_axes_0"), val = tensor([1])]; + tensor cos_7_cast_fp16 = expand_dims(axes = cos_7_axes_0, x = cos_1_cast_fp16)[name = string("cos_7_cast_fp16")]; + tensor sin_7_axes_0 = const()[name = string("sin_7_axes_0"), val = tensor([1])]; + tensor sin_7_cast_fp16 = expand_dims(axes = sin_7_axes_0, x = sin_1_cast_fp16)[name = string("sin_7_cast_fp16")]; + tensor q_1_cast_fp16 = transpose(perm = q_1_perm_0, x = var_249_cast_fp16)[name = string("transpose_119")]; + tensor var_259_cast_fp16 = mul(x = q_1_cast_fp16, y = cos_7_cast_fp16)[name = string("op_259_cast_fp16")]; + tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1_cast_fp16)[name = string("x1_1_cast_fp16")]; + tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1_cast_fp16)[name = string("x2_1_cast_fp16")]; + fp16 const_3_promoted_to_fp16 = const()[name = string("const_3_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_270_cast_fp16 = mul(x = x2_1_cast_fp16, y = const_3_promoted_to_fp16)[name = string("op_270_cast_fp16")]; + bool var_272_interleave_0 = const()[name = string("op_272_interleave_0"), val = bool(false)]; + tensor var_272_cast_fp16 = concat(axis = var_81, interleave = var_272_interleave_0, values = (var_270_cast_fp16, x1_1_cast_fp16))[name = string("op_272_cast_fp16")]; + tensor var_273_cast_fp16 = mul(x = var_272_cast_fp16, y = sin_7_cast_fp16)[name = string("op_273_cast_fp16")]; + tensor query_states_3_cast_fp16 = add(x = var_259_cast_fp16, y = var_273_cast_fp16)[name = string("query_states_3_cast_fp16")]; + tensor k_1_cast_fp16 = transpose(perm = k_1_perm_0, x = var_252_cast_fp16)[name = string("transpose_118")]; + tensor var_275_cast_fp16 = mul(x = k_1_cast_fp16, y = cos_7_cast_fp16)[name = string("op_275_cast_fp16")]; + tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1_cast_fp16)[name = string("x1_3_cast_fp16")]; + tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1_cast_fp16)[name = string("x2_3_cast_fp16")]; + fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_286_cast_fp16 = mul(x = x2_3_cast_fp16, y = const_4_promoted_to_fp16)[name = string("op_286_cast_fp16")]; + bool var_288_interleave_0 = const()[name = string("op_288_interleave_0"), val = bool(false)]; + tensor var_288_cast_fp16 = concat(axis = var_81, interleave = var_288_interleave_0, values = (var_286_cast_fp16, x1_3_cast_fp16))[name = string("op_288_cast_fp16")]; + tensor var_289_cast_fp16 = mul(x = var_288_cast_fp16, y = sin_7_cast_fp16)[name = string("op_289_cast_fp16")]; + tensor k_state_1_cast_fp16 = add(x = var_275_cast_fp16, y = var_289_cast_fp16)[name = string("k_state_1_cast_fp16")]; + tensor var_291_shape = shape(x = cache_position)[name = string("op_291_shape")]; + int32 gather_10_axis_0 = const()[name = string("gather_10_axis_0"), val = int32(0)]; + int32 gather_10_batch_dims_0 = const()[name = string("gather_10_batch_dims_0"), val = int32(0)]; + bool gather_10_validate_indices_0 = const()[name = string("gather_10_validate_indices_0"), val = bool(false)]; + string var_291_shape_to_uint16_dtype_0 = const()[name = string("op_291_shape_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_10_to_uint16 = const()[name = string("select_10_to_uint16"), val = uint16(0)]; + tensor var_291_shape_to_uint16 = cast(dtype = var_291_shape_to_uint16_dtype_0, x = var_291_shape)[name = string("cast_240")]; + uint16 gather_10_cast_uint16 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = select_10_to_uint16, validate_indices = gather_10_validate_indices_0, x = var_291_shape_to_uint16)[name = string("gather_10_cast_uint16")]; + string gather_10_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_10_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_10_cast_uint16_to_int32 = cast(dtype = gather_10_cast_uint16_to_int32_dtype_0, x = gather_10_cast_uint16)[name = string("cast_239")]; + int32 end_1 = add(x = past_seen_tokens, y = gather_10_cast_uint16_to_int32)[name = string("end_1")]; + tensor read_state_0 = read_state(input = key_cache)[name = string("read_state_0")]; + tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; + tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; + tensor expand_dims_2_axes_0 = const()[name = string("expand_dims_2_axes_0"), val = tensor([0])]; + tensor expand_dims_2 = expand_dims(axes = expand_dims_2_axes_0, x = past_seen_tokens)[name = string("expand_dims_2")]; + tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; + tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([3])]; + tensor expand_dims_5_axes_0 = const()[name = string("expand_dims_5_axes_0"), val = tensor([0])]; + tensor expand_dims_5 = expand_dims(axes = expand_dims_5_axes_0, x = end_1)[name = string("expand_dims_5")]; + tensor concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor([0])]; + int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)]; + bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)]; + tensor concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, expand_dims_0, expand_dims_1, expand_dims_2, expand_dims_3))[name = string("concat_5")]; + tensor concat_6_values0_0 = const()[name = string("concat_6_values0_0"), val = tensor([0])]; + tensor concat_6_values1_0 = const()[name = string("concat_6_values1_0"), val = tensor([0])]; + tensor concat_6_values4_0 = const()[name = string("concat_6_values4_0"), val = tensor([0])]; + int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; + bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; + tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (concat_6_values0_0, concat_6_values1_0, expand_dims_4, expand_dims_5, concat_6_values4_0))[name = string("concat_6")]; + tensor key_cache_internal_tensor_assign_1_stride_0 = const()[name = string("key_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = k_state_1_cast_fp16, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_1_cast_fp16, input = key_cache)[name = string("coreml_update_state_60_write_state")]; + tensor coreml_update_state_60 = read_state(input = key_cache)[name = string("coreml_update_state_60")]; + tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; + tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_1_cast_fp16 = transpose(perm = v_state_1_perm_0, x = var_255_cast_fp16)[name = string("transpose_117")]; + tensor value_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = v_state_1_cast_fp16, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_1_cast_fp16, input = value_cache)[name = string("coreml_update_state_61_write_state")]; + tensor coreml_update_state_61 = read_state(input = value_cache)[name = string("coreml_update_state_61")]; + tensor var_312_begin_0 = const()[name = string("op_312_begin_0"), val = tensor([0, 0, 0, 0, 0])]; + tensor var_312_end_0 = const()[name = string("op_312_end_0"), val = tensor([1, 1, 3, 2048, 64])]; + tensor var_312_end_mask_0 = const()[name = string("op_312_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_312_squeeze_mask_0 = const()[name = string("op_312_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_312_cast_fp16 = slice_by_index(begin = var_312_begin_0, end = var_312_end_0, end_mask = var_312_end_mask_0, squeeze_mask = var_312_squeeze_mask_0, x = coreml_update_state_60)[name = string("op_312_cast_fp16")]; + int32 concat_11_values0_0 = const()[name = string("concat_11_values0_0"), val = int32(1)]; + int32 concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = int32(3)]; + int32 concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = int32(64)]; + int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; + bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; + tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (concat_11_values0_0, concat_11_values1_0, end_1, concat_11_values3_0))[name = string("concat_11")]; + tensor var_315_begin_0 = const()[name = string("op_315_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_315_end_mask_0 = const()[name = string("op_315_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_315_cast_fp16 = slice_by_index(begin = var_315_begin_0, end = concat_11, end_mask = var_315_end_mask_0, x = var_312_cast_fp16)[name = string("op_315_cast_fp16")]; + tensor var_317_begin_0 = const()[name = string("op_317_begin_0"), val = tensor([0, 0, 0, 0, 0])]; + tensor var_317_end_0 = const()[name = string("op_317_end_0"), val = tensor([1, 1, 3, 2048, 64])]; + tensor var_317_end_mask_0 = const()[name = string("op_317_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_317_squeeze_mask_0 = const()[name = string("op_317_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_317_cast_fp16 = slice_by_index(begin = var_317_begin_0, end = var_317_end_0, end_mask = var_317_end_mask_0, squeeze_mask = var_317_squeeze_mask_0, x = coreml_update_state_61)[name = string("op_317_cast_fp16")]; + tensor var_320_begin_0 = const()[name = string("op_320_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_320_end_mask_0 = const()[name = string("op_320_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_320_cast_fp16 = slice_by_index(begin = var_320_begin_0, end = concat_11, end_mask = var_320_end_mask_0, x = var_317_cast_fp16)[name = string("op_320_cast_fp16")]; + tensor var_322_shape_cast_fp16 = shape(x = var_315_cast_fp16)[name = string("op_322_shape_cast_fp16")]; + int32 gather_13 = const()[name = string("gather_13"), val = int32(1)]; + int32 gather_14 = const()[name = string("gather_14"), val = int32(3)]; + int32 gather_15_axis_0 = const()[name = string("gather_15_axis_0"), val = int32(0)]; + int32 gather_15_batch_dims_0 = const()[name = string("gather_15_batch_dims_0"), val = int32(0)]; + bool gather_15_validate_indices_0 = const()[name = string("gather_15_validate_indices_0"), val = bool(false)]; + string var_322_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_322_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_15_to_uint16 = const()[name = string("select_15_to_uint16"), val = uint16(2)]; + tensor var_322_shape_cast_fp16_to_uint16 = cast(dtype = var_322_shape_cast_fp16_to_uint16_dtype_0, x = var_322_shape_cast_fp16)[name = string("cast_238")]; + uint16 gather_15_cast_uint16 = gather(axis = gather_15_axis_0, batch_dims = gather_15_batch_dims_0, indices = select_15_to_uint16, validate_indices = gather_15_validate_indices_0, x = var_322_shape_cast_fp16_to_uint16)[name = string("gather_15_cast_uint16")]; + string gather_15_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_15_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_16 = const()[name = string("gather_16"), val = int32(64)]; + tensor var_329_axes_0 = const()[name = string("op_329_axes_0"), val = tensor([2])]; + tensor var_329_cast_fp16 = expand_dims(axes = var_329_axes_0, x = var_315_cast_fp16)[name = string("op_329_cast_fp16")]; + tensor shape_17_cast_fp16 = shape(x = var_329_cast_fp16)[name = string("shape_17_cast_fp16")]; + int32 concat_13_axis_0 = const()[name = string("concat_13_axis_0"), val = int32(0)]; + bool concat_13_interleave_0 = const()[name = string("concat_13_interleave_0"), val = bool(false)]; + int32 gather_15_cast_uint16_to_int32 = cast(dtype = gather_15_cast_uint16_to_int32_dtype_0, x = gather_15_cast_uint16)[name = string("cast_237")]; + tensor concat_13 = concat(axis = concat_13_axis_0, interleave = concat_13_interleave_0, values = (gather_13, gather_14, var_85, gather_15_cast_uint16_to_int32, gather_16))[name = string("concat_13")]; + tensor real_div_0 = real_div(x = concat_13, y = shape_17_cast_fp16)[name = string("real_div_0")]; + tensor hidden_states_11_cast_fp16 = tile(reps = real_div_0, x = var_329_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; + tensor concat_14x = const()[name = string("concat_14x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_3_cast_fp16 = reshape(shape = concat_14x, x = hidden_states_11_cast_fp16)[name = string("key_states_3_cast_fp16")]; + tensor var_339_shape_cast_fp16 = shape(x = var_320_cast_fp16)[name = string("op_339_shape_cast_fp16")]; + int32 gather_17 = const()[name = string("gather_17"), val = int32(1)]; + int32 gather_18 = const()[name = string("gather_18"), val = int32(3)]; + int32 gather_19_axis_0 = const()[name = string("gather_19_axis_0"), val = int32(0)]; + int32 gather_19_batch_dims_0 = const()[name = string("gather_19_batch_dims_0"), val = int32(0)]; + bool gather_19_validate_indices_0 = const()[name = string("gather_19_validate_indices_0"), val = bool(false)]; + string var_339_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_339_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_19_to_uint16 = const()[name = string("select_19_to_uint16"), val = uint16(2)]; + tensor var_339_shape_cast_fp16_to_uint16 = cast(dtype = var_339_shape_cast_fp16_to_uint16_dtype_0, x = var_339_shape_cast_fp16)[name = string("cast_236")]; + uint16 gather_19_cast_uint16 = gather(axis = gather_19_axis_0, batch_dims = gather_19_batch_dims_0, indices = select_19_to_uint16, validate_indices = gather_19_validate_indices_0, x = var_339_shape_cast_fp16_to_uint16)[name = string("gather_19_cast_uint16")]; + string gather_19_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_19_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_20 = const()[name = string("gather_20"), val = int32(64)]; + tensor var_346_axes_0 = const()[name = string("op_346_axes_0"), val = tensor([2])]; + tensor var_346_cast_fp16 = expand_dims(axes = var_346_axes_0, x = var_320_cast_fp16)[name = string("op_346_cast_fp16")]; + tensor shape_22_cast_fp16 = shape(x = var_346_cast_fp16)[name = string("shape_22_cast_fp16")]; + int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; + bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; + int32 gather_19_cast_uint16_to_int32 = cast(dtype = gather_19_cast_uint16_to_int32_dtype_0, x = gather_19_cast_uint16)[name = string("cast_235")]; + tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_17, gather_18, var_85, gather_19_cast_uint16_to_int32, gather_20))[name = string("concat_15")]; + tensor real_div_1 = real_div(x = concat_15, y = shape_22_cast_fp16)[name = string("real_div_1")]; + tensor hidden_states_15_cast_fp16 = tile(reps = real_div_1, x = var_346_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; + tensor concat_16x = const()[name = string("concat_16x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_3_cast_fp16 = reshape(shape = concat_16x, x = hidden_states_15_cast_fp16)[name = string("value_states_3_cast_fp16")]; + tensor var_356_shape_cast_fp16 = shape(x = key_states_3_cast_fp16)[name = string("op_356_shape_cast_fp16")]; + int32 gather_21_axis_0 = const()[name = string("gather_21_axis_0"), val = int32(0)]; + int32 gather_21_batch_dims_0 = const()[name = string("gather_21_batch_dims_0"), val = int32(0)]; + bool gather_21_validate_indices_0 = const()[name = string("gather_21_validate_indices_0"), val = bool(false)]; + string var_356_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_356_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_21_to_uint16 = const()[name = string("select_21_to_uint16"), val = uint16(2)]; + tensor var_356_shape_cast_fp16_to_uint16 = cast(dtype = var_356_shape_cast_fp16_to_uint16_dtype_0, x = var_356_shape_cast_fp16)[name = string("cast_234")]; + uint16 gather_21_cast_uint16 = gather(axis = gather_21_axis_0, batch_dims = gather_21_batch_dims_0, indices = select_21_to_uint16, validate_indices = gather_21_validate_indices_0, x = var_356_shape_cast_fp16_to_uint16)[name = string("gather_21_cast_uint16")]; + string gather_21_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_21_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_17_values0_0 = const()[name = string("concat_17_values0_0"), val = int32(1)]; + int32 concat_17_values1_0 = const()[name = string("concat_17_values1_0"), val = int32(1)]; + int32 concat_17_values2_0 = const()[name = string("concat_17_values2_0"), val = int32(0)]; + int32 concat_17_axis_0 = const()[name = string("concat_17_axis_0"), val = int32(0)]; + bool concat_17_interleave_0 = const()[name = string("concat_17_interleave_0"), val = bool(false)]; + int32 gather_21_cast_uint16_to_int32 = cast(dtype = gather_21_cast_uint16_to_int32_dtype_0, x = gather_21_cast_uint16)[name = string("cast_233")]; + tensor concat_17 = concat(axis = concat_17_axis_0, interleave = concat_17_interleave_0, values = (concat_17_values0_0, concat_17_values1_0, concat_17_values2_0, gather_21_cast_uint16_to_int32))[name = string("concat_17")]; + tensor causal_mask_3_begin_0 = const()[name = string("causal_mask_3_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_3_end_mask_0 = const()[name = string("causal_mask_3_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_3_cast_fp16 = slice_by_index(begin = causal_mask_3_begin_0, end = concat_17, end_mask = causal_mask_3_end_mask_0, x = causal_mask)[name = string("causal_mask_3_cast_fp16")]; + tensor attn_output_1_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_3_cast_fp16, key = key_states_3_cast_fp16, query = query_states_3_cast_fp16, value = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")]; + tensor var_362_perm_0 = const()[name = string("op_362_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; + bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; + int32 gather_5_cast_uint16_to_int32 = cast(dtype = gather_5_cast_uint16_to_int32_dtype_0, x = gather_5_cast_uint16)[name = string("cast_232")]; + tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (gather_4, gather_5_cast_uint16_to_int32, var_81))[name = string("concat_18")]; + tensor var_362_cast_fp16 = transpose(perm = var_362_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_116")]; + tensor input_1_cast_fp16 = reshape(shape = concat_18, x = var_362_cast_fp16)[name = string("input_1_cast_fp16")]; + tensor model_model_layers_0_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16239872))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16405824))))[name = string("model_model_layers_0_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_3_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_to_fp16_quantized, x = input_1_cast_fp16)[name = string("linear_3_cast_fp16")]; + tensor hidden_states_19_cast_fp16 = add(x = inputs_embeds_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_19_cast_fp16")]; + fp16 var_76_promoted_1_to_fp16 = const()[name = string("op_76_promoted_1_to_fp16"), val = fp16(0x1p+1)]; + tensor var_371_cast_fp16 = pow(x = hidden_states_19_cast_fp16, y = var_76_promoted_1_to_fp16)[name = string("op_371_cast_fp16")]; + tensor variance_3_axes_0 = const()[name = string("variance_3_axes_0"), val = tensor([-1])]; + bool variance_3_keep_dims_0 = const()[name = string("variance_3_keep_dims_0"), val = bool(true)]; + tensor variance_3_cast_fp16 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = var_371_cast_fp16)[name = string("variance_3_cast_fp16")]; + fp16 var_374_to_fp16 = const()[name = string("op_374_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_375_cast_fp16 = add(x = variance_3_cast_fp16, y = var_374_to_fp16)[name = string("op_375_cast_fp16")]; + fp32 var_376_epsilon_0 = const()[name = string("op_376_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_376_cast_fp16 = rsqrt(epsilon = var_376_epsilon_0, x = var_375_cast_fp16)[name = string("op_376_cast_fp16")]; + tensor hidden_states_23_cast_fp16 = mul(x = hidden_states_19_cast_fp16, y = var_376_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; + tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16426624)))]; + tensor input_3_cast_fp16 = mul(x = model_model_layers_0_post_attention_layernorm_weight_to_fp16, y = hidden_states_23_cast_fp16)[name = string("input_3_cast_fp16")]; + tensor model_model_layers_0_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16427840))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16870272))))[name = string("model_model_layers_0_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_4_bias_0_to_fp16 = const()[name = string("linear_4_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16925632)))]; + tensor linear_4_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_0_mlp_gate_proj_weight_to_fp16_quantized, x = input_3_cast_fp16)[name = string("linear_4_cast_fp16")]; + tensor var_388_cast_fp16 = silu(x = linear_4_cast_fp16)[name = string("op_388_cast_fp16")]; + tensor model_model_layers_0_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(16928768))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17371200))))[name = string("model_model_layers_0_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_5_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_0_mlp_up_proj_weight_to_fp16_quantized, x = input_3_cast_fp16)[name = string("linear_5_cast_fp16")]; + tensor input_7_cast_fp16 = mul(x = var_388_cast_fp16, y = linear_5_cast_fp16)[name = string("input_7_cast_fp16")]; + tensor model_model_layers_0_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17426560))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17868992))))[name = string("model_model_layers_0_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_6_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_mlp_down_proj_weight_to_fp16_quantized, x = input_7_cast_fp16)[name = string("linear_6_cast_fp16")]; + tensor hidden_states_29_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; + fp16 var_76_promoted_2_to_fp16 = const()[name = string("op_76_promoted_2_to_fp16"), val = fp16(0x1p+1)]; + tensor var_401_cast_fp16 = pow(x = hidden_states_29_cast_fp16, y = var_76_promoted_2_to_fp16)[name = string("op_401_cast_fp16")]; + tensor variance_5_axes_0 = const()[name = string("variance_5_axes_0"), val = tensor([-1])]; + bool variance_5_keep_dims_0 = const()[name = string("variance_5_keep_dims_0"), val = bool(true)]; + tensor variance_5_cast_fp16 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = var_401_cast_fp16)[name = string("variance_5_cast_fp16")]; + fp16 var_404_to_fp16 = const()[name = string("op_404_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_405_cast_fp16 = add(x = variance_5_cast_fp16, y = var_404_to_fp16)[name = string("op_405_cast_fp16")]; + fp32 var_406_epsilon_0 = const()[name = string("op_406_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_406_cast_fp16 = rsqrt(epsilon = var_406_epsilon_0, x = var_405_cast_fp16)[name = string("op_406_cast_fp16")]; + tensor hidden_states_33_cast_fp16 = mul(x = hidden_states_29_cast_fp16, y = var_406_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; + tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17924352)))]; + tensor hidden_states_37_cast_fp16 = mul(x = model_model_layers_1_input_layernorm_weight_to_fp16, y = hidden_states_33_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; + tensor var_417_shape_cast_fp16 = shape(x = hidden_states_37_cast_fp16)[name = string("op_417_shape_cast_fp16")]; + int32 gather_22 = const()[name = string("gather_22"), val = int32(1)]; + int32 gather_23_axis_0 = const()[name = string("gather_23_axis_0"), val = int32(0)]; + int32 gather_23_batch_dims_0 = const()[name = string("gather_23_batch_dims_0"), val = int32(0)]; + bool gather_23_validate_indices_0 = const()[name = string("gather_23_validate_indices_0"), val = bool(false)]; + string var_417_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_417_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_23_to_uint16 = const()[name = string("select_23_to_uint16"), val = uint16(1)]; + tensor var_417_shape_cast_fp16_to_uint16 = cast(dtype = var_417_shape_cast_fp16_to_uint16_dtype_0, x = var_417_shape_cast_fp16)[name = string("cast_231")]; + uint16 gather_23_cast_uint16 = gather(axis = gather_23_axis_0, batch_dims = gather_23_batch_dims_0, indices = select_23_to_uint16, validate_indices = gather_23_validate_indices_0, x = var_417_shape_cast_fp16_to_uint16)[name = string("gather_23_cast_uint16")]; + string gather_23_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_23_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_1_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(17925568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18091520))))[name = string("model_model_layers_1_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_37_cast_fp16)[name = string("linear_7_cast_fp16")]; + tensor model_model_layers_1_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18112320))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18167680))))[name = string("model_model_layers_1_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_8_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_1_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_37_cast_fp16)[name = string("linear_8_cast_fp16")]; + tensor model_model_layers_1_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18174656))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18230016))))[name = string("model_model_layers_1_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_1_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_37_cast_fp16)[name = string("linear_9_cast_fp16")]; + tensor concat_19x = const()[name = string("concat_19x"), val = tensor([1, -1, 9, 64])]; + tensor var_426_cast_fp16 = reshape(shape = concat_19x, x = linear_7_cast_fp16)[name = string("op_426_cast_fp16")]; + tensor q_3_perm_0 = const()[name = string("q_3_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_20x = const()[name = string("concat_20x"), val = tensor([1, -1, 3, 64])]; + tensor var_429_cast_fp16 = reshape(shape = concat_20x, x = linear_8_cast_fp16)[name = string("op_429_cast_fp16")]; + tensor k_3_perm_0 = const()[name = string("k_3_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_21x = const()[name = string("concat_21x"), val = tensor([1, -1, 3, 64])]; + tensor var_432_cast_fp16 = reshape(shape = concat_21x, x = linear_9_cast_fp16)[name = string("op_432_cast_fp16")]; + tensor v_state_3_perm_0 = const()[name = string("v_state_3_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_3_cast_fp16 = transpose(perm = q_3_perm_0, x = var_426_cast_fp16)[name = string("transpose_115")]; + tensor var_436_cast_fp16 = mul(x = q_3_cast_fp16, y = cos_7_cast_fp16)[name = string("op_436_cast_fp16")]; + tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3_cast_fp16)[name = string("x1_5_cast_fp16")]; + tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3_cast_fp16)[name = string("x2_5_cast_fp16")]; + fp16 const_5_promoted_to_fp16 = const()[name = string("const_5_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_447_cast_fp16 = mul(x = x2_5_cast_fp16, y = const_5_promoted_to_fp16)[name = string("op_447_cast_fp16")]; + bool var_449_interleave_0 = const()[name = string("op_449_interleave_0"), val = bool(false)]; + tensor var_449_cast_fp16 = concat(axis = var_81, interleave = var_449_interleave_0, values = (var_447_cast_fp16, x1_5_cast_fp16))[name = string("op_449_cast_fp16")]; + tensor var_450_cast_fp16 = mul(x = var_449_cast_fp16, y = sin_7_cast_fp16)[name = string("op_450_cast_fp16")]; + tensor query_states_7_cast_fp16 = add(x = var_436_cast_fp16, y = var_450_cast_fp16)[name = string("query_states_7_cast_fp16")]; + tensor k_3_cast_fp16 = transpose(perm = k_3_perm_0, x = var_429_cast_fp16)[name = string("transpose_114")]; + tensor var_452_cast_fp16 = mul(x = k_3_cast_fp16, y = cos_7_cast_fp16)[name = string("op_452_cast_fp16")]; + tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3_cast_fp16)[name = string("x1_7_cast_fp16")]; + tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3_cast_fp16)[name = string("x2_7_cast_fp16")]; + fp16 const_6_promoted_to_fp16 = const()[name = string("const_6_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_463_cast_fp16 = mul(x = x2_7_cast_fp16, y = const_6_promoted_to_fp16)[name = string("op_463_cast_fp16")]; + bool var_465_interleave_0 = const()[name = string("op_465_interleave_0"), val = bool(false)]; + tensor var_465_cast_fp16 = concat(axis = var_81, interleave = var_465_interleave_0, values = (var_463_cast_fp16, x1_7_cast_fp16))[name = string("op_465_cast_fp16")]; + tensor var_466_cast_fp16 = mul(x = var_465_cast_fp16, y = sin_7_cast_fp16)[name = string("op_466_cast_fp16")]; + tensor k_state_3_cast_fp16 = add(x = var_452_cast_fp16, y = var_466_cast_fp16)[name = string("k_state_3_cast_fp16")]; + tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; + tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; + tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; + tensor concat_24_values0_0 = const()[name = string("concat_24_values0_0"), val = tensor([1])]; + int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; + bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; + tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (concat_24_values0_0, expand_dims_12, expand_dims_13, expand_dims_2, expand_dims_15))[name = string("concat_24")]; + tensor key_cache_internal_tensor_assign_2_stride_0 = const()[name = string("key_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_24, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = k_state_3_cast_fp16, x = coreml_update_state_60)[name = string("key_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_2_cast_fp16, input = key_cache)[name = string("coreml_update_state_62_write_state")]; + tensor coreml_update_state_62 = read_state(input = key_cache)[name = string("coreml_update_state_62")]; + tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_3_cast_fp16 = transpose(perm = v_state_3_perm_0, x = var_432_cast_fp16)[name = string("transpose_113")]; + tensor value_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_24, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = v_state_3_cast_fp16, x = coreml_update_state_61)[name = string("value_cache_internal_tensor_assign_2_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_2_cast_fp16, input = value_cache)[name = string("coreml_update_state_63_write_state")]; + tensor coreml_update_state_63 = read_state(input = value_cache)[name = string("coreml_update_state_63")]; + tensor var_489_begin_0 = const()[name = string("op_489_begin_0"), val = tensor([1, 0, 0, 0, 0])]; + tensor var_489_end_0 = const()[name = string("op_489_end_0"), val = tensor([2, 1, 3, 2048, 64])]; + tensor var_489_end_mask_0 = const()[name = string("op_489_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_489_squeeze_mask_0 = const()[name = string("op_489_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_489_cast_fp16 = slice_by_index(begin = var_489_begin_0, end = var_489_end_0, end_mask = var_489_end_mask_0, squeeze_mask = var_489_squeeze_mask_0, x = coreml_update_state_62)[name = string("op_489_cast_fp16")]; + tensor var_492_begin_0 = const()[name = string("op_492_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_492_end_mask_0 = const()[name = string("op_492_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_492_cast_fp16 = slice_by_index(begin = var_492_begin_0, end = concat_11, end_mask = var_492_end_mask_0, x = var_489_cast_fp16)[name = string("op_492_cast_fp16")]; + tensor var_494_begin_0 = const()[name = string("op_494_begin_0"), val = tensor([1, 0, 0, 0, 0])]; + tensor var_494_end_0 = const()[name = string("op_494_end_0"), val = tensor([2, 1, 3, 2048, 64])]; + tensor var_494_end_mask_0 = const()[name = string("op_494_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_494_squeeze_mask_0 = const()[name = string("op_494_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_494_cast_fp16 = slice_by_index(begin = var_494_begin_0, end = var_494_end_0, end_mask = var_494_end_mask_0, squeeze_mask = var_494_squeeze_mask_0, x = coreml_update_state_63)[name = string("op_494_cast_fp16")]; + tensor var_497_begin_0 = const()[name = string("op_497_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_497_end_mask_0 = const()[name = string("op_497_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_497_cast_fp16 = slice_by_index(begin = var_497_begin_0, end = concat_11, end_mask = var_497_end_mask_0, x = var_494_cast_fp16)[name = string("op_497_cast_fp16")]; + tensor var_499_shape_cast_fp16 = shape(x = var_492_cast_fp16)[name = string("op_499_shape_cast_fp16")]; + int32 gather_31 = const()[name = string("gather_31"), val = int32(1)]; + int32 gather_32 = const()[name = string("gather_32"), val = int32(3)]; + int32 gather_33_axis_0 = const()[name = string("gather_33_axis_0"), val = int32(0)]; + int32 gather_33_batch_dims_0 = const()[name = string("gather_33_batch_dims_0"), val = int32(0)]; + bool gather_33_validate_indices_0 = const()[name = string("gather_33_validate_indices_0"), val = bool(false)]; + string var_499_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_499_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_33_to_uint16 = const()[name = string("select_33_to_uint16"), val = uint16(2)]; + tensor var_499_shape_cast_fp16_to_uint16 = cast(dtype = var_499_shape_cast_fp16_to_uint16_dtype_0, x = var_499_shape_cast_fp16)[name = string("cast_230")]; + uint16 gather_33_cast_uint16 = gather(axis = gather_33_axis_0, batch_dims = gather_33_batch_dims_0, indices = select_33_to_uint16, validate_indices = gather_33_validate_indices_0, x = var_499_shape_cast_fp16_to_uint16)[name = string("gather_33_cast_uint16")]; + string gather_33_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_33_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_34 = const()[name = string("gather_34"), val = int32(64)]; + tensor var_506_axes_0 = const()[name = string("op_506_axes_0"), val = tensor([2])]; + tensor var_506_cast_fp16 = expand_dims(axes = var_506_axes_0, x = var_492_cast_fp16)[name = string("op_506_cast_fp16")]; + tensor shape_37_cast_fp16 = shape(x = var_506_cast_fp16)[name = string("shape_37_cast_fp16")]; + int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)]; + bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)]; + int32 gather_33_cast_uint16_to_int32 = cast(dtype = gather_33_cast_uint16_to_int32_dtype_0, x = gather_33_cast_uint16)[name = string("cast_229")]; + tensor concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (gather_31, gather_32, var_85, gather_33_cast_uint16_to_int32, gather_34))[name = string("concat_32")]; + tensor real_div_2 = real_div(x = concat_32, y = shape_37_cast_fp16)[name = string("real_div_2")]; + tensor hidden_states_41_cast_fp16 = tile(reps = real_div_2, x = var_506_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; + tensor concat_33x = const()[name = string("concat_33x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_7_cast_fp16 = reshape(shape = concat_33x, x = hidden_states_41_cast_fp16)[name = string("key_states_7_cast_fp16")]; + tensor var_516_shape_cast_fp16 = shape(x = var_497_cast_fp16)[name = string("op_516_shape_cast_fp16")]; + int32 gather_35 = const()[name = string("gather_35"), val = int32(1)]; + int32 gather_36 = const()[name = string("gather_36"), val = int32(3)]; + int32 gather_37_axis_0 = const()[name = string("gather_37_axis_0"), val = int32(0)]; + int32 gather_37_batch_dims_0 = const()[name = string("gather_37_batch_dims_0"), val = int32(0)]; + bool gather_37_validate_indices_0 = const()[name = string("gather_37_validate_indices_0"), val = bool(false)]; + string var_516_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_516_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_37_to_uint16 = const()[name = string("select_37_to_uint16"), val = uint16(2)]; + tensor var_516_shape_cast_fp16_to_uint16 = cast(dtype = var_516_shape_cast_fp16_to_uint16_dtype_0, x = var_516_shape_cast_fp16)[name = string("cast_228")]; + uint16 gather_37_cast_uint16 = gather(axis = gather_37_axis_0, batch_dims = gather_37_batch_dims_0, indices = select_37_to_uint16, validate_indices = gather_37_validate_indices_0, x = var_516_shape_cast_fp16_to_uint16)[name = string("gather_37_cast_uint16")]; + string gather_37_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_37_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_38 = const()[name = string("gather_38"), val = int32(64)]; + tensor var_523_axes_0 = const()[name = string("op_523_axes_0"), val = tensor([2])]; + tensor var_523_cast_fp16 = expand_dims(axes = var_523_axes_0, x = var_497_cast_fp16)[name = string("op_523_cast_fp16")]; + tensor shape_42_cast_fp16 = shape(x = var_523_cast_fp16)[name = string("shape_42_cast_fp16")]; + int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; + bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; + int32 gather_37_cast_uint16_to_int32 = cast(dtype = gather_37_cast_uint16_to_int32_dtype_0, x = gather_37_cast_uint16)[name = string("cast_227")]; + tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (gather_35, gather_36, var_85, gather_37_cast_uint16_to_int32, gather_38))[name = string("concat_34")]; + tensor real_div_3 = real_div(x = concat_34, y = shape_42_cast_fp16)[name = string("real_div_3")]; + tensor hidden_states_45_cast_fp16 = tile(reps = real_div_3, x = var_523_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; + tensor concat_35x = const()[name = string("concat_35x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_7_cast_fp16 = reshape(shape = concat_35x, x = hidden_states_45_cast_fp16)[name = string("value_states_7_cast_fp16")]; + tensor var_533_shape_cast_fp16 = shape(x = key_states_7_cast_fp16)[name = string("op_533_shape_cast_fp16")]; + int32 gather_39_axis_0 = const()[name = string("gather_39_axis_0"), val = int32(0)]; + int32 gather_39_batch_dims_0 = const()[name = string("gather_39_batch_dims_0"), val = int32(0)]; + bool gather_39_validate_indices_0 = const()[name = string("gather_39_validate_indices_0"), val = bool(false)]; + string var_533_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_533_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_39_to_uint16 = const()[name = string("select_39_to_uint16"), val = uint16(2)]; + tensor var_533_shape_cast_fp16_to_uint16 = cast(dtype = var_533_shape_cast_fp16_to_uint16_dtype_0, x = var_533_shape_cast_fp16)[name = string("cast_226")]; + uint16 gather_39_cast_uint16 = gather(axis = gather_39_axis_0, batch_dims = gather_39_batch_dims_0, indices = select_39_to_uint16, validate_indices = gather_39_validate_indices_0, x = var_533_shape_cast_fp16_to_uint16)[name = string("gather_39_cast_uint16")]; + string gather_39_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_39_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_36_values0_0 = const()[name = string("concat_36_values0_0"), val = int32(1)]; + int32 concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = int32(1)]; + int32 concat_36_values2_0 = const()[name = string("concat_36_values2_0"), val = int32(0)]; + int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; + bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; + int32 gather_39_cast_uint16_to_int32 = cast(dtype = gather_39_cast_uint16_to_int32_dtype_0, x = gather_39_cast_uint16)[name = string("cast_225")]; + tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (concat_36_values0_0, concat_36_values1_0, concat_36_values2_0, gather_39_cast_uint16_to_int32))[name = string("concat_36")]; + tensor causal_mask_5_begin_0 = const()[name = string("causal_mask_5_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_5_end_mask_0 = const()[name = string("causal_mask_5_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_5_cast_fp16 = slice_by_index(begin = causal_mask_5_begin_0, end = concat_36, end_mask = causal_mask_5_end_mask_0, x = causal_mask)[name = string("causal_mask_5_cast_fp16")]; + tensor attn_output_5_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_5_cast_fp16, key = key_states_7_cast_fp16, query = query_states_7_cast_fp16, value = value_states_7_cast_fp16)[name = string("attn_output_5_cast_fp16")]; + tensor var_539_perm_0 = const()[name = string("op_539_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)]; + bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)]; + int32 gather_23_cast_uint16_to_int32 = cast(dtype = gather_23_cast_uint16_to_int32_dtype_0, x = gather_23_cast_uint16)[name = string("cast_224")]; + tensor concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (gather_22, gather_23_cast_uint16_to_int32, var_81))[name = string("concat_37")]; + tensor var_539_cast_fp16 = transpose(perm = var_539_perm_0, x = attn_output_5_cast_fp16)[name = string("transpose_112")]; + tensor input_9_cast_fp16 = reshape(shape = concat_37, x = var_539_cast_fp16)[name = string("input_9_cast_fp16")]; + tensor model_model_layers_1_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18236992))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18402944))))[name = string("model_model_layers_1_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_to_fp16_quantized, x = input_9_cast_fp16)[name = string("linear_10_cast_fp16")]; + tensor hidden_states_49_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = linear_10_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; + fp16 var_76_promoted_3_to_fp16 = const()[name = string("op_76_promoted_3_to_fp16"), val = fp16(0x1p+1)]; + tensor var_548_cast_fp16 = pow(x = hidden_states_49_cast_fp16, y = var_76_promoted_3_to_fp16)[name = string("op_548_cast_fp16")]; + tensor variance_7_axes_0 = const()[name = string("variance_7_axes_0"), val = tensor([-1])]; + bool variance_7_keep_dims_0 = const()[name = string("variance_7_keep_dims_0"), val = bool(true)]; + tensor variance_7_cast_fp16 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_548_cast_fp16)[name = string("variance_7_cast_fp16")]; + fp16 var_551_to_fp16 = const()[name = string("op_551_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_552_cast_fp16 = add(x = variance_7_cast_fp16, y = var_551_to_fp16)[name = string("op_552_cast_fp16")]; + fp32 var_553_epsilon_0 = const()[name = string("op_553_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_553_cast_fp16 = rsqrt(epsilon = var_553_epsilon_0, x = var_552_cast_fp16)[name = string("op_553_cast_fp16")]; + tensor hidden_states_53_cast_fp16 = mul(x = hidden_states_49_cast_fp16, y = var_553_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; + tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18423744)))]; + tensor input_11_cast_fp16 = mul(x = model_model_layers_1_post_attention_layernorm_weight_to_fp16, y = hidden_states_53_cast_fp16)[name = string("input_11_cast_fp16")]; + tensor model_model_layers_1_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18424960))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18867392))))[name = string("model_model_layers_1_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_11_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_1_mlp_gate_proj_weight_to_fp16_quantized, x = input_11_cast_fp16)[name = string("linear_11_cast_fp16")]; + tensor var_565_cast_fp16 = silu(x = linear_11_cast_fp16)[name = string("op_565_cast_fp16")]; + tensor model_model_layers_1_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(18922752))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19365184))))[name = string("model_model_layers_1_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_12_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_1_mlp_up_proj_weight_to_fp16_quantized, x = input_11_cast_fp16)[name = string("linear_12_cast_fp16")]; + tensor input_15_cast_fp16 = mul(x = var_565_cast_fp16, y = linear_12_cast_fp16)[name = string("input_15_cast_fp16")]; + tensor model_model_layers_1_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19420544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19862976))))[name = string("model_model_layers_1_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_13_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_mlp_down_proj_weight_to_fp16_quantized, x = input_15_cast_fp16)[name = string("linear_13_cast_fp16")]; + tensor hidden_states_59_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_13_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; + fp16 var_76_promoted_4_to_fp16 = const()[name = string("op_76_promoted_4_to_fp16"), val = fp16(0x1p+1)]; + tensor var_578_cast_fp16 = pow(x = hidden_states_59_cast_fp16, y = var_76_promoted_4_to_fp16)[name = string("op_578_cast_fp16")]; + tensor variance_9_axes_0 = const()[name = string("variance_9_axes_0"), val = tensor([-1])]; + bool variance_9_keep_dims_0 = const()[name = string("variance_9_keep_dims_0"), val = bool(true)]; + tensor variance_9_cast_fp16 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_578_cast_fp16)[name = string("variance_9_cast_fp16")]; + fp16 var_581_to_fp16 = const()[name = string("op_581_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_582_cast_fp16 = add(x = variance_9_cast_fp16, y = var_581_to_fp16)[name = string("op_582_cast_fp16")]; + fp32 var_583_epsilon_0 = const()[name = string("op_583_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_583_cast_fp16 = rsqrt(epsilon = var_583_epsilon_0, x = var_582_cast_fp16)[name = string("op_583_cast_fp16")]; + tensor hidden_states_63_cast_fp16 = mul(x = hidden_states_59_cast_fp16, y = var_583_cast_fp16)[name = string("hidden_states_63_cast_fp16")]; + tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19918336)))]; + tensor hidden_states_67_cast_fp16 = mul(x = model_model_layers_2_input_layernorm_weight_to_fp16, y = hidden_states_63_cast_fp16)[name = string("hidden_states_67_cast_fp16")]; + tensor var_594_shape_cast_fp16 = shape(x = hidden_states_67_cast_fp16)[name = string("op_594_shape_cast_fp16")]; + int32 gather_40 = const()[name = string("gather_40"), val = int32(1)]; + int32 gather_41_axis_0 = const()[name = string("gather_41_axis_0"), val = int32(0)]; + int32 gather_41_batch_dims_0 = const()[name = string("gather_41_batch_dims_0"), val = int32(0)]; + bool gather_41_validate_indices_0 = const()[name = string("gather_41_validate_indices_0"), val = bool(false)]; + string var_594_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_594_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_41_to_uint16 = const()[name = string("select_41_to_uint16"), val = uint16(1)]; + tensor var_594_shape_cast_fp16_to_uint16 = cast(dtype = var_594_shape_cast_fp16_to_uint16_dtype_0, x = var_594_shape_cast_fp16)[name = string("cast_223")]; + uint16 gather_41_cast_uint16 = gather(axis = gather_41_axis_0, batch_dims = gather_41_batch_dims_0, indices = select_41_to_uint16, validate_indices = gather_41_validate_indices_0, x = var_594_shape_cast_fp16_to_uint16)[name = string("gather_41_cast_uint16")]; + string gather_41_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_41_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_2_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(19919552))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20085504))))[name = string("model_model_layers_2_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_67_cast_fp16)[name = string("linear_14_cast_fp16")]; + tensor model_model_layers_2_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20106304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20161664))))[name = string("model_model_layers_2_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_15_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_2_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_67_cast_fp16)[name = string("linear_15_cast_fp16")]; + tensor model_model_layers_2_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20168640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20224000))))[name = string("model_model_layers_2_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_16_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_2_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_67_cast_fp16)[name = string("linear_16_cast_fp16")]; + tensor concat_38x = const()[name = string("concat_38x"), val = tensor([1, -1, 9, 64])]; + tensor var_603_cast_fp16 = reshape(shape = concat_38x, x = linear_14_cast_fp16)[name = string("op_603_cast_fp16")]; + tensor q_5_perm_0 = const()[name = string("q_5_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_39x = const()[name = string("concat_39x"), val = tensor([1, -1, 3, 64])]; + tensor var_606_cast_fp16 = reshape(shape = concat_39x, x = linear_15_cast_fp16)[name = string("op_606_cast_fp16")]; + tensor k_5_perm_0 = const()[name = string("k_5_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_40x = const()[name = string("concat_40x"), val = tensor([1, -1, 3, 64])]; + tensor var_609_cast_fp16 = reshape(shape = concat_40x, x = linear_16_cast_fp16)[name = string("op_609_cast_fp16")]; + tensor v_state_5_perm_0 = const()[name = string("v_state_5_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_5_cast_fp16 = transpose(perm = q_5_perm_0, x = var_603_cast_fp16)[name = string("transpose_111")]; + tensor var_613_cast_fp16 = mul(x = q_5_cast_fp16, y = cos_7_cast_fp16)[name = string("op_613_cast_fp16")]; + tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5_cast_fp16)[name = string("x1_9_cast_fp16")]; + tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5_cast_fp16)[name = string("x2_9_cast_fp16")]; + fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_624_cast_fp16 = mul(x = x2_9_cast_fp16, y = const_7_promoted_to_fp16)[name = string("op_624_cast_fp16")]; + bool var_626_interleave_0 = const()[name = string("op_626_interleave_0"), val = bool(false)]; + tensor var_626_cast_fp16 = concat(axis = var_81, interleave = var_626_interleave_0, values = (var_624_cast_fp16, x1_9_cast_fp16))[name = string("op_626_cast_fp16")]; + tensor var_627_cast_fp16 = mul(x = var_626_cast_fp16, y = sin_7_cast_fp16)[name = string("op_627_cast_fp16")]; + tensor query_states_11_cast_fp16 = add(x = var_613_cast_fp16, y = var_627_cast_fp16)[name = string("query_states_11_cast_fp16")]; + tensor k_5_cast_fp16 = transpose(perm = k_5_perm_0, x = var_606_cast_fp16)[name = string("transpose_110")]; + tensor var_629_cast_fp16 = mul(x = k_5_cast_fp16, y = cos_7_cast_fp16)[name = string("op_629_cast_fp16")]; + tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5_cast_fp16)[name = string("x1_11_cast_fp16")]; + tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5_cast_fp16)[name = string("x2_11_cast_fp16")]; + fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_640_cast_fp16 = mul(x = x2_11_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_640_cast_fp16")]; + bool var_642_interleave_0 = const()[name = string("op_642_interleave_0"), val = bool(false)]; + tensor var_642_cast_fp16 = concat(axis = var_81, interleave = var_642_interleave_0, values = (var_640_cast_fp16, x1_11_cast_fp16))[name = string("op_642_cast_fp16")]; + tensor var_643_cast_fp16 = mul(x = var_642_cast_fp16, y = sin_7_cast_fp16)[name = string("op_643_cast_fp16")]; + tensor k_state_5_cast_fp16 = add(x = var_629_cast_fp16, y = var_643_cast_fp16)[name = string("k_state_5_cast_fp16")]; + tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([0])]; + tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; + tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; + tensor concat_43_values0_0 = const()[name = string("concat_43_values0_0"), val = tensor([2])]; + int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; + bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; + tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (concat_43_values0_0, expand_dims_24, expand_dims_25, expand_dims_2, expand_dims_27))[name = string("concat_43")]; + tensor key_cache_internal_tensor_assign_3_stride_0 = const()[name = string("key_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_43, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = k_state_5_cast_fp16, x = coreml_update_state_62)[name = string("key_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_3_cast_fp16, input = key_cache)[name = string("coreml_update_state_64_write_state")]; + tensor coreml_update_state_64 = read_state(input = key_cache)[name = string("coreml_update_state_64")]; + tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_5_cast_fp16 = transpose(perm = v_state_5_perm_0, x = var_609_cast_fp16)[name = string("transpose_109")]; + tensor value_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_43, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = v_state_5_cast_fp16, x = coreml_update_state_63)[name = string("value_cache_internal_tensor_assign_3_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_3_cast_fp16, input = value_cache)[name = string("coreml_update_state_65_write_state")]; + tensor coreml_update_state_65 = read_state(input = value_cache)[name = string("coreml_update_state_65")]; + tensor var_666_begin_0 = const()[name = string("op_666_begin_0"), val = tensor([2, 0, 0, 0, 0])]; + tensor var_666_end_0 = const()[name = string("op_666_end_0"), val = tensor([3, 1, 3, 2048, 64])]; + tensor var_666_end_mask_0 = const()[name = string("op_666_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_666_squeeze_mask_0 = const()[name = string("op_666_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_666_cast_fp16 = slice_by_index(begin = var_666_begin_0, end = var_666_end_0, end_mask = var_666_end_mask_0, squeeze_mask = var_666_squeeze_mask_0, x = coreml_update_state_64)[name = string("op_666_cast_fp16")]; + tensor var_669_begin_0 = const()[name = string("op_669_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_669_end_mask_0 = const()[name = string("op_669_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_669_cast_fp16 = slice_by_index(begin = var_669_begin_0, end = concat_11, end_mask = var_669_end_mask_0, x = var_666_cast_fp16)[name = string("op_669_cast_fp16")]; + tensor var_671_begin_0 = const()[name = string("op_671_begin_0"), val = tensor([2, 0, 0, 0, 0])]; + tensor var_671_end_0 = const()[name = string("op_671_end_0"), val = tensor([3, 1, 3, 2048, 64])]; + tensor var_671_end_mask_0 = const()[name = string("op_671_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_671_squeeze_mask_0 = const()[name = string("op_671_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_671_cast_fp16 = slice_by_index(begin = var_671_begin_0, end = var_671_end_0, end_mask = var_671_end_mask_0, squeeze_mask = var_671_squeeze_mask_0, x = coreml_update_state_65)[name = string("op_671_cast_fp16")]; + tensor var_674_begin_0 = const()[name = string("op_674_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_674_end_mask_0 = const()[name = string("op_674_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_674_cast_fp16 = slice_by_index(begin = var_674_begin_0, end = concat_11, end_mask = var_674_end_mask_0, x = var_671_cast_fp16)[name = string("op_674_cast_fp16")]; + tensor var_676_shape_cast_fp16 = shape(x = var_669_cast_fp16)[name = string("op_676_shape_cast_fp16")]; + int32 gather_49 = const()[name = string("gather_49"), val = int32(1)]; + int32 gather_50 = const()[name = string("gather_50"), val = int32(3)]; + int32 gather_51_axis_0 = const()[name = string("gather_51_axis_0"), val = int32(0)]; + int32 gather_51_batch_dims_0 = const()[name = string("gather_51_batch_dims_0"), val = int32(0)]; + bool gather_51_validate_indices_0 = const()[name = string("gather_51_validate_indices_0"), val = bool(false)]; + string var_676_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_676_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_51_to_uint16 = const()[name = string("select_51_to_uint16"), val = uint16(2)]; + tensor var_676_shape_cast_fp16_to_uint16 = cast(dtype = var_676_shape_cast_fp16_to_uint16_dtype_0, x = var_676_shape_cast_fp16)[name = string("cast_222")]; + uint16 gather_51_cast_uint16 = gather(axis = gather_51_axis_0, batch_dims = gather_51_batch_dims_0, indices = select_51_to_uint16, validate_indices = gather_51_validate_indices_0, x = var_676_shape_cast_fp16_to_uint16)[name = string("gather_51_cast_uint16")]; + string gather_51_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_51_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_52 = const()[name = string("gather_52"), val = int32(64)]; + tensor var_683_axes_0 = const()[name = string("op_683_axes_0"), val = tensor([2])]; + tensor var_683_cast_fp16 = expand_dims(axes = var_683_axes_0, x = var_669_cast_fp16)[name = string("op_683_cast_fp16")]; + tensor shape_57_cast_fp16 = shape(x = var_683_cast_fp16)[name = string("shape_57_cast_fp16")]; + int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; + bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; + int32 gather_51_cast_uint16_to_int32 = cast(dtype = gather_51_cast_uint16_to_int32_dtype_0, x = gather_51_cast_uint16)[name = string("cast_221")]; + tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (gather_49, gather_50, var_85, gather_51_cast_uint16_to_int32, gather_52))[name = string("concat_51")]; + tensor real_div_4 = real_div(x = concat_51, y = shape_57_cast_fp16)[name = string("real_div_4")]; + tensor hidden_states_71_cast_fp16 = tile(reps = real_div_4, x = var_683_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; + tensor concat_52x = const()[name = string("concat_52x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_11_cast_fp16 = reshape(shape = concat_52x, x = hidden_states_71_cast_fp16)[name = string("key_states_11_cast_fp16")]; + tensor var_693_shape_cast_fp16 = shape(x = var_674_cast_fp16)[name = string("op_693_shape_cast_fp16")]; + int32 gather_53 = const()[name = string("gather_53"), val = int32(1)]; + int32 gather_54 = const()[name = string("gather_54"), val = int32(3)]; + int32 gather_55_axis_0 = const()[name = string("gather_55_axis_0"), val = int32(0)]; + int32 gather_55_batch_dims_0 = const()[name = string("gather_55_batch_dims_0"), val = int32(0)]; + bool gather_55_validate_indices_0 = const()[name = string("gather_55_validate_indices_0"), val = bool(false)]; + string var_693_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_693_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_55_to_uint16 = const()[name = string("select_55_to_uint16"), val = uint16(2)]; + tensor var_693_shape_cast_fp16_to_uint16 = cast(dtype = var_693_shape_cast_fp16_to_uint16_dtype_0, x = var_693_shape_cast_fp16)[name = string("cast_220")]; + uint16 gather_55_cast_uint16 = gather(axis = gather_55_axis_0, batch_dims = gather_55_batch_dims_0, indices = select_55_to_uint16, validate_indices = gather_55_validate_indices_0, x = var_693_shape_cast_fp16_to_uint16)[name = string("gather_55_cast_uint16")]; + string gather_55_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_55_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_56 = const()[name = string("gather_56"), val = int32(64)]; + tensor var_700_axes_0 = const()[name = string("op_700_axes_0"), val = tensor([2])]; + tensor var_700_cast_fp16 = expand_dims(axes = var_700_axes_0, x = var_674_cast_fp16)[name = string("op_700_cast_fp16")]; + tensor shape_62_cast_fp16 = shape(x = var_700_cast_fp16)[name = string("shape_62_cast_fp16")]; + int32 concat_53_axis_0 = const()[name = string("concat_53_axis_0"), val = int32(0)]; + bool concat_53_interleave_0 = const()[name = string("concat_53_interleave_0"), val = bool(false)]; + int32 gather_55_cast_uint16_to_int32 = cast(dtype = gather_55_cast_uint16_to_int32_dtype_0, x = gather_55_cast_uint16)[name = string("cast_219")]; + tensor concat_53 = concat(axis = concat_53_axis_0, interleave = concat_53_interleave_0, values = (gather_53, gather_54, var_85, gather_55_cast_uint16_to_int32, gather_56))[name = string("concat_53")]; + tensor real_div_5 = real_div(x = concat_53, y = shape_62_cast_fp16)[name = string("real_div_5")]; + tensor hidden_states_75_cast_fp16 = tile(reps = real_div_5, x = var_700_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; + tensor concat_54x = const()[name = string("concat_54x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_11_cast_fp16 = reshape(shape = concat_54x, x = hidden_states_75_cast_fp16)[name = string("value_states_11_cast_fp16")]; + tensor var_710_shape_cast_fp16 = shape(x = key_states_11_cast_fp16)[name = string("op_710_shape_cast_fp16")]; + int32 gather_57_axis_0 = const()[name = string("gather_57_axis_0"), val = int32(0)]; + int32 gather_57_batch_dims_0 = const()[name = string("gather_57_batch_dims_0"), val = int32(0)]; + bool gather_57_validate_indices_0 = const()[name = string("gather_57_validate_indices_0"), val = bool(false)]; + string var_710_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_710_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_57_to_uint16 = const()[name = string("select_57_to_uint16"), val = uint16(2)]; + tensor var_710_shape_cast_fp16_to_uint16 = cast(dtype = var_710_shape_cast_fp16_to_uint16_dtype_0, x = var_710_shape_cast_fp16)[name = string("cast_218")]; + uint16 gather_57_cast_uint16 = gather(axis = gather_57_axis_0, batch_dims = gather_57_batch_dims_0, indices = select_57_to_uint16, validate_indices = gather_57_validate_indices_0, x = var_710_shape_cast_fp16_to_uint16)[name = string("gather_57_cast_uint16")]; + string gather_57_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_57_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_55_values0_0 = const()[name = string("concat_55_values0_0"), val = int32(1)]; + int32 concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = int32(1)]; + int32 concat_55_values2_0 = const()[name = string("concat_55_values2_0"), val = int32(0)]; + int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; + bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; + int32 gather_57_cast_uint16_to_int32 = cast(dtype = gather_57_cast_uint16_to_int32_dtype_0, x = gather_57_cast_uint16)[name = string("cast_217")]; + tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (concat_55_values0_0, concat_55_values1_0, concat_55_values2_0, gather_57_cast_uint16_to_int32))[name = string("concat_55")]; + tensor causal_mask_7_begin_0 = const()[name = string("causal_mask_7_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_7_end_mask_0 = const()[name = string("causal_mask_7_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_7_cast_fp16 = slice_by_index(begin = causal_mask_7_begin_0, end = concat_55, end_mask = causal_mask_7_end_mask_0, x = causal_mask)[name = string("causal_mask_7_cast_fp16")]; + tensor attn_output_9_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_7_cast_fp16, key = key_states_11_cast_fp16, query = query_states_11_cast_fp16, value = value_states_11_cast_fp16)[name = string("attn_output_9_cast_fp16")]; + tensor var_716_perm_0 = const()[name = string("op_716_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; + bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; + int32 gather_41_cast_uint16_to_int32 = cast(dtype = gather_41_cast_uint16_to_int32_dtype_0, x = gather_41_cast_uint16)[name = string("cast_216")]; + tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (gather_40, gather_41_cast_uint16_to_int32, var_81))[name = string("concat_56")]; + tensor var_716_cast_fp16 = transpose(perm = var_716_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_108")]; + tensor input_17_cast_fp16 = reshape(shape = concat_56, x = var_716_cast_fp16)[name = string("input_17_cast_fp16")]; + tensor model_model_layers_2_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20230976))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20396928))))[name = string("model_model_layers_2_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_17_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_to_fp16_quantized, x = input_17_cast_fp16)[name = string("linear_17_cast_fp16")]; + tensor hidden_states_79_cast_fp16 = add(x = hidden_states_59_cast_fp16, y = linear_17_cast_fp16)[name = string("hidden_states_79_cast_fp16")]; + fp16 var_76_promoted_5_to_fp16 = const()[name = string("op_76_promoted_5_to_fp16"), val = fp16(0x1p+1)]; + tensor var_725_cast_fp16 = pow(x = hidden_states_79_cast_fp16, y = var_76_promoted_5_to_fp16)[name = string("op_725_cast_fp16")]; + tensor variance_11_axes_0 = const()[name = string("variance_11_axes_0"), val = tensor([-1])]; + bool variance_11_keep_dims_0 = const()[name = string("variance_11_keep_dims_0"), val = bool(true)]; + tensor variance_11_cast_fp16 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = var_725_cast_fp16)[name = string("variance_11_cast_fp16")]; + fp16 var_728_to_fp16 = const()[name = string("op_728_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_729_cast_fp16 = add(x = variance_11_cast_fp16, y = var_728_to_fp16)[name = string("op_729_cast_fp16")]; + fp32 var_730_epsilon_0 = const()[name = string("op_730_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_730_cast_fp16 = rsqrt(epsilon = var_730_epsilon_0, x = var_729_cast_fp16)[name = string("op_730_cast_fp16")]; + tensor hidden_states_83_cast_fp16 = mul(x = hidden_states_79_cast_fp16, y = var_730_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; + tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20417728)))]; + tensor input_19_cast_fp16 = mul(x = model_model_layers_2_post_attention_layernorm_weight_to_fp16, y = hidden_states_83_cast_fp16)[name = string("input_19_cast_fp16")]; + tensor model_model_layers_2_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20418944))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20861376))))[name = string("model_model_layers_2_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_18_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_2_mlp_gate_proj_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = string("linear_18_cast_fp16")]; + tensor var_742_cast_fp16 = silu(x = linear_18_cast_fp16)[name = string("op_742_cast_fp16")]; + tensor model_model_layers_2_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(20916736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21359168))))[name = string("model_model_layers_2_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_19_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_2_mlp_up_proj_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = string("linear_19_cast_fp16")]; + tensor input_23_cast_fp16 = mul(x = var_742_cast_fp16, y = linear_19_cast_fp16)[name = string("input_23_cast_fp16")]; + tensor model_model_layers_2_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21414528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21856960))))[name = string("model_model_layers_2_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_mlp_down_proj_weight_to_fp16_quantized, x = input_23_cast_fp16)[name = string("linear_20_cast_fp16")]; + tensor hidden_states_89_cast_fp16 = add(x = hidden_states_79_cast_fp16, y = linear_20_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; + fp16 var_76_promoted_6_to_fp16 = const()[name = string("op_76_promoted_6_to_fp16"), val = fp16(0x1p+1)]; + tensor var_755_cast_fp16 = pow(x = hidden_states_89_cast_fp16, y = var_76_promoted_6_to_fp16)[name = string("op_755_cast_fp16")]; + tensor variance_13_axes_0 = const()[name = string("variance_13_axes_0"), val = tensor([-1])]; + bool variance_13_keep_dims_0 = const()[name = string("variance_13_keep_dims_0"), val = bool(true)]; + tensor variance_13_cast_fp16 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = var_755_cast_fp16)[name = string("variance_13_cast_fp16")]; + fp16 var_758_to_fp16 = const()[name = string("op_758_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_759_cast_fp16 = add(x = variance_13_cast_fp16, y = var_758_to_fp16)[name = string("op_759_cast_fp16")]; + fp32 var_760_epsilon_0 = const()[name = string("op_760_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_760_cast_fp16 = rsqrt(epsilon = var_760_epsilon_0, x = var_759_cast_fp16)[name = string("op_760_cast_fp16")]; + tensor hidden_states_93_cast_fp16 = mul(x = hidden_states_89_cast_fp16, y = var_760_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; + tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21912320)))]; + tensor hidden_states_97_cast_fp16 = mul(x = model_model_layers_3_input_layernorm_weight_to_fp16, y = hidden_states_93_cast_fp16)[name = string("hidden_states_97_cast_fp16")]; + tensor var_771_shape_cast_fp16 = shape(x = hidden_states_97_cast_fp16)[name = string("op_771_shape_cast_fp16")]; + int32 gather_58 = const()[name = string("gather_58"), val = int32(1)]; + int32 gather_59_axis_0 = const()[name = string("gather_59_axis_0"), val = int32(0)]; + int32 gather_59_batch_dims_0 = const()[name = string("gather_59_batch_dims_0"), val = int32(0)]; + bool gather_59_validate_indices_0 = const()[name = string("gather_59_validate_indices_0"), val = bool(false)]; + string var_771_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_771_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_59_to_uint16 = const()[name = string("select_59_to_uint16"), val = uint16(1)]; + tensor var_771_shape_cast_fp16_to_uint16 = cast(dtype = var_771_shape_cast_fp16_to_uint16_dtype_0, x = var_771_shape_cast_fp16)[name = string("cast_215")]; + uint16 gather_59_cast_uint16 = gather(axis = gather_59_axis_0, batch_dims = gather_59_batch_dims_0, indices = select_59_to_uint16, validate_indices = gather_59_validate_indices_0, x = var_771_shape_cast_fp16_to_uint16)[name = string("gather_59_cast_uint16")]; + string gather_59_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_59_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_3_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(21913536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22079488))))[name = string("model_model_layers_3_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_21_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_97_cast_fp16)[name = string("linear_21_cast_fp16")]; + tensor model_model_layers_3_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22100288))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22155648))))[name = string("model_model_layers_3_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_22_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_3_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_97_cast_fp16)[name = string("linear_22_cast_fp16")]; + tensor model_model_layers_3_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22162624))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22217984))))[name = string("model_model_layers_3_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_23_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_3_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_97_cast_fp16)[name = string("linear_23_cast_fp16")]; + tensor concat_57x = const()[name = string("concat_57x"), val = tensor([1, -1, 9, 64])]; + tensor var_780_cast_fp16 = reshape(shape = concat_57x, x = linear_21_cast_fp16)[name = string("op_780_cast_fp16")]; + tensor q_7_perm_0 = const()[name = string("q_7_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_58x = const()[name = string("concat_58x"), val = tensor([1, -1, 3, 64])]; + tensor var_783_cast_fp16 = reshape(shape = concat_58x, x = linear_22_cast_fp16)[name = string("op_783_cast_fp16")]; + tensor k_7_perm_0 = const()[name = string("k_7_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_59x = const()[name = string("concat_59x"), val = tensor([1, -1, 3, 64])]; + tensor var_786_cast_fp16 = reshape(shape = concat_59x, x = linear_23_cast_fp16)[name = string("op_786_cast_fp16")]; + tensor v_state_7_perm_0 = const()[name = string("v_state_7_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_7_cast_fp16 = transpose(perm = q_7_perm_0, x = var_780_cast_fp16)[name = string("transpose_107")]; + tensor var_790_cast_fp16 = mul(x = q_7_cast_fp16, y = cos_7_cast_fp16)[name = string("op_790_cast_fp16")]; + tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7_cast_fp16)[name = string("x1_13_cast_fp16")]; + tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7_cast_fp16)[name = string("x2_13_cast_fp16")]; + fp16 const_9_promoted_to_fp16 = const()[name = string("const_9_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_801_cast_fp16 = mul(x = x2_13_cast_fp16, y = const_9_promoted_to_fp16)[name = string("op_801_cast_fp16")]; + bool var_803_interleave_0 = const()[name = string("op_803_interleave_0"), val = bool(false)]; + tensor var_803_cast_fp16 = concat(axis = var_81, interleave = var_803_interleave_0, values = (var_801_cast_fp16, x1_13_cast_fp16))[name = string("op_803_cast_fp16")]; + tensor var_804_cast_fp16 = mul(x = var_803_cast_fp16, y = sin_7_cast_fp16)[name = string("op_804_cast_fp16")]; + tensor query_states_15_cast_fp16 = add(x = var_790_cast_fp16, y = var_804_cast_fp16)[name = string("query_states_15_cast_fp16")]; + tensor k_7_cast_fp16 = transpose(perm = k_7_perm_0, x = var_783_cast_fp16)[name = string("transpose_106")]; + tensor var_806_cast_fp16 = mul(x = k_7_cast_fp16, y = cos_7_cast_fp16)[name = string("op_806_cast_fp16")]; + tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7_cast_fp16)[name = string("x1_15_cast_fp16")]; + tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7_cast_fp16)[name = string("x2_15_cast_fp16")]; + fp16 const_10_promoted_to_fp16 = const()[name = string("const_10_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_817_cast_fp16 = mul(x = x2_15_cast_fp16, y = const_10_promoted_to_fp16)[name = string("op_817_cast_fp16")]; + bool var_819_interleave_0 = const()[name = string("op_819_interleave_0"), val = bool(false)]; + tensor var_819_cast_fp16 = concat(axis = var_81, interleave = var_819_interleave_0, values = (var_817_cast_fp16, x1_15_cast_fp16))[name = string("op_819_cast_fp16")]; + tensor var_820_cast_fp16 = mul(x = var_819_cast_fp16, y = sin_7_cast_fp16)[name = string("op_820_cast_fp16")]; + tensor k_state_7_cast_fp16 = add(x = var_806_cast_fp16, y = var_820_cast_fp16)[name = string("k_state_7_cast_fp16")]; + tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([0])]; + tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; + tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; + tensor concat_62_values0_0 = const()[name = string("concat_62_values0_0"), val = tensor([3])]; + int32 concat_62_axis_0 = const()[name = string("concat_62_axis_0"), val = int32(0)]; + bool concat_62_interleave_0 = const()[name = string("concat_62_interleave_0"), val = bool(false)]; + tensor concat_62 = concat(axis = concat_62_axis_0, interleave = concat_62_interleave_0, values = (concat_62_values0_0, expand_dims_36, expand_dims_37, expand_dims_2, expand_dims_39))[name = string("concat_62")]; + tensor key_cache_internal_tensor_assign_4_stride_0 = const()[name = string("key_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_62, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = k_state_7_cast_fp16, x = coreml_update_state_64)[name = string("key_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_4_cast_fp16, input = key_cache)[name = string("coreml_update_state_66_write_state")]; + tensor coreml_update_state_66 = read_state(input = key_cache)[name = string("coreml_update_state_66")]; + tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_7_cast_fp16 = transpose(perm = v_state_7_perm_0, x = var_786_cast_fp16)[name = string("transpose_105")]; + tensor value_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_62, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = v_state_7_cast_fp16, x = coreml_update_state_65)[name = string("value_cache_internal_tensor_assign_4_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_4_cast_fp16, input = value_cache)[name = string("coreml_update_state_67_write_state")]; + tensor coreml_update_state_67 = read_state(input = value_cache)[name = string("coreml_update_state_67")]; + tensor var_843_begin_0 = const()[name = string("op_843_begin_0"), val = tensor([3, 0, 0, 0, 0])]; + tensor var_843_end_0 = const()[name = string("op_843_end_0"), val = tensor([4, 1, 3, 2048, 64])]; + tensor var_843_end_mask_0 = const()[name = string("op_843_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_843_squeeze_mask_0 = const()[name = string("op_843_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_843_cast_fp16 = slice_by_index(begin = var_843_begin_0, end = var_843_end_0, end_mask = var_843_end_mask_0, squeeze_mask = var_843_squeeze_mask_0, x = coreml_update_state_66)[name = string("op_843_cast_fp16")]; + tensor var_846_begin_0 = const()[name = string("op_846_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_846_end_mask_0 = const()[name = string("op_846_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_846_cast_fp16 = slice_by_index(begin = var_846_begin_0, end = concat_11, end_mask = var_846_end_mask_0, x = var_843_cast_fp16)[name = string("op_846_cast_fp16")]; + tensor var_848_begin_0 = const()[name = string("op_848_begin_0"), val = tensor([3, 0, 0, 0, 0])]; + tensor var_848_end_0 = const()[name = string("op_848_end_0"), val = tensor([4, 1, 3, 2048, 64])]; + tensor var_848_end_mask_0 = const()[name = string("op_848_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_848_squeeze_mask_0 = const()[name = string("op_848_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_848_cast_fp16 = slice_by_index(begin = var_848_begin_0, end = var_848_end_0, end_mask = var_848_end_mask_0, squeeze_mask = var_848_squeeze_mask_0, x = coreml_update_state_67)[name = string("op_848_cast_fp16")]; + tensor var_851_begin_0 = const()[name = string("op_851_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_851_end_mask_0 = const()[name = string("op_851_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_851_cast_fp16 = slice_by_index(begin = var_851_begin_0, end = concat_11, end_mask = var_851_end_mask_0, x = var_848_cast_fp16)[name = string("op_851_cast_fp16")]; + tensor var_853_shape_cast_fp16 = shape(x = var_846_cast_fp16)[name = string("op_853_shape_cast_fp16")]; + int32 gather_67 = const()[name = string("gather_67"), val = int32(1)]; + int32 gather_68 = const()[name = string("gather_68"), val = int32(3)]; + int32 gather_69_axis_0 = const()[name = string("gather_69_axis_0"), val = int32(0)]; + int32 gather_69_batch_dims_0 = const()[name = string("gather_69_batch_dims_0"), val = int32(0)]; + bool gather_69_validate_indices_0 = const()[name = string("gather_69_validate_indices_0"), val = bool(false)]; + string var_853_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_853_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_69_to_uint16 = const()[name = string("select_69_to_uint16"), val = uint16(2)]; + tensor var_853_shape_cast_fp16_to_uint16 = cast(dtype = var_853_shape_cast_fp16_to_uint16_dtype_0, x = var_853_shape_cast_fp16)[name = string("cast_214")]; + uint16 gather_69_cast_uint16 = gather(axis = gather_69_axis_0, batch_dims = gather_69_batch_dims_0, indices = select_69_to_uint16, validate_indices = gather_69_validate_indices_0, x = var_853_shape_cast_fp16_to_uint16)[name = string("gather_69_cast_uint16")]; + string gather_69_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_69_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_70 = const()[name = string("gather_70"), val = int32(64)]; + tensor var_860_axes_0 = const()[name = string("op_860_axes_0"), val = tensor([2])]; + tensor var_860_cast_fp16 = expand_dims(axes = var_860_axes_0, x = var_846_cast_fp16)[name = string("op_860_cast_fp16")]; + tensor shape_77_cast_fp16 = shape(x = var_860_cast_fp16)[name = string("shape_77_cast_fp16")]; + int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; + bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; + int32 gather_69_cast_uint16_to_int32 = cast(dtype = gather_69_cast_uint16_to_int32_dtype_0, x = gather_69_cast_uint16)[name = string("cast_213")]; + tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (gather_67, gather_68, var_85, gather_69_cast_uint16_to_int32, gather_70))[name = string("concat_70")]; + tensor real_div_6 = real_div(x = concat_70, y = shape_77_cast_fp16)[name = string("real_div_6")]; + tensor hidden_states_101_cast_fp16 = tile(reps = real_div_6, x = var_860_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; + tensor concat_71x = const()[name = string("concat_71x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_15_cast_fp16 = reshape(shape = concat_71x, x = hidden_states_101_cast_fp16)[name = string("key_states_15_cast_fp16")]; + tensor var_870_shape_cast_fp16 = shape(x = var_851_cast_fp16)[name = string("op_870_shape_cast_fp16")]; + int32 gather_71 = const()[name = string("gather_71"), val = int32(1)]; + int32 gather_72 = const()[name = string("gather_72"), val = int32(3)]; + int32 gather_73_axis_0 = const()[name = string("gather_73_axis_0"), val = int32(0)]; + int32 gather_73_batch_dims_0 = const()[name = string("gather_73_batch_dims_0"), val = int32(0)]; + bool gather_73_validate_indices_0 = const()[name = string("gather_73_validate_indices_0"), val = bool(false)]; + string var_870_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_870_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_73_to_uint16 = const()[name = string("select_73_to_uint16"), val = uint16(2)]; + tensor var_870_shape_cast_fp16_to_uint16 = cast(dtype = var_870_shape_cast_fp16_to_uint16_dtype_0, x = var_870_shape_cast_fp16)[name = string("cast_212")]; + uint16 gather_73_cast_uint16 = gather(axis = gather_73_axis_0, batch_dims = gather_73_batch_dims_0, indices = select_73_to_uint16, validate_indices = gather_73_validate_indices_0, x = var_870_shape_cast_fp16_to_uint16)[name = string("gather_73_cast_uint16")]; + string gather_73_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_73_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_74 = const()[name = string("gather_74"), val = int32(64)]; + tensor var_877_axes_0 = const()[name = string("op_877_axes_0"), val = tensor([2])]; + tensor var_877_cast_fp16 = expand_dims(axes = var_877_axes_0, x = var_851_cast_fp16)[name = string("op_877_cast_fp16")]; + tensor shape_82_cast_fp16 = shape(x = var_877_cast_fp16)[name = string("shape_82_cast_fp16")]; + int32 concat_72_axis_0 = const()[name = string("concat_72_axis_0"), val = int32(0)]; + bool concat_72_interleave_0 = const()[name = string("concat_72_interleave_0"), val = bool(false)]; + int32 gather_73_cast_uint16_to_int32 = cast(dtype = gather_73_cast_uint16_to_int32_dtype_0, x = gather_73_cast_uint16)[name = string("cast_211")]; + tensor concat_72 = concat(axis = concat_72_axis_0, interleave = concat_72_interleave_0, values = (gather_71, gather_72, var_85, gather_73_cast_uint16_to_int32, gather_74))[name = string("concat_72")]; + tensor real_div_7 = real_div(x = concat_72, y = shape_82_cast_fp16)[name = string("real_div_7")]; + tensor hidden_states_105_cast_fp16 = tile(reps = real_div_7, x = var_877_cast_fp16)[name = string("hidden_states_105_cast_fp16")]; + tensor concat_73x = const()[name = string("concat_73x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_15_cast_fp16 = reshape(shape = concat_73x, x = hidden_states_105_cast_fp16)[name = string("value_states_15_cast_fp16")]; + tensor var_887_shape_cast_fp16 = shape(x = key_states_15_cast_fp16)[name = string("op_887_shape_cast_fp16")]; + int32 gather_75_axis_0 = const()[name = string("gather_75_axis_0"), val = int32(0)]; + int32 gather_75_batch_dims_0 = const()[name = string("gather_75_batch_dims_0"), val = int32(0)]; + bool gather_75_validate_indices_0 = const()[name = string("gather_75_validate_indices_0"), val = bool(false)]; + string var_887_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_887_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_75_to_uint16 = const()[name = string("select_75_to_uint16"), val = uint16(2)]; + tensor var_887_shape_cast_fp16_to_uint16 = cast(dtype = var_887_shape_cast_fp16_to_uint16_dtype_0, x = var_887_shape_cast_fp16)[name = string("cast_210")]; + uint16 gather_75_cast_uint16 = gather(axis = gather_75_axis_0, batch_dims = gather_75_batch_dims_0, indices = select_75_to_uint16, validate_indices = gather_75_validate_indices_0, x = var_887_shape_cast_fp16_to_uint16)[name = string("gather_75_cast_uint16")]; + string gather_75_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_75_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_74_values0_0 = const()[name = string("concat_74_values0_0"), val = int32(1)]; + int32 concat_74_values1_0 = const()[name = string("concat_74_values1_0"), val = int32(1)]; + int32 concat_74_values2_0 = const()[name = string("concat_74_values2_0"), val = int32(0)]; + int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; + bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; + int32 gather_75_cast_uint16_to_int32 = cast(dtype = gather_75_cast_uint16_to_int32_dtype_0, x = gather_75_cast_uint16)[name = string("cast_209")]; + tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (concat_74_values0_0, concat_74_values1_0, concat_74_values2_0, gather_75_cast_uint16_to_int32))[name = string("concat_74")]; + tensor causal_mask_9_begin_0 = const()[name = string("causal_mask_9_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_9_end_mask_0 = const()[name = string("causal_mask_9_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_9_cast_fp16 = slice_by_index(begin = causal_mask_9_begin_0, end = concat_74, end_mask = causal_mask_9_end_mask_0, x = causal_mask)[name = string("causal_mask_9_cast_fp16")]; + tensor attn_output_13_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_9_cast_fp16, key = key_states_15_cast_fp16, query = query_states_15_cast_fp16, value = value_states_15_cast_fp16)[name = string("attn_output_13_cast_fp16")]; + tensor var_893_perm_0 = const()[name = string("op_893_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; + bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; + int32 gather_59_cast_uint16_to_int32 = cast(dtype = gather_59_cast_uint16_to_int32_dtype_0, x = gather_59_cast_uint16)[name = string("cast_208")]; + tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (gather_58, gather_59_cast_uint16_to_int32, var_81))[name = string("concat_75")]; + tensor var_893_cast_fp16 = transpose(perm = var_893_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_104")]; + tensor input_25_cast_fp16 = reshape(shape = concat_75, x = var_893_cast_fp16)[name = string("input_25_cast_fp16")]; + tensor model_model_layers_3_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22224960))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22390912))))[name = string("model_model_layers_3_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_24_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_to_fp16_quantized, x = input_25_cast_fp16)[name = string("linear_24_cast_fp16")]; + tensor hidden_states_109_cast_fp16 = add(x = hidden_states_89_cast_fp16, y = linear_24_cast_fp16)[name = string("hidden_states_109_cast_fp16")]; + fp16 var_76_promoted_7_to_fp16 = const()[name = string("op_76_promoted_7_to_fp16"), val = fp16(0x1p+1)]; + tensor var_902_cast_fp16 = pow(x = hidden_states_109_cast_fp16, y = var_76_promoted_7_to_fp16)[name = string("op_902_cast_fp16")]; + tensor variance_15_axes_0 = const()[name = string("variance_15_axes_0"), val = tensor([-1])]; + bool variance_15_keep_dims_0 = const()[name = string("variance_15_keep_dims_0"), val = bool(true)]; + tensor variance_15_cast_fp16 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_902_cast_fp16)[name = string("variance_15_cast_fp16")]; + fp16 var_905_to_fp16 = const()[name = string("op_905_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_906_cast_fp16 = add(x = variance_15_cast_fp16, y = var_905_to_fp16)[name = string("op_906_cast_fp16")]; + fp32 var_907_epsilon_0 = const()[name = string("op_907_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_907_cast_fp16 = rsqrt(epsilon = var_907_epsilon_0, x = var_906_cast_fp16)[name = string("op_907_cast_fp16")]; + tensor hidden_states_113_cast_fp16 = mul(x = hidden_states_109_cast_fp16, y = var_907_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; + tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22411712)))]; + tensor input_27_cast_fp16 = mul(x = model_model_layers_3_post_attention_layernorm_weight_to_fp16, y = hidden_states_113_cast_fp16)[name = string("input_27_cast_fp16")]; + tensor model_model_layers_3_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22412928))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22855360))))[name = string("model_model_layers_3_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_25_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_3_mlp_gate_proj_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = string("linear_25_cast_fp16")]; + tensor var_919_cast_fp16 = silu(x = linear_25_cast_fp16)[name = string("op_919_cast_fp16")]; + tensor model_model_layers_3_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(22910720))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23353152))))[name = string("model_model_layers_3_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_26_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_3_mlp_up_proj_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = string("linear_26_cast_fp16")]; + tensor input_31_cast_fp16 = mul(x = var_919_cast_fp16, y = linear_26_cast_fp16)[name = string("input_31_cast_fp16")]; + tensor model_model_layers_3_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23408512))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23850944))))[name = string("model_model_layers_3_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_27_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_mlp_down_proj_weight_to_fp16_quantized, x = input_31_cast_fp16)[name = string("linear_27_cast_fp16")]; + tensor hidden_states_119_cast_fp16 = add(x = hidden_states_109_cast_fp16, y = linear_27_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; + fp16 var_76_promoted_8_to_fp16 = const()[name = string("op_76_promoted_8_to_fp16"), val = fp16(0x1p+1)]; + tensor var_932_cast_fp16 = pow(x = hidden_states_119_cast_fp16, y = var_76_promoted_8_to_fp16)[name = string("op_932_cast_fp16")]; + tensor variance_17_axes_0 = const()[name = string("variance_17_axes_0"), val = tensor([-1])]; + bool variance_17_keep_dims_0 = const()[name = string("variance_17_keep_dims_0"), val = bool(true)]; + tensor variance_17_cast_fp16 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_932_cast_fp16)[name = string("variance_17_cast_fp16")]; + fp16 var_935_to_fp16 = const()[name = string("op_935_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_936_cast_fp16 = add(x = variance_17_cast_fp16, y = var_935_to_fp16)[name = string("op_936_cast_fp16")]; + fp32 var_937_epsilon_0 = const()[name = string("op_937_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_937_cast_fp16 = rsqrt(epsilon = var_937_epsilon_0, x = var_936_cast_fp16)[name = string("op_937_cast_fp16")]; + tensor hidden_states_123_cast_fp16 = mul(x = hidden_states_119_cast_fp16, y = var_937_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; + tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23906304)))]; + tensor hidden_states_127_cast_fp16 = mul(x = model_model_layers_4_input_layernorm_weight_to_fp16, y = hidden_states_123_cast_fp16)[name = string("hidden_states_127_cast_fp16")]; + tensor var_948_shape_cast_fp16 = shape(x = hidden_states_127_cast_fp16)[name = string("op_948_shape_cast_fp16")]; + int32 gather_76 = const()[name = string("gather_76"), val = int32(1)]; + int32 gather_77_axis_0 = const()[name = string("gather_77_axis_0"), val = int32(0)]; + int32 gather_77_batch_dims_0 = const()[name = string("gather_77_batch_dims_0"), val = int32(0)]; + bool gather_77_validate_indices_0 = const()[name = string("gather_77_validate_indices_0"), val = bool(false)]; + string var_948_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_948_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_77_to_uint16 = const()[name = string("select_77_to_uint16"), val = uint16(1)]; + tensor var_948_shape_cast_fp16_to_uint16 = cast(dtype = var_948_shape_cast_fp16_to_uint16_dtype_0, x = var_948_shape_cast_fp16)[name = string("cast_207")]; + uint16 gather_77_cast_uint16 = gather(axis = gather_77_axis_0, batch_dims = gather_77_batch_dims_0, indices = select_77_to_uint16, validate_indices = gather_77_validate_indices_0, x = var_948_shape_cast_fp16_to_uint16)[name = string("gather_77_cast_uint16")]; + string gather_77_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_77_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_4_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(23907520))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24073472))))[name = string("model_model_layers_4_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_127_cast_fp16)[name = string("linear_28_cast_fp16")]; + tensor model_model_layers_4_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24094272))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24149632))))[name = string("model_model_layers_4_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_4_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_127_cast_fp16)[name = string("linear_29_cast_fp16")]; + tensor model_model_layers_4_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24156608))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24211968))))[name = string("model_model_layers_4_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_30_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_4_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_127_cast_fp16)[name = string("linear_30_cast_fp16")]; + tensor concat_76x = const()[name = string("concat_76x"), val = tensor([1, -1, 9, 64])]; + tensor var_957_cast_fp16 = reshape(shape = concat_76x, x = linear_28_cast_fp16)[name = string("op_957_cast_fp16")]; + tensor q_9_perm_0 = const()[name = string("q_9_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_77x = const()[name = string("concat_77x"), val = tensor([1, -1, 3, 64])]; + tensor var_960_cast_fp16 = reshape(shape = concat_77x, x = linear_29_cast_fp16)[name = string("op_960_cast_fp16")]; + tensor k_9_perm_0 = const()[name = string("k_9_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_78x = const()[name = string("concat_78x"), val = tensor([1, -1, 3, 64])]; + tensor var_963_cast_fp16 = reshape(shape = concat_78x, x = linear_30_cast_fp16)[name = string("op_963_cast_fp16")]; + tensor v_state_9_perm_0 = const()[name = string("v_state_9_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_9_cast_fp16 = transpose(perm = q_9_perm_0, x = var_957_cast_fp16)[name = string("transpose_103")]; + tensor var_967_cast_fp16 = mul(x = q_9_cast_fp16, y = cos_7_cast_fp16)[name = string("op_967_cast_fp16")]; + tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9_cast_fp16)[name = string("x1_17_cast_fp16")]; + tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9_cast_fp16)[name = string("x2_17_cast_fp16")]; + fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_978_cast_fp16 = mul(x = x2_17_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_978_cast_fp16")]; + bool var_980_interleave_0 = const()[name = string("op_980_interleave_0"), val = bool(false)]; + tensor var_980_cast_fp16 = concat(axis = var_81, interleave = var_980_interleave_0, values = (var_978_cast_fp16, x1_17_cast_fp16))[name = string("op_980_cast_fp16")]; + tensor var_981_cast_fp16 = mul(x = var_980_cast_fp16, y = sin_7_cast_fp16)[name = string("op_981_cast_fp16")]; + tensor query_states_19_cast_fp16 = add(x = var_967_cast_fp16, y = var_981_cast_fp16)[name = string("query_states_19_cast_fp16")]; + tensor k_9_cast_fp16 = transpose(perm = k_9_perm_0, x = var_960_cast_fp16)[name = string("transpose_102")]; + tensor var_983_cast_fp16 = mul(x = k_9_cast_fp16, y = cos_7_cast_fp16)[name = string("op_983_cast_fp16")]; + tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9_cast_fp16)[name = string("x1_19_cast_fp16")]; + tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9_cast_fp16)[name = string("x2_19_cast_fp16")]; + fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_994_cast_fp16 = mul(x = x2_19_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_994_cast_fp16")]; + bool var_996_interleave_0 = const()[name = string("op_996_interleave_0"), val = bool(false)]; + tensor var_996_cast_fp16 = concat(axis = var_81, interleave = var_996_interleave_0, values = (var_994_cast_fp16, x1_19_cast_fp16))[name = string("op_996_cast_fp16")]; + tensor var_997_cast_fp16 = mul(x = var_996_cast_fp16, y = sin_7_cast_fp16)[name = string("op_997_cast_fp16")]; + tensor k_state_9_cast_fp16 = add(x = var_983_cast_fp16, y = var_997_cast_fp16)[name = string("k_state_9_cast_fp16")]; + tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([0])]; + tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; + tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; + tensor concat_81_values0_0 = const()[name = string("concat_81_values0_0"), val = tensor([4])]; + int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)]; + bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)]; + tensor concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (concat_81_values0_0, expand_dims_48, expand_dims_49, expand_dims_2, expand_dims_51))[name = string("concat_81")]; + tensor key_cache_internal_tensor_assign_5_stride_0 = const()[name = string("key_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_81, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = k_state_9_cast_fp16, x = coreml_update_state_66)[name = string("key_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_5_cast_fp16, input = key_cache)[name = string("coreml_update_state_68_write_state")]; + tensor coreml_update_state_68 = read_state(input = key_cache)[name = string("coreml_update_state_68")]; + tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_9_cast_fp16 = transpose(perm = v_state_9_perm_0, x = var_963_cast_fp16)[name = string("transpose_101")]; + tensor value_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_81, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = v_state_9_cast_fp16, x = coreml_update_state_67)[name = string("value_cache_internal_tensor_assign_5_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_5_cast_fp16, input = value_cache)[name = string("coreml_update_state_69_write_state")]; + tensor coreml_update_state_69 = read_state(input = value_cache)[name = string("coreml_update_state_69")]; + tensor var_1020_begin_0 = const()[name = string("op_1020_begin_0"), val = tensor([4, 0, 0, 0, 0])]; + tensor var_1020_end_0 = const()[name = string("op_1020_end_0"), val = tensor([5, 1, 3, 2048, 64])]; + tensor var_1020_end_mask_0 = const()[name = string("op_1020_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1020_squeeze_mask_0 = const()[name = string("op_1020_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1020_cast_fp16 = slice_by_index(begin = var_1020_begin_0, end = var_1020_end_0, end_mask = var_1020_end_mask_0, squeeze_mask = var_1020_squeeze_mask_0, x = coreml_update_state_68)[name = string("op_1020_cast_fp16")]; + tensor var_1023_begin_0 = const()[name = string("op_1023_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1023_end_mask_0 = const()[name = string("op_1023_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1023_cast_fp16 = slice_by_index(begin = var_1023_begin_0, end = concat_11, end_mask = var_1023_end_mask_0, x = var_1020_cast_fp16)[name = string("op_1023_cast_fp16")]; + tensor var_1025_begin_0 = const()[name = string("op_1025_begin_0"), val = tensor([4, 0, 0, 0, 0])]; + tensor var_1025_end_0 = const()[name = string("op_1025_end_0"), val = tensor([5, 1, 3, 2048, 64])]; + tensor var_1025_end_mask_0 = const()[name = string("op_1025_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1025_squeeze_mask_0 = const()[name = string("op_1025_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1025_cast_fp16 = slice_by_index(begin = var_1025_begin_0, end = var_1025_end_0, end_mask = var_1025_end_mask_0, squeeze_mask = var_1025_squeeze_mask_0, x = coreml_update_state_69)[name = string("op_1025_cast_fp16")]; + tensor var_1028_begin_0 = const()[name = string("op_1028_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1028_end_mask_0 = const()[name = string("op_1028_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1028_cast_fp16 = slice_by_index(begin = var_1028_begin_0, end = concat_11, end_mask = var_1028_end_mask_0, x = var_1025_cast_fp16)[name = string("op_1028_cast_fp16")]; + tensor var_1030_shape_cast_fp16 = shape(x = var_1023_cast_fp16)[name = string("op_1030_shape_cast_fp16")]; + int32 gather_85 = const()[name = string("gather_85"), val = int32(1)]; + int32 gather_86 = const()[name = string("gather_86"), val = int32(3)]; + int32 gather_87_axis_0 = const()[name = string("gather_87_axis_0"), val = int32(0)]; + int32 gather_87_batch_dims_0 = const()[name = string("gather_87_batch_dims_0"), val = int32(0)]; + bool gather_87_validate_indices_0 = const()[name = string("gather_87_validate_indices_0"), val = bool(false)]; + string var_1030_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1030_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_87_to_uint16 = const()[name = string("select_87_to_uint16"), val = uint16(2)]; + tensor var_1030_shape_cast_fp16_to_uint16 = cast(dtype = var_1030_shape_cast_fp16_to_uint16_dtype_0, x = var_1030_shape_cast_fp16)[name = string("cast_206")]; + uint16 gather_87_cast_uint16 = gather(axis = gather_87_axis_0, batch_dims = gather_87_batch_dims_0, indices = select_87_to_uint16, validate_indices = gather_87_validate_indices_0, x = var_1030_shape_cast_fp16_to_uint16)[name = string("gather_87_cast_uint16")]; + string gather_87_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_87_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_88 = const()[name = string("gather_88"), val = int32(64)]; + tensor var_1037_axes_0 = const()[name = string("op_1037_axes_0"), val = tensor([2])]; + tensor var_1037_cast_fp16 = expand_dims(axes = var_1037_axes_0, x = var_1023_cast_fp16)[name = string("op_1037_cast_fp16")]; + tensor shape_97_cast_fp16 = shape(x = var_1037_cast_fp16)[name = string("shape_97_cast_fp16")]; + int32 concat_89_axis_0 = const()[name = string("concat_89_axis_0"), val = int32(0)]; + bool concat_89_interleave_0 = const()[name = string("concat_89_interleave_0"), val = bool(false)]; + int32 gather_87_cast_uint16_to_int32 = cast(dtype = gather_87_cast_uint16_to_int32_dtype_0, x = gather_87_cast_uint16)[name = string("cast_205")]; + tensor concat_89 = concat(axis = concat_89_axis_0, interleave = concat_89_interleave_0, values = (gather_85, gather_86, var_85, gather_87_cast_uint16_to_int32, gather_88))[name = string("concat_89")]; + tensor real_div_8 = real_div(x = concat_89, y = shape_97_cast_fp16)[name = string("real_div_8")]; + tensor hidden_states_131_cast_fp16 = tile(reps = real_div_8, x = var_1037_cast_fp16)[name = string("hidden_states_131_cast_fp16")]; + tensor concat_90x = const()[name = string("concat_90x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_19_cast_fp16 = reshape(shape = concat_90x, x = hidden_states_131_cast_fp16)[name = string("key_states_19_cast_fp16")]; + tensor var_1047_shape_cast_fp16 = shape(x = var_1028_cast_fp16)[name = string("op_1047_shape_cast_fp16")]; + int32 gather_89 = const()[name = string("gather_89"), val = int32(1)]; + int32 gather_90 = const()[name = string("gather_90"), val = int32(3)]; + int32 gather_91_axis_0 = const()[name = string("gather_91_axis_0"), val = int32(0)]; + int32 gather_91_batch_dims_0 = const()[name = string("gather_91_batch_dims_0"), val = int32(0)]; + bool gather_91_validate_indices_0 = const()[name = string("gather_91_validate_indices_0"), val = bool(false)]; + string var_1047_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1047_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_91_to_uint16 = const()[name = string("select_91_to_uint16"), val = uint16(2)]; + tensor var_1047_shape_cast_fp16_to_uint16 = cast(dtype = var_1047_shape_cast_fp16_to_uint16_dtype_0, x = var_1047_shape_cast_fp16)[name = string("cast_204")]; + uint16 gather_91_cast_uint16 = gather(axis = gather_91_axis_0, batch_dims = gather_91_batch_dims_0, indices = select_91_to_uint16, validate_indices = gather_91_validate_indices_0, x = var_1047_shape_cast_fp16_to_uint16)[name = string("gather_91_cast_uint16")]; + string gather_91_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_91_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_92 = const()[name = string("gather_92"), val = int32(64)]; + tensor var_1054_axes_0 = const()[name = string("op_1054_axes_0"), val = tensor([2])]; + tensor var_1054_cast_fp16 = expand_dims(axes = var_1054_axes_0, x = var_1028_cast_fp16)[name = string("op_1054_cast_fp16")]; + tensor shape_102_cast_fp16 = shape(x = var_1054_cast_fp16)[name = string("shape_102_cast_fp16")]; + int32 concat_91_axis_0 = const()[name = string("concat_91_axis_0"), val = int32(0)]; + bool concat_91_interleave_0 = const()[name = string("concat_91_interleave_0"), val = bool(false)]; + int32 gather_91_cast_uint16_to_int32 = cast(dtype = gather_91_cast_uint16_to_int32_dtype_0, x = gather_91_cast_uint16)[name = string("cast_203")]; + tensor concat_91 = concat(axis = concat_91_axis_0, interleave = concat_91_interleave_0, values = (gather_89, gather_90, var_85, gather_91_cast_uint16_to_int32, gather_92))[name = string("concat_91")]; + tensor real_div_9 = real_div(x = concat_91, y = shape_102_cast_fp16)[name = string("real_div_9")]; + tensor hidden_states_135_cast_fp16 = tile(reps = real_div_9, x = var_1054_cast_fp16)[name = string("hidden_states_135_cast_fp16")]; + tensor concat_92x = const()[name = string("concat_92x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_19_cast_fp16 = reshape(shape = concat_92x, x = hidden_states_135_cast_fp16)[name = string("value_states_19_cast_fp16")]; + tensor var_1064_shape_cast_fp16 = shape(x = key_states_19_cast_fp16)[name = string("op_1064_shape_cast_fp16")]; + int32 gather_93_axis_0 = const()[name = string("gather_93_axis_0"), val = int32(0)]; + int32 gather_93_batch_dims_0 = const()[name = string("gather_93_batch_dims_0"), val = int32(0)]; + bool gather_93_validate_indices_0 = const()[name = string("gather_93_validate_indices_0"), val = bool(false)]; + string var_1064_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1064_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_93_to_uint16 = const()[name = string("select_93_to_uint16"), val = uint16(2)]; + tensor var_1064_shape_cast_fp16_to_uint16 = cast(dtype = var_1064_shape_cast_fp16_to_uint16_dtype_0, x = var_1064_shape_cast_fp16)[name = string("cast_202")]; + uint16 gather_93_cast_uint16 = gather(axis = gather_93_axis_0, batch_dims = gather_93_batch_dims_0, indices = select_93_to_uint16, validate_indices = gather_93_validate_indices_0, x = var_1064_shape_cast_fp16_to_uint16)[name = string("gather_93_cast_uint16")]; + string gather_93_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_93_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = int32(1)]; + int32 concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = int32(1)]; + int32 concat_93_values2_0 = const()[name = string("concat_93_values2_0"), val = int32(0)]; + int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; + bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; + int32 gather_93_cast_uint16_to_int32 = cast(dtype = gather_93_cast_uint16_to_int32_dtype_0, x = gather_93_cast_uint16)[name = string("cast_201")]; + tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, concat_93_values2_0, gather_93_cast_uint16_to_int32))[name = string("concat_93")]; + tensor causal_mask_11_begin_0 = const()[name = string("causal_mask_11_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_11_end_mask_0 = const()[name = string("causal_mask_11_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_11_cast_fp16 = slice_by_index(begin = causal_mask_11_begin_0, end = concat_93, end_mask = causal_mask_11_end_mask_0, x = causal_mask)[name = string("causal_mask_11_cast_fp16")]; + tensor attn_output_17_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_11_cast_fp16, key = key_states_19_cast_fp16, query = query_states_19_cast_fp16, value = value_states_19_cast_fp16)[name = string("attn_output_17_cast_fp16")]; + tensor var_1070_perm_0 = const()[name = string("op_1070_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_94_axis_0 = const()[name = string("concat_94_axis_0"), val = int32(0)]; + bool concat_94_interleave_0 = const()[name = string("concat_94_interleave_0"), val = bool(false)]; + int32 gather_77_cast_uint16_to_int32 = cast(dtype = gather_77_cast_uint16_to_int32_dtype_0, x = gather_77_cast_uint16)[name = string("cast_200")]; + tensor concat_94 = concat(axis = concat_94_axis_0, interleave = concat_94_interleave_0, values = (gather_76, gather_77_cast_uint16_to_int32, var_81))[name = string("concat_94")]; + tensor var_1070_cast_fp16 = transpose(perm = var_1070_perm_0, x = attn_output_17_cast_fp16)[name = string("transpose_100")]; + tensor input_33_cast_fp16 = reshape(shape = concat_94, x = var_1070_cast_fp16)[name = string("input_33_cast_fp16")]; + tensor model_model_layers_4_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24218944))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24384896))))[name = string("model_model_layers_4_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_31_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_to_fp16_quantized, x = input_33_cast_fp16)[name = string("linear_31_cast_fp16")]; + tensor hidden_states_139_cast_fp16 = add(x = hidden_states_119_cast_fp16, y = linear_31_cast_fp16)[name = string("hidden_states_139_cast_fp16")]; + fp16 var_76_promoted_9_to_fp16 = const()[name = string("op_76_promoted_9_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1079_cast_fp16 = pow(x = hidden_states_139_cast_fp16, y = var_76_promoted_9_to_fp16)[name = string("op_1079_cast_fp16")]; + tensor variance_19_axes_0 = const()[name = string("variance_19_axes_0"), val = tensor([-1])]; + bool variance_19_keep_dims_0 = const()[name = string("variance_19_keep_dims_0"), val = bool(true)]; + tensor variance_19_cast_fp16 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = var_1079_cast_fp16)[name = string("variance_19_cast_fp16")]; + fp16 var_1082_to_fp16 = const()[name = string("op_1082_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1083_cast_fp16 = add(x = variance_19_cast_fp16, y = var_1082_to_fp16)[name = string("op_1083_cast_fp16")]; + fp32 var_1084_epsilon_0 = const()[name = string("op_1084_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1084_cast_fp16 = rsqrt(epsilon = var_1084_epsilon_0, x = var_1083_cast_fp16)[name = string("op_1084_cast_fp16")]; + tensor hidden_states_143_cast_fp16 = mul(x = hidden_states_139_cast_fp16, y = var_1084_cast_fp16)[name = string("hidden_states_143_cast_fp16")]; + tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24405696)))]; + tensor input_35_cast_fp16 = mul(x = model_model_layers_4_post_attention_layernorm_weight_to_fp16, y = hidden_states_143_cast_fp16)[name = string("input_35_cast_fp16")]; + tensor model_model_layers_4_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24406912))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24849344))))[name = string("model_model_layers_4_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_32_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_4_mlp_gate_proj_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = string("linear_32_cast_fp16")]; + tensor var_1096_cast_fp16 = silu(x = linear_32_cast_fp16)[name = string("op_1096_cast_fp16")]; + tensor model_model_layers_4_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(24904704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25347136))))[name = string("model_model_layers_4_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_33_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_4_mlp_up_proj_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = string("linear_33_cast_fp16")]; + tensor input_39_cast_fp16 = mul(x = var_1096_cast_fp16, y = linear_33_cast_fp16)[name = string("input_39_cast_fp16")]; + tensor model_model_layers_4_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25402496))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25844928))))[name = string("model_model_layers_4_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_34_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_mlp_down_proj_weight_to_fp16_quantized, x = input_39_cast_fp16)[name = string("linear_34_cast_fp16")]; + tensor hidden_states_149_cast_fp16 = add(x = hidden_states_139_cast_fp16, y = linear_34_cast_fp16)[name = string("hidden_states_149_cast_fp16")]; + fp16 var_76_promoted_10_to_fp16 = const()[name = string("op_76_promoted_10_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1109_cast_fp16 = pow(x = hidden_states_149_cast_fp16, y = var_76_promoted_10_to_fp16)[name = string("op_1109_cast_fp16")]; + tensor variance_21_axes_0 = const()[name = string("variance_21_axes_0"), val = tensor([-1])]; + bool variance_21_keep_dims_0 = const()[name = string("variance_21_keep_dims_0"), val = bool(true)]; + tensor variance_21_cast_fp16 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = var_1109_cast_fp16)[name = string("variance_21_cast_fp16")]; + fp16 var_1112_to_fp16 = const()[name = string("op_1112_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1113_cast_fp16 = add(x = variance_21_cast_fp16, y = var_1112_to_fp16)[name = string("op_1113_cast_fp16")]; + fp32 var_1114_epsilon_0 = const()[name = string("op_1114_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1114_cast_fp16 = rsqrt(epsilon = var_1114_epsilon_0, x = var_1113_cast_fp16)[name = string("op_1114_cast_fp16")]; + tensor hidden_states_153_cast_fp16 = mul(x = hidden_states_149_cast_fp16, y = var_1114_cast_fp16)[name = string("hidden_states_153_cast_fp16")]; + tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25900288)))]; + tensor hidden_states_157_cast_fp16 = mul(x = model_model_layers_5_input_layernorm_weight_to_fp16, y = hidden_states_153_cast_fp16)[name = string("hidden_states_157_cast_fp16")]; + tensor var_1125_shape_cast_fp16 = shape(x = hidden_states_157_cast_fp16)[name = string("op_1125_shape_cast_fp16")]; + int32 gather_94 = const()[name = string("gather_94"), val = int32(1)]; + int32 gather_95_axis_0 = const()[name = string("gather_95_axis_0"), val = int32(0)]; + int32 gather_95_batch_dims_0 = const()[name = string("gather_95_batch_dims_0"), val = int32(0)]; + bool gather_95_validate_indices_0 = const()[name = string("gather_95_validate_indices_0"), val = bool(false)]; + string var_1125_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1125_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_95_to_uint16 = const()[name = string("select_95_to_uint16"), val = uint16(1)]; + tensor var_1125_shape_cast_fp16_to_uint16 = cast(dtype = var_1125_shape_cast_fp16_to_uint16_dtype_0, x = var_1125_shape_cast_fp16)[name = string("cast_199")]; + uint16 gather_95_cast_uint16 = gather(axis = gather_95_axis_0, batch_dims = gather_95_batch_dims_0, indices = select_95_to_uint16, validate_indices = gather_95_validate_indices_0, x = var_1125_shape_cast_fp16_to_uint16)[name = string("gather_95_cast_uint16")]; + string gather_95_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_95_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_5_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(25901504))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26067456))))[name = string("model_model_layers_5_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_35_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_157_cast_fp16)[name = string("linear_35_cast_fp16")]; + tensor model_model_layers_5_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26088256))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26143616))))[name = string("model_model_layers_5_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_36_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_5_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_157_cast_fp16)[name = string("linear_36_cast_fp16")]; + tensor model_model_layers_5_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26150592))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26205952))))[name = string("model_model_layers_5_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_5_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_157_cast_fp16)[name = string("linear_37_cast_fp16")]; + tensor concat_95x = const()[name = string("concat_95x"), val = tensor([1, -1, 9, 64])]; + tensor var_1134_cast_fp16 = reshape(shape = concat_95x, x = linear_35_cast_fp16)[name = string("op_1134_cast_fp16")]; + tensor q_11_perm_0 = const()[name = string("q_11_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_96x = const()[name = string("concat_96x"), val = tensor([1, -1, 3, 64])]; + tensor var_1137_cast_fp16 = reshape(shape = concat_96x, x = linear_36_cast_fp16)[name = string("op_1137_cast_fp16")]; + tensor k_11_perm_0 = const()[name = string("k_11_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_97x = const()[name = string("concat_97x"), val = tensor([1, -1, 3, 64])]; + tensor var_1140_cast_fp16 = reshape(shape = concat_97x, x = linear_37_cast_fp16)[name = string("op_1140_cast_fp16")]; + tensor v_state_11_perm_0 = const()[name = string("v_state_11_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_11_cast_fp16 = transpose(perm = q_11_perm_0, x = var_1134_cast_fp16)[name = string("transpose_99")]; + tensor var_1144_cast_fp16 = mul(x = q_11_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1144_cast_fp16")]; + tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11_cast_fp16)[name = string("x1_21_cast_fp16")]; + tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11_cast_fp16)[name = string("x2_21_cast_fp16")]; + fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1155_cast_fp16 = mul(x = x2_21_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_1155_cast_fp16")]; + bool var_1157_interleave_0 = const()[name = string("op_1157_interleave_0"), val = bool(false)]; + tensor var_1157_cast_fp16 = concat(axis = var_81, interleave = var_1157_interleave_0, values = (var_1155_cast_fp16, x1_21_cast_fp16))[name = string("op_1157_cast_fp16")]; + tensor var_1158_cast_fp16 = mul(x = var_1157_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1158_cast_fp16")]; + tensor query_states_23_cast_fp16 = add(x = var_1144_cast_fp16, y = var_1158_cast_fp16)[name = string("query_states_23_cast_fp16")]; + tensor k_11_cast_fp16 = transpose(perm = k_11_perm_0, x = var_1137_cast_fp16)[name = string("transpose_98")]; + tensor var_1160_cast_fp16 = mul(x = k_11_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1160_cast_fp16")]; + tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11_cast_fp16)[name = string("x1_23_cast_fp16")]; + tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11_cast_fp16)[name = string("x2_23_cast_fp16")]; + fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1171_cast_fp16 = mul(x = x2_23_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1171_cast_fp16")]; + bool var_1173_interleave_0 = const()[name = string("op_1173_interleave_0"), val = bool(false)]; + tensor var_1173_cast_fp16 = concat(axis = var_81, interleave = var_1173_interleave_0, values = (var_1171_cast_fp16, x1_23_cast_fp16))[name = string("op_1173_cast_fp16")]; + tensor var_1174_cast_fp16 = mul(x = var_1173_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1174_cast_fp16")]; + tensor k_state_11_cast_fp16 = add(x = var_1160_cast_fp16, y = var_1174_cast_fp16)[name = string("k_state_11_cast_fp16")]; + tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([0])]; + tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; + tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; + tensor concat_100_values0_0 = const()[name = string("concat_100_values0_0"), val = tensor([5])]; + int32 concat_100_axis_0 = const()[name = string("concat_100_axis_0"), val = int32(0)]; + bool concat_100_interleave_0 = const()[name = string("concat_100_interleave_0"), val = bool(false)]; + tensor concat_100 = concat(axis = concat_100_axis_0, interleave = concat_100_interleave_0, values = (concat_100_values0_0, expand_dims_60, expand_dims_61, expand_dims_2, expand_dims_63))[name = string("concat_100")]; + tensor key_cache_internal_tensor_assign_6_stride_0 = const()[name = string("key_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_100, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = k_state_11_cast_fp16, x = coreml_update_state_68)[name = string("key_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_6_cast_fp16, input = key_cache)[name = string("coreml_update_state_70_write_state")]; + tensor coreml_update_state_70 = read_state(input = key_cache)[name = string("coreml_update_state_70")]; + tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_11_cast_fp16 = transpose(perm = v_state_11_perm_0, x = var_1140_cast_fp16)[name = string("transpose_97")]; + tensor value_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_100, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = v_state_11_cast_fp16, x = coreml_update_state_69)[name = string("value_cache_internal_tensor_assign_6_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_6_cast_fp16, input = value_cache)[name = string("coreml_update_state_71_write_state")]; + tensor coreml_update_state_71 = read_state(input = value_cache)[name = string("coreml_update_state_71")]; + tensor var_1197_begin_0 = const()[name = string("op_1197_begin_0"), val = tensor([5, 0, 0, 0, 0])]; + tensor var_1197_end_0 = const()[name = string("op_1197_end_0"), val = tensor([6, 1, 3, 2048, 64])]; + tensor var_1197_end_mask_0 = const()[name = string("op_1197_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1197_squeeze_mask_0 = const()[name = string("op_1197_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1197_cast_fp16 = slice_by_index(begin = var_1197_begin_0, end = var_1197_end_0, end_mask = var_1197_end_mask_0, squeeze_mask = var_1197_squeeze_mask_0, x = coreml_update_state_70)[name = string("op_1197_cast_fp16")]; + tensor var_1200_begin_0 = const()[name = string("op_1200_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1200_end_mask_0 = const()[name = string("op_1200_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1200_cast_fp16 = slice_by_index(begin = var_1200_begin_0, end = concat_11, end_mask = var_1200_end_mask_0, x = var_1197_cast_fp16)[name = string("op_1200_cast_fp16")]; + tensor var_1202_begin_0 = const()[name = string("op_1202_begin_0"), val = tensor([5, 0, 0, 0, 0])]; + tensor var_1202_end_0 = const()[name = string("op_1202_end_0"), val = tensor([6, 1, 3, 2048, 64])]; + tensor var_1202_end_mask_0 = const()[name = string("op_1202_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1202_squeeze_mask_0 = const()[name = string("op_1202_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1202_cast_fp16 = slice_by_index(begin = var_1202_begin_0, end = var_1202_end_0, end_mask = var_1202_end_mask_0, squeeze_mask = var_1202_squeeze_mask_0, x = coreml_update_state_71)[name = string("op_1202_cast_fp16")]; + tensor var_1205_begin_0 = const()[name = string("op_1205_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1205_end_mask_0 = const()[name = string("op_1205_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1205_cast_fp16 = slice_by_index(begin = var_1205_begin_0, end = concat_11, end_mask = var_1205_end_mask_0, x = var_1202_cast_fp16)[name = string("op_1205_cast_fp16")]; + tensor var_1207_shape_cast_fp16 = shape(x = var_1200_cast_fp16)[name = string("op_1207_shape_cast_fp16")]; + int32 gather_103 = const()[name = string("gather_103"), val = int32(1)]; + int32 gather_104 = const()[name = string("gather_104"), val = int32(3)]; + int32 gather_105_axis_0 = const()[name = string("gather_105_axis_0"), val = int32(0)]; + int32 gather_105_batch_dims_0 = const()[name = string("gather_105_batch_dims_0"), val = int32(0)]; + bool gather_105_validate_indices_0 = const()[name = string("gather_105_validate_indices_0"), val = bool(false)]; + string var_1207_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1207_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_105_to_uint16 = const()[name = string("select_105_to_uint16"), val = uint16(2)]; + tensor var_1207_shape_cast_fp16_to_uint16 = cast(dtype = var_1207_shape_cast_fp16_to_uint16_dtype_0, x = var_1207_shape_cast_fp16)[name = string("cast_198")]; + uint16 gather_105_cast_uint16 = gather(axis = gather_105_axis_0, batch_dims = gather_105_batch_dims_0, indices = select_105_to_uint16, validate_indices = gather_105_validate_indices_0, x = var_1207_shape_cast_fp16_to_uint16)[name = string("gather_105_cast_uint16")]; + string gather_105_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_105_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_106 = const()[name = string("gather_106"), val = int32(64)]; + tensor var_1214_axes_0 = const()[name = string("op_1214_axes_0"), val = tensor([2])]; + tensor var_1214_cast_fp16 = expand_dims(axes = var_1214_axes_0, x = var_1200_cast_fp16)[name = string("op_1214_cast_fp16")]; + tensor shape_117_cast_fp16 = shape(x = var_1214_cast_fp16)[name = string("shape_117_cast_fp16")]; + int32 concat_108_axis_0 = const()[name = string("concat_108_axis_0"), val = int32(0)]; + bool concat_108_interleave_0 = const()[name = string("concat_108_interleave_0"), val = bool(false)]; + int32 gather_105_cast_uint16_to_int32 = cast(dtype = gather_105_cast_uint16_to_int32_dtype_0, x = gather_105_cast_uint16)[name = string("cast_197")]; + tensor concat_108 = concat(axis = concat_108_axis_0, interleave = concat_108_interleave_0, values = (gather_103, gather_104, var_85, gather_105_cast_uint16_to_int32, gather_106))[name = string("concat_108")]; + tensor real_div_10 = real_div(x = concat_108, y = shape_117_cast_fp16)[name = string("real_div_10")]; + tensor hidden_states_161_cast_fp16 = tile(reps = real_div_10, x = var_1214_cast_fp16)[name = string("hidden_states_161_cast_fp16")]; + tensor concat_109x = const()[name = string("concat_109x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_23_cast_fp16 = reshape(shape = concat_109x, x = hidden_states_161_cast_fp16)[name = string("key_states_23_cast_fp16")]; + tensor var_1224_shape_cast_fp16 = shape(x = var_1205_cast_fp16)[name = string("op_1224_shape_cast_fp16")]; + int32 gather_107 = const()[name = string("gather_107"), val = int32(1)]; + int32 gather_108 = const()[name = string("gather_108"), val = int32(3)]; + int32 gather_109_axis_0 = const()[name = string("gather_109_axis_0"), val = int32(0)]; + int32 gather_109_batch_dims_0 = const()[name = string("gather_109_batch_dims_0"), val = int32(0)]; + bool gather_109_validate_indices_0 = const()[name = string("gather_109_validate_indices_0"), val = bool(false)]; + string var_1224_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1224_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_109_to_uint16 = const()[name = string("select_109_to_uint16"), val = uint16(2)]; + tensor var_1224_shape_cast_fp16_to_uint16 = cast(dtype = var_1224_shape_cast_fp16_to_uint16_dtype_0, x = var_1224_shape_cast_fp16)[name = string("cast_196")]; + uint16 gather_109_cast_uint16 = gather(axis = gather_109_axis_0, batch_dims = gather_109_batch_dims_0, indices = select_109_to_uint16, validate_indices = gather_109_validate_indices_0, x = var_1224_shape_cast_fp16_to_uint16)[name = string("gather_109_cast_uint16")]; + string gather_109_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_109_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_110 = const()[name = string("gather_110"), val = int32(64)]; + tensor var_1231_axes_0 = const()[name = string("op_1231_axes_0"), val = tensor([2])]; + tensor var_1231_cast_fp16 = expand_dims(axes = var_1231_axes_0, x = var_1205_cast_fp16)[name = string("op_1231_cast_fp16")]; + tensor shape_122_cast_fp16 = shape(x = var_1231_cast_fp16)[name = string("shape_122_cast_fp16")]; + int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; + bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; + int32 gather_109_cast_uint16_to_int32 = cast(dtype = gather_109_cast_uint16_to_int32_dtype_0, x = gather_109_cast_uint16)[name = string("cast_195")]; + tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (gather_107, gather_108, var_85, gather_109_cast_uint16_to_int32, gather_110))[name = string("concat_110")]; + tensor real_div_11 = real_div(x = concat_110, y = shape_122_cast_fp16)[name = string("real_div_11")]; + tensor hidden_states_165_cast_fp16 = tile(reps = real_div_11, x = var_1231_cast_fp16)[name = string("hidden_states_165_cast_fp16")]; + tensor concat_111x = const()[name = string("concat_111x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_23_cast_fp16 = reshape(shape = concat_111x, x = hidden_states_165_cast_fp16)[name = string("value_states_23_cast_fp16")]; + tensor var_1241_shape_cast_fp16 = shape(x = key_states_23_cast_fp16)[name = string("op_1241_shape_cast_fp16")]; + int32 gather_111_axis_0 = const()[name = string("gather_111_axis_0"), val = int32(0)]; + int32 gather_111_batch_dims_0 = const()[name = string("gather_111_batch_dims_0"), val = int32(0)]; + bool gather_111_validate_indices_0 = const()[name = string("gather_111_validate_indices_0"), val = bool(false)]; + string var_1241_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1241_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_111_to_uint16 = const()[name = string("select_111_to_uint16"), val = uint16(2)]; + tensor var_1241_shape_cast_fp16_to_uint16 = cast(dtype = var_1241_shape_cast_fp16_to_uint16_dtype_0, x = var_1241_shape_cast_fp16)[name = string("cast_194")]; + uint16 gather_111_cast_uint16 = gather(axis = gather_111_axis_0, batch_dims = gather_111_batch_dims_0, indices = select_111_to_uint16, validate_indices = gather_111_validate_indices_0, x = var_1241_shape_cast_fp16_to_uint16)[name = string("gather_111_cast_uint16")]; + string gather_111_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_111_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_112_values0_0 = const()[name = string("concat_112_values0_0"), val = int32(1)]; + int32 concat_112_values1_0 = const()[name = string("concat_112_values1_0"), val = int32(1)]; + int32 concat_112_values2_0 = const()[name = string("concat_112_values2_0"), val = int32(0)]; + int32 concat_112_axis_0 = const()[name = string("concat_112_axis_0"), val = int32(0)]; + bool concat_112_interleave_0 = const()[name = string("concat_112_interleave_0"), val = bool(false)]; + int32 gather_111_cast_uint16_to_int32 = cast(dtype = gather_111_cast_uint16_to_int32_dtype_0, x = gather_111_cast_uint16)[name = string("cast_193")]; + tensor concat_112 = concat(axis = concat_112_axis_0, interleave = concat_112_interleave_0, values = (concat_112_values0_0, concat_112_values1_0, concat_112_values2_0, gather_111_cast_uint16_to_int32))[name = string("concat_112")]; + tensor causal_mask_13_begin_0 = const()[name = string("causal_mask_13_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_13_end_mask_0 = const()[name = string("causal_mask_13_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_13_cast_fp16 = slice_by_index(begin = causal_mask_13_begin_0, end = concat_112, end_mask = causal_mask_13_end_mask_0, x = causal_mask)[name = string("causal_mask_13_cast_fp16")]; + tensor attn_output_21_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_13_cast_fp16, key = key_states_23_cast_fp16, query = query_states_23_cast_fp16, value = value_states_23_cast_fp16)[name = string("attn_output_21_cast_fp16")]; + tensor var_1247_perm_0 = const()[name = string("op_1247_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_113_axis_0 = const()[name = string("concat_113_axis_0"), val = int32(0)]; + bool concat_113_interleave_0 = const()[name = string("concat_113_interleave_0"), val = bool(false)]; + int32 gather_95_cast_uint16_to_int32 = cast(dtype = gather_95_cast_uint16_to_int32_dtype_0, x = gather_95_cast_uint16)[name = string("cast_192")]; + tensor concat_113 = concat(axis = concat_113_axis_0, interleave = concat_113_interleave_0, values = (gather_94, gather_95_cast_uint16_to_int32, var_81))[name = string("concat_113")]; + tensor var_1247_cast_fp16 = transpose(perm = var_1247_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_96")]; + tensor input_41_cast_fp16 = reshape(shape = concat_113, x = var_1247_cast_fp16)[name = string("input_41_cast_fp16")]; + tensor model_model_layers_5_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26212928))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26378880))))[name = string("model_model_layers_5_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_38_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_to_fp16_quantized, x = input_41_cast_fp16)[name = string("linear_38_cast_fp16")]; + tensor hidden_states_169_cast_fp16 = add(x = hidden_states_149_cast_fp16, y = linear_38_cast_fp16)[name = string("hidden_states_169_cast_fp16")]; + fp16 var_76_promoted_11_to_fp16 = const()[name = string("op_76_promoted_11_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1256_cast_fp16 = pow(x = hidden_states_169_cast_fp16, y = var_76_promoted_11_to_fp16)[name = string("op_1256_cast_fp16")]; + tensor variance_23_axes_0 = const()[name = string("variance_23_axes_0"), val = tensor([-1])]; + bool variance_23_keep_dims_0 = const()[name = string("variance_23_keep_dims_0"), val = bool(true)]; + tensor variance_23_cast_fp16 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_1256_cast_fp16)[name = string("variance_23_cast_fp16")]; + fp16 var_1259_to_fp16 = const()[name = string("op_1259_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1260_cast_fp16 = add(x = variance_23_cast_fp16, y = var_1259_to_fp16)[name = string("op_1260_cast_fp16")]; + fp32 var_1261_epsilon_0 = const()[name = string("op_1261_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1261_cast_fp16 = rsqrt(epsilon = var_1261_epsilon_0, x = var_1260_cast_fp16)[name = string("op_1261_cast_fp16")]; + tensor hidden_states_173_cast_fp16 = mul(x = hidden_states_169_cast_fp16, y = var_1261_cast_fp16)[name = string("hidden_states_173_cast_fp16")]; + tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26399680)))]; + tensor input_43_cast_fp16 = mul(x = model_model_layers_5_post_attention_layernorm_weight_to_fp16, y = hidden_states_173_cast_fp16)[name = string("input_43_cast_fp16")]; + tensor model_model_layers_5_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26400896))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26843328))))[name = string("model_model_layers_5_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_39_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_5_mlp_gate_proj_weight_to_fp16_quantized, x = input_43_cast_fp16)[name = string("linear_39_cast_fp16")]; + tensor var_1273_cast_fp16 = silu(x = linear_39_cast_fp16)[name = string("op_1273_cast_fp16")]; + tensor model_model_layers_5_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(26898688))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27341120))))[name = string("model_model_layers_5_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_40_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_5_mlp_up_proj_weight_to_fp16_quantized, x = input_43_cast_fp16)[name = string("linear_40_cast_fp16")]; + tensor input_47_cast_fp16 = mul(x = var_1273_cast_fp16, y = linear_40_cast_fp16)[name = string("input_47_cast_fp16")]; + tensor model_model_layers_5_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27396480))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27838912))))[name = string("model_model_layers_5_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_41_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_mlp_down_proj_weight_to_fp16_quantized, x = input_47_cast_fp16)[name = string("linear_41_cast_fp16")]; + tensor hidden_states_179_cast_fp16 = add(x = hidden_states_169_cast_fp16, y = linear_41_cast_fp16)[name = string("hidden_states_179_cast_fp16")]; + fp16 var_76_promoted_12_to_fp16 = const()[name = string("op_76_promoted_12_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1286_cast_fp16 = pow(x = hidden_states_179_cast_fp16, y = var_76_promoted_12_to_fp16)[name = string("op_1286_cast_fp16")]; + tensor variance_25_axes_0 = const()[name = string("variance_25_axes_0"), val = tensor([-1])]; + bool variance_25_keep_dims_0 = const()[name = string("variance_25_keep_dims_0"), val = bool(true)]; + tensor variance_25_cast_fp16 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = var_1286_cast_fp16)[name = string("variance_25_cast_fp16")]; + fp16 var_1289_to_fp16 = const()[name = string("op_1289_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1290_cast_fp16 = add(x = variance_25_cast_fp16, y = var_1289_to_fp16)[name = string("op_1290_cast_fp16")]; + fp32 var_1291_epsilon_0 = const()[name = string("op_1291_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1291_cast_fp16 = rsqrt(epsilon = var_1291_epsilon_0, x = var_1290_cast_fp16)[name = string("op_1291_cast_fp16")]; + tensor hidden_states_183_cast_fp16 = mul(x = hidden_states_179_cast_fp16, y = var_1291_cast_fp16)[name = string("hidden_states_183_cast_fp16")]; + tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27894272)))]; + tensor hidden_states_187_cast_fp16 = mul(x = model_model_layers_6_input_layernorm_weight_to_fp16, y = hidden_states_183_cast_fp16)[name = string("hidden_states_187_cast_fp16")]; + tensor var_1302_shape_cast_fp16 = shape(x = hidden_states_187_cast_fp16)[name = string("op_1302_shape_cast_fp16")]; + int32 gather_112 = const()[name = string("gather_112"), val = int32(1)]; + int32 gather_113_axis_0 = const()[name = string("gather_113_axis_0"), val = int32(0)]; + int32 gather_113_batch_dims_0 = const()[name = string("gather_113_batch_dims_0"), val = int32(0)]; + bool gather_113_validate_indices_0 = const()[name = string("gather_113_validate_indices_0"), val = bool(false)]; + string var_1302_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1302_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_113_to_uint16 = const()[name = string("select_113_to_uint16"), val = uint16(1)]; + tensor var_1302_shape_cast_fp16_to_uint16 = cast(dtype = var_1302_shape_cast_fp16_to_uint16_dtype_0, x = var_1302_shape_cast_fp16)[name = string("cast_191")]; + uint16 gather_113_cast_uint16 = gather(axis = gather_113_axis_0, batch_dims = gather_113_batch_dims_0, indices = select_113_to_uint16, validate_indices = gather_113_validate_indices_0, x = var_1302_shape_cast_fp16_to_uint16)[name = string("gather_113_cast_uint16")]; + string gather_113_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_113_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_6_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(27895488))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28061440))))[name = string("model_model_layers_6_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_42_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_187_cast_fp16)[name = string("linear_42_cast_fp16")]; + tensor model_model_layers_6_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28082240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28137600))))[name = string("model_model_layers_6_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_6_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_187_cast_fp16)[name = string("linear_43_cast_fp16")]; + tensor model_model_layers_6_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28144576))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28199936))))[name = string("model_model_layers_6_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_6_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_187_cast_fp16)[name = string("linear_44_cast_fp16")]; + tensor concat_114x = const()[name = string("concat_114x"), val = tensor([1, -1, 9, 64])]; + tensor var_1311_cast_fp16 = reshape(shape = concat_114x, x = linear_42_cast_fp16)[name = string("op_1311_cast_fp16")]; + tensor q_13_perm_0 = const()[name = string("q_13_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_115x = const()[name = string("concat_115x"), val = tensor([1, -1, 3, 64])]; + tensor var_1314_cast_fp16 = reshape(shape = concat_115x, x = linear_43_cast_fp16)[name = string("op_1314_cast_fp16")]; + tensor k_13_perm_0 = const()[name = string("k_13_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_116x = const()[name = string("concat_116x"), val = tensor([1, -1, 3, 64])]; + tensor var_1317_cast_fp16 = reshape(shape = concat_116x, x = linear_44_cast_fp16)[name = string("op_1317_cast_fp16")]; + tensor v_state_13_perm_0 = const()[name = string("v_state_13_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_13_cast_fp16 = transpose(perm = q_13_perm_0, x = var_1311_cast_fp16)[name = string("transpose_95")]; + tensor var_1321_cast_fp16 = mul(x = q_13_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1321_cast_fp16")]; + tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13_cast_fp16)[name = string("x1_25_cast_fp16")]; + tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13_cast_fp16)[name = string("x2_25_cast_fp16")]; + fp16 const_15_promoted_to_fp16 = const()[name = string("const_15_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1332_cast_fp16 = mul(x = x2_25_cast_fp16, y = const_15_promoted_to_fp16)[name = string("op_1332_cast_fp16")]; + bool var_1334_interleave_0 = const()[name = string("op_1334_interleave_0"), val = bool(false)]; + tensor var_1334_cast_fp16 = concat(axis = var_81, interleave = var_1334_interleave_0, values = (var_1332_cast_fp16, x1_25_cast_fp16))[name = string("op_1334_cast_fp16")]; + tensor var_1335_cast_fp16 = mul(x = var_1334_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1335_cast_fp16")]; + tensor query_states_27_cast_fp16 = add(x = var_1321_cast_fp16, y = var_1335_cast_fp16)[name = string("query_states_27_cast_fp16")]; + tensor k_13_cast_fp16 = transpose(perm = k_13_perm_0, x = var_1314_cast_fp16)[name = string("transpose_94")]; + tensor var_1337_cast_fp16 = mul(x = k_13_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1337_cast_fp16")]; + tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13_cast_fp16)[name = string("x1_27_cast_fp16")]; + tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13_cast_fp16)[name = string("x2_27_cast_fp16")]; + fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1348_cast_fp16 = mul(x = x2_27_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1348_cast_fp16")]; + bool var_1350_interleave_0 = const()[name = string("op_1350_interleave_0"), val = bool(false)]; + tensor var_1350_cast_fp16 = concat(axis = var_81, interleave = var_1350_interleave_0, values = (var_1348_cast_fp16, x1_27_cast_fp16))[name = string("op_1350_cast_fp16")]; + tensor var_1351_cast_fp16 = mul(x = var_1350_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1351_cast_fp16")]; + tensor k_state_13_cast_fp16 = add(x = var_1337_cast_fp16, y = var_1351_cast_fp16)[name = string("k_state_13_cast_fp16")]; + tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([0])]; + tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; + tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; + tensor concat_119_values0_0 = const()[name = string("concat_119_values0_0"), val = tensor([6])]; + int32 concat_119_axis_0 = const()[name = string("concat_119_axis_0"), val = int32(0)]; + bool concat_119_interleave_0 = const()[name = string("concat_119_interleave_0"), val = bool(false)]; + tensor concat_119 = concat(axis = concat_119_axis_0, interleave = concat_119_interleave_0, values = (concat_119_values0_0, expand_dims_72, expand_dims_73, expand_dims_2, expand_dims_75))[name = string("concat_119")]; + tensor key_cache_internal_tensor_assign_7_stride_0 = const()[name = string("key_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_119, begin_mask = key_cache_internal_tensor_assign_7_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_7_squeeze_mask_0, stride = key_cache_internal_tensor_assign_7_stride_0, update = k_state_13_cast_fp16, x = coreml_update_state_70)[name = string("key_cache_internal_tensor_assign_7_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_7_cast_fp16, input = key_cache)[name = string("coreml_update_state_72_write_state")]; + tensor coreml_update_state_72 = read_state(input = key_cache)[name = string("coreml_update_state_72")]; + tensor value_cache_internal_tensor_assign_7_stride_0 = const()[name = string("value_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_13_cast_fp16 = transpose(perm = v_state_13_perm_0, x = var_1317_cast_fp16)[name = string("transpose_93")]; + tensor value_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_119, begin_mask = value_cache_internal_tensor_assign_7_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_7_squeeze_mask_0, stride = value_cache_internal_tensor_assign_7_stride_0, update = v_state_13_cast_fp16, x = coreml_update_state_71)[name = string("value_cache_internal_tensor_assign_7_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_7_cast_fp16, input = value_cache)[name = string("coreml_update_state_73_write_state")]; + tensor coreml_update_state_73 = read_state(input = value_cache)[name = string("coreml_update_state_73")]; + tensor var_1374_begin_0 = const()[name = string("op_1374_begin_0"), val = tensor([6, 0, 0, 0, 0])]; + tensor var_1374_end_0 = const()[name = string("op_1374_end_0"), val = tensor([7, 1, 3, 2048, 64])]; + tensor var_1374_end_mask_0 = const()[name = string("op_1374_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1374_squeeze_mask_0 = const()[name = string("op_1374_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1374_cast_fp16 = slice_by_index(begin = var_1374_begin_0, end = var_1374_end_0, end_mask = var_1374_end_mask_0, squeeze_mask = var_1374_squeeze_mask_0, x = coreml_update_state_72)[name = string("op_1374_cast_fp16")]; + tensor var_1377_begin_0 = const()[name = string("op_1377_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1377_end_mask_0 = const()[name = string("op_1377_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1377_cast_fp16 = slice_by_index(begin = var_1377_begin_0, end = concat_11, end_mask = var_1377_end_mask_0, x = var_1374_cast_fp16)[name = string("op_1377_cast_fp16")]; + tensor var_1379_begin_0 = const()[name = string("op_1379_begin_0"), val = tensor([6, 0, 0, 0, 0])]; + tensor var_1379_end_0 = const()[name = string("op_1379_end_0"), val = tensor([7, 1, 3, 2048, 64])]; + tensor var_1379_end_mask_0 = const()[name = string("op_1379_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1379_squeeze_mask_0 = const()[name = string("op_1379_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1379_cast_fp16 = slice_by_index(begin = var_1379_begin_0, end = var_1379_end_0, end_mask = var_1379_end_mask_0, squeeze_mask = var_1379_squeeze_mask_0, x = coreml_update_state_73)[name = string("op_1379_cast_fp16")]; + tensor var_1382_begin_0 = const()[name = string("op_1382_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1382_end_mask_0 = const()[name = string("op_1382_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1382_cast_fp16 = slice_by_index(begin = var_1382_begin_0, end = concat_11, end_mask = var_1382_end_mask_0, x = var_1379_cast_fp16)[name = string("op_1382_cast_fp16")]; + tensor var_1384_shape_cast_fp16 = shape(x = var_1377_cast_fp16)[name = string("op_1384_shape_cast_fp16")]; + int32 gather_121 = const()[name = string("gather_121"), val = int32(1)]; + int32 gather_122 = const()[name = string("gather_122"), val = int32(3)]; + int32 gather_123_axis_0 = const()[name = string("gather_123_axis_0"), val = int32(0)]; + int32 gather_123_batch_dims_0 = const()[name = string("gather_123_batch_dims_0"), val = int32(0)]; + bool gather_123_validate_indices_0 = const()[name = string("gather_123_validate_indices_0"), val = bool(false)]; + string var_1384_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1384_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_123_to_uint16 = const()[name = string("select_123_to_uint16"), val = uint16(2)]; + tensor var_1384_shape_cast_fp16_to_uint16 = cast(dtype = var_1384_shape_cast_fp16_to_uint16_dtype_0, x = var_1384_shape_cast_fp16)[name = string("cast_190")]; + uint16 gather_123_cast_uint16 = gather(axis = gather_123_axis_0, batch_dims = gather_123_batch_dims_0, indices = select_123_to_uint16, validate_indices = gather_123_validate_indices_0, x = var_1384_shape_cast_fp16_to_uint16)[name = string("gather_123_cast_uint16")]; + string gather_123_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_123_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_124 = const()[name = string("gather_124"), val = int32(64)]; + tensor var_1391_axes_0 = const()[name = string("op_1391_axes_0"), val = tensor([2])]; + tensor var_1391_cast_fp16 = expand_dims(axes = var_1391_axes_0, x = var_1377_cast_fp16)[name = string("op_1391_cast_fp16")]; + tensor shape_137_cast_fp16 = shape(x = var_1391_cast_fp16)[name = string("shape_137_cast_fp16")]; + int32 concat_127_axis_0 = const()[name = string("concat_127_axis_0"), val = int32(0)]; + bool concat_127_interleave_0 = const()[name = string("concat_127_interleave_0"), val = bool(false)]; + int32 gather_123_cast_uint16_to_int32 = cast(dtype = gather_123_cast_uint16_to_int32_dtype_0, x = gather_123_cast_uint16)[name = string("cast_189")]; + tensor concat_127 = concat(axis = concat_127_axis_0, interleave = concat_127_interleave_0, values = (gather_121, gather_122, var_85, gather_123_cast_uint16_to_int32, gather_124))[name = string("concat_127")]; + tensor real_div_12 = real_div(x = concat_127, y = shape_137_cast_fp16)[name = string("real_div_12")]; + tensor hidden_states_191_cast_fp16 = tile(reps = real_div_12, x = var_1391_cast_fp16)[name = string("hidden_states_191_cast_fp16")]; + tensor concat_128x = const()[name = string("concat_128x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_27_cast_fp16 = reshape(shape = concat_128x, x = hidden_states_191_cast_fp16)[name = string("key_states_27_cast_fp16")]; + tensor var_1401_shape_cast_fp16 = shape(x = var_1382_cast_fp16)[name = string("op_1401_shape_cast_fp16")]; + int32 gather_125 = const()[name = string("gather_125"), val = int32(1)]; + int32 gather_126 = const()[name = string("gather_126"), val = int32(3)]; + int32 gather_127_axis_0 = const()[name = string("gather_127_axis_0"), val = int32(0)]; + int32 gather_127_batch_dims_0 = const()[name = string("gather_127_batch_dims_0"), val = int32(0)]; + bool gather_127_validate_indices_0 = const()[name = string("gather_127_validate_indices_0"), val = bool(false)]; + string var_1401_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1401_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_127_to_uint16 = const()[name = string("select_127_to_uint16"), val = uint16(2)]; + tensor var_1401_shape_cast_fp16_to_uint16 = cast(dtype = var_1401_shape_cast_fp16_to_uint16_dtype_0, x = var_1401_shape_cast_fp16)[name = string("cast_188")]; + uint16 gather_127_cast_uint16 = gather(axis = gather_127_axis_0, batch_dims = gather_127_batch_dims_0, indices = select_127_to_uint16, validate_indices = gather_127_validate_indices_0, x = var_1401_shape_cast_fp16_to_uint16)[name = string("gather_127_cast_uint16")]; + string gather_127_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_127_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_128 = const()[name = string("gather_128"), val = int32(64)]; + tensor var_1408_axes_0 = const()[name = string("op_1408_axes_0"), val = tensor([2])]; + tensor var_1408_cast_fp16 = expand_dims(axes = var_1408_axes_0, x = var_1382_cast_fp16)[name = string("op_1408_cast_fp16")]; + tensor shape_142_cast_fp16 = shape(x = var_1408_cast_fp16)[name = string("shape_142_cast_fp16")]; + int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; + bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; + int32 gather_127_cast_uint16_to_int32 = cast(dtype = gather_127_cast_uint16_to_int32_dtype_0, x = gather_127_cast_uint16)[name = string("cast_187")]; + tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (gather_125, gather_126, var_85, gather_127_cast_uint16_to_int32, gather_128))[name = string("concat_129")]; + tensor real_div_13 = real_div(x = concat_129, y = shape_142_cast_fp16)[name = string("real_div_13")]; + tensor hidden_states_195_cast_fp16 = tile(reps = real_div_13, x = var_1408_cast_fp16)[name = string("hidden_states_195_cast_fp16")]; + tensor concat_130x = const()[name = string("concat_130x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_27_cast_fp16 = reshape(shape = concat_130x, x = hidden_states_195_cast_fp16)[name = string("value_states_27_cast_fp16")]; + tensor var_1418_shape_cast_fp16 = shape(x = key_states_27_cast_fp16)[name = string("op_1418_shape_cast_fp16")]; + int32 gather_129_axis_0 = const()[name = string("gather_129_axis_0"), val = int32(0)]; + int32 gather_129_batch_dims_0 = const()[name = string("gather_129_batch_dims_0"), val = int32(0)]; + bool gather_129_validate_indices_0 = const()[name = string("gather_129_validate_indices_0"), val = bool(false)]; + string var_1418_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1418_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_129_to_uint16 = const()[name = string("select_129_to_uint16"), val = uint16(2)]; + tensor var_1418_shape_cast_fp16_to_uint16 = cast(dtype = var_1418_shape_cast_fp16_to_uint16_dtype_0, x = var_1418_shape_cast_fp16)[name = string("cast_186")]; + uint16 gather_129_cast_uint16 = gather(axis = gather_129_axis_0, batch_dims = gather_129_batch_dims_0, indices = select_129_to_uint16, validate_indices = gather_129_validate_indices_0, x = var_1418_shape_cast_fp16_to_uint16)[name = string("gather_129_cast_uint16")]; + string gather_129_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_129_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_131_values0_0 = const()[name = string("concat_131_values0_0"), val = int32(1)]; + int32 concat_131_values1_0 = const()[name = string("concat_131_values1_0"), val = int32(1)]; + int32 concat_131_values2_0 = const()[name = string("concat_131_values2_0"), val = int32(0)]; + int32 concat_131_axis_0 = const()[name = string("concat_131_axis_0"), val = int32(0)]; + bool concat_131_interleave_0 = const()[name = string("concat_131_interleave_0"), val = bool(false)]; + int32 gather_129_cast_uint16_to_int32 = cast(dtype = gather_129_cast_uint16_to_int32_dtype_0, x = gather_129_cast_uint16)[name = string("cast_185")]; + tensor concat_131 = concat(axis = concat_131_axis_0, interleave = concat_131_interleave_0, values = (concat_131_values0_0, concat_131_values1_0, concat_131_values2_0, gather_129_cast_uint16_to_int32))[name = string("concat_131")]; + tensor causal_mask_15_begin_0 = const()[name = string("causal_mask_15_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_15_end_mask_0 = const()[name = string("causal_mask_15_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_15_cast_fp16 = slice_by_index(begin = causal_mask_15_begin_0, end = concat_131, end_mask = causal_mask_15_end_mask_0, x = causal_mask)[name = string("causal_mask_15_cast_fp16")]; + tensor attn_output_25_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_15_cast_fp16, key = key_states_27_cast_fp16, query = query_states_27_cast_fp16, value = value_states_27_cast_fp16)[name = string("attn_output_25_cast_fp16")]; + tensor var_1424_perm_0 = const()[name = string("op_1424_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; + bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; + int32 gather_113_cast_uint16_to_int32 = cast(dtype = gather_113_cast_uint16_to_int32_dtype_0, x = gather_113_cast_uint16)[name = string("cast_184")]; + tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (gather_112, gather_113_cast_uint16_to_int32, var_81))[name = string("concat_132")]; + tensor var_1424_cast_fp16 = transpose(perm = var_1424_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_92")]; + tensor input_49_cast_fp16 = reshape(shape = concat_132, x = var_1424_cast_fp16)[name = string("input_49_cast_fp16")]; + tensor model_model_layers_6_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28206912))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28372864))))[name = string("model_model_layers_6_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_45_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_to_fp16_quantized, x = input_49_cast_fp16)[name = string("linear_45_cast_fp16")]; + tensor hidden_states_199_cast_fp16 = add(x = hidden_states_179_cast_fp16, y = linear_45_cast_fp16)[name = string("hidden_states_199_cast_fp16")]; + fp16 var_76_promoted_13_to_fp16 = const()[name = string("op_76_promoted_13_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1433_cast_fp16 = pow(x = hidden_states_199_cast_fp16, y = var_76_promoted_13_to_fp16)[name = string("op_1433_cast_fp16")]; + tensor variance_27_axes_0 = const()[name = string("variance_27_axes_0"), val = tensor([-1])]; + bool variance_27_keep_dims_0 = const()[name = string("variance_27_keep_dims_0"), val = bool(true)]; + tensor variance_27_cast_fp16 = reduce_mean(axes = variance_27_axes_0, keep_dims = variance_27_keep_dims_0, x = var_1433_cast_fp16)[name = string("variance_27_cast_fp16")]; + fp16 var_1436_to_fp16 = const()[name = string("op_1436_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1437_cast_fp16 = add(x = variance_27_cast_fp16, y = var_1436_to_fp16)[name = string("op_1437_cast_fp16")]; + fp32 var_1438_epsilon_0 = const()[name = string("op_1438_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1438_cast_fp16 = rsqrt(epsilon = var_1438_epsilon_0, x = var_1437_cast_fp16)[name = string("op_1438_cast_fp16")]; + tensor hidden_states_203_cast_fp16 = mul(x = hidden_states_199_cast_fp16, y = var_1438_cast_fp16)[name = string("hidden_states_203_cast_fp16")]; + tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28393664)))]; + tensor input_51_cast_fp16 = mul(x = model_model_layers_6_post_attention_layernorm_weight_to_fp16, y = hidden_states_203_cast_fp16)[name = string("input_51_cast_fp16")]; + tensor model_model_layers_6_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28394880))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28837312))))[name = string("model_model_layers_6_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_46_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_6_mlp_gate_proj_weight_to_fp16_quantized, x = input_51_cast_fp16)[name = string("linear_46_cast_fp16")]; + tensor var_1450_cast_fp16 = silu(x = linear_46_cast_fp16)[name = string("op_1450_cast_fp16")]; + tensor model_model_layers_6_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(28892672))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29335104))))[name = string("model_model_layers_6_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_47_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_6_mlp_up_proj_weight_to_fp16_quantized, x = input_51_cast_fp16)[name = string("linear_47_cast_fp16")]; + tensor input_55_cast_fp16 = mul(x = var_1450_cast_fp16, y = linear_47_cast_fp16)[name = string("input_55_cast_fp16")]; + tensor model_model_layers_6_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29390464))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29832896))))[name = string("model_model_layers_6_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_48_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_mlp_down_proj_weight_to_fp16_quantized, x = input_55_cast_fp16)[name = string("linear_48_cast_fp16")]; + tensor hidden_states_209_cast_fp16 = add(x = hidden_states_199_cast_fp16, y = linear_48_cast_fp16)[name = string("hidden_states_209_cast_fp16")]; + fp16 var_76_promoted_14_to_fp16 = const()[name = string("op_76_promoted_14_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1463_cast_fp16 = pow(x = hidden_states_209_cast_fp16, y = var_76_promoted_14_to_fp16)[name = string("op_1463_cast_fp16")]; + tensor variance_29_axes_0 = const()[name = string("variance_29_axes_0"), val = tensor([-1])]; + bool variance_29_keep_dims_0 = const()[name = string("variance_29_keep_dims_0"), val = bool(true)]; + tensor variance_29_cast_fp16 = reduce_mean(axes = variance_29_axes_0, keep_dims = variance_29_keep_dims_0, x = var_1463_cast_fp16)[name = string("variance_29_cast_fp16")]; + fp16 var_1466_to_fp16 = const()[name = string("op_1466_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1467_cast_fp16 = add(x = variance_29_cast_fp16, y = var_1466_to_fp16)[name = string("op_1467_cast_fp16")]; + fp32 var_1468_epsilon_0 = const()[name = string("op_1468_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1468_cast_fp16 = rsqrt(epsilon = var_1468_epsilon_0, x = var_1467_cast_fp16)[name = string("op_1468_cast_fp16")]; + tensor hidden_states_213_cast_fp16 = mul(x = hidden_states_209_cast_fp16, y = var_1468_cast_fp16)[name = string("hidden_states_213_cast_fp16")]; + tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29888256)))]; + tensor hidden_states_217_cast_fp16 = mul(x = model_model_layers_7_input_layernorm_weight_to_fp16, y = hidden_states_213_cast_fp16)[name = string("hidden_states_217_cast_fp16")]; + tensor var_1479_shape_cast_fp16 = shape(x = hidden_states_217_cast_fp16)[name = string("op_1479_shape_cast_fp16")]; + int32 gather_130 = const()[name = string("gather_130"), val = int32(1)]; + int32 gather_131_axis_0 = const()[name = string("gather_131_axis_0"), val = int32(0)]; + int32 gather_131_batch_dims_0 = const()[name = string("gather_131_batch_dims_0"), val = int32(0)]; + bool gather_131_validate_indices_0 = const()[name = string("gather_131_validate_indices_0"), val = bool(false)]; + string var_1479_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1479_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_131_to_uint16 = const()[name = string("select_131_to_uint16"), val = uint16(1)]; + tensor var_1479_shape_cast_fp16_to_uint16 = cast(dtype = var_1479_shape_cast_fp16_to_uint16_dtype_0, x = var_1479_shape_cast_fp16)[name = string("cast_183")]; + uint16 gather_131_cast_uint16 = gather(axis = gather_131_axis_0, batch_dims = gather_131_batch_dims_0, indices = select_131_to_uint16, validate_indices = gather_131_validate_indices_0, x = var_1479_shape_cast_fp16_to_uint16)[name = string("gather_131_cast_uint16")]; + string gather_131_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_131_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_7_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(29889472))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30055424))))[name = string("model_model_layers_7_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_49_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_217_cast_fp16)[name = string("linear_49_cast_fp16")]; + tensor model_model_layers_7_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30076224))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30131584))))[name = string("model_model_layers_7_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_50_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_7_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_217_cast_fp16)[name = string("linear_50_cast_fp16")]; + tensor model_model_layers_7_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30138560))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30193920))))[name = string("model_model_layers_7_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_51_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_7_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_217_cast_fp16)[name = string("linear_51_cast_fp16")]; + tensor concat_133x = const()[name = string("concat_133x"), val = tensor([1, -1, 9, 64])]; + tensor var_1488_cast_fp16 = reshape(shape = concat_133x, x = linear_49_cast_fp16)[name = string("op_1488_cast_fp16")]; + tensor q_15_perm_0 = const()[name = string("q_15_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_134x = const()[name = string("concat_134x"), val = tensor([1, -1, 3, 64])]; + tensor var_1491_cast_fp16 = reshape(shape = concat_134x, x = linear_50_cast_fp16)[name = string("op_1491_cast_fp16")]; + tensor k_15_perm_0 = const()[name = string("k_15_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_135x = const()[name = string("concat_135x"), val = tensor([1, -1, 3, 64])]; + tensor var_1494_cast_fp16 = reshape(shape = concat_135x, x = linear_51_cast_fp16)[name = string("op_1494_cast_fp16")]; + tensor v_state_15_perm_0 = const()[name = string("v_state_15_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_15_cast_fp16 = transpose(perm = q_15_perm_0, x = var_1488_cast_fp16)[name = string("transpose_91")]; + tensor var_1498_cast_fp16 = mul(x = q_15_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1498_cast_fp16")]; + tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15_cast_fp16)[name = string("x1_29_cast_fp16")]; + tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15_cast_fp16)[name = string("x2_29_cast_fp16")]; + fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1509_cast_fp16 = mul(x = x2_29_cast_fp16, y = const_17_promoted_to_fp16)[name = string("op_1509_cast_fp16")]; + bool var_1511_interleave_0 = const()[name = string("op_1511_interleave_0"), val = bool(false)]; + tensor var_1511_cast_fp16 = concat(axis = var_81, interleave = var_1511_interleave_0, values = (var_1509_cast_fp16, x1_29_cast_fp16))[name = string("op_1511_cast_fp16")]; + tensor var_1512_cast_fp16 = mul(x = var_1511_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1512_cast_fp16")]; + tensor query_states_31_cast_fp16 = add(x = var_1498_cast_fp16, y = var_1512_cast_fp16)[name = string("query_states_31_cast_fp16")]; + tensor k_15_cast_fp16 = transpose(perm = k_15_perm_0, x = var_1491_cast_fp16)[name = string("transpose_90")]; + tensor var_1514_cast_fp16 = mul(x = k_15_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1514_cast_fp16")]; + tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15_cast_fp16)[name = string("x1_31_cast_fp16")]; + tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15_cast_fp16)[name = string("x2_31_cast_fp16")]; + fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1525_cast_fp16 = mul(x = x2_31_cast_fp16, y = const_18_promoted_to_fp16)[name = string("op_1525_cast_fp16")]; + bool var_1527_interleave_0 = const()[name = string("op_1527_interleave_0"), val = bool(false)]; + tensor var_1527_cast_fp16 = concat(axis = var_81, interleave = var_1527_interleave_0, values = (var_1525_cast_fp16, x1_31_cast_fp16))[name = string("op_1527_cast_fp16")]; + tensor var_1528_cast_fp16 = mul(x = var_1527_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1528_cast_fp16")]; + tensor k_state_15_cast_fp16 = add(x = var_1514_cast_fp16, y = var_1528_cast_fp16)[name = string("k_state_15_cast_fp16")]; + tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([0])]; + tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; + tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; + tensor concat_138_values0_0 = const()[name = string("concat_138_values0_0"), val = tensor([7])]; + int32 concat_138_axis_0 = const()[name = string("concat_138_axis_0"), val = int32(0)]; + bool concat_138_interleave_0 = const()[name = string("concat_138_interleave_0"), val = bool(false)]; + tensor concat_138 = concat(axis = concat_138_axis_0, interleave = concat_138_interleave_0, values = (concat_138_values0_0, expand_dims_84, expand_dims_85, expand_dims_2, expand_dims_87))[name = string("concat_138")]; + tensor key_cache_internal_tensor_assign_8_stride_0 = const()[name = string("key_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_138, begin_mask = key_cache_internal_tensor_assign_8_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_8_squeeze_mask_0, stride = key_cache_internal_tensor_assign_8_stride_0, update = k_state_15_cast_fp16, x = coreml_update_state_72)[name = string("key_cache_internal_tensor_assign_8_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_8_cast_fp16, input = key_cache)[name = string("coreml_update_state_74_write_state")]; + tensor coreml_update_state_74 = read_state(input = key_cache)[name = string("coreml_update_state_74")]; + tensor value_cache_internal_tensor_assign_8_stride_0 = const()[name = string("value_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_15_cast_fp16 = transpose(perm = v_state_15_perm_0, x = var_1494_cast_fp16)[name = string("transpose_89")]; + tensor value_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_138, begin_mask = value_cache_internal_tensor_assign_8_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_8_squeeze_mask_0, stride = value_cache_internal_tensor_assign_8_stride_0, update = v_state_15_cast_fp16, x = coreml_update_state_73)[name = string("value_cache_internal_tensor_assign_8_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_8_cast_fp16, input = value_cache)[name = string("coreml_update_state_75_write_state")]; + tensor coreml_update_state_75 = read_state(input = value_cache)[name = string("coreml_update_state_75")]; + tensor var_1551_begin_0 = const()[name = string("op_1551_begin_0"), val = tensor([7, 0, 0, 0, 0])]; + tensor var_1551_end_0 = const()[name = string("op_1551_end_0"), val = tensor([8, 1, 3, 2048, 64])]; + tensor var_1551_end_mask_0 = const()[name = string("op_1551_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1551_squeeze_mask_0 = const()[name = string("op_1551_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1551_cast_fp16 = slice_by_index(begin = var_1551_begin_0, end = var_1551_end_0, end_mask = var_1551_end_mask_0, squeeze_mask = var_1551_squeeze_mask_0, x = coreml_update_state_74)[name = string("op_1551_cast_fp16")]; + tensor var_1554_begin_0 = const()[name = string("op_1554_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1554_end_mask_0 = const()[name = string("op_1554_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1554_cast_fp16 = slice_by_index(begin = var_1554_begin_0, end = concat_11, end_mask = var_1554_end_mask_0, x = var_1551_cast_fp16)[name = string("op_1554_cast_fp16")]; + tensor var_1556_begin_0 = const()[name = string("op_1556_begin_0"), val = tensor([7, 0, 0, 0, 0])]; + tensor var_1556_end_0 = const()[name = string("op_1556_end_0"), val = tensor([8, 1, 3, 2048, 64])]; + tensor var_1556_end_mask_0 = const()[name = string("op_1556_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1556_squeeze_mask_0 = const()[name = string("op_1556_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1556_cast_fp16 = slice_by_index(begin = var_1556_begin_0, end = var_1556_end_0, end_mask = var_1556_end_mask_0, squeeze_mask = var_1556_squeeze_mask_0, x = coreml_update_state_75)[name = string("op_1556_cast_fp16")]; + tensor var_1559_begin_0 = const()[name = string("op_1559_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1559_end_mask_0 = const()[name = string("op_1559_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1559_cast_fp16 = slice_by_index(begin = var_1559_begin_0, end = concat_11, end_mask = var_1559_end_mask_0, x = var_1556_cast_fp16)[name = string("op_1559_cast_fp16")]; + tensor var_1561_shape_cast_fp16 = shape(x = var_1554_cast_fp16)[name = string("op_1561_shape_cast_fp16")]; + int32 gather_139 = const()[name = string("gather_139"), val = int32(1)]; + int32 gather_140 = const()[name = string("gather_140"), val = int32(3)]; + int32 gather_141_axis_0 = const()[name = string("gather_141_axis_0"), val = int32(0)]; + int32 gather_141_batch_dims_0 = const()[name = string("gather_141_batch_dims_0"), val = int32(0)]; + bool gather_141_validate_indices_0 = const()[name = string("gather_141_validate_indices_0"), val = bool(false)]; + string var_1561_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1561_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_141_to_uint16 = const()[name = string("select_141_to_uint16"), val = uint16(2)]; + tensor var_1561_shape_cast_fp16_to_uint16 = cast(dtype = var_1561_shape_cast_fp16_to_uint16_dtype_0, x = var_1561_shape_cast_fp16)[name = string("cast_182")]; + uint16 gather_141_cast_uint16 = gather(axis = gather_141_axis_0, batch_dims = gather_141_batch_dims_0, indices = select_141_to_uint16, validate_indices = gather_141_validate_indices_0, x = var_1561_shape_cast_fp16_to_uint16)[name = string("gather_141_cast_uint16")]; + string gather_141_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_141_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_142 = const()[name = string("gather_142"), val = int32(64)]; + tensor var_1568_axes_0 = const()[name = string("op_1568_axes_0"), val = tensor([2])]; + tensor var_1568_cast_fp16 = expand_dims(axes = var_1568_axes_0, x = var_1554_cast_fp16)[name = string("op_1568_cast_fp16")]; + tensor shape_157_cast_fp16 = shape(x = var_1568_cast_fp16)[name = string("shape_157_cast_fp16")]; + int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; + bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; + int32 gather_141_cast_uint16_to_int32 = cast(dtype = gather_141_cast_uint16_to_int32_dtype_0, x = gather_141_cast_uint16)[name = string("cast_181")]; + tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (gather_139, gather_140, var_85, gather_141_cast_uint16_to_int32, gather_142))[name = string("concat_146")]; + tensor real_div_14 = real_div(x = concat_146, y = shape_157_cast_fp16)[name = string("real_div_14")]; + tensor hidden_states_221_cast_fp16 = tile(reps = real_div_14, x = var_1568_cast_fp16)[name = string("hidden_states_221_cast_fp16")]; + tensor concat_147x = const()[name = string("concat_147x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_31_cast_fp16 = reshape(shape = concat_147x, x = hidden_states_221_cast_fp16)[name = string("key_states_31_cast_fp16")]; + tensor var_1578_shape_cast_fp16 = shape(x = var_1559_cast_fp16)[name = string("op_1578_shape_cast_fp16")]; + int32 gather_143 = const()[name = string("gather_143"), val = int32(1)]; + int32 gather_144 = const()[name = string("gather_144"), val = int32(3)]; + int32 gather_145_axis_0 = const()[name = string("gather_145_axis_0"), val = int32(0)]; + int32 gather_145_batch_dims_0 = const()[name = string("gather_145_batch_dims_0"), val = int32(0)]; + bool gather_145_validate_indices_0 = const()[name = string("gather_145_validate_indices_0"), val = bool(false)]; + string var_1578_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1578_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_145_to_uint16 = const()[name = string("select_145_to_uint16"), val = uint16(2)]; + tensor var_1578_shape_cast_fp16_to_uint16 = cast(dtype = var_1578_shape_cast_fp16_to_uint16_dtype_0, x = var_1578_shape_cast_fp16)[name = string("cast_180")]; + uint16 gather_145_cast_uint16 = gather(axis = gather_145_axis_0, batch_dims = gather_145_batch_dims_0, indices = select_145_to_uint16, validate_indices = gather_145_validate_indices_0, x = var_1578_shape_cast_fp16_to_uint16)[name = string("gather_145_cast_uint16")]; + string gather_145_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_145_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_146 = const()[name = string("gather_146"), val = int32(64)]; + tensor var_1585_axes_0 = const()[name = string("op_1585_axes_0"), val = tensor([2])]; + tensor var_1585_cast_fp16 = expand_dims(axes = var_1585_axes_0, x = var_1559_cast_fp16)[name = string("op_1585_cast_fp16")]; + tensor shape_162_cast_fp16 = shape(x = var_1585_cast_fp16)[name = string("shape_162_cast_fp16")]; + int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; + bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; + int32 gather_145_cast_uint16_to_int32 = cast(dtype = gather_145_cast_uint16_to_int32_dtype_0, x = gather_145_cast_uint16)[name = string("cast_179")]; + tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (gather_143, gather_144, var_85, gather_145_cast_uint16_to_int32, gather_146))[name = string("concat_148")]; + tensor real_div_15 = real_div(x = concat_148, y = shape_162_cast_fp16)[name = string("real_div_15")]; + tensor hidden_states_225_cast_fp16 = tile(reps = real_div_15, x = var_1585_cast_fp16)[name = string("hidden_states_225_cast_fp16")]; + tensor concat_149x = const()[name = string("concat_149x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_31_cast_fp16 = reshape(shape = concat_149x, x = hidden_states_225_cast_fp16)[name = string("value_states_31_cast_fp16")]; + tensor var_1595_shape_cast_fp16 = shape(x = key_states_31_cast_fp16)[name = string("op_1595_shape_cast_fp16")]; + int32 gather_147_axis_0 = const()[name = string("gather_147_axis_0"), val = int32(0)]; + int32 gather_147_batch_dims_0 = const()[name = string("gather_147_batch_dims_0"), val = int32(0)]; + bool gather_147_validate_indices_0 = const()[name = string("gather_147_validate_indices_0"), val = bool(false)]; + string var_1595_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1595_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_147_to_uint16 = const()[name = string("select_147_to_uint16"), val = uint16(2)]; + tensor var_1595_shape_cast_fp16_to_uint16 = cast(dtype = var_1595_shape_cast_fp16_to_uint16_dtype_0, x = var_1595_shape_cast_fp16)[name = string("cast_178")]; + uint16 gather_147_cast_uint16 = gather(axis = gather_147_axis_0, batch_dims = gather_147_batch_dims_0, indices = select_147_to_uint16, validate_indices = gather_147_validate_indices_0, x = var_1595_shape_cast_fp16_to_uint16)[name = string("gather_147_cast_uint16")]; + string gather_147_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_147_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_150_values0_0 = const()[name = string("concat_150_values0_0"), val = int32(1)]; + int32 concat_150_values1_0 = const()[name = string("concat_150_values1_0"), val = int32(1)]; + int32 concat_150_values2_0 = const()[name = string("concat_150_values2_0"), val = int32(0)]; + int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; + bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; + int32 gather_147_cast_uint16_to_int32 = cast(dtype = gather_147_cast_uint16_to_int32_dtype_0, x = gather_147_cast_uint16)[name = string("cast_177")]; + tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (concat_150_values0_0, concat_150_values1_0, concat_150_values2_0, gather_147_cast_uint16_to_int32))[name = string("concat_150")]; + tensor causal_mask_17_begin_0 = const()[name = string("causal_mask_17_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_17_end_mask_0 = const()[name = string("causal_mask_17_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_17_cast_fp16 = slice_by_index(begin = causal_mask_17_begin_0, end = concat_150, end_mask = causal_mask_17_end_mask_0, x = causal_mask)[name = string("causal_mask_17_cast_fp16")]; + tensor attn_output_29_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_17_cast_fp16, key = key_states_31_cast_fp16, query = query_states_31_cast_fp16, value = value_states_31_cast_fp16)[name = string("attn_output_29_cast_fp16")]; + tensor var_1601_perm_0 = const()[name = string("op_1601_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; + bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; + int32 gather_131_cast_uint16_to_int32 = cast(dtype = gather_131_cast_uint16_to_int32_dtype_0, x = gather_131_cast_uint16)[name = string("cast_176")]; + tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (gather_130, gather_131_cast_uint16_to_int32, var_81))[name = string("concat_151")]; + tensor var_1601_cast_fp16 = transpose(perm = var_1601_perm_0, x = attn_output_29_cast_fp16)[name = string("transpose_88")]; + tensor input_57_cast_fp16 = reshape(shape = concat_151, x = var_1601_cast_fp16)[name = string("input_57_cast_fp16")]; + tensor model_model_layers_7_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30200896))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30366848))))[name = string("model_model_layers_7_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_52_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_to_fp16_quantized, x = input_57_cast_fp16)[name = string("linear_52_cast_fp16")]; + tensor hidden_states_229_cast_fp16 = add(x = hidden_states_209_cast_fp16, y = linear_52_cast_fp16)[name = string("hidden_states_229_cast_fp16")]; + fp16 var_76_promoted_15_to_fp16 = const()[name = string("op_76_promoted_15_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1610_cast_fp16 = pow(x = hidden_states_229_cast_fp16, y = var_76_promoted_15_to_fp16)[name = string("op_1610_cast_fp16")]; + tensor variance_31_axes_0 = const()[name = string("variance_31_axes_0"), val = tensor([-1])]; + bool variance_31_keep_dims_0 = const()[name = string("variance_31_keep_dims_0"), val = bool(true)]; + tensor variance_31_cast_fp16 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = var_1610_cast_fp16)[name = string("variance_31_cast_fp16")]; + fp16 var_1613_to_fp16 = const()[name = string("op_1613_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1614_cast_fp16 = add(x = variance_31_cast_fp16, y = var_1613_to_fp16)[name = string("op_1614_cast_fp16")]; + fp32 var_1615_epsilon_0 = const()[name = string("op_1615_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1615_cast_fp16 = rsqrt(epsilon = var_1615_epsilon_0, x = var_1614_cast_fp16)[name = string("op_1615_cast_fp16")]; + tensor hidden_states_233_cast_fp16 = mul(x = hidden_states_229_cast_fp16, y = var_1615_cast_fp16)[name = string("hidden_states_233_cast_fp16")]; + tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30387648)))]; + tensor input_59_cast_fp16 = mul(x = model_model_layers_7_post_attention_layernorm_weight_to_fp16, y = hidden_states_233_cast_fp16)[name = string("input_59_cast_fp16")]; + tensor model_model_layers_7_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30388864))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30831296))))[name = string("model_model_layers_7_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_53_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_7_mlp_gate_proj_weight_to_fp16_quantized, x = input_59_cast_fp16)[name = string("linear_53_cast_fp16")]; + tensor var_1627_cast_fp16 = silu(x = linear_53_cast_fp16)[name = string("op_1627_cast_fp16")]; + tensor model_model_layers_7_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(30886656))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31329088))))[name = string("model_model_layers_7_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_54_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_7_mlp_up_proj_weight_to_fp16_quantized, x = input_59_cast_fp16)[name = string("linear_54_cast_fp16")]; + tensor input_63_cast_fp16 = mul(x = var_1627_cast_fp16, y = linear_54_cast_fp16)[name = string("input_63_cast_fp16")]; + tensor model_model_layers_7_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31384448))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31826880))))[name = string("model_model_layers_7_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_55_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_mlp_down_proj_weight_to_fp16_quantized, x = input_63_cast_fp16)[name = string("linear_55_cast_fp16")]; + tensor hidden_states_239_cast_fp16 = add(x = hidden_states_229_cast_fp16, y = linear_55_cast_fp16)[name = string("hidden_states_239_cast_fp16")]; + fp16 var_76_promoted_16_to_fp16 = const()[name = string("op_76_promoted_16_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1640_cast_fp16 = pow(x = hidden_states_239_cast_fp16, y = var_76_promoted_16_to_fp16)[name = string("op_1640_cast_fp16")]; + tensor variance_33_axes_0 = const()[name = string("variance_33_axes_0"), val = tensor([-1])]; + bool variance_33_keep_dims_0 = const()[name = string("variance_33_keep_dims_0"), val = bool(true)]; + tensor variance_33_cast_fp16 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = var_1640_cast_fp16)[name = string("variance_33_cast_fp16")]; + fp16 var_1643_to_fp16 = const()[name = string("op_1643_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1644_cast_fp16 = add(x = variance_33_cast_fp16, y = var_1643_to_fp16)[name = string("op_1644_cast_fp16")]; + fp32 var_1645_epsilon_0 = const()[name = string("op_1645_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1645_cast_fp16 = rsqrt(epsilon = var_1645_epsilon_0, x = var_1644_cast_fp16)[name = string("op_1645_cast_fp16")]; + tensor hidden_states_243_cast_fp16 = mul(x = hidden_states_239_cast_fp16, y = var_1645_cast_fp16)[name = string("hidden_states_243_cast_fp16")]; + tensor model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31882240)))]; + tensor hidden_states_247_cast_fp16 = mul(x = model_model_layers_8_input_layernorm_weight_to_fp16, y = hidden_states_243_cast_fp16)[name = string("hidden_states_247_cast_fp16")]; + tensor var_1656_shape_cast_fp16 = shape(x = hidden_states_247_cast_fp16)[name = string("op_1656_shape_cast_fp16")]; + int32 gather_148 = const()[name = string("gather_148"), val = int32(1)]; + int32 gather_149_axis_0 = const()[name = string("gather_149_axis_0"), val = int32(0)]; + int32 gather_149_batch_dims_0 = const()[name = string("gather_149_batch_dims_0"), val = int32(0)]; + bool gather_149_validate_indices_0 = const()[name = string("gather_149_validate_indices_0"), val = bool(false)]; + string var_1656_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1656_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_149_to_uint16 = const()[name = string("select_149_to_uint16"), val = uint16(1)]; + tensor var_1656_shape_cast_fp16_to_uint16 = cast(dtype = var_1656_shape_cast_fp16_to_uint16_dtype_0, x = var_1656_shape_cast_fp16)[name = string("cast_175")]; + uint16 gather_149_cast_uint16 = gather(axis = gather_149_axis_0, batch_dims = gather_149_batch_dims_0, indices = select_149_to_uint16, validate_indices = gather_149_validate_indices_0, x = var_1656_shape_cast_fp16_to_uint16)[name = string("gather_149_cast_uint16")]; + string gather_149_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_149_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_8_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(31883456))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32049408))))[name = string("model_model_layers_8_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_56_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_247_cast_fp16)[name = string("linear_56_cast_fp16")]; + tensor model_model_layers_8_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32070208))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32125568))))[name = string("model_model_layers_8_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_8_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_247_cast_fp16)[name = string("linear_57_cast_fp16")]; + tensor model_model_layers_8_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32132544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32187904))))[name = string("model_model_layers_8_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_58_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_8_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_247_cast_fp16)[name = string("linear_58_cast_fp16")]; + tensor concat_152x = const()[name = string("concat_152x"), val = tensor([1, -1, 9, 64])]; + tensor var_1665_cast_fp16 = reshape(shape = concat_152x, x = linear_56_cast_fp16)[name = string("op_1665_cast_fp16")]; + tensor q_17_perm_0 = const()[name = string("q_17_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_153x = const()[name = string("concat_153x"), val = tensor([1, -1, 3, 64])]; + tensor var_1668_cast_fp16 = reshape(shape = concat_153x, x = linear_57_cast_fp16)[name = string("op_1668_cast_fp16")]; + tensor k_17_perm_0 = const()[name = string("k_17_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_154x = const()[name = string("concat_154x"), val = tensor([1, -1, 3, 64])]; + tensor var_1671_cast_fp16 = reshape(shape = concat_154x, x = linear_58_cast_fp16)[name = string("op_1671_cast_fp16")]; + tensor v_state_17_perm_0 = const()[name = string("v_state_17_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_17_cast_fp16 = transpose(perm = q_17_perm_0, x = var_1665_cast_fp16)[name = string("transpose_87")]; + tensor var_1675_cast_fp16 = mul(x = q_17_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1675_cast_fp16")]; + tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17_cast_fp16)[name = string("x1_33_cast_fp16")]; + tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17_cast_fp16)[name = string("x2_33_cast_fp16")]; + fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1686_cast_fp16 = mul(x = x2_33_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1686_cast_fp16")]; + bool var_1688_interleave_0 = const()[name = string("op_1688_interleave_0"), val = bool(false)]; + tensor var_1688_cast_fp16 = concat(axis = var_81, interleave = var_1688_interleave_0, values = (var_1686_cast_fp16, x1_33_cast_fp16))[name = string("op_1688_cast_fp16")]; + tensor var_1689_cast_fp16 = mul(x = var_1688_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1689_cast_fp16")]; + tensor query_states_35_cast_fp16 = add(x = var_1675_cast_fp16, y = var_1689_cast_fp16)[name = string("query_states_35_cast_fp16")]; + tensor k_17_cast_fp16 = transpose(perm = k_17_perm_0, x = var_1668_cast_fp16)[name = string("transpose_86")]; + tensor var_1691_cast_fp16 = mul(x = k_17_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1691_cast_fp16")]; + tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17_cast_fp16)[name = string("x1_35_cast_fp16")]; + tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17_cast_fp16)[name = string("x2_35_cast_fp16")]; + fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1702_cast_fp16 = mul(x = x2_35_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1702_cast_fp16")]; + bool var_1704_interleave_0 = const()[name = string("op_1704_interleave_0"), val = bool(false)]; + tensor var_1704_cast_fp16 = concat(axis = var_81, interleave = var_1704_interleave_0, values = (var_1702_cast_fp16, x1_35_cast_fp16))[name = string("op_1704_cast_fp16")]; + tensor var_1705_cast_fp16 = mul(x = var_1704_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1705_cast_fp16")]; + tensor k_state_17_cast_fp16 = add(x = var_1691_cast_fp16, y = var_1705_cast_fp16)[name = string("k_state_17_cast_fp16")]; + tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([0])]; + tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; + tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; + tensor concat_157_values0_0 = const()[name = string("concat_157_values0_0"), val = tensor([8])]; + int32 concat_157_axis_0 = const()[name = string("concat_157_axis_0"), val = int32(0)]; + bool concat_157_interleave_0 = const()[name = string("concat_157_interleave_0"), val = bool(false)]; + tensor concat_157 = concat(axis = concat_157_axis_0, interleave = concat_157_interleave_0, values = (concat_157_values0_0, expand_dims_96, expand_dims_97, expand_dims_2, expand_dims_99))[name = string("concat_157")]; + tensor key_cache_internal_tensor_assign_9_stride_0 = const()[name = string("key_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_157, begin_mask = key_cache_internal_tensor_assign_9_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_9_squeeze_mask_0, stride = key_cache_internal_tensor_assign_9_stride_0, update = k_state_17_cast_fp16, x = coreml_update_state_74)[name = string("key_cache_internal_tensor_assign_9_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_9_cast_fp16, input = key_cache)[name = string("coreml_update_state_76_write_state")]; + tensor coreml_update_state_76 = read_state(input = key_cache)[name = string("coreml_update_state_76")]; + tensor value_cache_internal_tensor_assign_9_stride_0 = const()[name = string("value_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_17_cast_fp16 = transpose(perm = v_state_17_perm_0, x = var_1671_cast_fp16)[name = string("transpose_85")]; + tensor value_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_157, begin_mask = value_cache_internal_tensor_assign_9_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_9_squeeze_mask_0, stride = value_cache_internal_tensor_assign_9_stride_0, update = v_state_17_cast_fp16, x = coreml_update_state_75)[name = string("value_cache_internal_tensor_assign_9_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_9_cast_fp16, input = value_cache)[name = string("coreml_update_state_77_write_state")]; + tensor coreml_update_state_77 = read_state(input = value_cache)[name = string("coreml_update_state_77")]; + tensor var_1728_begin_0 = const()[name = string("op_1728_begin_0"), val = tensor([8, 0, 0, 0, 0])]; + tensor var_1728_end_0 = const()[name = string("op_1728_end_0"), val = tensor([9, 1, 3, 2048, 64])]; + tensor var_1728_end_mask_0 = const()[name = string("op_1728_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1728_squeeze_mask_0 = const()[name = string("op_1728_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1728_cast_fp16 = slice_by_index(begin = var_1728_begin_0, end = var_1728_end_0, end_mask = var_1728_end_mask_0, squeeze_mask = var_1728_squeeze_mask_0, x = coreml_update_state_76)[name = string("op_1728_cast_fp16")]; + tensor var_1731_begin_0 = const()[name = string("op_1731_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1731_end_mask_0 = const()[name = string("op_1731_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1731_cast_fp16 = slice_by_index(begin = var_1731_begin_0, end = concat_11, end_mask = var_1731_end_mask_0, x = var_1728_cast_fp16)[name = string("op_1731_cast_fp16")]; + tensor var_1733_begin_0 = const()[name = string("op_1733_begin_0"), val = tensor([8, 0, 0, 0, 0])]; + tensor var_1733_end_0 = const()[name = string("op_1733_end_0"), val = tensor([9, 1, 3, 2048, 64])]; + tensor var_1733_end_mask_0 = const()[name = string("op_1733_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1733_squeeze_mask_0 = const()[name = string("op_1733_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1733_cast_fp16 = slice_by_index(begin = var_1733_begin_0, end = var_1733_end_0, end_mask = var_1733_end_mask_0, squeeze_mask = var_1733_squeeze_mask_0, x = coreml_update_state_77)[name = string("op_1733_cast_fp16")]; + tensor var_1736_begin_0 = const()[name = string("op_1736_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1736_end_mask_0 = const()[name = string("op_1736_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1736_cast_fp16 = slice_by_index(begin = var_1736_begin_0, end = concat_11, end_mask = var_1736_end_mask_0, x = var_1733_cast_fp16)[name = string("op_1736_cast_fp16")]; + tensor var_1738_shape_cast_fp16 = shape(x = var_1731_cast_fp16)[name = string("op_1738_shape_cast_fp16")]; + int32 gather_157 = const()[name = string("gather_157"), val = int32(1)]; + int32 gather_158 = const()[name = string("gather_158"), val = int32(3)]; + int32 gather_159_axis_0 = const()[name = string("gather_159_axis_0"), val = int32(0)]; + int32 gather_159_batch_dims_0 = const()[name = string("gather_159_batch_dims_0"), val = int32(0)]; + bool gather_159_validate_indices_0 = const()[name = string("gather_159_validate_indices_0"), val = bool(false)]; + string var_1738_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1738_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_159_to_uint16 = const()[name = string("select_159_to_uint16"), val = uint16(2)]; + tensor var_1738_shape_cast_fp16_to_uint16 = cast(dtype = var_1738_shape_cast_fp16_to_uint16_dtype_0, x = var_1738_shape_cast_fp16)[name = string("cast_174")]; + uint16 gather_159_cast_uint16 = gather(axis = gather_159_axis_0, batch_dims = gather_159_batch_dims_0, indices = select_159_to_uint16, validate_indices = gather_159_validate_indices_0, x = var_1738_shape_cast_fp16_to_uint16)[name = string("gather_159_cast_uint16")]; + string gather_159_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_159_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_160 = const()[name = string("gather_160"), val = int32(64)]; + tensor var_1745_axes_0 = const()[name = string("op_1745_axes_0"), val = tensor([2])]; + tensor var_1745_cast_fp16 = expand_dims(axes = var_1745_axes_0, x = var_1731_cast_fp16)[name = string("op_1745_cast_fp16")]; + tensor shape_177_cast_fp16 = shape(x = var_1745_cast_fp16)[name = string("shape_177_cast_fp16")]; + int32 concat_165_axis_0 = const()[name = string("concat_165_axis_0"), val = int32(0)]; + bool concat_165_interleave_0 = const()[name = string("concat_165_interleave_0"), val = bool(false)]; + int32 gather_159_cast_uint16_to_int32 = cast(dtype = gather_159_cast_uint16_to_int32_dtype_0, x = gather_159_cast_uint16)[name = string("cast_173")]; + tensor concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (gather_157, gather_158, var_85, gather_159_cast_uint16_to_int32, gather_160))[name = string("concat_165")]; + tensor real_div_16 = real_div(x = concat_165, y = shape_177_cast_fp16)[name = string("real_div_16")]; + tensor hidden_states_251_cast_fp16 = tile(reps = real_div_16, x = var_1745_cast_fp16)[name = string("hidden_states_251_cast_fp16")]; + tensor concat_166x = const()[name = string("concat_166x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_35_cast_fp16 = reshape(shape = concat_166x, x = hidden_states_251_cast_fp16)[name = string("key_states_35_cast_fp16")]; + tensor var_1755_shape_cast_fp16 = shape(x = var_1736_cast_fp16)[name = string("op_1755_shape_cast_fp16")]; + int32 gather_161 = const()[name = string("gather_161"), val = int32(1)]; + int32 gather_162 = const()[name = string("gather_162"), val = int32(3)]; + int32 gather_163_axis_0 = const()[name = string("gather_163_axis_0"), val = int32(0)]; + int32 gather_163_batch_dims_0 = const()[name = string("gather_163_batch_dims_0"), val = int32(0)]; + bool gather_163_validate_indices_0 = const()[name = string("gather_163_validate_indices_0"), val = bool(false)]; + string var_1755_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1755_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_163_to_uint16 = const()[name = string("select_163_to_uint16"), val = uint16(2)]; + tensor var_1755_shape_cast_fp16_to_uint16 = cast(dtype = var_1755_shape_cast_fp16_to_uint16_dtype_0, x = var_1755_shape_cast_fp16)[name = string("cast_172")]; + uint16 gather_163_cast_uint16 = gather(axis = gather_163_axis_0, batch_dims = gather_163_batch_dims_0, indices = select_163_to_uint16, validate_indices = gather_163_validate_indices_0, x = var_1755_shape_cast_fp16_to_uint16)[name = string("gather_163_cast_uint16")]; + string gather_163_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_163_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_164 = const()[name = string("gather_164"), val = int32(64)]; + tensor var_1762_axes_0 = const()[name = string("op_1762_axes_0"), val = tensor([2])]; + tensor var_1762_cast_fp16 = expand_dims(axes = var_1762_axes_0, x = var_1736_cast_fp16)[name = string("op_1762_cast_fp16")]; + tensor shape_182_cast_fp16 = shape(x = var_1762_cast_fp16)[name = string("shape_182_cast_fp16")]; + int32 concat_167_axis_0 = const()[name = string("concat_167_axis_0"), val = int32(0)]; + bool concat_167_interleave_0 = const()[name = string("concat_167_interleave_0"), val = bool(false)]; + int32 gather_163_cast_uint16_to_int32 = cast(dtype = gather_163_cast_uint16_to_int32_dtype_0, x = gather_163_cast_uint16)[name = string("cast_171")]; + tensor concat_167 = concat(axis = concat_167_axis_0, interleave = concat_167_interleave_0, values = (gather_161, gather_162, var_85, gather_163_cast_uint16_to_int32, gather_164))[name = string("concat_167")]; + tensor real_div_17 = real_div(x = concat_167, y = shape_182_cast_fp16)[name = string("real_div_17")]; + tensor hidden_states_255_cast_fp16 = tile(reps = real_div_17, x = var_1762_cast_fp16)[name = string("hidden_states_255_cast_fp16")]; + tensor concat_168x = const()[name = string("concat_168x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_35_cast_fp16 = reshape(shape = concat_168x, x = hidden_states_255_cast_fp16)[name = string("value_states_35_cast_fp16")]; + tensor var_1772_shape_cast_fp16 = shape(x = key_states_35_cast_fp16)[name = string("op_1772_shape_cast_fp16")]; + int32 gather_165_axis_0 = const()[name = string("gather_165_axis_0"), val = int32(0)]; + int32 gather_165_batch_dims_0 = const()[name = string("gather_165_batch_dims_0"), val = int32(0)]; + bool gather_165_validate_indices_0 = const()[name = string("gather_165_validate_indices_0"), val = bool(false)]; + string var_1772_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1772_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_165_to_uint16 = const()[name = string("select_165_to_uint16"), val = uint16(2)]; + tensor var_1772_shape_cast_fp16_to_uint16 = cast(dtype = var_1772_shape_cast_fp16_to_uint16_dtype_0, x = var_1772_shape_cast_fp16)[name = string("cast_170")]; + uint16 gather_165_cast_uint16 = gather(axis = gather_165_axis_0, batch_dims = gather_165_batch_dims_0, indices = select_165_to_uint16, validate_indices = gather_165_validate_indices_0, x = var_1772_shape_cast_fp16_to_uint16)[name = string("gather_165_cast_uint16")]; + string gather_165_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_165_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_169_values0_0 = const()[name = string("concat_169_values0_0"), val = int32(1)]; + int32 concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = int32(1)]; + int32 concat_169_values2_0 = const()[name = string("concat_169_values2_0"), val = int32(0)]; + int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; + bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; + int32 gather_165_cast_uint16_to_int32 = cast(dtype = gather_165_cast_uint16_to_int32_dtype_0, x = gather_165_cast_uint16)[name = string("cast_169")]; + tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (concat_169_values0_0, concat_169_values1_0, concat_169_values2_0, gather_165_cast_uint16_to_int32))[name = string("concat_169")]; + tensor causal_mask_19_begin_0 = const()[name = string("causal_mask_19_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_19_end_mask_0 = const()[name = string("causal_mask_19_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_19_cast_fp16 = slice_by_index(begin = causal_mask_19_begin_0, end = concat_169, end_mask = causal_mask_19_end_mask_0, x = causal_mask)[name = string("causal_mask_19_cast_fp16")]; + tensor attn_output_33_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_19_cast_fp16, key = key_states_35_cast_fp16, query = query_states_35_cast_fp16, value = value_states_35_cast_fp16)[name = string("attn_output_33_cast_fp16")]; + tensor var_1778_perm_0 = const()[name = string("op_1778_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)]; + bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)]; + int32 gather_149_cast_uint16_to_int32 = cast(dtype = gather_149_cast_uint16_to_int32_dtype_0, x = gather_149_cast_uint16)[name = string("cast_168")]; + tensor concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (gather_148, gather_149_cast_uint16_to_int32, var_81))[name = string("concat_170")]; + tensor var_1778_cast_fp16 = transpose(perm = var_1778_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_84")]; + tensor input_65_cast_fp16 = reshape(shape = concat_170, x = var_1778_cast_fp16)[name = string("input_65_cast_fp16")]; + tensor model_model_layers_8_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32194880))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32360832))))[name = string("model_model_layers_8_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_59_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_o_proj_weight_to_fp16_quantized, x = input_65_cast_fp16)[name = string("linear_59_cast_fp16")]; + tensor hidden_states_259_cast_fp16 = add(x = hidden_states_239_cast_fp16, y = linear_59_cast_fp16)[name = string("hidden_states_259_cast_fp16")]; + fp16 var_76_promoted_17_to_fp16 = const()[name = string("op_76_promoted_17_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1787_cast_fp16 = pow(x = hidden_states_259_cast_fp16, y = var_76_promoted_17_to_fp16)[name = string("op_1787_cast_fp16")]; + tensor variance_35_axes_0 = const()[name = string("variance_35_axes_0"), val = tensor([-1])]; + bool variance_35_keep_dims_0 = const()[name = string("variance_35_keep_dims_0"), val = bool(true)]; + tensor variance_35_cast_fp16 = reduce_mean(axes = variance_35_axes_0, keep_dims = variance_35_keep_dims_0, x = var_1787_cast_fp16)[name = string("variance_35_cast_fp16")]; + fp16 var_1790_to_fp16 = const()[name = string("op_1790_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1791_cast_fp16 = add(x = variance_35_cast_fp16, y = var_1790_to_fp16)[name = string("op_1791_cast_fp16")]; + fp32 var_1792_epsilon_0 = const()[name = string("op_1792_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1792_cast_fp16 = rsqrt(epsilon = var_1792_epsilon_0, x = var_1791_cast_fp16)[name = string("op_1792_cast_fp16")]; + tensor hidden_states_263_cast_fp16 = mul(x = hidden_states_259_cast_fp16, y = var_1792_cast_fp16)[name = string("hidden_states_263_cast_fp16")]; + tensor model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32381632)))]; + tensor input_67_cast_fp16 = mul(x = model_model_layers_8_post_attention_layernorm_weight_to_fp16, y = hidden_states_263_cast_fp16)[name = string("input_67_cast_fp16")]; + tensor model_model_layers_8_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32382848))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32825280))))[name = string("model_model_layers_8_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_60_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_8_mlp_gate_proj_weight_to_fp16_quantized, x = input_67_cast_fp16)[name = string("linear_60_cast_fp16")]; + tensor var_1804_cast_fp16 = silu(x = linear_60_cast_fp16)[name = string("op_1804_cast_fp16")]; + tensor model_model_layers_8_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(32880640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33323072))))[name = string("model_model_layers_8_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_61_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_8_mlp_up_proj_weight_to_fp16_quantized, x = input_67_cast_fp16)[name = string("linear_61_cast_fp16")]; + tensor input_71_cast_fp16 = mul(x = var_1804_cast_fp16, y = linear_61_cast_fp16)[name = string("input_71_cast_fp16")]; + tensor model_model_layers_8_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33378432))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33820864))))[name = string("model_model_layers_8_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_62_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_mlp_down_proj_weight_to_fp16_quantized, x = input_71_cast_fp16)[name = string("linear_62_cast_fp16")]; + tensor hidden_states_269_cast_fp16 = add(x = hidden_states_259_cast_fp16, y = linear_62_cast_fp16)[name = string("hidden_states_269_cast_fp16")]; + fp16 var_76_promoted_18_to_fp16 = const()[name = string("op_76_promoted_18_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1817_cast_fp16 = pow(x = hidden_states_269_cast_fp16, y = var_76_promoted_18_to_fp16)[name = string("op_1817_cast_fp16")]; + tensor variance_37_axes_0 = const()[name = string("variance_37_axes_0"), val = tensor([-1])]; + bool variance_37_keep_dims_0 = const()[name = string("variance_37_keep_dims_0"), val = bool(true)]; + tensor variance_37_cast_fp16 = reduce_mean(axes = variance_37_axes_0, keep_dims = variance_37_keep_dims_0, x = var_1817_cast_fp16)[name = string("variance_37_cast_fp16")]; + fp16 var_1820_to_fp16 = const()[name = string("op_1820_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1821_cast_fp16 = add(x = variance_37_cast_fp16, y = var_1820_to_fp16)[name = string("op_1821_cast_fp16")]; + fp32 var_1822_epsilon_0 = const()[name = string("op_1822_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1822_cast_fp16 = rsqrt(epsilon = var_1822_epsilon_0, x = var_1821_cast_fp16)[name = string("op_1822_cast_fp16")]; + tensor hidden_states_273_cast_fp16 = mul(x = hidden_states_269_cast_fp16, y = var_1822_cast_fp16)[name = string("hidden_states_273_cast_fp16")]; + tensor model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33876224)))]; + tensor hidden_states_277_cast_fp16 = mul(x = model_model_layers_9_input_layernorm_weight_to_fp16, y = hidden_states_273_cast_fp16)[name = string("hidden_states_277_cast_fp16")]; + tensor var_1833_shape_cast_fp16 = shape(x = hidden_states_277_cast_fp16)[name = string("op_1833_shape_cast_fp16")]; + int32 gather_166 = const()[name = string("gather_166"), val = int32(1)]; + int32 gather_167_axis_0 = const()[name = string("gather_167_axis_0"), val = int32(0)]; + int32 gather_167_batch_dims_0 = const()[name = string("gather_167_batch_dims_0"), val = int32(0)]; + bool gather_167_validate_indices_0 = const()[name = string("gather_167_validate_indices_0"), val = bool(false)]; + string var_1833_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1833_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_167_to_uint16 = const()[name = string("select_167_to_uint16"), val = uint16(1)]; + tensor var_1833_shape_cast_fp16_to_uint16 = cast(dtype = var_1833_shape_cast_fp16_to_uint16_dtype_0, x = var_1833_shape_cast_fp16)[name = string("cast_167")]; + uint16 gather_167_cast_uint16 = gather(axis = gather_167_axis_0, batch_dims = gather_167_batch_dims_0, indices = select_167_to_uint16, validate_indices = gather_167_validate_indices_0, x = var_1833_shape_cast_fp16_to_uint16)[name = string("gather_167_cast_uint16")]; + string gather_167_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_167_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_9_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(33877440))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34043392))))[name = string("model_model_layers_9_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_63_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_277_cast_fp16)[name = string("linear_63_cast_fp16")]; + tensor model_model_layers_9_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34064192))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34119552))))[name = string("model_model_layers_9_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_64_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_9_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_277_cast_fp16)[name = string("linear_64_cast_fp16")]; + tensor model_model_layers_9_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34126528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34181888))))[name = string("model_model_layers_9_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_9_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_277_cast_fp16)[name = string("linear_65_cast_fp16")]; + tensor concat_171x = const()[name = string("concat_171x"), val = tensor([1, -1, 9, 64])]; + tensor var_1842_cast_fp16 = reshape(shape = concat_171x, x = linear_63_cast_fp16)[name = string("op_1842_cast_fp16")]; + tensor q_19_perm_0 = const()[name = string("q_19_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_172x = const()[name = string("concat_172x"), val = tensor([1, -1, 3, 64])]; + tensor var_1845_cast_fp16 = reshape(shape = concat_172x, x = linear_64_cast_fp16)[name = string("op_1845_cast_fp16")]; + tensor k_19_perm_0 = const()[name = string("k_19_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_173x = const()[name = string("concat_173x"), val = tensor([1, -1, 3, 64])]; + tensor var_1848_cast_fp16 = reshape(shape = concat_173x, x = linear_65_cast_fp16)[name = string("op_1848_cast_fp16")]; + tensor v_state_19_perm_0 = const()[name = string("v_state_19_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_19_cast_fp16 = transpose(perm = q_19_perm_0, x = var_1842_cast_fp16)[name = string("transpose_83")]; + tensor var_1852_cast_fp16 = mul(x = q_19_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1852_cast_fp16")]; + tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19_cast_fp16)[name = string("x1_37_cast_fp16")]; + tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19_cast_fp16)[name = string("x2_37_cast_fp16")]; + fp16 const_21_promoted_to_fp16 = const()[name = string("const_21_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1863_cast_fp16 = mul(x = x2_37_cast_fp16, y = const_21_promoted_to_fp16)[name = string("op_1863_cast_fp16")]; + bool var_1865_interleave_0 = const()[name = string("op_1865_interleave_0"), val = bool(false)]; + tensor var_1865_cast_fp16 = concat(axis = var_81, interleave = var_1865_interleave_0, values = (var_1863_cast_fp16, x1_37_cast_fp16))[name = string("op_1865_cast_fp16")]; + tensor var_1866_cast_fp16 = mul(x = var_1865_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1866_cast_fp16")]; + tensor query_states_39_cast_fp16 = add(x = var_1852_cast_fp16, y = var_1866_cast_fp16)[name = string("query_states_39_cast_fp16")]; + tensor k_19_cast_fp16 = transpose(perm = k_19_perm_0, x = var_1845_cast_fp16)[name = string("transpose_82")]; + tensor var_1868_cast_fp16 = mul(x = k_19_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1868_cast_fp16")]; + tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19_cast_fp16)[name = string("x1_39_cast_fp16")]; + tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19_cast_fp16)[name = string("x2_39_cast_fp16")]; + fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_1879_cast_fp16 = mul(x = x2_39_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_1879_cast_fp16")]; + bool var_1881_interleave_0 = const()[name = string("op_1881_interleave_0"), val = bool(false)]; + tensor var_1881_cast_fp16 = concat(axis = var_81, interleave = var_1881_interleave_0, values = (var_1879_cast_fp16, x1_39_cast_fp16))[name = string("op_1881_cast_fp16")]; + tensor var_1882_cast_fp16 = mul(x = var_1881_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1882_cast_fp16")]; + tensor k_state_19_cast_fp16 = add(x = var_1868_cast_fp16, y = var_1882_cast_fp16)[name = string("k_state_19_cast_fp16")]; + tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([0])]; + tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; + tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; + tensor concat_176_values0_0 = const()[name = string("concat_176_values0_0"), val = tensor([9])]; + int32 concat_176_axis_0 = const()[name = string("concat_176_axis_0"), val = int32(0)]; + bool concat_176_interleave_0 = const()[name = string("concat_176_interleave_0"), val = bool(false)]; + tensor concat_176 = concat(axis = concat_176_axis_0, interleave = concat_176_interleave_0, values = (concat_176_values0_0, expand_dims_108, expand_dims_109, expand_dims_2, expand_dims_111))[name = string("concat_176")]; + tensor key_cache_internal_tensor_assign_10_stride_0 = const()[name = string("key_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_176, begin_mask = key_cache_internal_tensor_assign_10_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_10_squeeze_mask_0, stride = key_cache_internal_tensor_assign_10_stride_0, update = k_state_19_cast_fp16, x = coreml_update_state_76)[name = string("key_cache_internal_tensor_assign_10_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_10_cast_fp16, input = key_cache)[name = string("coreml_update_state_78_write_state")]; + tensor coreml_update_state_78 = read_state(input = key_cache)[name = string("coreml_update_state_78")]; + tensor value_cache_internal_tensor_assign_10_stride_0 = const()[name = string("value_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_19_cast_fp16 = transpose(perm = v_state_19_perm_0, x = var_1848_cast_fp16)[name = string("transpose_81")]; + tensor value_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_176, begin_mask = value_cache_internal_tensor_assign_10_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_10_squeeze_mask_0, stride = value_cache_internal_tensor_assign_10_stride_0, update = v_state_19_cast_fp16, x = coreml_update_state_77)[name = string("value_cache_internal_tensor_assign_10_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_10_cast_fp16, input = value_cache)[name = string("coreml_update_state_79_write_state")]; + tensor coreml_update_state_79 = read_state(input = value_cache)[name = string("coreml_update_state_79")]; + tensor var_1905_begin_0 = const()[name = string("op_1905_begin_0"), val = tensor([9, 0, 0, 0, 0])]; + tensor var_1905_end_0 = const()[name = string("op_1905_end_0"), val = tensor([10, 1, 3, 2048, 64])]; + tensor var_1905_end_mask_0 = const()[name = string("op_1905_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1905_squeeze_mask_0 = const()[name = string("op_1905_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1905_cast_fp16 = slice_by_index(begin = var_1905_begin_0, end = var_1905_end_0, end_mask = var_1905_end_mask_0, squeeze_mask = var_1905_squeeze_mask_0, x = coreml_update_state_78)[name = string("op_1905_cast_fp16")]; + tensor var_1908_begin_0 = const()[name = string("op_1908_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1908_end_mask_0 = const()[name = string("op_1908_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1908_cast_fp16 = slice_by_index(begin = var_1908_begin_0, end = concat_11, end_mask = var_1908_end_mask_0, x = var_1905_cast_fp16)[name = string("op_1908_cast_fp16")]; + tensor var_1910_begin_0 = const()[name = string("op_1910_begin_0"), val = tensor([9, 0, 0, 0, 0])]; + tensor var_1910_end_0 = const()[name = string("op_1910_end_0"), val = tensor([10, 1, 3, 2048, 64])]; + tensor var_1910_end_mask_0 = const()[name = string("op_1910_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_1910_squeeze_mask_0 = const()[name = string("op_1910_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_1910_cast_fp16 = slice_by_index(begin = var_1910_begin_0, end = var_1910_end_0, end_mask = var_1910_end_mask_0, squeeze_mask = var_1910_squeeze_mask_0, x = coreml_update_state_79)[name = string("op_1910_cast_fp16")]; + tensor var_1913_begin_0 = const()[name = string("op_1913_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1913_end_mask_0 = const()[name = string("op_1913_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_1913_cast_fp16 = slice_by_index(begin = var_1913_begin_0, end = concat_11, end_mask = var_1913_end_mask_0, x = var_1910_cast_fp16)[name = string("op_1913_cast_fp16")]; + tensor var_1915_shape_cast_fp16 = shape(x = var_1908_cast_fp16)[name = string("op_1915_shape_cast_fp16")]; + int32 gather_175 = const()[name = string("gather_175"), val = int32(1)]; + int32 gather_176 = const()[name = string("gather_176"), val = int32(3)]; + int32 gather_177_axis_0 = const()[name = string("gather_177_axis_0"), val = int32(0)]; + int32 gather_177_batch_dims_0 = const()[name = string("gather_177_batch_dims_0"), val = int32(0)]; + bool gather_177_validate_indices_0 = const()[name = string("gather_177_validate_indices_0"), val = bool(false)]; + string var_1915_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1915_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_177_to_uint16 = const()[name = string("select_177_to_uint16"), val = uint16(2)]; + tensor var_1915_shape_cast_fp16_to_uint16 = cast(dtype = var_1915_shape_cast_fp16_to_uint16_dtype_0, x = var_1915_shape_cast_fp16)[name = string("cast_166")]; + uint16 gather_177_cast_uint16 = gather(axis = gather_177_axis_0, batch_dims = gather_177_batch_dims_0, indices = select_177_to_uint16, validate_indices = gather_177_validate_indices_0, x = var_1915_shape_cast_fp16_to_uint16)[name = string("gather_177_cast_uint16")]; + string gather_177_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_177_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_178 = const()[name = string("gather_178"), val = int32(64)]; + tensor var_1922_axes_0 = const()[name = string("op_1922_axes_0"), val = tensor([2])]; + tensor var_1922_cast_fp16 = expand_dims(axes = var_1922_axes_0, x = var_1908_cast_fp16)[name = string("op_1922_cast_fp16")]; + tensor shape_197_cast_fp16 = shape(x = var_1922_cast_fp16)[name = string("shape_197_cast_fp16")]; + int32 concat_184_axis_0 = const()[name = string("concat_184_axis_0"), val = int32(0)]; + bool concat_184_interleave_0 = const()[name = string("concat_184_interleave_0"), val = bool(false)]; + int32 gather_177_cast_uint16_to_int32 = cast(dtype = gather_177_cast_uint16_to_int32_dtype_0, x = gather_177_cast_uint16)[name = string("cast_165")]; + tensor concat_184 = concat(axis = concat_184_axis_0, interleave = concat_184_interleave_0, values = (gather_175, gather_176, var_85, gather_177_cast_uint16_to_int32, gather_178))[name = string("concat_184")]; + tensor real_div_18 = real_div(x = concat_184, y = shape_197_cast_fp16)[name = string("real_div_18")]; + tensor hidden_states_281_cast_fp16 = tile(reps = real_div_18, x = var_1922_cast_fp16)[name = string("hidden_states_281_cast_fp16")]; + tensor concat_185x = const()[name = string("concat_185x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_39_cast_fp16 = reshape(shape = concat_185x, x = hidden_states_281_cast_fp16)[name = string("key_states_39_cast_fp16")]; + tensor var_1932_shape_cast_fp16 = shape(x = var_1913_cast_fp16)[name = string("op_1932_shape_cast_fp16")]; + int32 gather_179 = const()[name = string("gather_179"), val = int32(1)]; + int32 gather_180 = const()[name = string("gather_180"), val = int32(3)]; + int32 gather_181_axis_0 = const()[name = string("gather_181_axis_0"), val = int32(0)]; + int32 gather_181_batch_dims_0 = const()[name = string("gather_181_batch_dims_0"), val = int32(0)]; + bool gather_181_validate_indices_0 = const()[name = string("gather_181_validate_indices_0"), val = bool(false)]; + string var_1932_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1932_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_181_to_uint16 = const()[name = string("select_181_to_uint16"), val = uint16(2)]; + tensor var_1932_shape_cast_fp16_to_uint16 = cast(dtype = var_1932_shape_cast_fp16_to_uint16_dtype_0, x = var_1932_shape_cast_fp16)[name = string("cast_164")]; + uint16 gather_181_cast_uint16 = gather(axis = gather_181_axis_0, batch_dims = gather_181_batch_dims_0, indices = select_181_to_uint16, validate_indices = gather_181_validate_indices_0, x = var_1932_shape_cast_fp16_to_uint16)[name = string("gather_181_cast_uint16")]; + string gather_181_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_181_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_182 = const()[name = string("gather_182"), val = int32(64)]; + tensor var_1939_axes_0 = const()[name = string("op_1939_axes_0"), val = tensor([2])]; + tensor var_1939_cast_fp16 = expand_dims(axes = var_1939_axes_0, x = var_1913_cast_fp16)[name = string("op_1939_cast_fp16")]; + tensor shape_202_cast_fp16 = shape(x = var_1939_cast_fp16)[name = string("shape_202_cast_fp16")]; + int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; + bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; + int32 gather_181_cast_uint16_to_int32 = cast(dtype = gather_181_cast_uint16_to_int32_dtype_0, x = gather_181_cast_uint16)[name = string("cast_163")]; + tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (gather_179, gather_180, var_85, gather_181_cast_uint16_to_int32, gather_182))[name = string("concat_186")]; + tensor real_div_19 = real_div(x = concat_186, y = shape_202_cast_fp16)[name = string("real_div_19")]; + tensor hidden_states_285_cast_fp16 = tile(reps = real_div_19, x = var_1939_cast_fp16)[name = string("hidden_states_285_cast_fp16")]; + tensor concat_187x = const()[name = string("concat_187x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_39_cast_fp16 = reshape(shape = concat_187x, x = hidden_states_285_cast_fp16)[name = string("value_states_39_cast_fp16")]; + tensor var_1949_shape_cast_fp16 = shape(x = key_states_39_cast_fp16)[name = string("op_1949_shape_cast_fp16")]; + int32 gather_183_axis_0 = const()[name = string("gather_183_axis_0"), val = int32(0)]; + int32 gather_183_batch_dims_0 = const()[name = string("gather_183_batch_dims_0"), val = int32(0)]; + bool gather_183_validate_indices_0 = const()[name = string("gather_183_validate_indices_0"), val = bool(false)]; + string var_1949_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1949_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_183_to_uint16 = const()[name = string("select_183_to_uint16"), val = uint16(2)]; + tensor var_1949_shape_cast_fp16_to_uint16 = cast(dtype = var_1949_shape_cast_fp16_to_uint16_dtype_0, x = var_1949_shape_cast_fp16)[name = string("cast_162")]; + uint16 gather_183_cast_uint16 = gather(axis = gather_183_axis_0, batch_dims = gather_183_batch_dims_0, indices = select_183_to_uint16, validate_indices = gather_183_validate_indices_0, x = var_1949_shape_cast_fp16_to_uint16)[name = string("gather_183_cast_uint16")]; + string gather_183_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_183_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_188_values0_0 = const()[name = string("concat_188_values0_0"), val = int32(1)]; + int32 concat_188_values1_0 = const()[name = string("concat_188_values1_0"), val = int32(1)]; + int32 concat_188_values2_0 = const()[name = string("concat_188_values2_0"), val = int32(0)]; + int32 concat_188_axis_0 = const()[name = string("concat_188_axis_0"), val = int32(0)]; + bool concat_188_interleave_0 = const()[name = string("concat_188_interleave_0"), val = bool(false)]; + int32 gather_183_cast_uint16_to_int32 = cast(dtype = gather_183_cast_uint16_to_int32_dtype_0, x = gather_183_cast_uint16)[name = string("cast_161")]; + tensor concat_188 = concat(axis = concat_188_axis_0, interleave = concat_188_interleave_0, values = (concat_188_values0_0, concat_188_values1_0, concat_188_values2_0, gather_183_cast_uint16_to_int32))[name = string("concat_188")]; + tensor causal_mask_21_begin_0 = const()[name = string("causal_mask_21_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_21_end_mask_0 = const()[name = string("causal_mask_21_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_21_cast_fp16 = slice_by_index(begin = causal_mask_21_begin_0, end = concat_188, end_mask = causal_mask_21_end_mask_0, x = causal_mask)[name = string("causal_mask_21_cast_fp16")]; + tensor attn_output_37_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_21_cast_fp16, key = key_states_39_cast_fp16, query = query_states_39_cast_fp16, value = value_states_39_cast_fp16)[name = string("attn_output_37_cast_fp16")]; + tensor var_1955_perm_0 = const()[name = string("op_1955_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_189_axis_0 = const()[name = string("concat_189_axis_0"), val = int32(0)]; + bool concat_189_interleave_0 = const()[name = string("concat_189_interleave_0"), val = bool(false)]; + int32 gather_167_cast_uint16_to_int32 = cast(dtype = gather_167_cast_uint16_to_int32_dtype_0, x = gather_167_cast_uint16)[name = string("cast_160")]; + tensor concat_189 = concat(axis = concat_189_axis_0, interleave = concat_189_interleave_0, values = (gather_166, gather_167_cast_uint16_to_int32, var_81))[name = string("concat_189")]; + tensor var_1955_cast_fp16 = transpose(perm = var_1955_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_80")]; + tensor input_73_cast_fp16 = reshape(shape = concat_189, x = var_1955_cast_fp16)[name = string("input_73_cast_fp16")]; + tensor model_model_layers_9_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34188864))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34354816))))[name = string("model_model_layers_9_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_66_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_o_proj_weight_to_fp16_quantized, x = input_73_cast_fp16)[name = string("linear_66_cast_fp16")]; + tensor hidden_states_289_cast_fp16 = add(x = hidden_states_269_cast_fp16, y = linear_66_cast_fp16)[name = string("hidden_states_289_cast_fp16")]; + fp16 var_76_promoted_19_to_fp16 = const()[name = string("op_76_promoted_19_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1964_cast_fp16 = pow(x = hidden_states_289_cast_fp16, y = var_76_promoted_19_to_fp16)[name = string("op_1964_cast_fp16")]; + tensor variance_39_axes_0 = const()[name = string("variance_39_axes_0"), val = tensor([-1])]; + bool variance_39_keep_dims_0 = const()[name = string("variance_39_keep_dims_0"), val = bool(true)]; + tensor variance_39_cast_fp16 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = var_1964_cast_fp16)[name = string("variance_39_cast_fp16")]; + fp16 var_1967_to_fp16 = const()[name = string("op_1967_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1968_cast_fp16 = add(x = variance_39_cast_fp16, y = var_1967_to_fp16)[name = string("op_1968_cast_fp16")]; + fp32 var_1969_epsilon_0 = const()[name = string("op_1969_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1969_cast_fp16 = rsqrt(epsilon = var_1969_epsilon_0, x = var_1968_cast_fp16)[name = string("op_1969_cast_fp16")]; + tensor hidden_states_293_cast_fp16 = mul(x = hidden_states_289_cast_fp16, y = var_1969_cast_fp16)[name = string("hidden_states_293_cast_fp16")]; + tensor model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34375616)))]; + tensor input_75_cast_fp16 = mul(x = model_model_layers_9_post_attention_layernorm_weight_to_fp16, y = hidden_states_293_cast_fp16)[name = string("input_75_cast_fp16")]; + tensor model_model_layers_9_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34376832))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34819264))))[name = string("model_model_layers_9_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_67_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_9_mlp_gate_proj_weight_to_fp16_quantized, x = input_75_cast_fp16)[name = string("linear_67_cast_fp16")]; + tensor var_1981_cast_fp16 = silu(x = linear_67_cast_fp16)[name = string("op_1981_cast_fp16")]; + tensor model_model_layers_9_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(34874624))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35317056))))[name = string("model_model_layers_9_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_68_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_9_mlp_up_proj_weight_to_fp16_quantized, x = input_75_cast_fp16)[name = string("linear_68_cast_fp16")]; + tensor input_79_cast_fp16 = mul(x = var_1981_cast_fp16, y = linear_68_cast_fp16)[name = string("input_79_cast_fp16")]; + tensor model_model_layers_9_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35372416))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35814848))))[name = string("model_model_layers_9_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_69_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_mlp_down_proj_weight_to_fp16_quantized, x = input_79_cast_fp16)[name = string("linear_69_cast_fp16")]; + tensor hidden_states_299_cast_fp16 = add(x = hidden_states_289_cast_fp16, y = linear_69_cast_fp16)[name = string("hidden_states_299_cast_fp16")]; + fp16 var_76_promoted_20_to_fp16 = const()[name = string("op_76_promoted_20_to_fp16"), val = fp16(0x1p+1)]; + tensor var_1994_cast_fp16 = pow(x = hidden_states_299_cast_fp16, y = var_76_promoted_20_to_fp16)[name = string("op_1994_cast_fp16")]; + tensor variance_41_axes_0 = const()[name = string("variance_41_axes_0"), val = tensor([-1])]; + bool variance_41_keep_dims_0 = const()[name = string("variance_41_keep_dims_0"), val = bool(true)]; + tensor variance_41_cast_fp16 = reduce_mean(axes = variance_41_axes_0, keep_dims = variance_41_keep_dims_0, x = var_1994_cast_fp16)[name = string("variance_41_cast_fp16")]; + fp16 var_1997_to_fp16 = const()[name = string("op_1997_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_1998_cast_fp16 = add(x = variance_41_cast_fp16, y = var_1997_to_fp16)[name = string("op_1998_cast_fp16")]; + fp32 var_1999_epsilon_0 = const()[name = string("op_1999_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_1999_cast_fp16 = rsqrt(epsilon = var_1999_epsilon_0, x = var_1998_cast_fp16)[name = string("op_1999_cast_fp16")]; + tensor hidden_states_303_cast_fp16 = mul(x = hidden_states_299_cast_fp16, y = var_1999_cast_fp16)[name = string("hidden_states_303_cast_fp16")]; + tensor model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35870208)))]; + tensor hidden_states_307_cast_fp16 = mul(x = model_model_layers_10_input_layernorm_weight_to_fp16, y = hidden_states_303_cast_fp16)[name = string("hidden_states_307_cast_fp16")]; + tensor var_2010_shape_cast_fp16 = shape(x = hidden_states_307_cast_fp16)[name = string("op_2010_shape_cast_fp16")]; + int32 gather_184 = const()[name = string("gather_184"), val = int32(1)]; + int32 gather_185_axis_0 = const()[name = string("gather_185_axis_0"), val = int32(0)]; + int32 gather_185_batch_dims_0 = const()[name = string("gather_185_batch_dims_0"), val = int32(0)]; + bool gather_185_validate_indices_0 = const()[name = string("gather_185_validate_indices_0"), val = bool(false)]; + string var_2010_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2010_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_185_to_uint16 = const()[name = string("select_185_to_uint16"), val = uint16(1)]; + tensor var_2010_shape_cast_fp16_to_uint16 = cast(dtype = var_2010_shape_cast_fp16_to_uint16_dtype_0, x = var_2010_shape_cast_fp16)[name = string("cast_159")]; + uint16 gather_185_cast_uint16 = gather(axis = gather_185_axis_0, batch_dims = gather_185_batch_dims_0, indices = select_185_to_uint16, validate_indices = gather_185_validate_indices_0, x = var_2010_shape_cast_fp16_to_uint16)[name = string("gather_185_cast_uint16")]; + string gather_185_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_185_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_10_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(35871424))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36037376))))[name = string("model_model_layers_10_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_70_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_307_cast_fp16)[name = string("linear_70_cast_fp16")]; + tensor model_model_layers_10_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36058176))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36113536))))[name = string("model_model_layers_10_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_71_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_10_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_307_cast_fp16)[name = string("linear_71_cast_fp16")]; + tensor model_model_layers_10_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36120512))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36175872))))[name = string("model_model_layers_10_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_72_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_10_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_307_cast_fp16)[name = string("linear_72_cast_fp16")]; + tensor concat_190x = const()[name = string("concat_190x"), val = tensor([1, -1, 9, 64])]; + tensor var_2019_cast_fp16 = reshape(shape = concat_190x, x = linear_70_cast_fp16)[name = string("op_2019_cast_fp16")]; + tensor q_21_perm_0 = const()[name = string("q_21_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_191x = const()[name = string("concat_191x"), val = tensor([1, -1, 3, 64])]; + tensor var_2022_cast_fp16 = reshape(shape = concat_191x, x = linear_71_cast_fp16)[name = string("op_2022_cast_fp16")]; + tensor k_21_perm_0 = const()[name = string("k_21_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_192x = const()[name = string("concat_192x"), val = tensor([1, -1, 3, 64])]; + tensor var_2025_cast_fp16 = reshape(shape = concat_192x, x = linear_72_cast_fp16)[name = string("op_2025_cast_fp16")]; + tensor v_state_21_perm_0 = const()[name = string("v_state_21_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_21_cast_fp16 = transpose(perm = q_21_perm_0, x = var_2019_cast_fp16)[name = string("transpose_79")]; + tensor var_2029_cast_fp16 = mul(x = q_21_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2029_cast_fp16")]; + tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21_cast_fp16)[name = string("x1_41_cast_fp16")]; + tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21_cast_fp16)[name = string("x2_41_cast_fp16")]; + fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2040_cast_fp16 = mul(x = x2_41_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_2040_cast_fp16")]; + bool var_2042_interleave_0 = const()[name = string("op_2042_interleave_0"), val = bool(false)]; + tensor var_2042_cast_fp16 = concat(axis = var_81, interleave = var_2042_interleave_0, values = (var_2040_cast_fp16, x1_41_cast_fp16))[name = string("op_2042_cast_fp16")]; + tensor var_2043_cast_fp16 = mul(x = var_2042_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2043_cast_fp16")]; + tensor query_states_43_cast_fp16 = add(x = var_2029_cast_fp16, y = var_2043_cast_fp16)[name = string("query_states_43_cast_fp16")]; + tensor k_21_cast_fp16 = transpose(perm = k_21_perm_0, x = var_2022_cast_fp16)[name = string("transpose_78")]; + tensor var_2045_cast_fp16 = mul(x = k_21_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2045_cast_fp16")]; + tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21_cast_fp16)[name = string("x1_43_cast_fp16")]; + tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21_cast_fp16)[name = string("x2_43_cast_fp16")]; + fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2056_cast_fp16 = mul(x = x2_43_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_2056_cast_fp16")]; + bool var_2058_interleave_0 = const()[name = string("op_2058_interleave_0"), val = bool(false)]; + tensor var_2058_cast_fp16 = concat(axis = var_81, interleave = var_2058_interleave_0, values = (var_2056_cast_fp16, x1_43_cast_fp16))[name = string("op_2058_cast_fp16")]; + tensor var_2059_cast_fp16 = mul(x = var_2058_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2059_cast_fp16")]; + tensor k_state_21_cast_fp16 = add(x = var_2045_cast_fp16, y = var_2059_cast_fp16)[name = string("k_state_21_cast_fp16")]; + tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([0])]; + tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; + tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; + tensor concat_195_values0_0 = const()[name = string("concat_195_values0_0"), val = tensor([10])]; + int32 concat_195_axis_0 = const()[name = string("concat_195_axis_0"), val = int32(0)]; + bool concat_195_interleave_0 = const()[name = string("concat_195_interleave_0"), val = bool(false)]; + tensor concat_195 = concat(axis = concat_195_axis_0, interleave = concat_195_interleave_0, values = (concat_195_values0_0, expand_dims_120, expand_dims_121, expand_dims_2, expand_dims_123))[name = string("concat_195")]; + tensor key_cache_internal_tensor_assign_11_stride_0 = const()[name = string("key_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_195, begin_mask = key_cache_internal_tensor_assign_11_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_11_squeeze_mask_0, stride = key_cache_internal_tensor_assign_11_stride_0, update = k_state_21_cast_fp16, x = coreml_update_state_78)[name = string("key_cache_internal_tensor_assign_11_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_11_cast_fp16, input = key_cache)[name = string("coreml_update_state_80_write_state")]; + tensor coreml_update_state_80 = read_state(input = key_cache)[name = string("coreml_update_state_80")]; + tensor value_cache_internal_tensor_assign_11_stride_0 = const()[name = string("value_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_21_cast_fp16 = transpose(perm = v_state_21_perm_0, x = var_2025_cast_fp16)[name = string("transpose_77")]; + tensor value_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_195, begin_mask = value_cache_internal_tensor_assign_11_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_11_squeeze_mask_0, stride = value_cache_internal_tensor_assign_11_stride_0, update = v_state_21_cast_fp16, x = coreml_update_state_79)[name = string("value_cache_internal_tensor_assign_11_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_11_cast_fp16, input = value_cache)[name = string("coreml_update_state_81_write_state")]; + tensor coreml_update_state_81 = read_state(input = value_cache)[name = string("coreml_update_state_81")]; + tensor var_2082_begin_0 = const()[name = string("op_2082_begin_0"), val = tensor([10, 0, 0, 0, 0])]; + tensor var_2082_end_0 = const()[name = string("op_2082_end_0"), val = tensor([11, 1, 3, 2048, 64])]; + tensor var_2082_end_mask_0 = const()[name = string("op_2082_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2082_squeeze_mask_0 = const()[name = string("op_2082_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2082_cast_fp16 = slice_by_index(begin = var_2082_begin_0, end = var_2082_end_0, end_mask = var_2082_end_mask_0, squeeze_mask = var_2082_squeeze_mask_0, x = coreml_update_state_80)[name = string("op_2082_cast_fp16")]; + tensor var_2085_begin_0 = const()[name = string("op_2085_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2085_end_mask_0 = const()[name = string("op_2085_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2085_cast_fp16 = slice_by_index(begin = var_2085_begin_0, end = concat_11, end_mask = var_2085_end_mask_0, x = var_2082_cast_fp16)[name = string("op_2085_cast_fp16")]; + tensor var_2087_begin_0 = const()[name = string("op_2087_begin_0"), val = tensor([10, 0, 0, 0, 0])]; + tensor var_2087_end_0 = const()[name = string("op_2087_end_0"), val = tensor([11, 1, 3, 2048, 64])]; + tensor var_2087_end_mask_0 = const()[name = string("op_2087_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2087_squeeze_mask_0 = const()[name = string("op_2087_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2087_cast_fp16 = slice_by_index(begin = var_2087_begin_0, end = var_2087_end_0, end_mask = var_2087_end_mask_0, squeeze_mask = var_2087_squeeze_mask_0, x = coreml_update_state_81)[name = string("op_2087_cast_fp16")]; + tensor var_2090_begin_0 = const()[name = string("op_2090_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2090_end_mask_0 = const()[name = string("op_2090_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2090_cast_fp16 = slice_by_index(begin = var_2090_begin_0, end = concat_11, end_mask = var_2090_end_mask_0, x = var_2087_cast_fp16)[name = string("op_2090_cast_fp16")]; + tensor var_2092_shape_cast_fp16 = shape(x = var_2085_cast_fp16)[name = string("op_2092_shape_cast_fp16")]; + int32 gather_193 = const()[name = string("gather_193"), val = int32(1)]; + int32 gather_194 = const()[name = string("gather_194"), val = int32(3)]; + int32 gather_195_axis_0 = const()[name = string("gather_195_axis_0"), val = int32(0)]; + int32 gather_195_batch_dims_0 = const()[name = string("gather_195_batch_dims_0"), val = int32(0)]; + bool gather_195_validate_indices_0 = const()[name = string("gather_195_validate_indices_0"), val = bool(false)]; + string var_2092_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2092_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_195_to_uint16 = const()[name = string("select_195_to_uint16"), val = uint16(2)]; + tensor var_2092_shape_cast_fp16_to_uint16 = cast(dtype = var_2092_shape_cast_fp16_to_uint16_dtype_0, x = var_2092_shape_cast_fp16)[name = string("cast_158")]; + uint16 gather_195_cast_uint16 = gather(axis = gather_195_axis_0, batch_dims = gather_195_batch_dims_0, indices = select_195_to_uint16, validate_indices = gather_195_validate_indices_0, x = var_2092_shape_cast_fp16_to_uint16)[name = string("gather_195_cast_uint16")]; + string gather_195_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_195_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_196 = const()[name = string("gather_196"), val = int32(64)]; + tensor var_2099_axes_0 = const()[name = string("op_2099_axes_0"), val = tensor([2])]; + tensor var_2099_cast_fp16 = expand_dims(axes = var_2099_axes_0, x = var_2085_cast_fp16)[name = string("op_2099_cast_fp16")]; + tensor shape_217_cast_fp16 = shape(x = var_2099_cast_fp16)[name = string("shape_217_cast_fp16")]; + int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)]; + bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)]; + int32 gather_195_cast_uint16_to_int32 = cast(dtype = gather_195_cast_uint16_to_int32_dtype_0, x = gather_195_cast_uint16)[name = string("cast_157")]; + tensor concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (gather_193, gather_194, var_85, gather_195_cast_uint16_to_int32, gather_196))[name = string("concat_203")]; + tensor real_div_20 = real_div(x = concat_203, y = shape_217_cast_fp16)[name = string("real_div_20")]; + tensor hidden_states_311_cast_fp16 = tile(reps = real_div_20, x = var_2099_cast_fp16)[name = string("hidden_states_311_cast_fp16")]; + tensor concat_204x = const()[name = string("concat_204x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_43_cast_fp16 = reshape(shape = concat_204x, x = hidden_states_311_cast_fp16)[name = string("key_states_43_cast_fp16")]; + tensor var_2109_shape_cast_fp16 = shape(x = var_2090_cast_fp16)[name = string("op_2109_shape_cast_fp16")]; + int32 gather_197 = const()[name = string("gather_197"), val = int32(1)]; + int32 gather_198 = const()[name = string("gather_198"), val = int32(3)]; + int32 gather_199_axis_0 = const()[name = string("gather_199_axis_0"), val = int32(0)]; + int32 gather_199_batch_dims_0 = const()[name = string("gather_199_batch_dims_0"), val = int32(0)]; + bool gather_199_validate_indices_0 = const()[name = string("gather_199_validate_indices_0"), val = bool(false)]; + string var_2109_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2109_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_199_to_uint16 = const()[name = string("select_199_to_uint16"), val = uint16(2)]; + tensor var_2109_shape_cast_fp16_to_uint16 = cast(dtype = var_2109_shape_cast_fp16_to_uint16_dtype_0, x = var_2109_shape_cast_fp16)[name = string("cast_156")]; + uint16 gather_199_cast_uint16 = gather(axis = gather_199_axis_0, batch_dims = gather_199_batch_dims_0, indices = select_199_to_uint16, validate_indices = gather_199_validate_indices_0, x = var_2109_shape_cast_fp16_to_uint16)[name = string("gather_199_cast_uint16")]; + string gather_199_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_199_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_200 = const()[name = string("gather_200"), val = int32(64)]; + tensor var_2116_axes_0 = const()[name = string("op_2116_axes_0"), val = tensor([2])]; + tensor var_2116_cast_fp16 = expand_dims(axes = var_2116_axes_0, x = var_2090_cast_fp16)[name = string("op_2116_cast_fp16")]; + tensor shape_222_cast_fp16 = shape(x = var_2116_cast_fp16)[name = string("shape_222_cast_fp16")]; + int32 concat_205_axis_0 = const()[name = string("concat_205_axis_0"), val = int32(0)]; + bool concat_205_interleave_0 = const()[name = string("concat_205_interleave_0"), val = bool(false)]; + int32 gather_199_cast_uint16_to_int32 = cast(dtype = gather_199_cast_uint16_to_int32_dtype_0, x = gather_199_cast_uint16)[name = string("cast_155")]; + tensor concat_205 = concat(axis = concat_205_axis_0, interleave = concat_205_interleave_0, values = (gather_197, gather_198, var_85, gather_199_cast_uint16_to_int32, gather_200))[name = string("concat_205")]; + tensor real_div_21 = real_div(x = concat_205, y = shape_222_cast_fp16)[name = string("real_div_21")]; + tensor hidden_states_315_cast_fp16 = tile(reps = real_div_21, x = var_2116_cast_fp16)[name = string("hidden_states_315_cast_fp16")]; + tensor concat_206x = const()[name = string("concat_206x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_43_cast_fp16 = reshape(shape = concat_206x, x = hidden_states_315_cast_fp16)[name = string("value_states_43_cast_fp16")]; + tensor var_2126_shape_cast_fp16 = shape(x = key_states_43_cast_fp16)[name = string("op_2126_shape_cast_fp16")]; + int32 gather_201_axis_0 = const()[name = string("gather_201_axis_0"), val = int32(0)]; + int32 gather_201_batch_dims_0 = const()[name = string("gather_201_batch_dims_0"), val = int32(0)]; + bool gather_201_validate_indices_0 = const()[name = string("gather_201_validate_indices_0"), val = bool(false)]; + string var_2126_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2126_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_201_to_uint16 = const()[name = string("select_201_to_uint16"), val = uint16(2)]; + tensor var_2126_shape_cast_fp16_to_uint16 = cast(dtype = var_2126_shape_cast_fp16_to_uint16_dtype_0, x = var_2126_shape_cast_fp16)[name = string("cast_154")]; + uint16 gather_201_cast_uint16 = gather(axis = gather_201_axis_0, batch_dims = gather_201_batch_dims_0, indices = select_201_to_uint16, validate_indices = gather_201_validate_indices_0, x = var_2126_shape_cast_fp16_to_uint16)[name = string("gather_201_cast_uint16")]; + string gather_201_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_201_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_207_values0_0 = const()[name = string("concat_207_values0_0"), val = int32(1)]; + int32 concat_207_values1_0 = const()[name = string("concat_207_values1_0"), val = int32(1)]; + int32 concat_207_values2_0 = const()[name = string("concat_207_values2_0"), val = int32(0)]; + int32 concat_207_axis_0 = const()[name = string("concat_207_axis_0"), val = int32(0)]; + bool concat_207_interleave_0 = const()[name = string("concat_207_interleave_0"), val = bool(false)]; + int32 gather_201_cast_uint16_to_int32 = cast(dtype = gather_201_cast_uint16_to_int32_dtype_0, x = gather_201_cast_uint16)[name = string("cast_153")]; + tensor concat_207 = concat(axis = concat_207_axis_0, interleave = concat_207_interleave_0, values = (concat_207_values0_0, concat_207_values1_0, concat_207_values2_0, gather_201_cast_uint16_to_int32))[name = string("concat_207")]; + tensor causal_mask_23_begin_0 = const()[name = string("causal_mask_23_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_23_end_mask_0 = const()[name = string("causal_mask_23_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_23_cast_fp16 = slice_by_index(begin = causal_mask_23_begin_0, end = concat_207, end_mask = causal_mask_23_end_mask_0, x = causal_mask)[name = string("causal_mask_23_cast_fp16")]; + tensor attn_output_41_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_23_cast_fp16, key = key_states_43_cast_fp16, query = query_states_43_cast_fp16, value = value_states_43_cast_fp16)[name = string("attn_output_41_cast_fp16")]; + tensor var_2132_perm_0 = const()[name = string("op_2132_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_208_axis_0 = const()[name = string("concat_208_axis_0"), val = int32(0)]; + bool concat_208_interleave_0 = const()[name = string("concat_208_interleave_0"), val = bool(false)]; + int32 gather_185_cast_uint16_to_int32 = cast(dtype = gather_185_cast_uint16_to_int32_dtype_0, x = gather_185_cast_uint16)[name = string("cast_152")]; + tensor concat_208 = concat(axis = concat_208_axis_0, interleave = concat_208_interleave_0, values = (gather_184, gather_185_cast_uint16_to_int32, var_81))[name = string("concat_208")]; + tensor var_2132_cast_fp16 = transpose(perm = var_2132_perm_0, x = attn_output_41_cast_fp16)[name = string("transpose_76")]; + tensor input_81_cast_fp16 = reshape(shape = concat_208, x = var_2132_cast_fp16)[name = string("input_81_cast_fp16")]; + tensor model_model_layers_10_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36182848))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36348800))))[name = string("model_model_layers_10_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_73_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_o_proj_weight_to_fp16_quantized, x = input_81_cast_fp16)[name = string("linear_73_cast_fp16")]; + tensor hidden_states_319_cast_fp16 = add(x = hidden_states_299_cast_fp16, y = linear_73_cast_fp16)[name = string("hidden_states_319_cast_fp16")]; + fp16 var_76_promoted_21_to_fp16 = const()[name = string("op_76_promoted_21_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2141_cast_fp16 = pow(x = hidden_states_319_cast_fp16, y = var_76_promoted_21_to_fp16)[name = string("op_2141_cast_fp16")]; + tensor variance_43_axes_0 = const()[name = string("variance_43_axes_0"), val = tensor([-1])]; + bool variance_43_keep_dims_0 = const()[name = string("variance_43_keep_dims_0"), val = bool(true)]; + tensor variance_43_cast_fp16 = reduce_mean(axes = variance_43_axes_0, keep_dims = variance_43_keep_dims_0, x = var_2141_cast_fp16)[name = string("variance_43_cast_fp16")]; + fp16 var_2144_to_fp16 = const()[name = string("op_2144_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2145_cast_fp16 = add(x = variance_43_cast_fp16, y = var_2144_to_fp16)[name = string("op_2145_cast_fp16")]; + fp32 var_2146_epsilon_0 = const()[name = string("op_2146_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2146_cast_fp16 = rsqrt(epsilon = var_2146_epsilon_0, x = var_2145_cast_fp16)[name = string("op_2146_cast_fp16")]; + tensor hidden_states_323_cast_fp16 = mul(x = hidden_states_319_cast_fp16, y = var_2146_cast_fp16)[name = string("hidden_states_323_cast_fp16")]; + tensor model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36369600)))]; + tensor input_83_cast_fp16 = mul(x = model_model_layers_10_post_attention_layernorm_weight_to_fp16, y = hidden_states_323_cast_fp16)[name = string("input_83_cast_fp16")]; + tensor model_model_layers_10_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36370816))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36813248))))[name = string("model_model_layers_10_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_74_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_10_mlp_gate_proj_weight_to_fp16_quantized, x = input_83_cast_fp16)[name = string("linear_74_cast_fp16")]; + tensor var_2158_cast_fp16 = silu(x = linear_74_cast_fp16)[name = string("op_2158_cast_fp16")]; + tensor model_model_layers_10_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(36868608))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37311040))))[name = string("model_model_layers_10_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_75_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_10_mlp_up_proj_weight_to_fp16_quantized, x = input_83_cast_fp16)[name = string("linear_75_cast_fp16")]; + tensor input_87_cast_fp16 = mul(x = var_2158_cast_fp16, y = linear_75_cast_fp16)[name = string("input_87_cast_fp16")]; + tensor model_model_layers_10_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37366400))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37808832))))[name = string("model_model_layers_10_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_76_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_mlp_down_proj_weight_to_fp16_quantized, x = input_87_cast_fp16)[name = string("linear_76_cast_fp16")]; + tensor hidden_states_329_cast_fp16 = add(x = hidden_states_319_cast_fp16, y = linear_76_cast_fp16)[name = string("hidden_states_329_cast_fp16")]; + fp16 var_76_promoted_22_to_fp16 = const()[name = string("op_76_promoted_22_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2171_cast_fp16 = pow(x = hidden_states_329_cast_fp16, y = var_76_promoted_22_to_fp16)[name = string("op_2171_cast_fp16")]; + tensor variance_45_axes_0 = const()[name = string("variance_45_axes_0"), val = tensor([-1])]; + bool variance_45_keep_dims_0 = const()[name = string("variance_45_keep_dims_0"), val = bool(true)]; + tensor variance_45_cast_fp16 = reduce_mean(axes = variance_45_axes_0, keep_dims = variance_45_keep_dims_0, x = var_2171_cast_fp16)[name = string("variance_45_cast_fp16")]; + fp16 var_2174_to_fp16 = const()[name = string("op_2174_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2175_cast_fp16 = add(x = variance_45_cast_fp16, y = var_2174_to_fp16)[name = string("op_2175_cast_fp16")]; + fp32 var_2176_epsilon_0 = const()[name = string("op_2176_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2176_cast_fp16 = rsqrt(epsilon = var_2176_epsilon_0, x = var_2175_cast_fp16)[name = string("op_2176_cast_fp16")]; + tensor hidden_states_333_cast_fp16 = mul(x = hidden_states_329_cast_fp16, y = var_2176_cast_fp16)[name = string("hidden_states_333_cast_fp16")]; + tensor model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37864192)))]; + tensor hidden_states_337_cast_fp16 = mul(x = model_model_layers_11_input_layernorm_weight_to_fp16, y = hidden_states_333_cast_fp16)[name = string("hidden_states_337_cast_fp16")]; + tensor var_2187_shape_cast_fp16 = shape(x = hidden_states_337_cast_fp16)[name = string("op_2187_shape_cast_fp16")]; + int32 gather_202 = const()[name = string("gather_202"), val = int32(1)]; + int32 gather_203_axis_0 = const()[name = string("gather_203_axis_0"), val = int32(0)]; + int32 gather_203_batch_dims_0 = const()[name = string("gather_203_batch_dims_0"), val = int32(0)]; + bool gather_203_validate_indices_0 = const()[name = string("gather_203_validate_indices_0"), val = bool(false)]; + string var_2187_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2187_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_203_to_uint16 = const()[name = string("select_203_to_uint16"), val = uint16(1)]; + tensor var_2187_shape_cast_fp16_to_uint16 = cast(dtype = var_2187_shape_cast_fp16_to_uint16_dtype_0, x = var_2187_shape_cast_fp16)[name = string("cast_151")]; + uint16 gather_203_cast_uint16 = gather(axis = gather_203_axis_0, batch_dims = gather_203_batch_dims_0, indices = select_203_to_uint16, validate_indices = gather_203_validate_indices_0, x = var_2187_shape_cast_fp16_to_uint16)[name = string("gather_203_cast_uint16")]; + string gather_203_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_203_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_11_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(37865408))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38031360))))[name = string("model_model_layers_11_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_77_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_337_cast_fp16)[name = string("linear_77_cast_fp16")]; + tensor model_model_layers_11_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38052160))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38107520))))[name = string("model_model_layers_11_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_78_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_11_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_337_cast_fp16)[name = string("linear_78_cast_fp16")]; + tensor model_model_layers_11_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38114496))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38169856))))[name = string("model_model_layers_11_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_79_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_11_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_337_cast_fp16)[name = string("linear_79_cast_fp16")]; + tensor concat_209x = const()[name = string("concat_209x"), val = tensor([1, -1, 9, 64])]; + tensor var_2196_cast_fp16 = reshape(shape = concat_209x, x = linear_77_cast_fp16)[name = string("op_2196_cast_fp16")]; + tensor q_23_perm_0 = const()[name = string("q_23_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_210x = const()[name = string("concat_210x"), val = tensor([1, -1, 3, 64])]; + tensor var_2199_cast_fp16 = reshape(shape = concat_210x, x = linear_78_cast_fp16)[name = string("op_2199_cast_fp16")]; + tensor k_23_perm_0 = const()[name = string("k_23_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_211x = const()[name = string("concat_211x"), val = tensor([1, -1, 3, 64])]; + tensor var_2202_cast_fp16 = reshape(shape = concat_211x, x = linear_79_cast_fp16)[name = string("op_2202_cast_fp16")]; + tensor v_state_23_perm_0 = const()[name = string("v_state_23_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_23_cast_fp16 = transpose(perm = q_23_perm_0, x = var_2196_cast_fp16)[name = string("transpose_75")]; + tensor var_2206_cast_fp16 = mul(x = q_23_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2206_cast_fp16")]; + tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23_cast_fp16)[name = string("x1_45_cast_fp16")]; + tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23_cast_fp16)[name = string("x2_45_cast_fp16")]; + fp16 const_25_promoted_to_fp16 = const()[name = string("const_25_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2217_cast_fp16 = mul(x = x2_45_cast_fp16, y = const_25_promoted_to_fp16)[name = string("op_2217_cast_fp16")]; + bool var_2219_interleave_0 = const()[name = string("op_2219_interleave_0"), val = bool(false)]; + tensor var_2219_cast_fp16 = concat(axis = var_81, interleave = var_2219_interleave_0, values = (var_2217_cast_fp16, x1_45_cast_fp16))[name = string("op_2219_cast_fp16")]; + tensor var_2220_cast_fp16 = mul(x = var_2219_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2220_cast_fp16")]; + tensor query_states_47_cast_fp16 = add(x = var_2206_cast_fp16, y = var_2220_cast_fp16)[name = string("query_states_47_cast_fp16")]; + tensor k_23_cast_fp16 = transpose(perm = k_23_perm_0, x = var_2199_cast_fp16)[name = string("transpose_74")]; + tensor var_2222_cast_fp16 = mul(x = k_23_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2222_cast_fp16")]; + tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23_cast_fp16)[name = string("x1_47_cast_fp16")]; + tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23_cast_fp16)[name = string("x2_47_cast_fp16")]; + fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2233_cast_fp16 = mul(x = x2_47_cast_fp16, y = const_26_promoted_to_fp16)[name = string("op_2233_cast_fp16")]; + bool var_2235_interleave_0 = const()[name = string("op_2235_interleave_0"), val = bool(false)]; + tensor var_2235_cast_fp16 = concat(axis = var_81, interleave = var_2235_interleave_0, values = (var_2233_cast_fp16, x1_47_cast_fp16))[name = string("op_2235_cast_fp16")]; + tensor var_2236_cast_fp16 = mul(x = var_2235_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2236_cast_fp16")]; + tensor k_state_23_cast_fp16 = add(x = var_2222_cast_fp16, y = var_2236_cast_fp16)[name = string("k_state_23_cast_fp16")]; + tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([0])]; + tensor expand_dims_133 = const()[name = string("expand_dims_133"), val = tensor([0])]; + tensor expand_dims_135 = const()[name = string("expand_dims_135"), val = tensor([0])]; + tensor concat_214_values0_0 = const()[name = string("concat_214_values0_0"), val = tensor([11])]; + int32 concat_214_axis_0 = const()[name = string("concat_214_axis_0"), val = int32(0)]; + bool concat_214_interleave_0 = const()[name = string("concat_214_interleave_0"), val = bool(false)]; + tensor concat_214 = concat(axis = concat_214_axis_0, interleave = concat_214_interleave_0, values = (concat_214_values0_0, expand_dims_132, expand_dims_133, expand_dims_2, expand_dims_135))[name = string("concat_214")]; + tensor key_cache_internal_tensor_assign_12_stride_0 = const()[name = string("key_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_214, begin_mask = key_cache_internal_tensor_assign_12_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_12_squeeze_mask_0, stride = key_cache_internal_tensor_assign_12_stride_0, update = k_state_23_cast_fp16, x = coreml_update_state_80)[name = string("key_cache_internal_tensor_assign_12_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_12_cast_fp16, input = key_cache)[name = string("coreml_update_state_82_write_state")]; + tensor coreml_update_state_82 = read_state(input = key_cache)[name = string("coreml_update_state_82")]; + tensor value_cache_internal_tensor_assign_12_stride_0 = const()[name = string("value_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_23_cast_fp16 = transpose(perm = v_state_23_perm_0, x = var_2202_cast_fp16)[name = string("transpose_73")]; + tensor value_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_214, begin_mask = value_cache_internal_tensor_assign_12_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_12_squeeze_mask_0, stride = value_cache_internal_tensor_assign_12_stride_0, update = v_state_23_cast_fp16, x = coreml_update_state_81)[name = string("value_cache_internal_tensor_assign_12_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_12_cast_fp16, input = value_cache)[name = string("coreml_update_state_83_write_state")]; + tensor coreml_update_state_83 = read_state(input = value_cache)[name = string("coreml_update_state_83")]; + tensor var_2259_begin_0 = const()[name = string("op_2259_begin_0"), val = tensor([11, 0, 0, 0, 0])]; + tensor var_2259_end_0 = const()[name = string("op_2259_end_0"), val = tensor([12, 1, 3, 2048, 64])]; + tensor var_2259_end_mask_0 = const()[name = string("op_2259_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2259_squeeze_mask_0 = const()[name = string("op_2259_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2259_cast_fp16 = slice_by_index(begin = var_2259_begin_0, end = var_2259_end_0, end_mask = var_2259_end_mask_0, squeeze_mask = var_2259_squeeze_mask_0, x = coreml_update_state_82)[name = string("op_2259_cast_fp16")]; + tensor var_2262_begin_0 = const()[name = string("op_2262_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2262_end_mask_0 = const()[name = string("op_2262_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2262_cast_fp16 = slice_by_index(begin = var_2262_begin_0, end = concat_11, end_mask = var_2262_end_mask_0, x = var_2259_cast_fp16)[name = string("op_2262_cast_fp16")]; + tensor var_2264_begin_0 = const()[name = string("op_2264_begin_0"), val = tensor([11, 0, 0, 0, 0])]; + tensor var_2264_end_0 = const()[name = string("op_2264_end_0"), val = tensor([12, 1, 3, 2048, 64])]; + tensor var_2264_end_mask_0 = const()[name = string("op_2264_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2264_squeeze_mask_0 = const()[name = string("op_2264_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2264_cast_fp16 = slice_by_index(begin = var_2264_begin_0, end = var_2264_end_0, end_mask = var_2264_end_mask_0, squeeze_mask = var_2264_squeeze_mask_0, x = coreml_update_state_83)[name = string("op_2264_cast_fp16")]; + tensor var_2267_begin_0 = const()[name = string("op_2267_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2267_end_mask_0 = const()[name = string("op_2267_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2267_cast_fp16 = slice_by_index(begin = var_2267_begin_0, end = concat_11, end_mask = var_2267_end_mask_0, x = var_2264_cast_fp16)[name = string("op_2267_cast_fp16")]; + tensor var_2269_shape_cast_fp16 = shape(x = var_2262_cast_fp16)[name = string("op_2269_shape_cast_fp16")]; + int32 gather_211 = const()[name = string("gather_211"), val = int32(1)]; + int32 gather_212 = const()[name = string("gather_212"), val = int32(3)]; + int32 gather_213_axis_0 = const()[name = string("gather_213_axis_0"), val = int32(0)]; + int32 gather_213_batch_dims_0 = const()[name = string("gather_213_batch_dims_0"), val = int32(0)]; + bool gather_213_validate_indices_0 = const()[name = string("gather_213_validate_indices_0"), val = bool(false)]; + string var_2269_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2269_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_213_to_uint16 = const()[name = string("select_213_to_uint16"), val = uint16(2)]; + tensor var_2269_shape_cast_fp16_to_uint16 = cast(dtype = var_2269_shape_cast_fp16_to_uint16_dtype_0, x = var_2269_shape_cast_fp16)[name = string("cast_150")]; + uint16 gather_213_cast_uint16 = gather(axis = gather_213_axis_0, batch_dims = gather_213_batch_dims_0, indices = select_213_to_uint16, validate_indices = gather_213_validate_indices_0, x = var_2269_shape_cast_fp16_to_uint16)[name = string("gather_213_cast_uint16")]; + string gather_213_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_213_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_214 = const()[name = string("gather_214"), val = int32(64)]; + tensor var_2276_axes_0 = const()[name = string("op_2276_axes_0"), val = tensor([2])]; + tensor var_2276_cast_fp16 = expand_dims(axes = var_2276_axes_0, x = var_2262_cast_fp16)[name = string("op_2276_cast_fp16")]; + tensor shape_237_cast_fp16 = shape(x = var_2276_cast_fp16)[name = string("shape_237_cast_fp16")]; + int32 concat_222_axis_0 = const()[name = string("concat_222_axis_0"), val = int32(0)]; + bool concat_222_interleave_0 = const()[name = string("concat_222_interleave_0"), val = bool(false)]; + int32 gather_213_cast_uint16_to_int32 = cast(dtype = gather_213_cast_uint16_to_int32_dtype_0, x = gather_213_cast_uint16)[name = string("cast_149")]; + tensor concat_222 = concat(axis = concat_222_axis_0, interleave = concat_222_interleave_0, values = (gather_211, gather_212, var_85, gather_213_cast_uint16_to_int32, gather_214))[name = string("concat_222")]; + tensor real_div_22 = real_div(x = concat_222, y = shape_237_cast_fp16)[name = string("real_div_22")]; + tensor hidden_states_341_cast_fp16 = tile(reps = real_div_22, x = var_2276_cast_fp16)[name = string("hidden_states_341_cast_fp16")]; + tensor concat_223x = const()[name = string("concat_223x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_47_cast_fp16 = reshape(shape = concat_223x, x = hidden_states_341_cast_fp16)[name = string("key_states_47_cast_fp16")]; + tensor var_2286_shape_cast_fp16 = shape(x = var_2267_cast_fp16)[name = string("op_2286_shape_cast_fp16")]; + int32 gather_215 = const()[name = string("gather_215"), val = int32(1)]; + int32 gather_216 = const()[name = string("gather_216"), val = int32(3)]; + int32 gather_217_axis_0 = const()[name = string("gather_217_axis_0"), val = int32(0)]; + int32 gather_217_batch_dims_0 = const()[name = string("gather_217_batch_dims_0"), val = int32(0)]; + bool gather_217_validate_indices_0 = const()[name = string("gather_217_validate_indices_0"), val = bool(false)]; + string var_2286_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2286_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_217_to_uint16 = const()[name = string("select_217_to_uint16"), val = uint16(2)]; + tensor var_2286_shape_cast_fp16_to_uint16 = cast(dtype = var_2286_shape_cast_fp16_to_uint16_dtype_0, x = var_2286_shape_cast_fp16)[name = string("cast_148")]; + uint16 gather_217_cast_uint16 = gather(axis = gather_217_axis_0, batch_dims = gather_217_batch_dims_0, indices = select_217_to_uint16, validate_indices = gather_217_validate_indices_0, x = var_2286_shape_cast_fp16_to_uint16)[name = string("gather_217_cast_uint16")]; + string gather_217_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_217_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_218 = const()[name = string("gather_218"), val = int32(64)]; + tensor var_2293_axes_0 = const()[name = string("op_2293_axes_0"), val = tensor([2])]; + tensor var_2293_cast_fp16 = expand_dims(axes = var_2293_axes_0, x = var_2267_cast_fp16)[name = string("op_2293_cast_fp16")]; + tensor shape_242_cast_fp16 = shape(x = var_2293_cast_fp16)[name = string("shape_242_cast_fp16")]; + int32 concat_224_axis_0 = const()[name = string("concat_224_axis_0"), val = int32(0)]; + bool concat_224_interleave_0 = const()[name = string("concat_224_interleave_0"), val = bool(false)]; + int32 gather_217_cast_uint16_to_int32 = cast(dtype = gather_217_cast_uint16_to_int32_dtype_0, x = gather_217_cast_uint16)[name = string("cast_147")]; + tensor concat_224 = concat(axis = concat_224_axis_0, interleave = concat_224_interleave_0, values = (gather_215, gather_216, var_85, gather_217_cast_uint16_to_int32, gather_218))[name = string("concat_224")]; + tensor real_div_23 = real_div(x = concat_224, y = shape_242_cast_fp16)[name = string("real_div_23")]; + tensor hidden_states_345_cast_fp16 = tile(reps = real_div_23, x = var_2293_cast_fp16)[name = string("hidden_states_345_cast_fp16")]; + tensor concat_225x = const()[name = string("concat_225x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_47_cast_fp16 = reshape(shape = concat_225x, x = hidden_states_345_cast_fp16)[name = string("value_states_47_cast_fp16")]; + tensor var_2303_shape_cast_fp16 = shape(x = key_states_47_cast_fp16)[name = string("op_2303_shape_cast_fp16")]; + int32 gather_219_axis_0 = const()[name = string("gather_219_axis_0"), val = int32(0)]; + int32 gather_219_batch_dims_0 = const()[name = string("gather_219_batch_dims_0"), val = int32(0)]; + bool gather_219_validate_indices_0 = const()[name = string("gather_219_validate_indices_0"), val = bool(false)]; + string var_2303_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2303_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_219_to_uint16 = const()[name = string("select_219_to_uint16"), val = uint16(2)]; + tensor var_2303_shape_cast_fp16_to_uint16 = cast(dtype = var_2303_shape_cast_fp16_to_uint16_dtype_0, x = var_2303_shape_cast_fp16)[name = string("cast_146")]; + uint16 gather_219_cast_uint16 = gather(axis = gather_219_axis_0, batch_dims = gather_219_batch_dims_0, indices = select_219_to_uint16, validate_indices = gather_219_validate_indices_0, x = var_2303_shape_cast_fp16_to_uint16)[name = string("gather_219_cast_uint16")]; + string gather_219_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_219_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_226_values0_0 = const()[name = string("concat_226_values0_0"), val = int32(1)]; + int32 concat_226_values1_0 = const()[name = string("concat_226_values1_0"), val = int32(1)]; + int32 concat_226_values2_0 = const()[name = string("concat_226_values2_0"), val = int32(0)]; + int32 concat_226_axis_0 = const()[name = string("concat_226_axis_0"), val = int32(0)]; + bool concat_226_interleave_0 = const()[name = string("concat_226_interleave_0"), val = bool(false)]; + int32 gather_219_cast_uint16_to_int32 = cast(dtype = gather_219_cast_uint16_to_int32_dtype_0, x = gather_219_cast_uint16)[name = string("cast_145")]; + tensor concat_226 = concat(axis = concat_226_axis_0, interleave = concat_226_interleave_0, values = (concat_226_values0_0, concat_226_values1_0, concat_226_values2_0, gather_219_cast_uint16_to_int32))[name = string("concat_226")]; + tensor causal_mask_25_begin_0 = const()[name = string("causal_mask_25_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_25_end_mask_0 = const()[name = string("causal_mask_25_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_25_cast_fp16 = slice_by_index(begin = causal_mask_25_begin_0, end = concat_226, end_mask = causal_mask_25_end_mask_0, x = causal_mask)[name = string("causal_mask_25_cast_fp16")]; + tensor attn_output_45_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_25_cast_fp16, key = key_states_47_cast_fp16, query = query_states_47_cast_fp16, value = value_states_47_cast_fp16)[name = string("attn_output_45_cast_fp16")]; + tensor var_2309_perm_0 = const()[name = string("op_2309_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_227_axis_0 = const()[name = string("concat_227_axis_0"), val = int32(0)]; + bool concat_227_interleave_0 = const()[name = string("concat_227_interleave_0"), val = bool(false)]; + int32 gather_203_cast_uint16_to_int32 = cast(dtype = gather_203_cast_uint16_to_int32_dtype_0, x = gather_203_cast_uint16)[name = string("cast_144")]; + tensor concat_227 = concat(axis = concat_227_axis_0, interleave = concat_227_interleave_0, values = (gather_202, gather_203_cast_uint16_to_int32, var_81))[name = string("concat_227")]; + tensor var_2309_cast_fp16 = transpose(perm = var_2309_perm_0, x = attn_output_45_cast_fp16)[name = string("transpose_72")]; + tensor input_89_cast_fp16 = reshape(shape = concat_227, x = var_2309_cast_fp16)[name = string("input_89_cast_fp16")]; + tensor model_model_layers_11_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38176832))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38342784))))[name = string("model_model_layers_11_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_80_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_o_proj_weight_to_fp16_quantized, x = input_89_cast_fp16)[name = string("linear_80_cast_fp16")]; + tensor hidden_states_349_cast_fp16 = add(x = hidden_states_329_cast_fp16, y = linear_80_cast_fp16)[name = string("hidden_states_349_cast_fp16")]; + fp16 var_76_promoted_23_to_fp16 = const()[name = string("op_76_promoted_23_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2318_cast_fp16 = pow(x = hidden_states_349_cast_fp16, y = var_76_promoted_23_to_fp16)[name = string("op_2318_cast_fp16")]; + tensor variance_47_axes_0 = const()[name = string("variance_47_axes_0"), val = tensor([-1])]; + bool variance_47_keep_dims_0 = const()[name = string("variance_47_keep_dims_0"), val = bool(true)]; + tensor variance_47_cast_fp16 = reduce_mean(axes = variance_47_axes_0, keep_dims = variance_47_keep_dims_0, x = var_2318_cast_fp16)[name = string("variance_47_cast_fp16")]; + fp16 var_2321_to_fp16 = const()[name = string("op_2321_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2322_cast_fp16 = add(x = variance_47_cast_fp16, y = var_2321_to_fp16)[name = string("op_2322_cast_fp16")]; + fp32 var_2323_epsilon_0 = const()[name = string("op_2323_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2323_cast_fp16 = rsqrt(epsilon = var_2323_epsilon_0, x = var_2322_cast_fp16)[name = string("op_2323_cast_fp16")]; + tensor hidden_states_353_cast_fp16 = mul(x = hidden_states_349_cast_fp16, y = var_2323_cast_fp16)[name = string("hidden_states_353_cast_fp16")]; + tensor model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38363584)))]; + tensor input_91_cast_fp16 = mul(x = model_model_layers_11_post_attention_layernorm_weight_to_fp16, y = hidden_states_353_cast_fp16)[name = string("input_91_cast_fp16")]; + tensor model_model_layers_11_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38364800))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38807232))))[name = string("model_model_layers_11_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_81_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_11_mlp_gate_proj_weight_to_fp16_quantized, x = input_91_cast_fp16)[name = string("linear_81_cast_fp16")]; + tensor var_2335_cast_fp16 = silu(x = linear_81_cast_fp16)[name = string("op_2335_cast_fp16")]; + tensor model_model_layers_11_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(38862592))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39305024))))[name = string("model_model_layers_11_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_82_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_11_mlp_up_proj_weight_to_fp16_quantized, x = input_91_cast_fp16)[name = string("linear_82_cast_fp16")]; + tensor input_95_cast_fp16 = mul(x = var_2335_cast_fp16, y = linear_82_cast_fp16)[name = string("input_95_cast_fp16")]; + tensor model_model_layers_11_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39360384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39802816))))[name = string("model_model_layers_11_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_83_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_mlp_down_proj_weight_to_fp16_quantized, x = input_95_cast_fp16)[name = string("linear_83_cast_fp16")]; + tensor hidden_states_359_cast_fp16 = add(x = hidden_states_349_cast_fp16, y = linear_83_cast_fp16)[name = string("hidden_states_359_cast_fp16")]; + fp16 var_76_promoted_24_to_fp16 = const()[name = string("op_76_promoted_24_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2348_cast_fp16 = pow(x = hidden_states_359_cast_fp16, y = var_76_promoted_24_to_fp16)[name = string("op_2348_cast_fp16")]; + tensor variance_49_axes_0 = const()[name = string("variance_49_axes_0"), val = tensor([-1])]; + bool variance_49_keep_dims_0 = const()[name = string("variance_49_keep_dims_0"), val = bool(true)]; + tensor variance_49_cast_fp16 = reduce_mean(axes = variance_49_axes_0, keep_dims = variance_49_keep_dims_0, x = var_2348_cast_fp16)[name = string("variance_49_cast_fp16")]; + fp16 var_2351_to_fp16 = const()[name = string("op_2351_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2352_cast_fp16 = add(x = variance_49_cast_fp16, y = var_2351_to_fp16)[name = string("op_2352_cast_fp16")]; + fp32 var_2353_epsilon_0 = const()[name = string("op_2353_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2353_cast_fp16 = rsqrt(epsilon = var_2353_epsilon_0, x = var_2352_cast_fp16)[name = string("op_2353_cast_fp16")]; + tensor hidden_states_363_cast_fp16 = mul(x = hidden_states_359_cast_fp16, y = var_2353_cast_fp16)[name = string("hidden_states_363_cast_fp16")]; + tensor model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39858176)))]; + tensor hidden_states_367_cast_fp16 = mul(x = model_model_layers_12_input_layernorm_weight_to_fp16, y = hidden_states_363_cast_fp16)[name = string("hidden_states_367_cast_fp16")]; + tensor var_2364_shape_cast_fp16 = shape(x = hidden_states_367_cast_fp16)[name = string("op_2364_shape_cast_fp16")]; + int32 gather_220 = const()[name = string("gather_220"), val = int32(1)]; + int32 gather_221_axis_0 = const()[name = string("gather_221_axis_0"), val = int32(0)]; + int32 gather_221_batch_dims_0 = const()[name = string("gather_221_batch_dims_0"), val = int32(0)]; + bool gather_221_validate_indices_0 = const()[name = string("gather_221_validate_indices_0"), val = bool(false)]; + string var_2364_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2364_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_221_to_uint16 = const()[name = string("select_221_to_uint16"), val = uint16(1)]; + tensor var_2364_shape_cast_fp16_to_uint16 = cast(dtype = var_2364_shape_cast_fp16_to_uint16_dtype_0, x = var_2364_shape_cast_fp16)[name = string("cast_143")]; + uint16 gather_221_cast_uint16 = gather(axis = gather_221_axis_0, batch_dims = gather_221_batch_dims_0, indices = select_221_to_uint16, validate_indices = gather_221_validate_indices_0, x = var_2364_shape_cast_fp16_to_uint16)[name = string("gather_221_cast_uint16")]; + string gather_221_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_221_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_12_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(39859392))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40025344))))[name = string("model_model_layers_12_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_84_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_367_cast_fp16)[name = string("linear_84_cast_fp16")]; + tensor model_model_layers_12_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40046144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40101504))))[name = string("model_model_layers_12_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_85_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_12_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_367_cast_fp16)[name = string("linear_85_cast_fp16")]; + tensor model_model_layers_12_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40108480))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40163840))))[name = string("model_model_layers_12_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_86_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_12_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_367_cast_fp16)[name = string("linear_86_cast_fp16")]; + tensor concat_228x = const()[name = string("concat_228x"), val = tensor([1, -1, 9, 64])]; + tensor var_2373_cast_fp16 = reshape(shape = concat_228x, x = linear_84_cast_fp16)[name = string("op_2373_cast_fp16")]; + tensor q_25_perm_0 = const()[name = string("q_25_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_229x = const()[name = string("concat_229x"), val = tensor([1, -1, 3, 64])]; + tensor var_2376_cast_fp16 = reshape(shape = concat_229x, x = linear_85_cast_fp16)[name = string("op_2376_cast_fp16")]; + tensor k_25_perm_0 = const()[name = string("k_25_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_230x = const()[name = string("concat_230x"), val = tensor([1, -1, 3, 64])]; + tensor var_2379_cast_fp16 = reshape(shape = concat_230x, x = linear_86_cast_fp16)[name = string("op_2379_cast_fp16")]; + tensor v_state_25_perm_0 = const()[name = string("v_state_25_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_25_cast_fp16 = transpose(perm = q_25_perm_0, x = var_2373_cast_fp16)[name = string("transpose_71")]; + tensor var_2383_cast_fp16 = mul(x = q_25_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2383_cast_fp16")]; + tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25_cast_fp16)[name = string("x1_49_cast_fp16")]; + tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25_cast_fp16)[name = string("x2_49_cast_fp16")]; + fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2394_cast_fp16 = mul(x = x2_49_cast_fp16, y = const_27_promoted_to_fp16)[name = string("op_2394_cast_fp16")]; + bool var_2396_interleave_0 = const()[name = string("op_2396_interleave_0"), val = bool(false)]; + tensor var_2396_cast_fp16 = concat(axis = var_81, interleave = var_2396_interleave_0, values = (var_2394_cast_fp16, x1_49_cast_fp16))[name = string("op_2396_cast_fp16")]; + tensor var_2397_cast_fp16 = mul(x = var_2396_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2397_cast_fp16")]; + tensor query_states_51_cast_fp16 = add(x = var_2383_cast_fp16, y = var_2397_cast_fp16)[name = string("query_states_51_cast_fp16")]; + tensor k_25_cast_fp16 = transpose(perm = k_25_perm_0, x = var_2376_cast_fp16)[name = string("transpose_70")]; + tensor var_2399_cast_fp16 = mul(x = k_25_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2399_cast_fp16")]; + tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25_cast_fp16)[name = string("x1_51_cast_fp16")]; + tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25_cast_fp16)[name = string("x2_51_cast_fp16")]; + fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2410_cast_fp16 = mul(x = x2_51_cast_fp16, y = const_28_promoted_to_fp16)[name = string("op_2410_cast_fp16")]; + bool var_2412_interleave_0 = const()[name = string("op_2412_interleave_0"), val = bool(false)]; + tensor var_2412_cast_fp16 = concat(axis = var_81, interleave = var_2412_interleave_0, values = (var_2410_cast_fp16, x1_51_cast_fp16))[name = string("op_2412_cast_fp16")]; + tensor var_2413_cast_fp16 = mul(x = var_2412_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2413_cast_fp16")]; + tensor k_state_25_cast_fp16 = add(x = var_2399_cast_fp16, y = var_2413_cast_fp16)[name = string("k_state_25_cast_fp16")]; + tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([0])]; + tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; + tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; + tensor concat_233_values0_0 = const()[name = string("concat_233_values0_0"), val = tensor([12])]; + int32 concat_233_axis_0 = const()[name = string("concat_233_axis_0"), val = int32(0)]; + bool concat_233_interleave_0 = const()[name = string("concat_233_interleave_0"), val = bool(false)]; + tensor concat_233 = concat(axis = concat_233_axis_0, interleave = concat_233_interleave_0, values = (concat_233_values0_0, expand_dims_144, expand_dims_145, expand_dims_2, expand_dims_147))[name = string("concat_233")]; + tensor key_cache_internal_tensor_assign_13_stride_0 = const()[name = string("key_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_233, begin_mask = key_cache_internal_tensor_assign_13_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_13_squeeze_mask_0, stride = key_cache_internal_tensor_assign_13_stride_0, update = k_state_25_cast_fp16, x = coreml_update_state_82)[name = string("key_cache_internal_tensor_assign_13_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_13_cast_fp16, input = key_cache)[name = string("coreml_update_state_84_write_state")]; + tensor coreml_update_state_84 = read_state(input = key_cache)[name = string("coreml_update_state_84")]; + tensor value_cache_internal_tensor_assign_13_stride_0 = const()[name = string("value_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_25_cast_fp16 = transpose(perm = v_state_25_perm_0, x = var_2379_cast_fp16)[name = string("transpose_69")]; + tensor value_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_233, begin_mask = value_cache_internal_tensor_assign_13_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_13_squeeze_mask_0, stride = value_cache_internal_tensor_assign_13_stride_0, update = v_state_25_cast_fp16, x = coreml_update_state_83)[name = string("value_cache_internal_tensor_assign_13_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_13_cast_fp16, input = value_cache)[name = string("coreml_update_state_85_write_state")]; + tensor coreml_update_state_85 = read_state(input = value_cache)[name = string("coreml_update_state_85")]; + tensor var_2436_begin_0 = const()[name = string("op_2436_begin_0"), val = tensor([12, 0, 0, 0, 0])]; + tensor var_2436_end_0 = const()[name = string("op_2436_end_0"), val = tensor([13, 1, 3, 2048, 64])]; + tensor var_2436_end_mask_0 = const()[name = string("op_2436_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2436_squeeze_mask_0 = const()[name = string("op_2436_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2436_cast_fp16 = slice_by_index(begin = var_2436_begin_0, end = var_2436_end_0, end_mask = var_2436_end_mask_0, squeeze_mask = var_2436_squeeze_mask_0, x = coreml_update_state_84)[name = string("op_2436_cast_fp16")]; + tensor var_2439_begin_0 = const()[name = string("op_2439_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2439_end_mask_0 = const()[name = string("op_2439_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2439_cast_fp16 = slice_by_index(begin = var_2439_begin_0, end = concat_11, end_mask = var_2439_end_mask_0, x = var_2436_cast_fp16)[name = string("op_2439_cast_fp16")]; + tensor var_2441_begin_0 = const()[name = string("op_2441_begin_0"), val = tensor([12, 0, 0, 0, 0])]; + tensor var_2441_end_0 = const()[name = string("op_2441_end_0"), val = tensor([13, 1, 3, 2048, 64])]; + tensor var_2441_end_mask_0 = const()[name = string("op_2441_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2441_squeeze_mask_0 = const()[name = string("op_2441_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2441_cast_fp16 = slice_by_index(begin = var_2441_begin_0, end = var_2441_end_0, end_mask = var_2441_end_mask_0, squeeze_mask = var_2441_squeeze_mask_0, x = coreml_update_state_85)[name = string("op_2441_cast_fp16")]; + tensor var_2444_begin_0 = const()[name = string("op_2444_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2444_end_mask_0 = const()[name = string("op_2444_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2444_cast_fp16 = slice_by_index(begin = var_2444_begin_0, end = concat_11, end_mask = var_2444_end_mask_0, x = var_2441_cast_fp16)[name = string("op_2444_cast_fp16")]; + tensor var_2446_shape_cast_fp16 = shape(x = var_2439_cast_fp16)[name = string("op_2446_shape_cast_fp16")]; + int32 gather_229 = const()[name = string("gather_229"), val = int32(1)]; + int32 gather_230 = const()[name = string("gather_230"), val = int32(3)]; + int32 gather_231_axis_0 = const()[name = string("gather_231_axis_0"), val = int32(0)]; + int32 gather_231_batch_dims_0 = const()[name = string("gather_231_batch_dims_0"), val = int32(0)]; + bool gather_231_validate_indices_0 = const()[name = string("gather_231_validate_indices_0"), val = bool(false)]; + string var_2446_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2446_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_231_to_uint16 = const()[name = string("select_231_to_uint16"), val = uint16(2)]; + tensor var_2446_shape_cast_fp16_to_uint16 = cast(dtype = var_2446_shape_cast_fp16_to_uint16_dtype_0, x = var_2446_shape_cast_fp16)[name = string("cast_142")]; + uint16 gather_231_cast_uint16 = gather(axis = gather_231_axis_0, batch_dims = gather_231_batch_dims_0, indices = select_231_to_uint16, validate_indices = gather_231_validate_indices_0, x = var_2446_shape_cast_fp16_to_uint16)[name = string("gather_231_cast_uint16")]; + string gather_231_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_231_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_232 = const()[name = string("gather_232"), val = int32(64)]; + tensor var_2453_axes_0 = const()[name = string("op_2453_axes_0"), val = tensor([2])]; + tensor var_2453_cast_fp16 = expand_dims(axes = var_2453_axes_0, x = var_2439_cast_fp16)[name = string("op_2453_cast_fp16")]; + tensor shape_257_cast_fp16 = shape(x = var_2453_cast_fp16)[name = string("shape_257_cast_fp16")]; + int32 concat_241_axis_0 = const()[name = string("concat_241_axis_0"), val = int32(0)]; + bool concat_241_interleave_0 = const()[name = string("concat_241_interleave_0"), val = bool(false)]; + int32 gather_231_cast_uint16_to_int32 = cast(dtype = gather_231_cast_uint16_to_int32_dtype_0, x = gather_231_cast_uint16)[name = string("cast_141")]; + tensor concat_241 = concat(axis = concat_241_axis_0, interleave = concat_241_interleave_0, values = (gather_229, gather_230, var_85, gather_231_cast_uint16_to_int32, gather_232))[name = string("concat_241")]; + tensor real_div_24 = real_div(x = concat_241, y = shape_257_cast_fp16)[name = string("real_div_24")]; + tensor hidden_states_371_cast_fp16 = tile(reps = real_div_24, x = var_2453_cast_fp16)[name = string("hidden_states_371_cast_fp16")]; + tensor concat_242x = const()[name = string("concat_242x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_51_cast_fp16 = reshape(shape = concat_242x, x = hidden_states_371_cast_fp16)[name = string("key_states_51_cast_fp16")]; + tensor var_2463_shape_cast_fp16 = shape(x = var_2444_cast_fp16)[name = string("op_2463_shape_cast_fp16")]; + int32 gather_233 = const()[name = string("gather_233"), val = int32(1)]; + int32 gather_234 = const()[name = string("gather_234"), val = int32(3)]; + int32 gather_235_axis_0 = const()[name = string("gather_235_axis_0"), val = int32(0)]; + int32 gather_235_batch_dims_0 = const()[name = string("gather_235_batch_dims_0"), val = int32(0)]; + bool gather_235_validate_indices_0 = const()[name = string("gather_235_validate_indices_0"), val = bool(false)]; + string var_2463_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2463_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_235_to_uint16 = const()[name = string("select_235_to_uint16"), val = uint16(2)]; + tensor var_2463_shape_cast_fp16_to_uint16 = cast(dtype = var_2463_shape_cast_fp16_to_uint16_dtype_0, x = var_2463_shape_cast_fp16)[name = string("cast_140")]; + uint16 gather_235_cast_uint16 = gather(axis = gather_235_axis_0, batch_dims = gather_235_batch_dims_0, indices = select_235_to_uint16, validate_indices = gather_235_validate_indices_0, x = var_2463_shape_cast_fp16_to_uint16)[name = string("gather_235_cast_uint16")]; + string gather_235_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_235_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_236 = const()[name = string("gather_236"), val = int32(64)]; + tensor var_2470_axes_0 = const()[name = string("op_2470_axes_0"), val = tensor([2])]; + tensor var_2470_cast_fp16 = expand_dims(axes = var_2470_axes_0, x = var_2444_cast_fp16)[name = string("op_2470_cast_fp16")]; + tensor shape_262_cast_fp16 = shape(x = var_2470_cast_fp16)[name = string("shape_262_cast_fp16")]; + int32 concat_243_axis_0 = const()[name = string("concat_243_axis_0"), val = int32(0)]; + bool concat_243_interleave_0 = const()[name = string("concat_243_interleave_0"), val = bool(false)]; + int32 gather_235_cast_uint16_to_int32 = cast(dtype = gather_235_cast_uint16_to_int32_dtype_0, x = gather_235_cast_uint16)[name = string("cast_139")]; + tensor concat_243 = concat(axis = concat_243_axis_0, interleave = concat_243_interleave_0, values = (gather_233, gather_234, var_85, gather_235_cast_uint16_to_int32, gather_236))[name = string("concat_243")]; + tensor real_div_25 = real_div(x = concat_243, y = shape_262_cast_fp16)[name = string("real_div_25")]; + tensor hidden_states_375_cast_fp16 = tile(reps = real_div_25, x = var_2470_cast_fp16)[name = string("hidden_states_375_cast_fp16")]; + tensor concat_244x = const()[name = string("concat_244x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_51_cast_fp16 = reshape(shape = concat_244x, x = hidden_states_375_cast_fp16)[name = string("value_states_51_cast_fp16")]; + tensor var_2480_shape_cast_fp16 = shape(x = key_states_51_cast_fp16)[name = string("op_2480_shape_cast_fp16")]; + int32 gather_237_axis_0 = const()[name = string("gather_237_axis_0"), val = int32(0)]; + int32 gather_237_batch_dims_0 = const()[name = string("gather_237_batch_dims_0"), val = int32(0)]; + bool gather_237_validate_indices_0 = const()[name = string("gather_237_validate_indices_0"), val = bool(false)]; + string var_2480_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2480_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_237_to_uint16 = const()[name = string("select_237_to_uint16"), val = uint16(2)]; + tensor var_2480_shape_cast_fp16_to_uint16 = cast(dtype = var_2480_shape_cast_fp16_to_uint16_dtype_0, x = var_2480_shape_cast_fp16)[name = string("cast_138")]; + uint16 gather_237_cast_uint16 = gather(axis = gather_237_axis_0, batch_dims = gather_237_batch_dims_0, indices = select_237_to_uint16, validate_indices = gather_237_validate_indices_0, x = var_2480_shape_cast_fp16_to_uint16)[name = string("gather_237_cast_uint16")]; + string gather_237_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_237_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_245_values0_0 = const()[name = string("concat_245_values0_0"), val = int32(1)]; + int32 concat_245_values1_0 = const()[name = string("concat_245_values1_0"), val = int32(1)]; + int32 concat_245_values2_0 = const()[name = string("concat_245_values2_0"), val = int32(0)]; + int32 concat_245_axis_0 = const()[name = string("concat_245_axis_0"), val = int32(0)]; + bool concat_245_interleave_0 = const()[name = string("concat_245_interleave_0"), val = bool(false)]; + int32 gather_237_cast_uint16_to_int32 = cast(dtype = gather_237_cast_uint16_to_int32_dtype_0, x = gather_237_cast_uint16)[name = string("cast_137")]; + tensor concat_245 = concat(axis = concat_245_axis_0, interleave = concat_245_interleave_0, values = (concat_245_values0_0, concat_245_values1_0, concat_245_values2_0, gather_237_cast_uint16_to_int32))[name = string("concat_245")]; + tensor causal_mask_27_begin_0 = const()[name = string("causal_mask_27_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_27_end_mask_0 = const()[name = string("causal_mask_27_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_27_cast_fp16 = slice_by_index(begin = causal_mask_27_begin_0, end = concat_245, end_mask = causal_mask_27_end_mask_0, x = causal_mask)[name = string("causal_mask_27_cast_fp16")]; + tensor attn_output_49_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_27_cast_fp16, key = key_states_51_cast_fp16, query = query_states_51_cast_fp16, value = value_states_51_cast_fp16)[name = string("attn_output_49_cast_fp16")]; + tensor var_2486_perm_0 = const()[name = string("op_2486_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_246_axis_0 = const()[name = string("concat_246_axis_0"), val = int32(0)]; + bool concat_246_interleave_0 = const()[name = string("concat_246_interleave_0"), val = bool(false)]; + int32 gather_221_cast_uint16_to_int32 = cast(dtype = gather_221_cast_uint16_to_int32_dtype_0, x = gather_221_cast_uint16)[name = string("cast_136")]; + tensor concat_246 = concat(axis = concat_246_axis_0, interleave = concat_246_interleave_0, values = (gather_220, gather_221_cast_uint16_to_int32, var_81))[name = string("concat_246")]; + tensor var_2486_cast_fp16 = transpose(perm = var_2486_perm_0, x = attn_output_49_cast_fp16)[name = string("transpose_68")]; + tensor input_97_cast_fp16 = reshape(shape = concat_246, x = var_2486_cast_fp16)[name = string("input_97_cast_fp16")]; + tensor model_model_layers_12_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40170816))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40336768))))[name = string("model_model_layers_12_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_87_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_o_proj_weight_to_fp16_quantized, x = input_97_cast_fp16)[name = string("linear_87_cast_fp16")]; + tensor hidden_states_379_cast_fp16 = add(x = hidden_states_359_cast_fp16, y = linear_87_cast_fp16)[name = string("hidden_states_379_cast_fp16")]; + fp16 var_76_promoted_25_to_fp16 = const()[name = string("op_76_promoted_25_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2495_cast_fp16 = pow(x = hidden_states_379_cast_fp16, y = var_76_promoted_25_to_fp16)[name = string("op_2495_cast_fp16")]; + tensor variance_51_axes_0 = const()[name = string("variance_51_axes_0"), val = tensor([-1])]; + bool variance_51_keep_dims_0 = const()[name = string("variance_51_keep_dims_0"), val = bool(true)]; + tensor variance_51_cast_fp16 = reduce_mean(axes = variance_51_axes_0, keep_dims = variance_51_keep_dims_0, x = var_2495_cast_fp16)[name = string("variance_51_cast_fp16")]; + fp16 var_2498_to_fp16 = const()[name = string("op_2498_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2499_cast_fp16 = add(x = variance_51_cast_fp16, y = var_2498_to_fp16)[name = string("op_2499_cast_fp16")]; + fp32 var_2500_epsilon_0 = const()[name = string("op_2500_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2500_cast_fp16 = rsqrt(epsilon = var_2500_epsilon_0, x = var_2499_cast_fp16)[name = string("op_2500_cast_fp16")]; + tensor hidden_states_383_cast_fp16 = mul(x = hidden_states_379_cast_fp16, y = var_2500_cast_fp16)[name = string("hidden_states_383_cast_fp16")]; + tensor model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40357568)))]; + tensor input_99_cast_fp16 = mul(x = model_model_layers_12_post_attention_layernorm_weight_to_fp16, y = hidden_states_383_cast_fp16)[name = string("input_99_cast_fp16")]; + tensor model_model_layers_12_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40358784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40801216))))[name = string("model_model_layers_12_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_88_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_12_mlp_gate_proj_weight_to_fp16_quantized, x = input_99_cast_fp16)[name = string("linear_88_cast_fp16")]; + tensor var_2512_cast_fp16 = silu(x = linear_88_cast_fp16)[name = string("op_2512_cast_fp16")]; + tensor model_model_layers_12_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(40856576))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41299008))))[name = string("model_model_layers_12_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_89_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_12_mlp_up_proj_weight_to_fp16_quantized, x = input_99_cast_fp16)[name = string("linear_89_cast_fp16")]; + tensor input_103_cast_fp16 = mul(x = var_2512_cast_fp16, y = linear_89_cast_fp16)[name = string("input_103_cast_fp16")]; + tensor model_model_layers_12_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41354368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41796800))))[name = string("model_model_layers_12_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_90_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_mlp_down_proj_weight_to_fp16_quantized, x = input_103_cast_fp16)[name = string("linear_90_cast_fp16")]; + tensor hidden_states_389_cast_fp16 = add(x = hidden_states_379_cast_fp16, y = linear_90_cast_fp16)[name = string("hidden_states_389_cast_fp16")]; + fp16 var_76_promoted_26_to_fp16 = const()[name = string("op_76_promoted_26_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2525_cast_fp16 = pow(x = hidden_states_389_cast_fp16, y = var_76_promoted_26_to_fp16)[name = string("op_2525_cast_fp16")]; + tensor variance_53_axes_0 = const()[name = string("variance_53_axes_0"), val = tensor([-1])]; + bool variance_53_keep_dims_0 = const()[name = string("variance_53_keep_dims_0"), val = bool(true)]; + tensor variance_53_cast_fp16 = reduce_mean(axes = variance_53_axes_0, keep_dims = variance_53_keep_dims_0, x = var_2525_cast_fp16)[name = string("variance_53_cast_fp16")]; + fp16 var_2528_to_fp16 = const()[name = string("op_2528_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2529_cast_fp16 = add(x = variance_53_cast_fp16, y = var_2528_to_fp16)[name = string("op_2529_cast_fp16")]; + fp32 var_2530_epsilon_0 = const()[name = string("op_2530_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2530_cast_fp16 = rsqrt(epsilon = var_2530_epsilon_0, x = var_2529_cast_fp16)[name = string("op_2530_cast_fp16")]; + tensor hidden_states_393_cast_fp16 = mul(x = hidden_states_389_cast_fp16, y = var_2530_cast_fp16)[name = string("hidden_states_393_cast_fp16")]; + tensor model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41852160)))]; + tensor hidden_states_397_cast_fp16 = mul(x = model_model_layers_13_input_layernorm_weight_to_fp16, y = hidden_states_393_cast_fp16)[name = string("hidden_states_397_cast_fp16")]; + tensor var_2541_shape_cast_fp16 = shape(x = hidden_states_397_cast_fp16)[name = string("op_2541_shape_cast_fp16")]; + int32 gather_238 = const()[name = string("gather_238"), val = int32(1)]; + int32 gather_239_axis_0 = const()[name = string("gather_239_axis_0"), val = int32(0)]; + int32 gather_239_batch_dims_0 = const()[name = string("gather_239_batch_dims_0"), val = int32(0)]; + bool gather_239_validate_indices_0 = const()[name = string("gather_239_validate_indices_0"), val = bool(false)]; + string var_2541_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2541_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_239_to_uint16 = const()[name = string("select_239_to_uint16"), val = uint16(1)]; + tensor var_2541_shape_cast_fp16_to_uint16 = cast(dtype = var_2541_shape_cast_fp16_to_uint16_dtype_0, x = var_2541_shape_cast_fp16)[name = string("cast_135")]; + uint16 gather_239_cast_uint16 = gather(axis = gather_239_axis_0, batch_dims = gather_239_batch_dims_0, indices = select_239_to_uint16, validate_indices = gather_239_validate_indices_0, x = var_2541_shape_cast_fp16_to_uint16)[name = string("gather_239_cast_uint16")]; + string gather_239_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_239_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_13_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(41853376))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42019328))))[name = string("model_model_layers_13_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_91_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_397_cast_fp16)[name = string("linear_91_cast_fp16")]; + tensor model_model_layers_13_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42040128))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42095488))))[name = string("model_model_layers_13_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_92_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_13_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_397_cast_fp16)[name = string("linear_92_cast_fp16")]; + tensor model_model_layers_13_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42102464))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42157824))))[name = string("model_model_layers_13_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_93_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_13_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_397_cast_fp16)[name = string("linear_93_cast_fp16")]; + tensor concat_247x = const()[name = string("concat_247x"), val = tensor([1, -1, 9, 64])]; + tensor var_2550_cast_fp16 = reshape(shape = concat_247x, x = linear_91_cast_fp16)[name = string("op_2550_cast_fp16")]; + tensor q_27_perm_0 = const()[name = string("q_27_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_248x = const()[name = string("concat_248x"), val = tensor([1, -1, 3, 64])]; + tensor var_2553_cast_fp16 = reshape(shape = concat_248x, x = linear_92_cast_fp16)[name = string("op_2553_cast_fp16")]; + tensor k_27_perm_0 = const()[name = string("k_27_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_249x = const()[name = string("concat_249x"), val = tensor([1, -1, 3, 64])]; + tensor var_2556_cast_fp16 = reshape(shape = concat_249x, x = linear_93_cast_fp16)[name = string("op_2556_cast_fp16")]; + tensor v_state_27_perm_0 = const()[name = string("v_state_27_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_27_cast_fp16 = transpose(perm = q_27_perm_0, x = var_2550_cast_fp16)[name = string("transpose_67")]; + tensor var_2560_cast_fp16 = mul(x = q_27_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2560_cast_fp16")]; + tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27_cast_fp16)[name = string("x1_53_cast_fp16")]; + tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27_cast_fp16)[name = string("x2_53_cast_fp16")]; + fp16 const_29_promoted_to_fp16 = const()[name = string("const_29_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2571_cast_fp16 = mul(x = x2_53_cast_fp16, y = const_29_promoted_to_fp16)[name = string("op_2571_cast_fp16")]; + bool var_2573_interleave_0 = const()[name = string("op_2573_interleave_0"), val = bool(false)]; + tensor var_2573_cast_fp16 = concat(axis = var_81, interleave = var_2573_interleave_0, values = (var_2571_cast_fp16, x1_53_cast_fp16))[name = string("op_2573_cast_fp16")]; + tensor var_2574_cast_fp16 = mul(x = var_2573_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2574_cast_fp16")]; + tensor query_states_55_cast_fp16 = add(x = var_2560_cast_fp16, y = var_2574_cast_fp16)[name = string("query_states_55_cast_fp16")]; + tensor k_27_cast_fp16 = transpose(perm = k_27_perm_0, x = var_2553_cast_fp16)[name = string("transpose_66")]; + tensor var_2576_cast_fp16 = mul(x = k_27_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2576_cast_fp16")]; + tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27_cast_fp16)[name = string("x1_55_cast_fp16")]; + tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27_cast_fp16)[name = string("x2_55_cast_fp16")]; + fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2587_cast_fp16 = mul(x = x2_55_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_2587_cast_fp16")]; + bool var_2589_interleave_0 = const()[name = string("op_2589_interleave_0"), val = bool(false)]; + tensor var_2589_cast_fp16 = concat(axis = var_81, interleave = var_2589_interleave_0, values = (var_2587_cast_fp16, x1_55_cast_fp16))[name = string("op_2589_cast_fp16")]; + tensor var_2590_cast_fp16 = mul(x = var_2589_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2590_cast_fp16")]; + tensor k_state_27_cast_fp16 = add(x = var_2576_cast_fp16, y = var_2590_cast_fp16)[name = string("k_state_27_cast_fp16")]; + tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([0])]; + tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; + tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; + tensor concat_252_values0_0 = const()[name = string("concat_252_values0_0"), val = tensor([13])]; + int32 concat_252_axis_0 = const()[name = string("concat_252_axis_0"), val = int32(0)]; + bool concat_252_interleave_0 = const()[name = string("concat_252_interleave_0"), val = bool(false)]; + tensor concat_252 = concat(axis = concat_252_axis_0, interleave = concat_252_interleave_0, values = (concat_252_values0_0, expand_dims_156, expand_dims_157, expand_dims_2, expand_dims_159))[name = string("concat_252")]; + tensor key_cache_internal_tensor_assign_14_stride_0 = const()[name = string("key_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_252, begin_mask = key_cache_internal_tensor_assign_14_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_14_squeeze_mask_0, stride = key_cache_internal_tensor_assign_14_stride_0, update = k_state_27_cast_fp16, x = coreml_update_state_84)[name = string("key_cache_internal_tensor_assign_14_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_14_cast_fp16, input = key_cache)[name = string("coreml_update_state_86_write_state")]; + tensor coreml_update_state_86 = read_state(input = key_cache)[name = string("coreml_update_state_86")]; + tensor value_cache_internal_tensor_assign_14_stride_0 = const()[name = string("value_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_27_cast_fp16 = transpose(perm = v_state_27_perm_0, x = var_2556_cast_fp16)[name = string("transpose_65")]; + tensor value_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_252, begin_mask = value_cache_internal_tensor_assign_14_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_14_squeeze_mask_0, stride = value_cache_internal_tensor_assign_14_stride_0, update = v_state_27_cast_fp16, x = coreml_update_state_85)[name = string("value_cache_internal_tensor_assign_14_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_14_cast_fp16, input = value_cache)[name = string("coreml_update_state_87_write_state")]; + tensor coreml_update_state_87 = read_state(input = value_cache)[name = string("coreml_update_state_87")]; + tensor var_2613_begin_0 = const()[name = string("op_2613_begin_0"), val = tensor([13, 0, 0, 0, 0])]; + tensor var_2613_end_0 = const()[name = string("op_2613_end_0"), val = tensor([14, 1, 3, 2048, 64])]; + tensor var_2613_end_mask_0 = const()[name = string("op_2613_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2613_squeeze_mask_0 = const()[name = string("op_2613_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2613_cast_fp16 = slice_by_index(begin = var_2613_begin_0, end = var_2613_end_0, end_mask = var_2613_end_mask_0, squeeze_mask = var_2613_squeeze_mask_0, x = coreml_update_state_86)[name = string("op_2613_cast_fp16")]; + tensor var_2616_begin_0 = const()[name = string("op_2616_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2616_end_mask_0 = const()[name = string("op_2616_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2616_cast_fp16 = slice_by_index(begin = var_2616_begin_0, end = concat_11, end_mask = var_2616_end_mask_0, x = var_2613_cast_fp16)[name = string("op_2616_cast_fp16")]; + tensor var_2618_begin_0 = const()[name = string("op_2618_begin_0"), val = tensor([13, 0, 0, 0, 0])]; + tensor var_2618_end_0 = const()[name = string("op_2618_end_0"), val = tensor([14, 1, 3, 2048, 64])]; + tensor var_2618_end_mask_0 = const()[name = string("op_2618_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2618_squeeze_mask_0 = const()[name = string("op_2618_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2618_cast_fp16 = slice_by_index(begin = var_2618_begin_0, end = var_2618_end_0, end_mask = var_2618_end_mask_0, squeeze_mask = var_2618_squeeze_mask_0, x = coreml_update_state_87)[name = string("op_2618_cast_fp16")]; + tensor var_2621_begin_0 = const()[name = string("op_2621_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2621_end_mask_0 = const()[name = string("op_2621_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2621_cast_fp16 = slice_by_index(begin = var_2621_begin_0, end = concat_11, end_mask = var_2621_end_mask_0, x = var_2618_cast_fp16)[name = string("op_2621_cast_fp16")]; + tensor var_2623_shape_cast_fp16 = shape(x = var_2616_cast_fp16)[name = string("op_2623_shape_cast_fp16")]; + int32 gather_247 = const()[name = string("gather_247"), val = int32(1)]; + int32 gather_248 = const()[name = string("gather_248"), val = int32(3)]; + int32 gather_249_axis_0 = const()[name = string("gather_249_axis_0"), val = int32(0)]; + int32 gather_249_batch_dims_0 = const()[name = string("gather_249_batch_dims_0"), val = int32(0)]; + bool gather_249_validate_indices_0 = const()[name = string("gather_249_validate_indices_0"), val = bool(false)]; + string var_2623_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2623_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_249_to_uint16 = const()[name = string("select_249_to_uint16"), val = uint16(2)]; + tensor var_2623_shape_cast_fp16_to_uint16 = cast(dtype = var_2623_shape_cast_fp16_to_uint16_dtype_0, x = var_2623_shape_cast_fp16)[name = string("cast_134")]; + uint16 gather_249_cast_uint16 = gather(axis = gather_249_axis_0, batch_dims = gather_249_batch_dims_0, indices = select_249_to_uint16, validate_indices = gather_249_validate_indices_0, x = var_2623_shape_cast_fp16_to_uint16)[name = string("gather_249_cast_uint16")]; + string gather_249_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_249_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_250 = const()[name = string("gather_250"), val = int32(64)]; + tensor var_2630_axes_0 = const()[name = string("op_2630_axes_0"), val = tensor([2])]; + tensor var_2630_cast_fp16 = expand_dims(axes = var_2630_axes_0, x = var_2616_cast_fp16)[name = string("op_2630_cast_fp16")]; + tensor shape_277_cast_fp16 = shape(x = var_2630_cast_fp16)[name = string("shape_277_cast_fp16")]; + int32 concat_260_axis_0 = const()[name = string("concat_260_axis_0"), val = int32(0)]; + bool concat_260_interleave_0 = const()[name = string("concat_260_interleave_0"), val = bool(false)]; + int32 gather_249_cast_uint16_to_int32 = cast(dtype = gather_249_cast_uint16_to_int32_dtype_0, x = gather_249_cast_uint16)[name = string("cast_133")]; + tensor concat_260 = concat(axis = concat_260_axis_0, interleave = concat_260_interleave_0, values = (gather_247, gather_248, var_85, gather_249_cast_uint16_to_int32, gather_250))[name = string("concat_260")]; + tensor real_div_26 = real_div(x = concat_260, y = shape_277_cast_fp16)[name = string("real_div_26")]; + tensor hidden_states_401_cast_fp16 = tile(reps = real_div_26, x = var_2630_cast_fp16)[name = string("hidden_states_401_cast_fp16")]; + tensor concat_261x = const()[name = string("concat_261x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_55_cast_fp16 = reshape(shape = concat_261x, x = hidden_states_401_cast_fp16)[name = string("key_states_55_cast_fp16")]; + tensor var_2640_shape_cast_fp16 = shape(x = var_2621_cast_fp16)[name = string("op_2640_shape_cast_fp16")]; + int32 gather_251 = const()[name = string("gather_251"), val = int32(1)]; + int32 gather_252 = const()[name = string("gather_252"), val = int32(3)]; + int32 gather_253_axis_0 = const()[name = string("gather_253_axis_0"), val = int32(0)]; + int32 gather_253_batch_dims_0 = const()[name = string("gather_253_batch_dims_0"), val = int32(0)]; + bool gather_253_validate_indices_0 = const()[name = string("gather_253_validate_indices_0"), val = bool(false)]; + string var_2640_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2640_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_253_to_uint16 = const()[name = string("select_253_to_uint16"), val = uint16(2)]; + tensor var_2640_shape_cast_fp16_to_uint16 = cast(dtype = var_2640_shape_cast_fp16_to_uint16_dtype_0, x = var_2640_shape_cast_fp16)[name = string("cast_132")]; + uint16 gather_253_cast_uint16 = gather(axis = gather_253_axis_0, batch_dims = gather_253_batch_dims_0, indices = select_253_to_uint16, validate_indices = gather_253_validate_indices_0, x = var_2640_shape_cast_fp16_to_uint16)[name = string("gather_253_cast_uint16")]; + string gather_253_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_253_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_254 = const()[name = string("gather_254"), val = int32(64)]; + tensor var_2647_axes_0 = const()[name = string("op_2647_axes_0"), val = tensor([2])]; + tensor var_2647_cast_fp16 = expand_dims(axes = var_2647_axes_0, x = var_2621_cast_fp16)[name = string("op_2647_cast_fp16")]; + tensor shape_282_cast_fp16 = shape(x = var_2647_cast_fp16)[name = string("shape_282_cast_fp16")]; + int32 concat_262_axis_0 = const()[name = string("concat_262_axis_0"), val = int32(0)]; + bool concat_262_interleave_0 = const()[name = string("concat_262_interleave_0"), val = bool(false)]; + int32 gather_253_cast_uint16_to_int32 = cast(dtype = gather_253_cast_uint16_to_int32_dtype_0, x = gather_253_cast_uint16)[name = string("cast_131")]; + tensor concat_262 = concat(axis = concat_262_axis_0, interleave = concat_262_interleave_0, values = (gather_251, gather_252, var_85, gather_253_cast_uint16_to_int32, gather_254))[name = string("concat_262")]; + tensor real_div_27 = real_div(x = concat_262, y = shape_282_cast_fp16)[name = string("real_div_27")]; + tensor hidden_states_405_cast_fp16 = tile(reps = real_div_27, x = var_2647_cast_fp16)[name = string("hidden_states_405_cast_fp16")]; + tensor concat_263x = const()[name = string("concat_263x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_55_cast_fp16 = reshape(shape = concat_263x, x = hidden_states_405_cast_fp16)[name = string("value_states_55_cast_fp16")]; + tensor var_2657_shape_cast_fp16 = shape(x = key_states_55_cast_fp16)[name = string("op_2657_shape_cast_fp16")]; + int32 gather_255_axis_0 = const()[name = string("gather_255_axis_0"), val = int32(0)]; + int32 gather_255_batch_dims_0 = const()[name = string("gather_255_batch_dims_0"), val = int32(0)]; + bool gather_255_validate_indices_0 = const()[name = string("gather_255_validate_indices_0"), val = bool(false)]; + string var_2657_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2657_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_255_to_uint16 = const()[name = string("select_255_to_uint16"), val = uint16(2)]; + tensor var_2657_shape_cast_fp16_to_uint16 = cast(dtype = var_2657_shape_cast_fp16_to_uint16_dtype_0, x = var_2657_shape_cast_fp16)[name = string("cast_130")]; + uint16 gather_255_cast_uint16 = gather(axis = gather_255_axis_0, batch_dims = gather_255_batch_dims_0, indices = select_255_to_uint16, validate_indices = gather_255_validate_indices_0, x = var_2657_shape_cast_fp16_to_uint16)[name = string("gather_255_cast_uint16")]; + string gather_255_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_255_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_264_values0_0 = const()[name = string("concat_264_values0_0"), val = int32(1)]; + int32 concat_264_values1_0 = const()[name = string("concat_264_values1_0"), val = int32(1)]; + int32 concat_264_values2_0 = const()[name = string("concat_264_values2_0"), val = int32(0)]; + int32 concat_264_axis_0 = const()[name = string("concat_264_axis_0"), val = int32(0)]; + bool concat_264_interleave_0 = const()[name = string("concat_264_interleave_0"), val = bool(false)]; + int32 gather_255_cast_uint16_to_int32 = cast(dtype = gather_255_cast_uint16_to_int32_dtype_0, x = gather_255_cast_uint16)[name = string("cast_129")]; + tensor concat_264 = concat(axis = concat_264_axis_0, interleave = concat_264_interleave_0, values = (concat_264_values0_0, concat_264_values1_0, concat_264_values2_0, gather_255_cast_uint16_to_int32))[name = string("concat_264")]; + tensor causal_mask_29_begin_0 = const()[name = string("causal_mask_29_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_29_end_mask_0 = const()[name = string("causal_mask_29_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_29_cast_fp16 = slice_by_index(begin = causal_mask_29_begin_0, end = concat_264, end_mask = causal_mask_29_end_mask_0, x = causal_mask)[name = string("causal_mask_29_cast_fp16")]; + tensor attn_output_53_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_29_cast_fp16, key = key_states_55_cast_fp16, query = query_states_55_cast_fp16, value = value_states_55_cast_fp16)[name = string("attn_output_53_cast_fp16")]; + tensor var_2663_perm_0 = const()[name = string("op_2663_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_265_axis_0 = const()[name = string("concat_265_axis_0"), val = int32(0)]; + bool concat_265_interleave_0 = const()[name = string("concat_265_interleave_0"), val = bool(false)]; + int32 gather_239_cast_uint16_to_int32 = cast(dtype = gather_239_cast_uint16_to_int32_dtype_0, x = gather_239_cast_uint16)[name = string("cast_128")]; + tensor concat_265 = concat(axis = concat_265_axis_0, interleave = concat_265_interleave_0, values = (gather_238, gather_239_cast_uint16_to_int32, var_81))[name = string("concat_265")]; + tensor var_2663_cast_fp16 = transpose(perm = var_2663_perm_0, x = attn_output_53_cast_fp16)[name = string("transpose_64")]; + tensor input_105_cast_fp16 = reshape(shape = concat_265, x = var_2663_cast_fp16)[name = string("input_105_cast_fp16")]; + tensor model_model_layers_13_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42164800))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42330752))))[name = string("model_model_layers_13_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_94_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_o_proj_weight_to_fp16_quantized, x = input_105_cast_fp16)[name = string("linear_94_cast_fp16")]; + tensor hidden_states_409_cast_fp16 = add(x = hidden_states_389_cast_fp16, y = linear_94_cast_fp16)[name = string("hidden_states_409_cast_fp16")]; + fp16 var_76_promoted_27_to_fp16 = const()[name = string("op_76_promoted_27_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2672_cast_fp16 = pow(x = hidden_states_409_cast_fp16, y = var_76_promoted_27_to_fp16)[name = string("op_2672_cast_fp16")]; + tensor variance_55_axes_0 = const()[name = string("variance_55_axes_0"), val = tensor([-1])]; + bool variance_55_keep_dims_0 = const()[name = string("variance_55_keep_dims_0"), val = bool(true)]; + tensor variance_55_cast_fp16 = reduce_mean(axes = variance_55_axes_0, keep_dims = variance_55_keep_dims_0, x = var_2672_cast_fp16)[name = string("variance_55_cast_fp16")]; + fp16 var_2675_to_fp16 = const()[name = string("op_2675_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2676_cast_fp16 = add(x = variance_55_cast_fp16, y = var_2675_to_fp16)[name = string("op_2676_cast_fp16")]; + fp32 var_2677_epsilon_0 = const()[name = string("op_2677_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2677_cast_fp16 = rsqrt(epsilon = var_2677_epsilon_0, x = var_2676_cast_fp16)[name = string("op_2677_cast_fp16")]; + tensor hidden_states_413_cast_fp16 = mul(x = hidden_states_409_cast_fp16, y = var_2677_cast_fp16)[name = string("hidden_states_413_cast_fp16")]; + tensor model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42351552)))]; + tensor input_107_cast_fp16 = mul(x = model_model_layers_13_post_attention_layernorm_weight_to_fp16, y = hidden_states_413_cast_fp16)[name = string("input_107_cast_fp16")]; + tensor model_model_layers_13_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42352768))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42795200))))[name = string("model_model_layers_13_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_95_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_13_mlp_gate_proj_weight_to_fp16_quantized, x = input_107_cast_fp16)[name = string("linear_95_cast_fp16")]; + tensor var_2689_cast_fp16 = silu(x = linear_95_cast_fp16)[name = string("op_2689_cast_fp16")]; + tensor model_model_layers_13_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(42850560))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43292992))))[name = string("model_model_layers_13_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_96_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_13_mlp_up_proj_weight_to_fp16_quantized, x = input_107_cast_fp16)[name = string("linear_96_cast_fp16")]; + tensor input_111_cast_fp16 = mul(x = var_2689_cast_fp16, y = linear_96_cast_fp16)[name = string("input_111_cast_fp16")]; + tensor model_model_layers_13_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43348352))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43790784))))[name = string("model_model_layers_13_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_97_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_mlp_down_proj_weight_to_fp16_quantized, x = input_111_cast_fp16)[name = string("linear_97_cast_fp16")]; + tensor hidden_states_419_cast_fp16 = add(x = hidden_states_409_cast_fp16, y = linear_97_cast_fp16)[name = string("hidden_states_419_cast_fp16")]; + fp16 var_76_promoted_28_to_fp16 = const()[name = string("op_76_promoted_28_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2702_cast_fp16 = pow(x = hidden_states_419_cast_fp16, y = var_76_promoted_28_to_fp16)[name = string("op_2702_cast_fp16")]; + tensor variance_57_axes_0 = const()[name = string("variance_57_axes_0"), val = tensor([-1])]; + bool variance_57_keep_dims_0 = const()[name = string("variance_57_keep_dims_0"), val = bool(true)]; + tensor variance_57_cast_fp16 = reduce_mean(axes = variance_57_axes_0, keep_dims = variance_57_keep_dims_0, x = var_2702_cast_fp16)[name = string("variance_57_cast_fp16")]; + fp16 var_2705_to_fp16 = const()[name = string("op_2705_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2706_cast_fp16 = add(x = variance_57_cast_fp16, y = var_2705_to_fp16)[name = string("op_2706_cast_fp16")]; + fp32 var_2707_epsilon_0 = const()[name = string("op_2707_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2707_cast_fp16 = rsqrt(epsilon = var_2707_epsilon_0, x = var_2706_cast_fp16)[name = string("op_2707_cast_fp16")]; + tensor hidden_states_423_cast_fp16 = mul(x = hidden_states_419_cast_fp16, y = var_2707_cast_fp16)[name = string("hidden_states_423_cast_fp16")]; + tensor model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43846144)))]; + tensor hidden_states_427_cast_fp16 = mul(x = model_model_layers_14_input_layernorm_weight_to_fp16, y = hidden_states_423_cast_fp16)[name = string("hidden_states_427_cast_fp16")]; + tensor var_2718_shape_cast_fp16 = shape(x = hidden_states_427_cast_fp16)[name = string("op_2718_shape_cast_fp16")]; + int32 gather_256 = const()[name = string("gather_256"), val = int32(1)]; + int32 gather_257_axis_0 = const()[name = string("gather_257_axis_0"), val = int32(0)]; + int32 gather_257_batch_dims_0 = const()[name = string("gather_257_batch_dims_0"), val = int32(0)]; + bool gather_257_validate_indices_0 = const()[name = string("gather_257_validate_indices_0"), val = bool(false)]; + string var_2718_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2718_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_257_to_uint16 = const()[name = string("select_257_to_uint16"), val = uint16(1)]; + tensor var_2718_shape_cast_fp16_to_uint16 = cast(dtype = var_2718_shape_cast_fp16_to_uint16_dtype_0, x = var_2718_shape_cast_fp16)[name = string("cast_127")]; + uint16 gather_257_cast_uint16 = gather(axis = gather_257_axis_0, batch_dims = gather_257_batch_dims_0, indices = select_257_to_uint16, validate_indices = gather_257_validate_indices_0, x = var_2718_shape_cast_fp16_to_uint16)[name = string("gather_257_cast_uint16")]; + string gather_257_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_257_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_14_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(43847360))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44013312))))[name = string("model_model_layers_14_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_98_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_427_cast_fp16)[name = string("linear_98_cast_fp16")]; + tensor model_model_layers_14_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44034112))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44089472))))[name = string("model_model_layers_14_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_99_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_14_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_427_cast_fp16)[name = string("linear_99_cast_fp16")]; + tensor model_model_layers_14_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44096448))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44151808))))[name = string("model_model_layers_14_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_100_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_14_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_427_cast_fp16)[name = string("linear_100_cast_fp16")]; + tensor concat_266x = const()[name = string("concat_266x"), val = tensor([1, -1, 9, 64])]; + tensor var_2727_cast_fp16 = reshape(shape = concat_266x, x = linear_98_cast_fp16)[name = string("op_2727_cast_fp16")]; + tensor q_29_perm_0 = const()[name = string("q_29_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_267x = const()[name = string("concat_267x"), val = tensor([1, -1, 3, 64])]; + tensor var_2730_cast_fp16 = reshape(shape = concat_267x, x = linear_99_cast_fp16)[name = string("op_2730_cast_fp16")]; + tensor k_29_perm_0 = const()[name = string("k_29_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_268x = const()[name = string("concat_268x"), val = tensor([1, -1, 3, 64])]; + tensor var_2733_cast_fp16 = reshape(shape = concat_268x, x = linear_100_cast_fp16)[name = string("op_2733_cast_fp16")]; + tensor v_state_29_perm_0 = const()[name = string("v_state_29_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_29_cast_fp16 = transpose(perm = q_29_perm_0, x = var_2727_cast_fp16)[name = string("transpose_63")]; + tensor var_2737_cast_fp16 = mul(x = q_29_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2737_cast_fp16")]; + tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29_cast_fp16)[name = string("x1_57_cast_fp16")]; + tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29_cast_fp16)[name = string("x2_57_cast_fp16")]; + fp16 const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2748_cast_fp16 = mul(x = x2_57_cast_fp16, y = const_31_promoted_to_fp16)[name = string("op_2748_cast_fp16")]; + bool var_2750_interleave_0 = const()[name = string("op_2750_interleave_0"), val = bool(false)]; + tensor var_2750_cast_fp16 = concat(axis = var_81, interleave = var_2750_interleave_0, values = (var_2748_cast_fp16, x1_57_cast_fp16))[name = string("op_2750_cast_fp16")]; + tensor var_2751_cast_fp16 = mul(x = var_2750_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2751_cast_fp16")]; + tensor query_states_59_cast_fp16 = add(x = var_2737_cast_fp16, y = var_2751_cast_fp16)[name = string("query_states_59_cast_fp16")]; + tensor k_29_cast_fp16 = transpose(perm = k_29_perm_0, x = var_2730_cast_fp16)[name = string("transpose_62")]; + tensor var_2753_cast_fp16 = mul(x = k_29_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2753_cast_fp16")]; + tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29_cast_fp16)[name = string("x1_59_cast_fp16")]; + tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29_cast_fp16)[name = string("x2_59_cast_fp16")]; + fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2764_cast_fp16 = mul(x = x2_59_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_2764_cast_fp16")]; + bool var_2766_interleave_0 = const()[name = string("op_2766_interleave_0"), val = bool(false)]; + tensor var_2766_cast_fp16 = concat(axis = var_81, interleave = var_2766_interleave_0, values = (var_2764_cast_fp16, x1_59_cast_fp16))[name = string("op_2766_cast_fp16")]; + tensor var_2767_cast_fp16 = mul(x = var_2766_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2767_cast_fp16")]; + tensor k_state_29_cast_fp16 = add(x = var_2753_cast_fp16, y = var_2767_cast_fp16)[name = string("k_state_29_cast_fp16")]; + tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([0])]; + tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; + tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; + tensor concat_271_values0_0 = const()[name = string("concat_271_values0_0"), val = tensor([14])]; + int32 concat_271_axis_0 = const()[name = string("concat_271_axis_0"), val = int32(0)]; + bool concat_271_interleave_0 = const()[name = string("concat_271_interleave_0"), val = bool(false)]; + tensor concat_271 = concat(axis = concat_271_axis_0, interleave = concat_271_interleave_0, values = (concat_271_values0_0, expand_dims_168, expand_dims_169, expand_dims_2, expand_dims_171))[name = string("concat_271")]; + tensor key_cache_internal_tensor_assign_15_stride_0 = const()[name = string("key_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_271, begin_mask = key_cache_internal_tensor_assign_15_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_15_squeeze_mask_0, stride = key_cache_internal_tensor_assign_15_stride_0, update = k_state_29_cast_fp16, x = coreml_update_state_86)[name = string("key_cache_internal_tensor_assign_15_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_15_cast_fp16, input = key_cache)[name = string("coreml_update_state_88_write_state")]; + tensor coreml_update_state_88 = read_state(input = key_cache)[name = string("coreml_update_state_88")]; + tensor value_cache_internal_tensor_assign_15_stride_0 = const()[name = string("value_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_29_cast_fp16 = transpose(perm = v_state_29_perm_0, x = var_2733_cast_fp16)[name = string("transpose_61")]; + tensor value_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_271, begin_mask = value_cache_internal_tensor_assign_15_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_15_squeeze_mask_0, stride = value_cache_internal_tensor_assign_15_stride_0, update = v_state_29_cast_fp16, x = coreml_update_state_87)[name = string("value_cache_internal_tensor_assign_15_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_15_cast_fp16, input = value_cache)[name = string("coreml_update_state_89_write_state")]; + tensor coreml_update_state_89 = read_state(input = value_cache)[name = string("coreml_update_state_89")]; + tensor var_2790_begin_0 = const()[name = string("op_2790_begin_0"), val = tensor([14, 0, 0, 0, 0])]; + tensor var_2790_end_0 = const()[name = string("op_2790_end_0"), val = tensor([15, 1, 3, 2048, 64])]; + tensor var_2790_end_mask_0 = const()[name = string("op_2790_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2790_squeeze_mask_0 = const()[name = string("op_2790_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2790_cast_fp16 = slice_by_index(begin = var_2790_begin_0, end = var_2790_end_0, end_mask = var_2790_end_mask_0, squeeze_mask = var_2790_squeeze_mask_0, x = coreml_update_state_88)[name = string("op_2790_cast_fp16")]; + tensor var_2793_begin_0 = const()[name = string("op_2793_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2793_end_mask_0 = const()[name = string("op_2793_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2793_cast_fp16 = slice_by_index(begin = var_2793_begin_0, end = concat_11, end_mask = var_2793_end_mask_0, x = var_2790_cast_fp16)[name = string("op_2793_cast_fp16")]; + tensor var_2795_begin_0 = const()[name = string("op_2795_begin_0"), val = tensor([14, 0, 0, 0, 0])]; + tensor var_2795_end_0 = const()[name = string("op_2795_end_0"), val = tensor([15, 1, 3, 2048, 64])]; + tensor var_2795_end_mask_0 = const()[name = string("op_2795_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2795_squeeze_mask_0 = const()[name = string("op_2795_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2795_cast_fp16 = slice_by_index(begin = var_2795_begin_0, end = var_2795_end_0, end_mask = var_2795_end_mask_0, squeeze_mask = var_2795_squeeze_mask_0, x = coreml_update_state_89)[name = string("op_2795_cast_fp16")]; + tensor var_2798_begin_0 = const()[name = string("op_2798_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2798_end_mask_0 = const()[name = string("op_2798_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2798_cast_fp16 = slice_by_index(begin = var_2798_begin_0, end = concat_11, end_mask = var_2798_end_mask_0, x = var_2795_cast_fp16)[name = string("op_2798_cast_fp16")]; + tensor var_2800_shape_cast_fp16 = shape(x = var_2793_cast_fp16)[name = string("op_2800_shape_cast_fp16")]; + int32 gather_265 = const()[name = string("gather_265"), val = int32(1)]; + int32 gather_266 = const()[name = string("gather_266"), val = int32(3)]; + int32 gather_267_axis_0 = const()[name = string("gather_267_axis_0"), val = int32(0)]; + int32 gather_267_batch_dims_0 = const()[name = string("gather_267_batch_dims_0"), val = int32(0)]; + bool gather_267_validate_indices_0 = const()[name = string("gather_267_validate_indices_0"), val = bool(false)]; + string var_2800_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2800_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_267_to_uint16 = const()[name = string("select_267_to_uint16"), val = uint16(2)]; + tensor var_2800_shape_cast_fp16_to_uint16 = cast(dtype = var_2800_shape_cast_fp16_to_uint16_dtype_0, x = var_2800_shape_cast_fp16)[name = string("cast_126")]; + uint16 gather_267_cast_uint16 = gather(axis = gather_267_axis_0, batch_dims = gather_267_batch_dims_0, indices = select_267_to_uint16, validate_indices = gather_267_validate_indices_0, x = var_2800_shape_cast_fp16_to_uint16)[name = string("gather_267_cast_uint16")]; + string gather_267_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_267_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_268 = const()[name = string("gather_268"), val = int32(64)]; + tensor var_2807_axes_0 = const()[name = string("op_2807_axes_0"), val = tensor([2])]; + tensor var_2807_cast_fp16 = expand_dims(axes = var_2807_axes_0, x = var_2793_cast_fp16)[name = string("op_2807_cast_fp16")]; + tensor shape_297_cast_fp16 = shape(x = var_2807_cast_fp16)[name = string("shape_297_cast_fp16")]; + int32 concat_279_axis_0 = const()[name = string("concat_279_axis_0"), val = int32(0)]; + bool concat_279_interleave_0 = const()[name = string("concat_279_interleave_0"), val = bool(false)]; + int32 gather_267_cast_uint16_to_int32 = cast(dtype = gather_267_cast_uint16_to_int32_dtype_0, x = gather_267_cast_uint16)[name = string("cast_125")]; + tensor concat_279 = concat(axis = concat_279_axis_0, interleave = concat_279_interleave_0, values = (gather_265, gather_266, var_85, gather_267_cast_uint16_to_int32, gather_268))[name = string("concat_279")]; + tensor real_div_28 = real_div(x = concat_279, y = shape_297_cast_fp16)[name = string("real_div_28")]; + tensor hidden_states_431_cast_fp16 = tile(reps = real_div_28, x = var_2807_cast_fp16)[name = string("hidden_states_431_cast_fp16")]; + tensor concat_280x = const()[name = string("concat_280x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_59_cast_fp16 = reshape(shape = concat_280x, x = hidden_states_431_cast_fp16)[name = string("key_states_59_cast_fp16")]; + tensor var_2817_shape_cast_fp16 = shape(x = var_2798_cast_fp16)[name = string("op_2817_shape_cast_fp16")]; + int32 gather_269 = const()[name = string("gather_269"), val = int32(1)]; + int32 gather_270 = const()[name = string("gather_270"), val = int32(3)]; + int32 gather_271_axis_0 = const()[name = string("gather_271_axis_0"), val = int32(0)]; + int32 gather_271_batch_dims_0 = const()[name = string("gather_271_batch_dims_0"), val = int32(0)]; + bool gather_271_validate_indices_0 = const()[name = string("gather_271_validate_indices_0"), val = bool(false)]; + string var_2817_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2817_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_271_to_uint16 = const()[name = string("select_271_to_uint16"), val = uint16(2)]; + tensor var_2817_shape_cast_fp16_to_uint16 = cast(dtype = var_2817_shape_cast_fp16_to_uint16_dtype_0, x = var_2817_shape_cast_fp16)[name = string("cast_124")]; + uint16 gather_271_cast_uint16 = gather(axis = gather_271_axis_0, batch_dims = gather_271_batch_dims_0, indices = select_271_to_uint16, validate_indices = gather_271_validate_indices_0, x = var_2817_shape_cast_fp16_to_uint16)[name = string("gather_271_cast_uint16")]; + string gather_271_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_271_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_272 = const()[name = string("gather_272"), val = int32(64)]; + tensor var_2824_axes_0 = const()[name = string("op_2824_axes_0"), val = tensor([2])]; + tensor var_2824_cast_fp16 = expand_dims(axes = var_2824_axes_0, x = var_2798_cast_fp16)[name = string("op_2824_cast_fp16")]; + tensor shape_302_cast_fp16 = shape(x = var_2824_cast_fp16)[name = string("shape_302_cast_fp16")]; + int32 concat_281_axis_0 = const()[name = string("concat_281_axis_0"), val = int32(0)]; + bool concat_281_interleave_0 = const()[name = string("concat_281_interleave_0"), val = bool(false)]; + int32 gather_271_cast_uint16_to_int32 = cast(dtype = gather_271_cast_uint16_to_int32_dtype_0, x = gather_271_cast_uint16)[name = string("cast_123")]; + tensor concat_281 = concat(axis = concat_281_axis_0, interleave = concat_281_interleave_0, values = (gather_269, gather_270, var_85, gather_271_cast_uint16_to_int32, gather_272))[name = string("concat_281")]; + tensor real_div_29 = real_div(x = concat_281, y = shape_302_cast_fp16)[name = string("real_div_29")]; + tensor hidden_states_435_cast_fp16 = tile(reps = real_div_29, x = var_2824_cast_fp16)[name = string("hidden_states_435_cast_fp16")]; + tensor concat_282x = const()[name = string("concat_282x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_59_cast_fp16 = reshape(shape = concat_282x, x = hidden_states_435_cast_fp16)[name = string("value_states_59_cast_fp16")]; + tensor var_2834_shape_cast_fp16 = shape(x = key_states_59_cast_fp16)[name = string("op_2834_shape_cast_fp16")]; + int32 gather_273_axis_0 = const()[name = string("gather_273_axis_0"), val = int32(0)]; + int32 gather_273_batch_dims_0 = const()[name = string("gather_273_batch_dims_0"), val = int32(0)]; + bool gather_273_validate_indices_0 = const()[name = string("gather_273_validate_indices_0"), val = bool(false)]; + string var_2834_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2834_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_273_to_uint16 = const()[name = string("select_273_to_uint16"), val = uint16(2)]; + tensor var_2834_shape_cast_fp16_to_uint16 = cast(dtype = var_2834_shape_cast_fp16_to_uint16_dtype_0, x = var_2834_shape_cast_fp16)[name = string("cast_122")]; + uint16 gather_273_cast_uint16 = gather(axis = gather_273_axis_0, batch_dims = gather_273_batch_dims_0, indices = select_273_to_uint16, validate_indices = gather_273_validate_indices_0, x = var_2834_shape_cast_fp16_to_uint16)[name = string("gather_273_cast_uint16")]; + string gather_273_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_273_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_283_values0_0 = const()[name = string("concat_283_values0_0"), val = int32(1)]; + int32 concat_283_values1_0 = const()[name = string("concat_283_values1_0"), val = int32(1)]; + int32 concat_283_values2_0 = const()[name = string("concat_283_values2_0"), val = int32(0)]; + int32 concat_283_axis_0 = const()[name = string("concat_283_axis_0"), val = int32(0)]; + bool concat_283_interleave_0 = const()[name = string("concat_283_interleave_0"), val = bool(false)]; + int32 gather_273_cast_uint16_to_int32 = cast(dtype = gather_273_cast_uint16_to_int32_dtype_0, x = gather_273_cast_uint16)[name = string("cast_121")]; + tensor concat_283 = concat(axis = concat_283_axis_0, interleave = concat_283_interleave_0, values = (concat_283_values0_0, concat_283_values1_0, concat_283_values2_0, gather_273_cast_uint16_to_int32))[name = string("concat_283")]; + tensor causal_mask_31_begin_0 = const()[name = string("causal_mask_31_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_31_end_mask_0 = const()[name = string("causal_mask_31_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_31_cast_fp16 = slice_by_index(begin = causal_mask_31_begin_0, end = concat_283, end_mask = causal_mask_31_end_mask_0, x = causal_mask)[name = string("causal_mask_31_cast_fp16")]; + tensor attn_output_57_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_31_cast_fp16, key = key_states_59_cast_fp16, query = query_states_59_cast_fp16, value = value_states_59_cast_fp16)[name = string("attn_output_57_cast_fp16")]; + tensor var_2840_perm_0 = const()[name = string("op_2840_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_284_axis_0 = const()[name = string("concat_284_axis_0"), val = int32(0)]; + bool concat_284_interleave_0 = const()[name = string("concat_284_interleave_0"), val = bool(false)]; + int32 gather_257_cast_uint16_to_int32 = cast(dtype = gather_257_cast_uint16_to_int32_dtype_0, x = gather_257_cast_uint16)[name = string("cast_120")]; + tensor concat_284 = concat(axis = concat_284_axis_0, interleave = concat_284_interleave_0, values = (gather_256, gather_257_cast_uint16_to_int32, var_81))[name = string("concat_284")]; + tensor var_2840_cast_fp16 = transpose(perm = var_2840_perm_0, x = attn_output_57_cast_fp16)[name = string("transpose_60")]; + tensor input_113_cast_fp16 = reshape(shape = concat_284, x = var_2840_cast_fp16)[name = string("input_113_cast_fp16")]; + tensor model_model_layers_14_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44158784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44324736))))[name = string("model_model_layers_14_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_101_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_o_proj_weight_to_fp16_quantized, x = input_113_cast_fp16)[name = string("linear_101_cast_fp16")]; + tensor hidden_states_439_cast_fp16 = add(x = hidden_states_419_cast_fp16, y = linear_101_cast_fp16)[name = string("hidden_states_439_cast_fp16")]; + fp16 var_76_promoted_29_to_fp16 = const()[name = string("op_76_promoted_29_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2849_cast_fp16 = pow(x = hidden_states_439_cast_fp16, y = var_76_promoted_29_to_fp16)[name = string("op_2849_cast_fp16")]; + tensor variance_59_axes_0 = const()[name = string("variance_59_axes_0"), val = tensor([-1])]; + bool variance_59_keep_dims_0 = const()[name = string("variance_59_keep_dims_0"), val = bool(true)]; + tensor variance_59_cast_fp16 = reduce_mean(axes = variance_59_axes_0, keep_dims = variance_59_keep_dims_0, x = var_2849_cast_fp16)[name = string("variance_59_cast_fp16")]; + fp16 var_2852_to_fp16 = const()[name = string("op_2852_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2853_cast_fp16 = add(x = variance_59_cast_fp16, y = var_2852_to_fp16)[name = string("op_2853_cast_fp16")]; + fp32 var_2854_epsilon_0 = const()[name = string("op_2854_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2854_cast_fp16 = rsqrt(epsilon = var_2854_epsilon_0, x = var_2853_cast_fp16)[name = string("op_2854_cast_fp16")]; + tensor hidden_states_443_cast_fp16 = mul(x = hidden_states_439_cast_fp16, y = var_2854_cast_fp16)[name = string("hidden_states_443_cast_fp16")]; + tensor model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44345536)))]; + tensor input_115_cast_fp16 = mul(x = model_model_layers_14_post_attention_layernorm_weight_to_fp16, y = hidden_states_443_cast_fp16)[name = string("input_115_cast_fp16")]; + tensor model_model_layers_14_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44346752))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44789184))))[name = string("model_model_layers_14_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_102_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_14_mlp_gate_proj_weight_to_fp16_quantized, x = input_115_cast_fp16)[name = string("linear_102_cast_fp16")]; + tensor var_2866_cast_fp16 = silu(x = linear_102_cast_fp16)[name = string("op_2866_cast_fp16")]; + tensor model_model_layers_14_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(44844544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45286976))))[name = string("model_model_layers_14_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_103_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_14_mlp_up_proj_weight_to_fp16_quantized, x = input_115_cast_fp16)[name = string("linear_103_cast_fp16")]; + tensor input_119_cast_fp16 = mul(x = var_2866_cast_fp16, y = linear_103_cast_fp16)[name = string("input_119_cast_fp16")]; + tensor model_model_layers_14_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45342336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45784768))))[name = string("model_model_layers_14_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_104_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_mlp_down_proj_weight_to_fp16_quantized, x = input_119_cast_fp16)[name = string("linear_104_cast_fp16")]; + tensor hidden_states_449_cast_fp16 = add(x = hidden_states_439_cast_fp16, y = linear_104_cast_fp16)[name = string("hidden_states_449_cast_fp16")]; + fp16 var_76_promoted_30_to_fp16 = const()[name = string("op_76_promoted_30_to_fp16"), val = fp16(0x1p+1)]; + tensor var_2879_cast_fp16 = pow(x = hidden_states_449_cast_fp16, y = var_76_promoted_30_to_fp16)[name = string("op_2879_cast_fp16")]; + tensor variance_61_axes_0 = const()[name = string("variance_61_axes_0"), val = tensor([-1])]; + bool variance_61_keep_dims_0 = const()[name = string("variance_61_keep_dims_0"), val = bool(true)]; + tensor variance_61_cast_fp16 = reduce_mean(axes = variance_61_axes_0, keep_dims = variance_61_keep_dims_0, x = var_2879_cast_fp16)[name = string("variance_61_cast_fp16")]; + fp16 var_2882_to_fp16 = const()[name = string("op_2882_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_2883_cast_fp16 = add(x = variance_61_cast_fp16, y = var_2882_to_fp16)[name = string("op_2883_cast_fp16")]; + fp32 var_2884_epsilon_0 = const()[name = string("op_2884_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_2884_cast_fp16 = rsqrt(epsilon = var_2884_epsilon_0, x = var_2883_cast_fp16)[name = string("op_2884_cast_fp16")]; + tensor hidden_states_453_cast_fp16 = mul(x = hidden_states_449_cast_fp16, y = var_2884_cast_fp16)[name = string("hidden_states_453_cast_fp16")]; + tensor model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45840128)))]; + tensor hidden_states_457_cast_fp16 = mul(x = model_model_layers_15_input_layernorm_weight_to_fp16, y = hidden_states_453_cast_fp16)[name = string("hidden_states_457_cast_fp16")]; + tensor var_2895_shape_cast_fp16 = shape(x = hidden_states_457_cast_fp16)[name = string("op_2895_shape_cast_fp16")]; + int32 gather_274 = const()[name = string("gather_274"), val = int32(1)]; + int32 gather_275_axis_0 = const()[name = string("gather_275_axis_0"), val = int32(0)]; + int32 gather_275_batch_dims_0 = const()[name = string("gather_275_batch_dims_0"), val = int32(0)]; + bool gather_275_validate_indices_0 = const()[name = string("gather_275_validate_indices_0"), val = bool(false)]; + string var_2895_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2895_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_275_to_uint16 = const()[name = string("select_275_to_uint16"), val = uint16(1)]; + tensor var_2895_shape_cast_fp16_to_uint16 = cast(dtype = var_2895_shape_cast_fp16_to_uint16_dtype_0, x = var_2895_shape_cast_fp16)[name = string("cast_119")]; + uint16 gather_275_cast_uint16 = gather(axis = gather_275_axis_0, batch_dims = gather_275_batch_dims_0, indices = select_275_to_uint16, validate_indices = gather_275_validate_indices_0, x = var_2895_shape_cast_fp16_to_uint16)[name = string("gather_275_cast_uint16")]; + string gather_275_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_275_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_15_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(45841344))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46007296))))[name = string("model_model_layers_15_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_105_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_457_cast_fp16)[name = string("linear_105_cast_fp16")]; + tensor model_model_layers_15_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46028096))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46083456))))[name = string("model_model_layers_15_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_106_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_15_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_457_cast_fp16)[name = string("linear_106_cast_fp16")]; + tensor model_model_layers_15_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46090432))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46145792))))[name = string("model_model_layers_15_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_107_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_15_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_457_cast_fp16)[name = string("linear_107_cast_fp16")]; + tensor concat_285x = const()[name = string("concat_285x"), val = tensor([1, -1, 9, 64])]; + tensor var_2904_cast_fp16 = reshape(shape = concat_285x, x = linear_105_cast_fp16)[name = string("op_2904_cast_fp16")]; + tensor q_31_perm_0 = const()[name = string("q_31_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_286x = const()[name = string("concat_286x"), val = tensor([1, -1, 3, 64])]; + tensor var_2907_cast_fp16 = reshape(shape = concat_286x, x = linear_106_cast_fp16)[name = string("op_2907_cast_fp16")]; + tensor k_31_perm_0 = const()[name = string("k_31_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_287x = const()[name = string("concat_287x"), val = tensor([1, -1, 3, 64])]; + tensor var_2910_cast_fp16 = reshape(shape = concat_287x, x = linear_107_cast_fp16)[name = string("op_2910_cast_fp16")]; + tensor v_state_31_perm_0 = const()[name = string("v_state_31_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_31_cast_fp16 = transpose(perm = q_31_perm_0, x = var_2904_cast_fp16)[name = string("transpose_59")]; + tensor var_2914_cast_fp16 = mul(x = q_31_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2914_cast_fp16")]; + tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_31_cast_fp16)[name = string("x1_61_cast_fp16")]; + tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_31_cast_fp16)[name = string("x2_61_cast_fp16")]; + fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2925_cast_fp16 = mul(x = x2_61_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_2925_cast_fp16")]; + bool var_2927_interleave_0 = const()[name = string("op_2927_interleave_0"), val = bool(false)]; + tensor var_2927_cast_fp16 = concat(axis = var_81, interleave = var_2927_interleave_0, values = (var_2925_cast_fp16, x1_61_cast_fp16))[name = string("op_2927_cast_fp16")]; + tensor var_2928_cast_fp16 = mul(x = var_2927_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2928_cast_fp16")]; + tensor query_states_63_cast_fp16 = add(x = var_2914_cast_fp16, y = var_2928_cast_fp16)[name = string("query_states_63_cast_fp16")]; + tensor k_31_cast_fp16 = transpose(perm = k_31_perm_0, x = var_2907_cast_fp16)[name = string("transpose_58")]; + tensor var_2930_cast_fp16 = mul(x = k_31_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2930_cast_fp16")]; + tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_63_cast_fp16 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_31_cast_fp16)[name = string("x1_63_cast_fp16")]; + tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_63_cast_fp16 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_31_cast_fp16)[name = string("x2_63_cast_fp16")]; + fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_2941_cast_fp16 = mul(x = x2_63_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_2941_cast_fp16")]; + bool var_2943_interleave_0 = const()[name = string("op_2943_interleave_0"), val = bool(false)]; + tensor var_2943_cast_fp16 = concat(axis = var_81, interleave = var_2943_interleave_0, values = (var_2941_cast_fp16, x1_63_cast_fp16))[name = string("op_2943_cast_fp16")]; + tensor var_2944_cast_fp16 = mul(x = var_2943_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2944_cast_fp16")]; + tensor k_state_31_cast_fp16 = add(x = var_2930_cast_fp16, y = var_2944_cast_fp16)[name = string("k_state_31_cast_fp16")]; + tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([0])]; + tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; + tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; + tensor concat_290_values0_0 = const()[name = string("concat_290_values0_0"), val = tensor([15])]; + int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)]; + bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)]; + tensor concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (concat_290_values0_0, expand_dims_180, expand_dims_181, expand_dims_2, expand_dims_183))[name = string("concat_290")]; + tensor key_cache_internal_tensor_assign_16_stride_0 = const()[name = string("key_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_290, begin_mask = key_cache_internal_tensor_assign_16_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_16_squeeze_mask_0, stride = key_cache_internal_tensor_assign_16_stride_0, update = k_state_31_cast_fp16, x = coreml_update_state_88)[name = string("key_cache_internal_tensor_assign_16_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_16_cast_fp16, input = key_cache)[name = string("coreml_update_state_90_write_state")]; + tensor coreml_update_state_90 = read_state(input = key_cache)[name = string("coreml_update_state_90")]; + tensor value_cache_internal_tensor_assign_16_stride_0 = const()[name = string("value_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_31_cast_fp16 = transpose(perm = v_state_31_perm_0, x = var_2910_cast_fp16)[name = string("transpose_57")]; + tensor value_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_290, begin_mask = value_cache_internal_tensor_assign_16_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_16_squeeze_mask_0, stride = value_cache_internal_tensor_assign_16_stride_0, update = v_state_31_cast_fp16, x = coreml_update_state_89)[name = string("value_cache_internal_tensor_assign_16_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_16_cast_fp16, input = value_cache)[name = string("coreml_update_state_91_write_state")]; + tensor coreml_update_state_91 = read_state(input = value_cache)[name = string("coreml_update_state_91")]; + tensor var_2967_begin_0 = const()[name = string("op_2967_begin_0"), val = tensor([15, 0, 0, 0, 0])]; + tensor var_2967_end_0 = const()[name = string("op_2967_end_0"), val = tensor([16, 1, 3, 2048, 64])]; + tensor var_2967_end_mask_0 = const()[name = string("op_2967_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2967_squeeze_mask_0 = const()[name = string("op_2967_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2967_cast_fp16 = slice_by_index(begin = var_2967_begin_0, end = var_2967_end_0, end_mask = var_2967_end_mask_0, squeeze_mask = var_2967_squeeze_mask_0, x = coreml_update_state_90)[name = string("op_2967_cast_fp16")]; + tensor var_2970_begin_0 = const()[name = string("op_2970_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2970_end_mask_0 = const()[name = string("op_2970_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2970_cast_fp16 = slice_by_index(begin = var_2970_begin_0, end = concat_11, end_mask = var_2970_end_mask_0, x = var_2967_cast_fp16)[name = string("op_2970_cast_fp16")]; + tensor var_2972_begin_0 = const()[name = string("op_2972_begin_0"), val = tensor([15, 0, 0, 0, 0])]; + tensor var_2972_end_0 = const()[name = string("op_2972_end_0"), val = tensor([16, 1, 3, 2048, 64])]; + tensor var_2972_end_mask_0 = const()[name = string("op_2972_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_2972_squeeze_mask_0 = const()[name = string("op_2972_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_2972_cast_fp16 = slice_by_index(begin = var_2972_begin_0, end = var_2972_end_0, end_mask = var_2972_end_mask_0, squeeze_mask = var_2972_squeeze_mask_0, x = coreml_update_state_91)[name = string("op_2972_cast_fp16")]; + tensor var_2975_begin_0 = const()[name = string("op_2975_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2975_end_mask_0 = const()[name = string("op_2975_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2975_cast_fp16 = slice_by_index(begin = var_2975_begin_0, end = concat_11, end_mask = var_2975_end_mask_0, x = var_2972_cast_fp16)[name = string("op_2975_cast_fp16")]; + tensor var_2977_shape_cast_fp16 = shape(x = var_2970_cast_fp16)[name = string("op_2977_shape_cast_fp16")]; + int32 gather_283 = const()[name = string("gather_283"), val = int32(1)]; + int32 gather_284 = const()[name = string("gather_284"), val = int32(3)]; + int32 gather_285_axis_0 = const()[name = string("gather_285_axis_0"), val = int32(0)]; + int32 gather_285_batch_dims_0 = const()[name = string("gather_285_batch_dims_0"), val = int32(0)]; + bool gather_285_validate_indices_0 = const()[name = string("gather_285_validate_indices_0"), val = bool(false)]; + string var_2977_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2977_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_285_to_uint16 = const()[name = string("select_285_to_uint16"), val = uint16(2)]; + tensor var_2977_shape_cast_fp16_to_uint16 = cast(dtype = var_2977_shape_cast_fp16_to_uint16_dtype_0, x = var_2977_shape_cast_fp16)[name = string("cast_118")]; + uint16 gather_285_cast_uint16 = gather(axis = gather_285_axis_0, batch_dims = gather_285_batch_dims_0, indices = select_285_to_uint16, validate_indices = gather_285_validate_indices_0, x = var_2977_shape_cast_fp16_to_uint16)[name = string("gather_285_cast_uint16")]; + string gather_285_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_285_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_286 = const()[name = string("gather_286"), val = int32(64)]; + tensor var_2984_axes_0 = const()[name = string("op_2984_axes_0"), val = tensor([2])]; + tensor var_2984_cast_fp16 = expand_dims(axes = var_2984_axes_0, x = var_2970_cast_fp16)[name = string("op_2984_cast_fp16")]; + tensor shape_317_cast_fp16 = shape(x = var_2984_cast_fp16)[name = string("shape_317_cast_fp16")]; + int32 concat_298_axis_0 = const()[name = string("concat_298_axis_0"), val = int32(0)]; + bool concat_298_interleave_0 = const()[name = string("concat_298_interleave_0"), val = bool(false)]; + int32 gather_285_cast_uint16_to_int32 = cast(dtype = gather_285_cast_uint16_to_int32_dtype_0, x = gather_285_cast_uint16)[name = string("cast_117")]; + tensor concat_298 = concat(axis = concat_298_axis_0, interleave = concat_298_interleave_0, values = (gather_283, gather_284, var_85, gather_285_cast_uint16_to_int32, gather_286))[name = string("concat_298")]; + tensor real_div_30 = real_div(x = concat_298, y = shape_317_cast_fp16)[name = string("real_div_30")]; + tensor hidden_states_461_cast_fp16 = tile(reps = real_div_30, x = var_2984_cast_fp16)[name = string("hidden_states_461_cast_fp16")]; + tensor concat_299x = const()[name = string("concat_299x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_63_cast_fp16 = reshape(shape = concat_299x, x = hidden_states_461_cast_fp16)[name = string("key_states_63_cast_fp16")]; + tensor var_2994_shape_cast_fp16 = shape(x = var_2975_cast_fp16)[name = string("op_2994_shape_cast_fp16")]; + int32 gather_287 = const()[name = string("gather_287"), val = int32(1)]; + int32 gather_288 = const()[name = string("gather_288"), val = int32(3)]; + int32 gather_289_axis_0 = const()[name = string("gather_289_axis_0"), val = int32(0)]; + int32 gather_289_batch_dims_0 = const()[name = string("gather_289_batch_dims_0"), val = int32(0)]; + bool gather_289_validate_indices_0 = const()[name = string("gather_289_validate_indices_0"), val = bool(false)]; + string var_2994_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2994_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_289_to_uint16 = const()[name = string("select_289_to_uint16"), val = uint16(2)]; + tensor var_2994_shape_cast_fp16_to_uint16 = cast(dtype = var_2994_shape_cast_fp16_to_uint16_dtype_0, x = var_2994_shape_cast_fp16)[name = string("cast_116")]; + uint16 gather_289_cast_uint16 = gather(axis = gather_289_axis_0, batch_dims = gather_289_batch_dims_0, indices = select_289_to_uint16, validate_indices = gather_289_validate_indices_0, x = var_2994_shape_cast_fp16_to_uint16)[name = string("gather_289_cast_uint16")]; + string gather_289_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_289_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_290 = const()[name = string("gather_290"), val = int32(64)]; + tensor var_3001_axes_0 = const()[name = string("op_3001_axes_0"), val = tensor([2])]; + tensor var_3001_cast_fp16 = expand_dims(axes = var_3001_axes_0, x = var_2975_cast_fp16)[name = string("op_3001_cast_fp16")]; + tensor shape_322_cast_fp16 = shape(x = var_3001_cast_fp16)[name = string("shape_322_cast_fp16")]; + int32 concat_300_axis_0 = const()[name = string("concat_300_axis_0"), val = int32(0)]; + bool concat_300_interleave_0 = const()[name = string("concat_300_interleave_0"), val = bool(false)]; + int32 gather_289_cast_uint16_to_int32 = cast(dtype = gather_289_cast_uint16_to_int32_dtype_0, x = gather_289_cast_uint16)[name = string("cast_115")]; + tensor concat_300 = concat(axis = concat_300_axis_0, interleave = concat_300_interleave_0, values = (gather_287, gather_288, var_85, gather_289_cast_uint16_to_int32, gather_290))[name = string("concat_300")]; + tensor real_div_31 = real_div(x = concat_300, y = shape_322_cast_fp16)[name = string("real_div_31")]; + tensor hidden_states_465_cast_fp16 = tile(reps = real_div_31, x = var_3001_cast_fp16)[name = string("hidden_states_465_cast_fp16")]; + tensor concat_301x = const()[name = string("concat_301x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_63_cast_fp16 = reshape(shape = concat_301x, x = hidden_states_465_cast_fp16)[name = string("value_states_63_cast_fp16")]; + tensor var_3011_shape_cast_fp16 = shape(x = key_states_63_cast_fp16)[name = string("op_3011_shape_cast_fp16")]; + int32 gather_291_axis_0 = const()[name = string("gather_291_axis_0"), val = int32(0)]; + int32 gather_291_batch_dims_0 = const()[name = string("gather_291_batch_dims_0"), val = int32(0)]; + bool gather_291_validate_indices_0 = const()[name = string("gather_291_validate_indices_0"), val = bool(false)]; + string var_3011_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3011_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_291_to_uint16 = const()[name = string("select_291_to_uint16"), val = uint16(2)]; + tensor var_3011_shape_cast_fp16_to_uint16 = cast(dtype = var_3011_shape_cast_fp16_to_uint16_dtype_0, x = var_3011_shape_cast_fp16)[name = string("cast_114")]; + uint16 gather_291_cast_uint16 = gather(axis = gather_291_axis_0, batch_dims = gather_291_batch_dims_0, indices = select_291_to_uint16, validate_indices = gather_291_validate_indices_0, x = var_3011_shape_cast_fp16_to_uint16)[name = string("gather_291_cast_uint16")]; + string gather_291_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_291_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_302_values0_0 = const()[name = string("concat_302_values0_0"), val = int32(1)]; + int32 concat_302_values1_0 = const()[name = string("concat_302_values1_0"), val = int32(1)]; + int32 concat_302_values2_0 = const()[name = string("concat_302_values2_0"), val = int32(0)]; + int32 concat_302_axis_0 = const()[name = string("concat_302_axis_0"), val = int32(0)]; + bool concat_302_interleave_0 = const()[name = string("concat_302_interleave_0"), val = bool(false)]; + int32 gather_291_cast_uint16_to_int32 = cast(dtype = gather_291_cast_uint16_to_int32_dtype_0, x = gather_291_cast_uint16)[name = string("cast_113")]; + tensor concat_302 = concat(axis = concat_302_axis_0, interleave = concat_302_interleave_0, values = (concat_302_values0_0, concat_302_values1_0, concat_302_values2_0, gather_291_cast_uint16_to_int32))[name = string("concat_302")]; + tensor causal_mask_33_begin_0 = const()[name = string("causal_mask_33_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_33_end_mask_0 = const()[name = string("causal_mask_33_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_33_cast_fp16 = slice_by_index(begin = causal_mask_33_begin_0, end = concat_302, end_mask = causal_mask_33_end_mask_0, x = causal_mask)[name = string("causal_mask_33_cast_fp16")]; + tensor attn_output_61_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_33_cast_fp16, key = key_states_63_cast_fp16, query = query_states_63_cast_fp16, value = value_states_63_cast_fp16)[name = string("attn_output_61_cast_fp16")]; + tensor var_3017_perm_0 = const()[name = string("op_3017_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_303_axis_0 = const()[name = string("concat_303_axis_0"), val = int32(0)]; + bool concat_303_interleave_0 = const()[name = string("concat_303_interleave_0"), val = bool(false)]; + int32 gather_275_cast_uint16_to_int32 = cast(dtype = gather_275_cast_uint16_to_int32_dtype_0, x = gather_275_cast_uint16)[name = string("cast_112")]; + tensor concat_303 = concat(axis = concat_303_axis_0, interleave = concat_303_interleave_0, values = (gather_274, gather_275_cast_uint16_to_int32, var_81))[name = string("concat_303")]; + tensor var_3017_cast_fp16 = transpose(perm = var_3017_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_56")]; + tensor input_121_cast_fp16 = reshape(shape = concat_303, x = var_3017_cast_fp16)[name = string("input_121_cast_fp16")]; + tensor model_model_layers_15_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46152768))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46318720))))[name = string("model_model_layers_15_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_108_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_o_proj_weight_to_fp16_quantized, x = input_121_cast_fp16)[name = string("linear_108_cast_fp16")]; + tensor hidden_states_469_cast_fp16 = add(x = hidden_states_449_cast_fp16, y = linear_108_cast_fp16)[name = string("hidden_states_469_cast_fp16")]; + fp16 var_76_promoted_31_to_fp16 = const()[name = string("op_76_promoted_31_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3026_cast_fp16 = pow(x = hidden_states_469_cast_fp16, y = var_76_promoted_31_to_fp16)[name = string("op_3026_cast_fp16")]; + tensor variance_63_axes_0 = const()[name = string("variance_63_axes_0"), val = tensor([-1])]; + bool variance_63_keep_dims_0 = const()[name = string("variance_63_keep_dims_0"), val = bool(true)]; + tensor variance_63_cast_fp16 = reduce_mean(axes = variance_63_axes_0, keep_dims = variance_63_keep_dims_0, x = var_3026_cast_fp16)[name = string("variance_63_cast_fp16")]; + fp16 var_3029_to_fp16 = const()[name = string("op_3029_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3030_cast_fp16 = add(x = variance_63_cast_fp16, y = var_3029_to_fp16)[name = string("op_3030_cast_fp16")]; + fp32 var_3031_epsilon_0 = const()[name = string("op_3031_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3031_cast_fp16 = rsqrt(epsilon = var_3031_epsilon_0, x = var_3030_cast_fp16)[name = string("op_3031_cast_fp16")]; + tensor hidden_states_473_cast_fp16 = mul(x = hidden_states_469_cast_fp16, y = var_3031_cast_fp16)[name = string("hidden_states_473_cast_fp16")]; + tensor model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46339520)))]; + tensor input_123_cast_fp16 = mul(x = model_model_layers_15_post_attention_layernorm_weight_to_fp16, y = hidden_states_473_cast_fp16)[name = string("input_123_cast_fp16")]; + tensor model_model_layers_15_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46340736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46783168))))[name = string("model_model_layers_15_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_109_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_15_mlp_gate_proj_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = string("linear_109_cast_fp16")]; + tensor var_3043_cast_fp16 = silu(x = linear_109_cast_fp16)[name = string("op_3043_cast_fp16")]; + tensor model_model_layers_15_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(46838528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47280960))))[name = string("model_model_layers_15_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_110_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_15_mlp_up_proj_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = string("linear_110_cast_fp16")]; + tensor input_127_cast_fp16 = mul(x = var_3043_cast_fp16, y = linear_110_cast_fp16)[name = string("input_127_cast_fp16")]; + tensor model_model_layers_15_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47336320))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47778752))))[name = string("model_model_layers_15_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_111_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_mlp_down_proj_weight_to_fp16_quantized, x = input_127_cast_fp16)[name = string("linear_111_cast_fp16")]; + tensor hidden_states_479_cast_fp16 = add(x = hidden_states_469_cast_fp16, y = linear_111_cast_fp16)[name = string("hidden_states_479_cast_fp16")]; + fp16 var_76_promoted_32_to_fp16 = const()[name = string("op_76_promoted_32_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3056_cast_fp16 = pow(x = hidden_states_479_cast_fp16, y = var_76_promoted_32_to_fp16)[name = string("op_3056_cast_fp16")]; + tensor variance_65_axes_0 = const()[name = string("variance_65_axes_0"), val = tensor([-1])]; + bool variance_65_keep_dims_0 = const()[name = string("variance_65_keep_dims_0"), val = bool(true)]; + tensor variance_65_cast_fp16 = reduce_mean(axes = variance_65_axes_0, keep_dims = variance_65_keep_dims_0, x = var_3056_cast_fp16)[name = string("variance_65_cast_fp16")]; + fp16 var_3059_to_fp16 = const()[name = string("op_3059_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3060_cast_fp16 = add(x = variance_65_cast_fp16, y = var_3059_to_fp16)[name = string("op_3060_cast_fp16")]; + fp32 var_3061_epsilon_0 = const()[name = string("op_3061_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3061_cast_fp16 = rsqrt(epsilon = var_3061_epsilon_0, x = var_3060_cast_fp16)[name = string("op_3061_cast_fp16")]; + tensor hidden_states_483_cast_fp16 = mul(x = hidden_states_479_cast_fp16, y = var_3061_cast_fp16)[name = string("hidden_states_483_cast_fp16")]; + tensor model_model_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47834112)))]; + tensor hidden_states_487_cast_fp16 = mul(x = model_model_layers_16_input_layernorm_weight_to_fp16, y = hidden_states_483_cast_fp16)[name = string("hidden_states_487_cast_fp16")]; + tensor var_3072_shape_cast_fp16 = shape(x = hidden_states_487_cast_fp16)[name = string("op_3072_shape_cast_fp16")]; + int32 gather_292 = const()[name = string("gather_292"), val = int32(1)]; + int32 gather_293_axis_0 = const()[name = string("gather_293_axis_0"), val = int32(0)]; + int32 gather_293_batch_dims_0 = const()[name = string("gather_293_batch_dims_0"), val = int32(0)]; + bool gather_293_validate_indices_0 = const()[name = string("gather_293_validate_indices_0"), val = bool(false)]; + string var_3072_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3072_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_293_to_uint16 = const()[name = string("select_293_to_uint16"), val = uint16(1)]; + tensor var_3072_shape_cast_fp16_to_uint16 = cast(dtype = var_3072_shape_cast_fp16_to_uint16_dtype_0, x = var_3072_shape_cast_fp16)[name = string("cast_111")]; + uint16 gather_293_cast_uint16 = gather(axis = gather_293_axis_0, batch_dims = gather_293_batch_dims_0, indices = select_293_to_uint16, validate_indices = gather_293_validate_indices_0, x = var_3072_shape_cast_fp16_to_uint16)[name = string("gather_293_cast_uint16")]; + string gather_293_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_293_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_16_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(47835328))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48001280))))[name = string("model_model_layers_16_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_112_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_487_cast_fp16)[name = string("linear_112_cast_fp16")]; + tensor model_model_layers_16_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48022080))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48077440))))[name = string("model_model_layers_16_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_113_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_16_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_487_cast_fp16)[name = string("linear_113_cast_fp16")]; + tensor model_model_layers_16_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48084416))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48139776))))[name = string("model_model_layers_16_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_114_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_16_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_487_cast_fp16)[name = string("linear_114_cast_fp16")]; + tensor concat_304x = const()[name = string("concat_304x"), val = tensor([1, -1, 9, 64])]; + tensor var_3081_cast_fp16 = reshape(shape = concat_304x, x = linear_112_cast_fp16)[name = string("op_3081_cast_fp16")]; + tensor q_33_perm_0 = const()[name = string("q_33_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_305x = const()[name = string("concat_305x"), val = tensor([1, -1, 3, 64])]; + tensor var_3084_cast_fp16 = reshape(shape = concat_305x, x = linear_113_cast_fp16)[name = string("op_3084_cast_fp16")]; + tensor k_33_perm_0 = const()[name = string("k_33_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_306x = const()[name = string("concat_306x"), val = tensor([1, -1, 3, 64])]; + tensor var_3087_cast_fp16 = reshape(shape = concat_306x, x = linear_114_cast_fp16)[name = string("op_3087_cast_fp16")]; + tensor v_state_33_perm_0 = const()[name = string("v_state_33_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_33_cast_fp16 = transpose(perm = q_33_perm_0, x = var_3081_cast_fp16)[name = string("transpose_55")]; + tensor var_3091_cast_fp16 = mul(x = q_33_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3091_cast_fp16")]; + tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_65_cast_fp16 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_33_cast_fp16)[name = string("x1_65_cast_fp16")]; + tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_65_cast_fp16 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_33_cast_fp16)[name = string("x2_65_cast_fp16")]; + fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3102_cast_fp16 = mul(x = x2_65_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_3102_cast_fp16")]; + bool var_3104_interleave_0 = const()[name = string("op_3104_interleave_0"), val = bool(false)]; + tensor var_3104_cast_fp16 = concat(axis = var_81, interleave = var_3104_interleave_0, values = (var_3102_cast_fp16, x1_65_cast_fp16))[name = string("op_3104_cast_fp16")]; + tensor var_3105_cast_fp16 = mul(x = var_3104_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3105_cast_fp16")]; + tensor query_states_67_cast_fp16 = add(x = var_3091_cast_fp16, y = var_3105_cast_fp16)[name = string("query_states_67_cast_fp16")]; + tensor k_33_cast_fp16 = transpose(perm = k_33_perm_0, x = var_3084_cast_fp16)[name = string("transpose_54")]; + tensor var_3107_cast_fp16 = mul(x = k_33_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3107_cast_fp16")]; + tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_67_cast_fp16 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_33_cast_fp16)[name = string("x1_67_cast_fp16")]; + tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_67_cast_fp16 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_33_cast_fp16)[name = string("x2_67_cast_fp16")]; + fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3118_cast_fp16 = mul(x = x2_67_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_3118_cast_fp16")]; + bool var_3120_interleave_0 = const()[name = string("op_3120_interleave_0"), val = bool(false)]; + tensor var_3120_cast_fp16 = concat(axis = var_81, interleave = var_3120_interleave_0, values = (var_3118_cast_fp16, x1_67_cast_fp16))[name = string("op_3120_cast_fp16")]; + tensor var_3121_cast_fp16 = mul(x = var_3120_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3121_cast_fp16")]; + tensor k_state_33_cast_fp16 = add(x = var_3107_cast_fp16, y = var_3121_cast_fp16)[name = string("k_state_33_cast_fp16")]; + tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([0])]; + tensor expand_dims_193 = const()[name = string("expand_dims_193"), val = tensor([0])]; + tensor expand_dims_195 = const()[name = string("expand_dims_195"), val = tensor([0])]; + tensor concat_309_values0_0 = const()[name = string("concat_309_values0_0"), val = tensor([16])]; + int32 concat_309_axis_0 = const()[name = string("concat_309_axis_0"), val = int32(0)]; + bool concat_309_interleave_0 = const()[name = string("concat_309_interleave_0"), val = bool(false)]; + tensor concat_309 = concat(axis = concat_309_axis_0, interleave = concat_309_interleave_0, values = (concat_309_values0_0, expand_dims_192, expand_dims_193, expand_dims_2, expand_dims_195))[name = string("concat_309")]; + tensor key_cache_internal_tensor_assign_17_stride_0 = const()[name = string("key_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_309, begin_mask = key_cache_internal_tensor_assign_17_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_17_squeeze_mask_0, stride = key_cache_internal_tensor_assign_17_stride_0, update = k_state_33_cast_fp16, x = coreml_update_state_90)[name = string("key_cache_internal_tensor_assign_17_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_17_cast_fp16, input = key_cache)[name = string("coreml_update_state_92_write_state")]; + tensor coreml_update_state_92 = read_state(input = key_cache)[name = string("coreml_update_state_92")]; + tensor value_cache_internal_tensor_assign_17_stride_0 = const()[name = string("value_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_33_cast_fp16 = transpose(perm = v_state_33_perm_0, x = var_3087_cast_fp16)[name = string("transpose_53")]; + tensor value_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_309, begin_mask = value_cache_internal_tensor_assign_17_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_17_squeeze_mask_0, stride = value_cache_internal_tensor_assign_17_stride_0, update = v_state_33_cast_fp16, x = coreml_update_state_91)[name = string("value_cache_internal_tensor_assign_17_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_17_cast_fp16, input = value_cache)[name = string("coreml_update_state_93_write_state")]; + tensor coreml_update_state_93 = read_state(input = value_cache)[name = string("coreml_update_state_93")]; + tensor var_3144_begin_0 = const()[name = string("op_3144_begin_0"), val = tensor([16, 0, 0, 0, 0])]; + tensor var_3144_end_0 = const()[name = string("op_3144_end_0"), val = tensor([17, 1, 3, 2048, 64])]; + tensor var_3144_end_mask_0 = const()[name = string("op_3144_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3144_squeeze_mask_0 = const()[name = string("op_3144_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3144_cast_fp16 = slice_by_index(begin = var_3144_begin_0, end = var_3144_end_0, end_mask = var_3144_end_mask_0, squeeze_mask = var_3144_squeeze_mask_0, x = coreml_update_state_92)[name = string("op_3144_cast_fp16")]; + tensor var_3147_begin_0 = const()[name = string("op_3147_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3147_end_mask_0 = const()[name = string("op_3147_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3147_cast_fp16 = slice_by_index(begin = var_3147_begin_0, end = concat_11, end_mask = var_3147_end_mask_0, x = var_3144_cast_fp16)[name = string("op_3147_cast_fp16")]; + tensor var_3149_begin_0 = const()[name = string("op_3149_begin_0"), val = tensor([16, 0, 0, 0, 0])]; + tensor var_3149_end_0 = const()[name = string("op_3149_end_0"), val = tensor([17, 1, 3, 2048, 64])]; + tensor var_3149_end_mask_0 = const()[name = string("op_3149_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3149_squeeze_mask_0 = const()[name = string("op_3149_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3149_cast_fp16 = slice_by_index(begin = var_3149_begin_0, end = var_3149_end_0, end_mask = var_3149_end_mask_0, squeeze_mask = var_3149_squeeze_mask_0, x = coreml_update_state_93)[name = string("op_3149_cast_fp16")]; + tensor var_3152_begin_0 = const()[name = string("op_3152_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3152_end_mask_0 = const()[name = string("op_3152_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3152_cast_fp16 = slice_by_index(begin = var_3152_begin_0, end = concat_11, end_mask = var_3152_end_mask_0, x = var_3149_cast_fp16)[name = string("op_3152_cast_fp16")]; + tensor var_3154_shape_cast_fp16 = shape(x = var_3147_cast_fp16)[name = string("op_3154_shape_cast_fp16")]; + int32 gather_301 = const()[name = string("gather_301"), val = int32(1)]; + int32 gather_302 = const()[name = string("gather_302"), val = int32(3)]; + int32 gather_303_axis_0 = const()[name = string("gather_303_axis_0"), val = int32(0)]; + int32 gather_303_batch_dims_0 = const()[name = string("gather_303_batch_dims_0"), val = int32(0)]; + bool gather_303_validate_indices_0 = const()[name = string("gather_303_validate_indices_0"), val = bool(false)]; + string var_3154_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3154_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_303_to_uint16 = const()[name = string("select_303_to_uint16"), val = uint16(2)]; + tensor var_3154_shape_cast_fp16_to_uint16 = cast(dtype = var_3154_shape_cast_fp16_to_uint16_dtype_0, x = var_3154_shape_cast_fp16)[name = string("cast_110")]; + uint16 gather_303_cast_uint16 = gather(axis = gather_303_axis_0, batch_dims = gather_303_batch_dims_0, indices = select_303_to_uint16, validate_indices = gather_303_validate_indices_0, x = var_3154_shape_cast_fp16_to_uint16)[name = string("gather_303_cast_uint16")]; + string gather_303_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_303_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_304 = const()[name = string("gather_304"), val = int32(64)]; + tensor var_3161_axes_0 = const()[name = string("op_3161_axes_0"), val = tensor([2])]; + tensor var_3161_cast_fp16 = expand_dims(axes = var_3161_axes_0, x = var_3147_cast_fp16)[name = string("op_3161_cast_fp16")]; + tensor shape_337_cast_fp16 = shape(x = var_3161_cast_fp16)[name = string("shape_337_cast_fp16")]; + int32 concat_317_axis_0 = const()[name = string("concat_317_axis_0"), val = int32(0)]; + bool concat_317_interleave_0 = const()[name = string("concat_317_interleave_0"), val = bool(false)]; + int32 gather_303_cast_uint16_to_int32 = cast(dtype = gather_303_cast_uint16_to_int32_dtype_0, x = gather_303_cast_uint16)[name = string("cast_109")]; + tensor concat_317 = concat(axis = concat_317_axis_0, interleave = concat_317_interleave_0, values = (gather_301, gather_302, var_85, gather_303_cast_uint16_to_int32, gather_304))[name = string("concat_317")]; + tensor real_div_32 = real_div(x = concat_317, y = shape_337_cast_fp16)[name = string("real_div_32")]; + tensor hidden_states_491_cast_fp16 = tile(reps = real_div_32, x = var_3161_cast_fp16)[name = string("hidden_states_491_cast_fp16")]; + tensor concat_318x = const()[name = string("concat_318x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_67_cast_fp16 = reshape(shape = concat_318x, x = hidden_states_491_cast_fp16)[name = string("key_states_67_cast_fp16")]; + tensor var_3171_shape_cast_fp16 = shape(x = var_3152_cast_fp16)[name = string("op_3171_shape_cast_fp16")]; + int32 gather_305 = const()[name = string("gather_305"), val = int32(1)]; + int32 gather_306 = const()[name = string("gather_306"), val = int32(3)]; + int32 gather_307_axis_0 = const()[name = string("gather_307_axis_0"), val = int32(0)]; + int32 gather_307_batch_dims_0 = const()[name = string("gather_307_batch_dims_0"), val = int32(0)]; + bool gather_307_validate_indices_0 = const()[name = string("gather_307_validate_indices_0"), val = bool(false)]; + string var_3171_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3171_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_307_to_uint16 = const()[name = string("select_307_to_uint16"), val = uint16(2)]; + tensor var_3171_shape_cast_fp16_to_uint16 = cast(dtype = var_3171_shape_cast_fp16_to_uint16_dtype_0, x = var_3171_shape_cast_fp16)[name = string("cast_108")]; + uint16 gather_307_cast_uint16 = gather(axis = gather_307_axis_0, batch_dims = gather_307_batch_dims_0, indices = select_307_to_uint16, validate_indices = gather_307_validate_indices_0, x = var_3171_shape_cast_fp16_to_uint16)[name = string("gather_307_cast_uint16")]; + string gather_307_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_307_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_308 = const()[name = string("gather_308"), val = int32(64)]; + tensor var_3178_axes_0 = const()[name = string("op_3178_axes_0"), val = tensor([2])]; + tensor var_3178_cast_fp16 = expand_dims(axes = var_3178_axes_0, x = var_3152_cast_fp16)[name = string("op_3178_cast_fp16")]; + tensor shape_342_cast_fp16 = shape(x = var_3178_cast_fp16)[name = string("shape_342_cast_fp16")]; + int32 concat_319_axis_0 = const()[name = string("concat_319_axis_0"), val = int32(0)]; + bool concat_319_interleave_0 = const()[name = string("concat_319_interleave_0"), val = bool(false)]; + int32 gather_307_cast_uint16_to_int32 = cast(dtype = gather_307_cast_uint16_to_int32_dtype_0, x = gather_307_cast_uint16)[name = string("cast_107")]; + tensor concat_319 = concat(axis = concat_319_axis_0, interleave = concat_319_interleave_0, values = (gather_305, gather_306, var_85, gather_307_cast_uint16_to_int32, gather_308))[name = string("concat_319")]; + tensor real_div_33 = real_div(x = concat_319, y = shape_342_cast_fp16)[name = string("real_div_33")]; + tensor hidden_states_495_cast_fp16 = tile(reps = real_div_33, x = var_3178_cast_fp16)[name = string("hidden_states_495_cast_fp16")]; + tensor concat_320x = const()[name = string("concat_320x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_67_cast_fp16 = reshape(shape = concat_320x, x = hidden_states_495_cast_fp16)[name = string("value_states_67_cast_fp16")]; + tensor var_3188_shape_cast_fp16 = shape(x = key_states_67_cast_fp16)[name = string("op_3188_shape_cast_fp16")]; + int32 gather_309_axis_0 = const()[name = string("gather_309_axis_0"), val = int32(0)]; + int32 gather_309_batch_dims_0 = const()[name = string("gather_309_batch_dims_0"), val = int32(0)]; + bool gather_309_validate_indices_0 = const()[name = string("gather_309_validate_indices_0"), val = bool(false)]; + string var_3188_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3188_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_309_to_uint16 = const()[name = string("select_309_to_uint16"), val = uint16(2)]; + tensor var_3188_shape_cast_fp16_to_uint16 = cast(dtype = var_3188_shape_cast_fp16_to_uint16_dtype_0, x = var_3188_shape_cast_fp16)[name = string("cast_106")]; + uint16 gather_309_cast_uint16 = gather(axis = gather_309_axis_0, batch_dims = gather_309_batch_dims_0, indices = select_309_to_uint16, validate_indices = gather_309_validate_indices_0, x = var_3188_shape_cast_fp16_to_uint16)[name = string("gather_309_cast_uint16")]; + string gather_309_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_309_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_321_values0_0 = const()[name = string("concat_321_values0_0"), val = int32(1)]; + int32 concat_321_values1_0 = const()[name = string("concat_321_values1_0"), val = int32(1)]; + int32 concat_321_values2_0 = const()[name = string("concat_321_values2_0"), val = int32(0)]; + int32 concat_321_axis_0 = const()[name = string("concat_321_axis_0"), val = int32(0)]; + bool concat_321_interleave_0 = const()[name = string("concat_321_interleave_0"), val = bool(false)]; + int32 gather_309_cast_uint16_to_int32 = cast(dtype = gather_309_cast_uint16_to_int32_dtype_0, x = gather_309_cast_uint16)[name = string("cast_105")]; + tensor concat_321 = concat(axis = concat_321_axis_0, interleave = concat_321_interleave_0, values = (concat_321_values0_0, concat_321_values1_0, concat_321_values2_0, gather_309_cast_uint16_to_int32))[name = string("concat_321")]; + tensor causal_mask_35_begin_0 = const()[name = string("causal_mask_35_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_35_end_mask_0 = const()[name = string("causal_mask_35_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_35_cast_fp16 = slice_by_index(begin = causal_mask_35_begin_0, end = concat_321, end_mask = causal_mask_35_end_mask_0, x = causal_mask)[name = string("causal_mask_35_cast_fp16")]; + tensor attn_output_65_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_35_cast_fp16, key = key_states_67_cast_fp16, query = query_states_67_cast_fp16, value = value_states_67_cast_fp16)[name = string("attn_output_65_cast_fp16")]; + tensor var_3194_perm_0 = const()[name = string("op_3194_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_322_axis_0 = const()[name = string("concat_322_axis_0"), val = int32(0)]; + bool concat_322_interleave_0 = const()[name = string("concat_322_interleave_0"), val = bool(false)]; + int32 gather_293_cast_uint16_to_int32 = cast(dtype = gather_293_cast_uint16_to_int32_dtype_0, x = gather_293_cast_uint16)[name = string("cast_104")]; + tensor concat_322 = concat(axis = concat_322_axis_0, interleave = concat_322_interleave_0, values = (gather_292, gather_293_cast_uint16_to_int32, var_81))[name = string("concat_322")]; + tensor var_3194_cast_fp16 = transpose(perm = var_3194_perm_0, x = attn_output_65_cast_fp16)[name = string("transpose_52")]; + tensor input_129_cast_fp16 = reshape(shape = concat_322, x = var_3194_cast_fp16)[name = string("input_129_cast_fp16")]; + tensor model_model_layers_16_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48146752))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48312704))))[name = string("model_model_layers_16_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_115_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_self_attn_o_proj_weight_to_fp16_quantized, x = input_129_cast_fp16)[name = string("linear_115_cast_fp16")]; + tensor hidden_states_499_cast_fp16 = add(x = hidden_states_479_cast_fp16, y = linear_115_cast_fp16)[name = string("hidden_states_499_cast_fp16")]; + fp16 var_76_promoted_33_to_fp16 = const()[name = string("op_76_promoted_33_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3203_cast_fp16 = pow(x = hidden_states_499_cast_fp16, y = var_76_promoted_33_to_fp16)[name = string("op_3203_cast_fp16")]; + tensor variance_67_axes_0 = const()[name = string("variance_67_axes_0"), val = tensor([-1])]; + bool variance_67_keep_dims_0 = const()[name = string("variance_67_keep_dims_0"), val = bool(true)]; + tensor variance_67_cast_fp16 = reduce_mean(axes = variance_67_axes_0, keep_dims = variance_67_keep_dims_0, x = var_3203_cast_fp16)[name = string("variance_67_cast_fp16")]; + fp16 var_3206_to_fp16 = const()[name = string("op_3206_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3207_cast_fp16 = add(x = variance_67_cast_fp16, y = var_3206_to_fp16)[name = string("op_3207_cast_fp16")]; + fp32 var_3208_epsilon_0 = const()[name = string("op_3208_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3208_cast_fp16 = rsqrt(epsilon = var_3208_epsilon_0, x = var_3207_cast_fp16)[name = string("op_3208_cast_fp16")]; + tensor hidden_states_503_cast_fp16 = mul(x = hidden_states_499_cast_fp16, y = var_3208_cast_fp16)[name = string("hidden_states_503_cast_fp16")]; + tensor model_model_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48333504)))]; + tensor input_131_cast_fp16 = mul(x = model_model_layers_16_post_attention_layernorm_weight_to_fp16, y = hidden_states_503_cast_fp16)[name = string("input_131_cast_fp16")]; + tensor model_model_layers_16_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48334720))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48777152))))[name = string("model_model_layers_16_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_116_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_16_mlp_gate_proj_weight_to_fp16_quantized, x = input_131_cast_fp16)[name = string("linear_116_cast_fp16")]; + tensor var_3220_cast_fp16 = silu(x = linear_116_cast_fp16)[name = string("op_3220_cast_fp16")]; + tensor model_model_layers_16_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(48832512))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49274944))))[name = string("model_model_layers_16_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_117_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_16_mlp_up_proj_weight_to_fp16_quantized, x = input_131_cast_fp16)[name = string("linear_117_cast_fp16")]; + tensor input_135_cast_fp16 = mul(x = var_3220_cast_fp16, y = linear_117_cast_fp16)[name = string("input_135_cast_fp16")]; + tensor model_model_layers_16_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49330304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49772736))))[name = string("model_model_layers_16_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_118_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_mlp_down_proj_weight_to_fp16_quantized, x = input_135_cast_fp16)[name = string("linear_118_cast_fp16")]; + tensor hidden_states_509_cast_fp16 = add(x = hidden_states_499_cast_fp16, y = linear_118_cast_fp16)[name = string("hidden_states_509_cast_fp16")]; + fp16 var_76_promoted_34_to_fp16 = const()[name = string("op_76_promoted_34_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3233_cast_fp16 = pow(x = hidden_states_509_cast_fp16, y = var_76_promoted_34_to_fp16)[name = string("op_3233_cast_fp16")]; + tensor variance_69_axes_0 = const()[name = string("variance_69_axes_0"), val = tensor([-1])]; + bool variance_69_keep_dims_0 = const()[name = string("variance_69_keep_dims_0"), val = bool(true)]; + tensor variance_69_cast_fp16 = reduce_mean(axes = variance_69_axes_0, keep_dims = variance_69_keep_dims_0, x = var_3233_cast_fp16)[name = string("variance_69_cast_fp16")]; + fp16 var_3236_to_fp16 = const()[name = string("op_3236_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3237_cast_fp16 = add(x = variance_69_cast_fp16, y = var_3236_to_fp16)[name = string("op_3237_cast_fp16")]; + fp32 var_3238_epsilon_0 = const()[name = string("op_3238_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3238_cast_fp16 = rsqrt(epsilon = var_3238_epsilon_0, x = var_3237_cast_fp16)[name = string("op_3238_cast_fp16")]; + tensor hidden_states_513_cast_fp16 = mul(x = hidden_states_509_cast_fp16, y = var_3238_cast_fp16)[name = string("hidden_states_513_cast_fp16")]; + tensor model_model_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49828096)))]; + tensor hidden_states_517_cast_fp16 = mul(x = model_model_layers_17_input_layernorm_weight_to_fp16, y = hidden_states_513_cast_fp16)[name = string("hidden_states_517_cast_fp16")]; + tensor var_3249_shape_cast_fp16 = shape(x = hidden_states_517_cast_fp16)[name = string("op_3249_shape_cast_fp16")]; + int32 gather_310 = const()[name = string("gather_310"), val = int32(1)]; + int32 gather_311_axis_0 = const()[name = string("gather_311_axis_0"), val = int32(0)]; + int32 gather_311_batch_dims_0 = const()[name = string("gather_311_batch_dims_0"), val = int32(0)]; + bool gather_311_validate_indices_0 = const()[name = string("gather_311_validate_indices_0"), val = bool(false)]; + string var_3249_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3249_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_311_to_uint16 = const()[name = string("select_311_to_uint16"), val = uint16(1)]; + tensor var_3249_shape_cast_fp16_to_uint16 = cast(dtype = var_3249_shape_cast_fp16_to_uint16_dtype_0, x = var_3249_shape_cast_fp16)[name = string("cast_103")]; + uint16 gather_311_cast_uint16 = gather(axis = gather_311_axis_0, batch_dims = gather_311_batch_dims_0, indices = select_311_to_uint16, validate_indices = gather_311_validate_indices_0, x = var_3249_shape_cast_fp16_to_uint16)[name = string("gather_311_cast_uint16")]; + string gather_311_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_311_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_17_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49829312))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(49995264))))[name = string("model_model_layers_17_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_119_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_517_cast_fp16)[name = string("linear_119_cast_fp16")]; + tensor model_model_layers_17_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50016064))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50071424))))[name = string("model_model_layers_17_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_120_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_17_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_517_cast_fp16)[name = string("linear_120_cast_fp16")]; + tensor model_model_layers_17_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50078400))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50133760))))[name = string("model_model_layers_17_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_17_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_517_cast_fp16)[name = string("linear_121_cast_fp16")]; + tensor concat_323x = const()[name = string("concat_323x"), val = tensor([1, -1, 9, 64])]; + tensor var_3258_cast_fp16 = reshape(shape = concat_323x, x = linear_119_cast_fp16)[name = string("op_3258_cast_fp16")]; + tensor q_35_perm_0 = const()[name = string("q_35_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_324x = const()[name = string("concat_324x"), val = tensor([1, -1, 3, 64])]; + tensor var_3261_cast_fp16 = reshape(shape = concat_324x, x = linear_120_cast_fp16)[name = string("op_3261_cast_fp16")]; + tensor k_35_perm_0 = const()[name = string("k_35_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_325x = const()[name = string("concat_325x"), val = tensor([1, -1, 3, 64])]; + tensor var_3264_cast_fp16 = reshape(shape = concat_325x, x = linear_121_cast_fp16)[name = string("op_3264_cast_fp16")]; + tensor v_state_35_perm_0 = const()[name = string("v_state_35_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_35_cast_fp16 = transpose(perm = q_35_perm_0, x = var_3258_cast_fp16)[name = string("transpose_51")]; + tensor var_3268_cast_fp16 = mul(x = q_35_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3268_cast_fp16")]; + tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_69_cast_fp16 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_35_cast_fp16)[name = string("x1_69_cast_fp16")]; + tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_69_cast_fp16 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_35_cast_fp16)[name = string("x2_69_cast_fp16")]; + fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3279_cast_fp16 = mul(x = x2_69_cast_fp16, y = const_37_promoted_to_fp16)[name = string("op_3279_cast_fp16")]; + bool var_3281_interleave_0 = const()[name = string("op_3281_interleave_0"), val = bool(false)]; + tensor var_3281_cast_fp16 = concat(axis = var_81, interleave = var_3281_interleave_0, values = (var_3279_cast_fp16, x1_69_cast_fp16))[name = string("op_3281_cast_fp16")]; + tensor var_3282_cast_fp16 = mul(x = var_3281_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3282_cast_fp16")]; + tensor query_states_71_cast_fp16 = add(x = var_3268_cast_fp16, y = var_3282_cast_fp16)[name = string("query_states_71_cast_fp16")]; + tensor k_35_cast_fp16 = transpose(perm = k_35_perm_0, x = var_3261_cast_fp16)[name = string("transpose_50")]; + tensor var_3284_cast_fp16 = mul(x = k_35_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3284_cast_fp16")]; + tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_71_cast_fp16 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_35_cast_fp16)[name = string("x1_71_cast_fp16")]; + tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_71_cast_fp16 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_35_cast_fp16)[name = string("x2_71_cast_fp16")]; + fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3295_cast_fp16 = mul(x = x2_71_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_3295_cast_fp16")]; + bool var_3297_interleave_0 = const()[name = string("op_3297_interleave_0"), val = bool(false)]; + tensor var_3297_cast_fp16 = concat(axis = var_81, interleave = var_3297_interleave_0, values = (var_3295_cast_fp16, x1_71_cast_fp16))[name = string("op_3297_cast_fp16")]; + tensor var_3298_cast_fp16 = mul(x = var_3297_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3298_cast_fp16")]; + tensor k_state_35_cast_fp16 = add(x = var_3284_cast_fp16, y = var_3298_cast_fp16)[name = string("k_state_35_cast_fp16")]; + tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([0])]; + tensor expand_dims_205 = const()[name = string("expand_dims_205"), val = tensor([0])]; + tensor expand_dims_207 = const()[name = string("expand_dims_207"), val = tensor([0])]; + tensor concat_328_values0_0 = const()[name = string("concat_328_values0_0"), val = tensor([17])]; + int32 concat_328_axis_0 = const()[name = string("concat_328_axis_0"), val = int32(0)]; + bool concat_328_interleave_0 = const()[name = string("concat_328_interleave_0"), val = bool(false)]; + tensor concat_328 = concat(axis = concat_328_axis_0, interleave = concat_328_interleave_0, values = (concat_328_values0_0, expand_dims_204, expand_dims_205, expand_dims_2, expand_dims_207))[name = string("concat_328")]; + tensor key_cache_internal_tensor_assign_18_stride_0 = const()[name = string("key_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_328, begin_mask = key_cache_internal_tensor_assign_18_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_18_squeeze_mask_0, stride = key_cache_internal_tensor_assign_18_stride_0, update = k_state_35_cast_fp16, x = coreml_update_state_92)[name = string("key_cache_internal_tensor_assign_18_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_18_cast_fp16, input = key_cache)[name = string("coreml_update_state_94_write_state")]; + tensor coreml_update_state_94 = read_state(input = key_cache)[name = string("coreml_update_state_94")]; + tensor value_cache_internal_tensor_assign_18_stride_0 = const()[name = string("value_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_35_cast_fp16 = transpose(perm = v_state_35_perm_0, x = var_3264_cast_fp16)[name = string("transpose_49")]; + tensor value_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_328, begin_mask = value_cache_internal_tensor_assign_18_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_18_squeeze_mask_0, stride = value_cache_internal_tensor_assign_18_stride_0, update = v_state_35_cast_fp16, x = coreml_update_state_93)[name = string("value_cache_internal_tensor_assign_18_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_18_cast_fp16, input = value_cache)[name = string("coreml_update_state_95_write_state")]; + tensor coreml_update_state_95 = read_state(input = value_cache)[name = string("coreml_update_state_95")]; + tensor var_3321_begin_0 = const()[name = string("op_3321_begin_0"), val = tensor([17, 0, 0, 0, 0])]; + tensor var_3321_end_0 = const()[name = string("op_3321_end_0"), val = tensor([18, 1, 3, 2048, 64])]; + tensor var_3321_end_mask_0 = const()[name = string("op_3321_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3321_squeeze_mask_0 = const()[name = string("op_3321_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3321_cast_fp16 = slice_by_index(begin = var_3321_begin_0, end = var_3321_end_0, end_mask = var_3321_end_mask_0, squeeze_mask = var_3321_squeeze_mask_0, x = coreml_update_state_94)[name = string("op_3321_cast_fp16")]; + tensor var_3324_begin_0 = const()[name = string("op_3324_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3324_end_mask_0 = const()[name = string("op_3324_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3324_cast_fp16 = slice_by_index(begin = var_3324_begin_0, end = concat_11, end_mask = var_3324_end_mask_0, x = var_3321_cast_fp16)[name = string("op_3324_cast_fp16")]; + tensor var_3326_begin_0 = const()[name = string("op_3326_begin_0"), val = tensor([17, 0, 0, 0, 0])]; + tensor var_3326_end_0 = const()[name = string("op_3326_end_0"), val = tensor([18, 1, 3, 2048, 64])]; + tensor var_3326_end_mask_0 = const()[name = string("op_3326_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3326_squeeze_mask_0 = const()[name = string("op_3326_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3326_cast_fp16 = slice_by_index(begin = var_3326_begin_0, end = var_3326_end_0, end_mask = var_3326_end_mask_0, squeeze_mask = var_3326_squeeze_mask_0, x = coreml_update_state_95)[name = string("op_3326_cast_fp16")]; + tensor var_3329_begin_0 = const()[name = string("op_3329_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3329_end_mask_0 = const()[name = string("op_3329_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3329_cast_fp16 = slice_by_index(begin = var_3329_begin_0, end = concat_11, end_mask = var_3329_end_mask_0, x = var_3326_cast_fp16)[name = string("op_3329_cast_fp16")]; + tensor var_3331_shape_cast_fp16 = shape(x = var_3324_cast_fp16)[name = string("op_3331_shape_cast_fp16")]; + int32 gather_319 = const()[name = string("gather_319"), val = int32(1)]; + int32 gather_320 = const()[name = string("gather_320"), val = int32(3)]; + int32 gather_321_axis_0 = const()[name = string("gather_321_axis_0"), val = int32(0)]; + int32 gather_321_batch_dims_0 = const()[name = string("gather_321_batch_dims_0"), val = int32(0)]; + bool gather_321_validate_indices_0 = const()[name = string("gather_321_validate_indices_0"), val = bool(false)]; + string var_3331_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3331_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_321_to_uint16 = const()[name = string("select_321_to_uint16"), val = uint16(2)]; + tensor var_3331_shape_cast_fp16_to_uint16 = cast(dtype = var_3331_shape_cast_fp16_to_uint16_dtype_0, x = var_3331_shape_cast_fp16)[name = string("cast_102")]; + uint16 gather_321_cast_uint16 = gather(axis = gather_321_axis_0, batch_dims = gather_321_batch_dims_0, indices = select_321_to_uint16, validate_indices = gather_321_validate_indices_0, x = var_3331_shape_cast_fp16_to_uint16)[name = string("gather_321_cast_uint16")]; + string gather_321_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_321_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_322 = const()[name = string("gather_322"), val = int32(64)]; + tensor var_3338_axes_0 = const()[name = string("op_3338_axes_0"), val = tensor([2])]; + tensor var_3338_cast_fp16 = expand_dims(axes = var_3338_axes_0, x = var_3324_cast_fp16)[name = string("op_3338_cast_fp16")]; + tensor shape_357_cast_fp16 = shape(x = var_3338_cast_fp16)[name = string("shape_357_cast_fp16")]; + int32 concat_336_axis_0 = const()[name = string("concat_336_axis_0"), val = int32(0)]; + bool concat_336_interleave_0 = const()[name = string("concat_336_interleave_0"), val = bool(false)]; + int32 gather_321_cast_uint16_to_int32 = cast(dtype = gather_321_cast_uint16_to_int32_dtype_0, x = gather_321_cast_uint16)[name = string("cast_101")]; + tensor concat_336 = concat(axis = concat_336_axis_0, interleave = concat_336_interleave_0, values = (gather_319, gather_320, var_85, gather_321_cast_uint16_to_int32, gather_322))[name = string("concat_336")]; + tensor real_div_34 = real_div(x = concat_336, y = shape_357_cast_fp16)[name = string("real_div_34")]; + tensor hidden_states_521_cast_fp16 = tile(reps = real_div_34, x = var_3338_cast_fp16)[name = string("hidden_states_521_cast_fp16")]; + tensor concat_337x = const()[name = string("concat_337x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_71_cast_fp16 = reshape(shape = concat_337x, x = hidden_states_521_cast_fp16)[name = string("key_states_71_cast_fp16")]; + tensor var_3348_shape_cast_fp16 = shape(x = var_3329_cast_fp16)[name = string("op_3348_shape_cast_fp16")]; + int32 gather_323 = const()[name = string("gather_323"), val = int32(1)]; + int32 gather_324 = const()[name = string("gather_324"), val = int32(3)]; + int32 gather_325_axis_0 = const()[name = string("gather_325_axis_0"), val = int32(0)]; + int32 gather_325_batch_dims_0 = const()[name = string("gather_325_batch_dims_0"), val = int32(0)]; + bool gather_325_validate_indices_0 = const()[name = string("gather_325_validate_indices_0"), val = bool(false)]; + string var_3348_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3348_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_325_to_uint16 = const()[name = string("select_325_to_uint16"), val = uint16(2)]; + tensor var_3348_shape_cast_fp16_to_uint16 = cast(dtype = var_3348_shape_cast_fp16_to_uint16_dtype_0, x = var_3348_shape_cast_fp16)[name = string("cast_100")]; + uint16 gather_325_cast_uint16 = gather(axis = gather_325_axis_0, batch_dims = gather_325_batch_dims_0, indices = select_325_to_uint16, validate_indices = gather_325_validate_indices_0, x = var_3348_shape_cast_fp16_to_uint16)[name = string("gather_325_cast_uint16")]; + string gather_325_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_325_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_326 = const()[name = string("gather_326"), val = int32(64)]; + tensor var_3355_axes_0 = const()[name = string("op_3355_axes_0"), val = tensor([2])]; + tensor var_3355_cast_fp16 = expand_dims(axes = var_3355_axes_0, x = var_3329_cast_fp16)[name = string("op_3355_cast_fp16")]; + tensor shape_362_cast_fp16 = shape(x = var_3355_cast_fp16)[name = string("shape_362_cast_fp16")]; + int32 concat_338_axis_0 = const()[name = string("concat_338_axis_0"), val = int32(0)]; + bool concat_338_interleave_0 = const()[name = string("concat_338_interleave_0"), val = bool(false)]; + int32 gather_325_cast_uint16_to_int32 = cast(dtype = gather_325_cast_uint16_to_int32_dtype_0, x = gather_325_cast_uint16)[name = string("cast_99")]; + tensor concat_338 = concat(axis = concat_338_axis_0, interleave = concat_338_interleave_0, values = (gather_323, gather_324, var_85, gather_325_cast_uint16_to_int32, gather_326))[name = string("concat_338")]; + tensor real_div_35 = real_div(x = concat_338, y = shape_362_cast_fp16)[name = string("real_div_35")]; + tensor hidden_states_525_cast_fp16 = tile(reps = real_div_35, x = var_3355_cast_fp16)[name = string("hidden_states_525_cast_fp16")]; + tensor concat_339x = const()[name = string("concat_339x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_71_cast_fp16 = reshape(shape = concat_339x, x = hidden_states_525_cast_fp16)[name = string("value_states_71_cast_fp16")]; + tensor var_3365_shape_cast_fp16 = shape(x = key_states_71_cast_fp16)[name = string("op_3365_shape_cast_fp16")]; + int32 gather_327_axis_0 = const()[name = string("gather_327_axis_0"), val = int32(0)]; + int32 gather_327_batch_dims_0 = const()[name = string("gather_327_batch_dims_0"), val = int32(0)]; + bool gather_327_validate_indices_0 = const()[name = string("gather_327_validate_indices_0"), val = bool(false)]; + string var_3365_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3365_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_327_to_uint16 = const()[name = string("select_327_to_uint16"), val = uint16(2)]; + tensor var_3365_shape_cast_fp16_to_uint16 = cast(dtype = var_3365_shape_cast_fp16_to_uint16_dtype_0, x = var_3365_shape_cast_fp16)[name = string("cast_98")]; + uint16 gather_327_cast_uint16 = gather(axis = gather_327_axis_0, batch_dims = gather_327_batch_dims_0, indices = select_327_to_uint16, validate_indices = gather_327_validate_indices_0, x = var_3365_shape_cast_fp16_to_uint16)[name = string("gather_327_cast_uint16")]; + string gather_327_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_327_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_340_values0_0 = const()[name = string("concat_340_values0_0"), val = int32(1)]; + int32 concat_340_values1_0 = const()[name = string("concat_340_values1_0"), val = int32(1)]; + int32 concat_340_values2_0 = const()[name = string("concat_340_values2_0"), val = int32(0)]; + int32 concat_340_axis_0 = const()[name = string("concat_340_axis_0"), val = int32(0)]; + bool concat_340_interleave_0 = const()[name = string("concat_340_interleave_0"), val = bool(false)]; + int32 gather_327_cast_uint16_to_int32 = cast(dtype = gather_327_cast_uint16_to_int32_dtype_0, x = gather_327_cast_uint16)[name = string("cast_97")]; + tensor concat_340 = concat(axis = concat_340_axis_0, interleave = concat_340_interleave_0, values = (concat_340_values0_0, concat_340_values1_0, concat_340_values2_0, gather_327_cast_uint16_to_int32))[name = string("concat_340")]; + tensor causal_mask_37_begin_0 = const()[name = string("causal_mask_37_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_37_end_mask_0 = const()[name = string("causal_mask_37_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_37_cast_fp16 = slice_by_index(begin = causal_mask_37_begin_0, end = concat_340, end_mask = causal_mask_37_end_mask_0, x = causal_mask)[name = string("causal_mask_37_cast_fp16")]; + tensor attn_output_69_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_37_cast_fp16, key = key_states_71_cast_fp16, query = query_states_71_cast_fp16, value = value_states_71_cast_fp16)[name = string("attn_output_69_cast_fp16")]; + tensor var_3371_perm_0 = const()[name = string("op_3371_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_341_axis_0 = const()[name = string("concat_341_axis_0"), val = int32(0)]; + bool concat_341_interleave_0 = const()[name = string("concat_341_interleave_0"), val = bool(false)]; + int32 gather_311_cast_uint16_to_int32 = cast(dtype = gather_311_cast_uint16_to_int32_dtype_0, x = gather_311_cast_uint16)[name = string("cast_96")]; + tensor concat_341 = concat(axis = concat_341_axis_0, interleave = concat_341_interleave_0, values = (gather_310, gather_311_cast_uint16_to_int32, var_81))[name = string("concat_341")]; + tensor var_3371_cast_fp16 = transpose(perm = var_3371_perm_0, x = attn_output_69_cast_fp16)[name = string("transpose_48")]; + tensor input_137_cast_fp16 = reshape(shape = concat_341, x = var_3371_cast_fp16)[name = string("input_137_cast_fp16")]; + tensor model_model_layers_17_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50140736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50306688))))[name = string("model_model_layers_17_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_122_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_self_attn_o_proj_weight_to_fp16_quantized, x = input_137_cast_fp16)[name = string("linear_122_cast_fp16")]; + tensor hidden_states_529_cast_fp16 = add(x = hidden_states_509_cast_fp16, y = linear_122_cast_fp16)[name = string("hidden_states_529_cast_fp16")]; + fp16 var_76_promoted_35_to_fp16 = const()[name = string("op_76_promoted_35_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3380_cast_fp16 = pow(x = hidden_states_529_cast_fp16, y = var_76_promoted_35_to_fp16)[name = string("op_3380_cast_fp16")]; + tensor variance_71_axes_0 = const()[name = string("variance_71_axes_0"), val = tensor([-1])]; + bool variance_71_keep_dims_0 = const()[name = string("variance_71_keep_dims_0"), val = bool(true)]; + tensor variance_71_cast_fp16 = reduce_mean(axes = variance_71_axes_0, keep_dims = variance_71_keep_dims_0, x = var_3380_cast_fp16)[name = string("variance_71_cast_fp16")]; + fp16 var_3383_to_fp16 = const()[name = string("op_3383_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3384_cast_fp16 = add(x = variance_71_cast_fp16, y = var_3383_to_fp16)[name = string("op_3384_cast_fp16")]; + fp32 var_3385_epsilon_0 = const()[name = string("op_3385_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3385_cast_fp16 = rsqrt(epsilon = var_3385_epsilon_0, x = var_3384_cast_fp16)[name = string("op_3385_cast_fp16")]; + tensor hidden_states_533_cast_fp16 = mul(x = hidden_states_529_cast_fp16, y = var_3385_cast_fp16)[name = string("hidden_states_533_cast_fp16")]; + tensor model_model_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50327488)))]; + tensor input_139_cast_fp16 = mul(x = model_model_layers_17_post_attention_layernorm_weight_to_fp16, y = hidden_states_533_cast_fp16)[name = string("input_139_cast_fp16")]; + tensor model_model_layers_17_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50328704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50771136))))[name = string("model_model_layers_17_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_123_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_17_mlp_gate_proj_weight_to_fp16_quantized, x = input_139_cast_fp16)[name = string("linear_123_cast_fp16")]; + tensor var_3397_cast_fp16 = silu(x = linear_123_cast_fp16)[name = string("op_3397_cast_fp16")]; + tensor model_model_layers_17_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(50826496))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51268928))))[name = string("model_model_layers_17_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_124_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_17_mlp_up_proj_weight_to_fp16_quantized, x = input_139_cast_fp16)[name = string("linear_124_cast_fp16")]; + tensor input_143_cast_fp16 = mul(x = var_3397_cast_fp16, y = linear_124_cast_fp16)[name = string("input_143_cast_fp16")]; + tensor model_model_layers_17_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51324288))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51766720))))[name = string("model_model_layers_17_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_125_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_mlp_down_proj_weight_to_fp16_quantized, x = input_143_cast_fp16)[name = string("linear_125_cast_fp16")]; + tensor hidden_states_539_cast_fp16 = add(x = hidden_states_529_cast_fp16, y = linear_125_cast_fp16)[name = string("hidden_states_539_cast_fp16")]; + fp16 var_76_promoted_36_to_fp16 = const()[name = string("op_76_promoted_36_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3410_cast_fp16 = pow(x = hidden_states_539_cast_fp16, y = var_76_promoted_36_to_fp16)[name = string("op_3410_cast_fp16")]; + tensor variance_73_axes_0 = const()[name = string("variance_73_axes_0"), val = tensor([-1])]; + bool variance_73_keep_dims_0 = const()[name = string("variance_73_keep_dims_0"), val = bool(true)]; + tensor variance_73_cast_fp16 = reduce_mean(axes = variance_73_axes_0, keep_dims = variance_73_keep_dims_0, x = var_3410_cast_fp16)[name = string("variance_73_cast_fp16")]; + fp16 var_3413_to_fp16 = const()[name = string("op_3413_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3414_cast_fp16 = add(x = variance_73_cast_fp16, y = var_3413_to_fp16)[name = string("op_3414_cast_fp16")]; + fp32 var_3415_epsilon_0 = const()[name = string("op_3415_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3415_cast_fp16 = rsqrt(epsilon = var_3415_epsilon_0, x = var_3414_cast_fp16)[name = string("op_3415_cast_fp16")]; + tensor hidden_states_543_cast_fp16 = mul(x = hidden_states_539_cast_fp16, y = var_3415_cast_fp16)[name = string("hidden_states_543_cast_fp16")]; + tensor model_model_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51822080)))]; + tensor hidden_states_547_cast_fp16 = mul(x = model_model_layers_18_input_layernorm_weight_to_fp16, y = hidden_states_543_cast_fp16)[name = string("hidden_states_547_cast_fp16")]; + tensor var_3426_shape_cast_fp16 = shape(x = hidden_states_547_cast_fp16)[name = string("op_3426_shape_cast_fp16")]; + int32 gather_328 = const()[name = string("gather_328"), val = int32(1)]; + int32 gather_329_axis_0 = const()[name = string("gather_329_axis_0"), val = int32(0)]; + int32 gather_329_batch_dims_0 = const()[name = string("gather_329_batch_dims_0"), val = int32(0)]; + bool gather_329_validate_indices_0 = const()[name = string("gather_329_validate_indices_0"), val = bool(false)]; + string var_3426_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3426_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_329_to_uint16 = const()[name = string("select_329_to_uint16"), val = uint16(1)]; + tensor var_3426_shape_cast_fp16_to_uint16 = cast(dtype = var_3426_shape_cast_fp16_to_uint16_dtype_0, x = var_3426_shape_cast_fp16)[name = string("cast_95")]; + uint16 gather_329_cast_uint16 = gather(axis = gather_329_axis_0, batch_dims = gather_329_batch_dims_0, indices = select_329_to_uint16, validate_indices = gather_329_validate_indices_0, x = var_3426_shape_cast_fp16_to_uint16)[name = string("gather_329_cast_uint16")]; + string gather_329_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_329_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_18_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51823296))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(51989248))))[name = string("model_model_layers_18_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_126_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_547_cast_fp16)[name = string("linear_126_cast_fp16")]; + tensor model_model_layers_18_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52010048))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52065408))))[name = string("model_model_layers_18_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_127_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_18_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_547_cast_fp16)[name = string("linear_127_cast_fp16")]; + tensor model_model_layers_18_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52072384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52127744))))[name = string("model_model_layers_18_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_128_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_18_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_547_cast_fp16)[name = string("linear_128_cast_fp16")]; + tensor concat_342x = const()[name = string("concat_342x"), val = tensor([1, -1, 9, 64])]; + tensor var_3435_cast_fp16 = reshape(shape = concat_342x, x = linear_126_cast_fp16)[name = string("op_3435_cast_fp16")]; + tensor q_37_perm_0 = const()[name = string("q_37_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_343x = const()[name = string("concat_343x"), val = tensor([1, -1, 3, 64])]; + tensor var_3438_cast_fp16 = reshape(shape = concat_343x, x = linear_127_cast_fp16)[name = string("op_3438_cast_fp16")]; + tensor k_37_perm_0 = const()[name = string("k_37_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_344x = const()[name = string("concat_344x"), val = tensor([1, -1, 3, 64])]; + tensor var_3441_cast_fp16 = reshape(shape = concat_344x, x = linear_128_cast_fp16)[name = string("op_3441_cast_fp16")]; + tensor v_state_37_perm_0 = const()[name = string("v_state_37_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_37_cast_fp16 = transpose(perm = q_37_perm_0, x = var_3435_cast_fp16)[name = string("transpose_47")]; + tensor var_3445_cast_fp16 = mul(x = q_37_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3445_cast_fp16")]; + tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_73_cast_fp16 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_37_cast_fp16)[name = string("x1_73_cast_fp16")]; + tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_73_cast_fp16 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_37_cast_fp16)[name = string("x2_73_cast_fp16")]; + fp16 const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3456_cast_fp16 = mul(x = x2_73_cast_fp16, y = const_39_promoted_to_fp16)[name = string("op_3456_cast_fp16")]; + bool var_3458_interleave_0 = const()[name = string("op_3458_interleave_0"), val = bool(false)]; + tensor var_3458_cast_fp16 = concat(axis = var_81, interleave = var_3458_interleave_0, values = (var_3456_cast_fp16, x1_73_cast_fp16))[name = string("op_3458_cast_fp16")]; + tensor var_3459_cast_fp16 = mul(x = var_3458_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3459_cast_fp16")]; + tensor query_states_75_cast_fp16 = add(x = var_3445_cast_fp16, y = var_3459_cast_fp16)[name = string("query_states_75_cast_fp16")]; + tensor k_37_cast_fp16 = transpose(perm = k_37_perm_0, x = var_3438_cast_fp16)[name = string("transpose_46")]; + tensor var_3461_cast_fp16 = mul(x = k_37_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3461_cast_fp16")]; + tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_75_cast_fp16 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_37_cast_fp16)[name = string("x1_75_cast_fp16")]; + tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_75_cast_fp16 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_37_cast_fp16)[name = string("x2_75_cast_fp16")]; + fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3472_cast_fp16 = mul(x = x2_75_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_3472_cast_fp16")]; + bool var_3474_interleave_0 = const()[name = string("op_3474_interleave_0"), val = bool(false)]; + tensor var_3474_cast_fp16 = concat(axis = var_81, interleave = var_3474_interleave_0, values = (var_3472_cast_fp16, x1_75_cast_fp16))[name = string("op_3474_cast_fp16")]; + tensor var_3475_cast_fp16 = mul(x = var_3474_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3475_cast_fp16")]; + tensor k_state_37_cast_fp16 = add(x = var_3461_cast_fp16, y = var_3475_cast_fp16)[name = string("k_state_37_cast_fp16")]; + tensor expand_dims_216 = const()[name = string("expand_dims_216"), val = tensor([0])]; + tensor expand_dims_217 = const()[name = string("expand_dims_217"), val = tensor([0])]; + tensor expand_dims_219 = const()[name = string("expand_dims_219"), val = tensor([0])]; + tensor concat_347_values0_0 = const()[name = string("concat_347_values0_0"), val = tensor([18])]; + int32 concat_347_axis_0 = const()[name = string("concat_347_axis_0"), val = int32(0)]; + bool concat_347_interleave_0 = const()[name = string("concat_347_interleave_0"), val = bool(false)]; + tensor concat_347 = concat(axis = concat_347_axis_0, interleave = concat_347_interleave_0, values = (concat_347_values0_0, expand_dims_216, expand_dims_217, expand_dims_2, expand_dims_219))[name = string("concat_347")]; + tensor key_cache_internal_tensor_assign_19_stride_0 = const()[name = string("key_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_347, begin_mask = key_cache_internal_tensor_assign_19_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_19_squeeze_mask_0, stride = key_cache_internal_tensor_assign_19_stride_0, update = k_state_37_cast_fp16, x = coreml_update_state_94)[name = string("key_cache_internal_tensor_assign_19_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_19_cast_fp16, input = key_cache)[name = string("coreml_update_state_96_write_state")]; + tensor coreml_update_state_96 = read_state(input = key_cache)[name = string("coreml_update_state_96")]; + tensor value_cache_internal_tensor_assign_19_stride_0 = const()[name = string("value_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_37_cast_fp16 = transpose(perm = v_state_37_perm_0, x = var_3441_cast_fp16)[name = string("transpose_45")]; + tensor value_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_347, begin_mask = value_cache_internal_tensor_assign_19_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_19_squeeze_mask_0, stride = value_cache_internal_tensor_assign_19_stride_0, update = v_state_37_cast_fp16, x = coreml_update_state_95)[name = string("value_cache_internal_tensor_assign_19_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_19_cast_fp16, input = value_cache)[name = string("coreml_update_state_97_write_state")]; + tensor coreml_update_state_97 = read_state(input = value_cache)[name = string("coreml_update_state_97")]; + tensor var_3498_begin_0 = const()[name = string("op_3498_begin_0"), val = tensor([18, 0, 0, 0, 0])]; + tensor var_3498_end_0 = const()[name = string("op_3498_end_0"), val = tensor([19, 1, 3, 2048, 64])]; + tensor var_3498_end_mask_0 = const()[name = string("op_3498_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3498_squeeze_mask_0 = const()[name = string("op_3498_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3498_cast_fp16 = slice_by_index(begin = var_3498_begin_0, end = var_3498_end_0, end_mask = var_3498_end_mask_0, squeeze_mask = var_3498_squeeze_mask_0, x = coreml_update_state_96)[name = string("op_3498_cast_fp16")]; + tensor var_3501_begin_0 = const()[name = string("op_3501_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3501_end_mask_0 = const()[name = string("op_3501_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3501_cast_fp16 = slice_by_index(begin = var_3501_begin_0, end = concat_11, end_mask = var_3501_end_mask_0, x = var_3498_cast_fp16)[name = string("op_3501_cast_fp16")]; + tensor var_3503_begin_0 = const()[name = string("op_3503_begin_0"), val = tensor([18, 0, 0, 0, 0])]; + tensor var_3503_end_0 = const()[name = string("op_3503_end_0"), val = tensor([19, 1, 3, 2048, 64])]; + tensor var_3503_end_mask_0 = const()[name = string("op_3503_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3503_squeeze_mask_0 = const()[name = string("op_3503_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3503_cast_fp16 = slice_by_index(begin = var_3503_begin_0, end = var_3503_end_0, end_mask = var_3503_end_mask_0, squeeze_mask = var_3503_squeeze_mask_0, x = coreml_update_state_97)[name = string("op_3503_cast_fp16")]; + tensor var_3506_begin_0 = const()[name = string("op_3506_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3506_end_mask_0 = const()[name = string("op_3506_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3506_cast_fp16 = slice_by_index(begin = var_3506_begin_0, end = concat_11, end_mask = var_3506_end_mask_0, x = var_3503_cast_fp16)[name = string("op_3506_cast_fp16")]; + tensor var_3508_shape_cast_fp16 = shape(x = var_3501_cast_fp16)[name = string("op_3508_shape_cast_fp16")]; + int32 gather_337 = const()[name = string("gather_337"), val = int32(1)]; + int32 gather_338 = const()[name = string("gather_338"), val = int32(3)]; + int32 gather_339_axis_0 = const()[name = string("gather_339_axis_0"), val = int32(0)]; + int32 gather_339_batch_dims_0 = const()[name = string("gather_339_batch_dims_0"), val = int32(0)]; + bool gather_339_validate_indices_0 = const()[name = string("gather_339_validate_indices_0"), val = bool(false)]; + string var_3508_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3508_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_339_to_uint16 = const()[name = string("select_339_to_uint16"), val = uint16(2)]; + tensor var_3508_shape_cast_fp16_to_uint16 = cast(dtype = var_3508_shape_cast_fp16_to_uint16_dtype_0, x = var_3508_shape_cast_fp16)[name = string("cast_94")]; + uint16 gather_339_cast_uint16 = gather(axis = gather_339_axis_0, batch_dims = gather_339_batch_dims_0, indices = select_339_to_uint16, validate_indices = gather_339_validate_indices_0, x = var_3508_shape_cast_fp16_to_uint16)[name = string("gather_339_cast_uint16")]; + string gather_339_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_339_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_340 = const()[name = string("gather_340"), val = int32(64)]; + tensor var_3515_axes_0 = const()[name = string("op_3515_axes_0"), val = tensor([2])]; + tensor var_3515_cast_fp16 = expand_dims(axes = var_3515_axes_0, x = var_3501_cast_fp16)[name = string("op_3515_cast_fp16")]; + tensor shape_377_cast_fp16 = shape(x = var_3515_cast_fp16)[name = string("shape_377_cast_fp16")]; + int32 concat_355_axis_0 = const()[name = string("concat_355_axis_0"), val = int32(0)]; + bool concat_355_interleave_0 = const()[name = string("concat_355_interleave_0"), val = bool(false)]; + int32 gather_339_cast_uint16_to_int32 = cast(dtype = gather_339_cast_uint16_to_int32_dtype_0, x = gather_339_cast_uint16)[name = string("cast_93")]; + tensor concat_355 = concat(axis = concat_355_axis_0, interleave = concat_355_interleave_0, values = (gather_337, gather_338, var_85, gather_339_cast_uint16_to_int32, gather_340))[name = string("concat_355")]; + tensor real_div_36 = real_div(x = concat_355, y = shape_377_cast_fp16)[name = string("real_div_36")]; + tensor hidden_states_551_cast_fp16 = tile(reps = real_div_36, x = var_3515_cast_fp16)[name = string("hidden_states_551_cast_fp16")]; + tensor concat_356x = const()[name = string("concat_356x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_75_cast_fp16 = reshape(shape = concat_356x, x = hidden_states_551_cast_fp16)[name = string("key_states_75_cast_fp16")]; + tensor var_3525_shape_cast_fp16 = shape(x = var_3506_cast_fp16)[name = string("op_3525_shape_cast_fp16")]; + int32 gather_341 = const()[name = string("gather_341"), val = int32(1)]; + int32 gather_342 = const()[name = string("gather_342"), val = int32(3)]; + int32 gather_343_axis_0 = const()[name = string("gather_343_axis_0"), val = int32(0)]; + int32 gather_343_batch_dims_0 = const()[name = string("gather_343_batch_dims_0"), val = int32(0)]; + bool gather_343_validate_indices_0 = const()[name = string("gather_343_validate_indices_0"), val = bool(false)]; + string var_3525_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3525_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_343_to_uint16 = const()[name = string("select_343_to_uint16"), val = uint16(2)]; + tensor var_3525_shape_cast_fp16_to_uint16 = cast(dtype = var_3525_shape_cast_fp16_to_uint16_dtype_0, x = var_3525_shape_cast_fp16)[name = string("cast_92")]; + uint16 gather_343_cast_uint16 = gather(axis = gather_343_axis_0, batch_dims = gather_343_batch_dims_0, indices = select_343_to_uint16, validate_indices = gather_343_validate_indices_0, x = var_3525_shape_cast_fp16_to_uint16)[name = string("gather_343_cast_uint16")]; + string gather_343_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_343_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_344 = const()[name = string("gather_344"), val = int32(64)]; + tensor var_3532_axes_0 = const()[name = string("op_3532_axes_0"), val = tensor([2])]; + tensor var_3532_cast_fp16 = expand_dims(axes = var_3532_axes_0, x = var_3506_cast_fp16)[name = string("op_3532_cast_fp16")]; + tensor shape_382_cast_fp16 = shape(x = var_3532_cast_fp16)[name = string("shape_382_cast_fp16")]; + int32 concat_357_axis_0 = const()[name = string("concat_357_axis_0"), val = int32(0)]; + bool concat_357_interleave_0 = const()[name = string("concat_357_interleave_0"), val = bool(false)]; + int32 gather_343_cast_uint16_to_int32 = cast(dtype = gather_343_cast_uint16_to_int32_dtype_0, x = gather_343_cast_uint16)[name = string("cast_91")]; + tensor concat_357 = concat(axis = concat_357_axis_0, interleave = concat_357_interleave_0, values = (gather_341, gather_342, var_85, gather_343_cast_uint16_to_int32, gather_344))[name = string("concat_357")]; + tensor real_div_37 = real_div(x = concat_357, y = shape_382_cast_fp16)[name = string("real_div_37")]; + tensor hidden_states_555_cast_fp16 = tile(reps = real_div_37, x = var_3532_cast_fp16)[name = string("hidden_states_555_cast_fp16")]; + tensor concat_358x = const()[name = string("concat_358x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_75_cast_fp16 = reshape(shape = concat_358x, x = hidden_states_555_cast_fp16)[name = string("value_states_75_cast_fp16")]; + tensor var_3542_shape_cast_fp16 = shape(x = key_states_75_cast_fp16)[name = string("op_3542_shape_cast_fp16")]; + int32 gather_345_axis_0 = const()[name = string("gather_345_axis_0"), val = int32(0)]; + int32 gather_345_batch_dims_0 = const()[name = string("gather_345_batch_dims_0"), val = int32(0)]; + bool gather_345_validate_indices_0 = const()[name = string("gather_345_validate_indices_0"), val = bool(false)]; + string var_3542_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3542_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_345_to_uint16 = const()[name = string("select_345_to_uint16"), val = uint16(2)]; + tensor var_3542_shape_cast_fp16_to_uint16 = cast(dtype = var_3542_shape_cast_fp16_to_uint16_dtype_0, x = var_3542_shape_cast_fp16)[name = string("cast_90")]; + uint16 gather_345_cast_uint16 = gather(axis = gather_345_axis_0, batch_dims = gather_345_batch_dims_0, indices = select_345_to_uint16, validate_indices = gather_345_validate_indices_0, x = var_3542_shape_cast_fp16_to_uint16)[name = string("gather_345_cast_uint16")]; + string gather_345_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_345_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_359_values0_0 = const()[name = string("concat_359_values0_0"), val = int32(1)]; + int32 concat_359_values1_0 = const()[name = string("concat_359_values1_0"), val = int32(1)]; + int32 concat_359_values2_0 = const()[name = string("concat_359_values2_0"), val = int32(0)]; + int32 concat_359_axis_0 = const()[name = string("concat_359_axis_0"), val = int32(0)]; + bool concat_359_interleave_0 = const()[name = string("concat_359_interleave_0"), val = bool(false)]; + int32 gather_345_cast_uint16_to_int32 = cast(dtype = gather_345_cast_uint16_to_int32_dtype_0, x = gather_345_cast_uint16)[name = string("cast_89")]; + tensor concat_359 = concat(axis = concat_359_axis_0, interleave = concat_359_interleave_0, values = (concat_359_values0_0, concat_359_values1_0, concat_359_values2_0, gather_345_cast_uint16_to_int32))[name = string("concat_359")]; + tensor causal_mask_39_begin_0 = const()[name = string("causal_mask_39_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_39_end_mask_0 = const()[name = string("causal_mask_39_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_39_cast_fp16 = slice_by_index(begin = causal_mask_39_begin_0, end = concat_359, end_mask = causal_mask_39_end_mask_0, x = causal_mask)[name = string("causal_mask_39_cast_fp16")]; + tensor attn_output_73_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_39_cast_fp16, key = key_states_75_cast_fp16, query = query_states_75_cast_fp16, value = value_states_75_cast_fp16)[name = string("attn_output_73_cast_fp16")]; + tensor var_3548_perm_0 = const()[name = string("op_3548_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_360_axis_0 = const()[name = string("concat_360_axis_0"), val = int32(0)]; + bool concat_360_interleave_0 = const()[name = string("concat_360_interleave_0"), val = bool(false)]; + int32 gather_329_cast_uint16_to_int32 = cast(dtype = gather_329_cast_uint16_to_int32_dtype_0, x = gather_329_cast_uint16)[name = string("cast_88")]; + tensor concat_360 = concat(axis = concat_360_axis_0, interleave = concat_360_interleave_0, values = (gather_328, gather_329_cast_uint16_to_int32, var_81))[name = string("concat_360")]; + tensor var_3548_cast_fp16 = transpose(perm = var_3548_perm_0, x = attn_output_73_cast_fp16)[name = string("transpose_44")]; + tensor input_145_cast_fp16 = reshape(shape = concat_360, x = var_3548_cast_fp16)[name = string("input_145_cast_fp16")]; + tensor model_model_layers_18_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52134720))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52300672))))[name = string("model_model_layers_18_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_129_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_self_attn_o_proj_weight_to_fp16_quantized, x = input_145_cast_fp16)[name = string("linear_129_cast_fp16")]; + tensor hidden_states_559_cast_fp16 = add(x = hidden_states_539_cast_fp16, y = linear_129_cast_fp16)[name = string("hidden_states_559_cast_fp16")]; + fp16 var_76_promoted_37_to_fp16 = const()[name = string("op_76_promoted_37_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3557_cast_fp16 = pow(x = hidden_states_559_cast_fp16, y = var_76_promoted_37_to_fp16)[name = string("op_3557_cast_fp16")]; + tensor variance_75_axes_0 = const()[name = string("variance_75_axes_0"), val = tensor([-1])]; + bool variance_75_keep_dims_0 = const()[name = string("variance_75_keep_dims_0"), val = bool(true)]; + tensor variance_75_cast_fp16 = reduce_mean(axes = variance_75_axes_0, keep_dims = variance_75_keep_dims_0, x = var_3557_cast_fp16)[name = string("variance_75_cast_fp16")]; + fp16 var_3560_to_fp16 = const()[name = string("op_3560_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3561_cast_fp16 = add(x = variance_75_cast_fp16, y = var_3560_to_fp16)[name = string("op_3561_cast_fp16")]; + fp32 var_3562_epsilon_0 = const()[name = string("op_3562_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3562_cast_fp16 = rsqrt(epsilon = var_3562_epsilon_0, x = var_3561_cast_fp16)[name = string("op_3562_cast_fp16")]; + tensor hidden_states_563_cast_fp16 = mul(x = hidden_states_559_cast_fp16, y = var_3562_cast_fp16)[name = string("hidden_states_563_cast_fp16")]; + tensor model_model_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52321472)))]; + tensor input_147_cast_fp16 = mul(x = model_model_layers_18_post_attention_layernorm_weight_to_fp16, y = hidden_states_563_cast_fp16)[name = string("input_147_cast_fp16")]; + tensor model_model_layers_18_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52322688))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52765120))))[name = string("model_model_layers_18_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_130_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_18_mlp_gate_proj_weight_to_fp16_quantized, x = input_147_cast_fp16)[name = string("linear_130_cast_fp16")]; + tensor var_3574_cast_fp16 = silu(x = linear_130_cast_fp16)[name = string("op_3574_cast_fp16")]; + tensor model_model_layers_18_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(52820480))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53262912))))[name = string("model_model_layers_18_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_131_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_18_mlp_up_proj_weight_to_fp16_quantized, x = input_147_cast_fp16)[name = string("linear_131_cast_fp16")]; + tensor input_151_cast_fp16 = mul(x = var_3574_cast_fp16, y = linear_131_cast_fp16)[name = string("input_151_cast_fp16")]; + tensor model_model_layers_18_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53318272))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53760704))))[name = string("model_model_layers_18_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_132_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_mlp_down_proj_weight_to_fp16_quantized, x = input_151_cast_fp16)[name = string("linear_132_cast_fp16")]; + tensor hidden_states_569_cast_fp16 = add(x = hidden_states_559_cast_fp16, y = linear_132_cast_fp16)[name = string("hidden_states_569_cast_fp16")]; + fp16 var_76_promoted_38_to_fp16 = const()[name = string("op_76_promoted_38_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3587_cast_fp16 = pow(x = hidden_states_569_cast_fp16, y = var_76_promoted_38_to_fp16)[name = string("op_3587_cast_fp16")]; + tensor variance_77_axes_0 = const()[name = string("variance_77_axes_0"), val = tensor([-1])]; + bool variance_77_keep_dims_0 = const()[name = string("variance_77_keep_dims_0"), val = bool(true)]; + tensor variance_77_cast_fp16 = reduce_mean(axes = variance_77_axes_0, keep_dims = variance_77_keep_dims_0, x = var_3587_cast_fp16)[name = string("variance_77_cast_fp16")]; + fp16 var_3590_to_fp16 = const()[name = string("op_3590_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3591_cast_fp16 = add(x = variance_77_cast_fp16, y = var_3590_to_fp16)[name = string("op_3591_cast_fp16")]; + fp32 var_3592_epsilon_0 = const()[name = string("op_3592_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3592_cast_fp16 = rsqrt(epsilon = var_3592_epsilon_0, x = var_3591_cast_fp16)[name = string("op_3592_cast_fp16")]; + tensor hidden_states_573_cast_fp16 = mul(x = hidden_states_569_cast_fp16, y = var_3592_cast_fp16)[name = string("hidden_states_573_cast_fp16")]; + tensor model_model_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53816064)))]; + tensor hidden_states_577_cast_fp16 = mul(x = model_model_layers_19_input_layernorm_weight_to_fp16, y = hidden_states_573_cast_fp16)[name = string("hidden_states_577_cast_fp16")]; + tensor var_3603_shape_cast_fp16 = shape(x = hidden_states_577_cast_fp16)[name = string("op_3603_shape_cast_fp16")]; + int32 gather_346 = const()[name = string("gather_346"), val = int32(1)]; + int32 gather_347_axis_0 = const()[name = string("gather_347_axis_0"), val = int32(0)]; + int32 gather_347_batch_dims_0 = const()[name = string("gather_347_batch_dims_0"), val = int32(0)]; + bool gather_347_validate_indices_0 = const()[name = string("gather_347_validate_indices_0"), val = bool(false)]; + string var_3603_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3603_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_347_to_uint16 = const()[name = string("select_347_to_uint16"), val = uint16(1)]; + tensor var_3603_shape_cast_fp16_to_uint16 = cast(dtype = var_3603_shape_cast_fp16_to_uint16_dtype_0, x = var_3603_shape_cast_fp16)[name = string("cast_87")]; + uint16 gather_347_cast_uint16 = gather(axis = gather_347_axis_0, batch_dims = gather_347_batch_dims_0, indices = select_347_to_uint16, validate_indices = gather_347_validate_indices_0, x = var_3603_shape_cast_fp16_to_uint16)[name = string("gather_347_cast_uint16")]; + string gather_347_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_347_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_19_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53817280))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(53983232))))[name = string("model_model_layers_19_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_133_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_577_cast_fp16)[name = string("linear_133_cast_fp16")]; + tensor model_model_layers_19_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54004032))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54059392))))[name = string("model_model_layers_19_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_134_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_19_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_577_cast_fp16)[name = string("linear_134_cast_fp16")]; + tensor model_model_layers_19_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54066368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54121728))))[name = string("model_model_layers_19_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_135_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_19_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_577_cast_fp16)[name = string("linear_135_cast_fp16")]; + tensor concat_361x = const()[name = string("concat_361x"), val = tensor([1, -1, 9, 64])]; + tensor var_3612_cast_fp16 = reshape(shape = concat_361x, x = linear_133_cast_fp16)[name = string("op_3612_cast_fp16")]; + tensor q_39_perm_0 = const()[name = string("q_39_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_362x = const()[name = string("concat_362x"), val = tensor([1, -1, 3, 64])]; + tensor var_3615_cast_fp16 = reshape(shape = concat_362x, x = linear_134_cast_fp16)[name = string("op_3615_cast_fp16")]; + tensor k_39_perm_0 = const()[name = string("k_39_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_363x = const()[name = string("concat_363x"), val = tensor([1, -1, 3, 64])]; + tensor var_3618_cast_fp16 = reshape(shape = concat_363x, x = linear_135_cast_fp16)[name = string("op_3618_cast_fp16")]; + tensor v_state_39_perm_0 = const()[name = string("v_state_39_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_39_cast_fp16 = transpose(perm = q_39_perm_0, x = var_3612_cast_fp16)[name = string("transpose_43")]; + tensor var_3622_cast_fp16 = mul(x = q_39_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3622_cast_fp16")]; + tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_77_cast_fp16 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_39_cast_fp16)[name = string("x1_77_cast_fp16")]; + tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_77_cast_fp16 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_39_cast_fp16)[name = string("x2_77_cast_fp16")]; + fp16 const_41_promoted_to_fp16 = const()[name = string("const_41_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3633_cast_fp16 = mul(x = x2_77_cast_fp16, y = const_41_promoted_to_fp16)[name = string("op_3633_cast_fp16")]; + bool var_3635_interleave_0 = const()[name = string("op_3635_interleave_0"), val = bool(false)]; + tensor var_3635_cast_fp16 = concat(axis = var_81, interleave = var_3635_interleave_0, values = (var_3633_cast_fp16, x1_77_cast_fp16))[name = string("op_3635_cast_fp16")]; + tensor var_3636_cast_fp16 = mul(x = var_3635_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3636_cast_fp16")]; + tensor query_states_79_cast_fp16 = add(x = var_3622_cast_fp16, y = var_3636_cast_fp16)[name = string("query_states_79_cast_fp16")]; + tensor k_39_cast_fp16 = transpose(perm = k_39_perm_0, x = var_3615_cast_fp16)[name = string("transpose_42")]; + tensor var_3638_cast_fp16 = mul(x = k_39_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3638_cast_fp16")]; + tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_79_cast_fp16 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_39_cast_fp16)[name = string("x1_79_cast_fp16")]; + tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_79_cast_fp16 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_39_cast_fp16)[name = string("x2_79_cast_fp16")]; + fp16 const_42_promoted_to_fp16 = const()[name = string("const_42_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3649_cast_fp16 = mul(x = x2_79_cast_fp16, y = const_42_promoted_to_fp16)[name = string("op_3649_cast_fp16")]; + bool var_3651_interleave_0 = const()[name = string("op_3651_interleave_0"), val = bool(false)]; + tensor var_3651_cast_fp16 = concat(axis = var_81, interleave = var_3651_interleave_0, values = (var_3649_cast_fp16, x1_79_cast_fp16))[name = string("op_3651_cast_fp16")]; + tensor var_3652_cast_fp16 = mul(x = var_3651_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3652_cast_fp16")]; + tensor k_state_39_cast_fp16 = add(x = var_3638_cast_fp16, y = var_3652_cast_fp16)[name = string("k_state_39_cast_fp16")]; + tensor expand_dims_228 = const()[name = string("expand_dims_228"), val = tensor([0])]; + tensor expand_dims_229 = const()[name = string("expand_dims_229"), val = tensor([0])]; + tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([0])]; + tensor concat_366_values0_0 = const()[name = string("concat_366_values0_0"), val = tensor([19])]; + int32 concat_366_axis_0 = const()[name = string("concat_366_axis_0"), val = int32(0)]; + bool concat_366_interleave_0 = const()[name = string("concat_366_interleave_0"), val = bool(false)]; + tensor concat_366 = concat(axis = concat_366_axis_0, interleave = concat_366_interleave_0, values = (concat_366_values0_0, expand_dims_228, expand_dims_229, expand_dims_2, expand_dims_231))[name = string("concat_366")]; + tensor key_cache_internal_tensor_assign_20_stride_0 = const()[name = string("key_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_366, begin_mask = key_cache_internal_tensor_assign_20_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_20_squeeze_mask_0, stride = key_cache_internal_tensor_assign_20_stride_0, update = k_state_39_cast_fp16, x = coreml_update_state_96)[name = string("key_cache_internal_tensor_assign_20_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_20_cast_fp16, input = key_cache)[name = string("coreml_update_state_98_write_state")]; + tensor coreml_update_state_98 = read_state(input = key_cache)[name = string("coreml_update_state_98")]; + tensor value_cache_internal_tensor_assign_20_stride_0 = const()[name = string("value_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_39_cast_fp16 = transpose(perm = v_state_39_perm_0, x = var_3618_cast_fp16)[name = string("transpose_41")]; + tensor value_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_366, begin_mask = value_cache_internal_tensor_assign_20_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_20_squeeze_mask_0, stride = value_cache_internal_tensor_assign_20_stride_0, update = v_state_39_cast_fp16, x = coreml_update_state_97)[name = string("value_cache_internal_tensor_assign_20_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_20_cast_fp16, input = value_cache)[name = string("coreml_update_state_99_write_state")]; + tensor coreml_update_state_99 = read_state(input = value_cache)[name = string("coreml_update_state_99")]; + tensor var_3675_begin_0 = const()[name = string("op_3675_begin_0"), val = tensor([19, 0, 0, 0, 0])]; + tensor var_3675_end_0 = const()[name = string("op_3675_end_0"), val = tensor([20, 1, 3, 2048, 64])]; + tensor var_3675_end_mask_0 = const()[name = string("op_3675_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3675_squeeze_mask_0 = const()[name = string("op_3675_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3675_cast_fp16 = slice_by_index(begin = var_3675_begin_0, end = var_3675_end_0, end_mask = var_3675_end_mask_0, squeeze_mask = var_3675_squeeze_mask_0, x = coreml_update_state_98)[name = string("op_3675_cast_fp16")]; + tensor var_3678_begin_0 = const()[name = string("op_3678_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3678_end_mask_0 = const()[name = string("op_3678_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3678_cast_fp16 = slice_by_index(begin = var_3678_begin_0, end = concat_11, end_mask = var_3678_end_mask_0, x = var_3675_cast_fp16)[name = string("op_3678_cast_fp16")]; + tensor var_3680_begin_0 = const()[name = string("op_3680_begin_0"), val = tensor([19, 0, 0, 0, 0])]; + tensor var_3680_end_0 = const()[name = string("op_3680_end_0"), val = tensor([20, 1, 3, 2048, 64])]; + tensor var_3680_end_mask_0 = const()[name = string("op_3680_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3680_squeeze_mask_0 = const()[name = string("op_3680_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3680_cast_fp16 = slice_by_index(begin = var_3680_begin_0, end = var_3680_end_0, end_mask = var_3680_end_mask_0, squeeze_mask = var_3680_squeeze_mask_0, x = coreml_update_state_99)[name = string("op_3680_cast_fp16")]; + tensor var_3683_begin_0 = const()[name = string("op_3683_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3683_end_mask_0 = const()[name = string("op_3683_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3683_cast_fp16 = slice_by_index(begin = var_3683_begin_0, end = concat_11, end_mask = var_3683_end_mask_0, x = var_3680_cast_fp16)[name = string("op_3683_cast_fp16")]; + tensor var_3685_shape_cast_fp16 = shape(x = var_3678_cast_fp16)[name = string("op_3685_shape_cast_fp16")]; + int32 gather_355 = const()[name = string("gather_355"), val = int32(1)]; + int32 gather_356 = const()[name = string("gather_356"), val = int32(3)]; + int32 gather_357_axis_0 = const()[name = string("gather_357_axis_0"), val = int32(0)]; + int32 gather_357_batch_dims_0 = const()[name = string("gather_357_batch_dims_0"), val = int32(0)]; + bool gather_357_validate_indices_0 = const()[name = string("gather_357_validate_indices_0"), val = bool(false)]; + string var_3685_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3685_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_357_to_uint16 = const()[name = string("select_357_to_uint16"), val = uint16(2)]; + tensor var_3685_shape_cast_fp16_to_uint16 = cast(dtype = var_3685_shape_cast_fp16_to_uint16_dtype_0, x = var_3685_shape_cast_fp16)[name = string("cast_86")]; + uint16 gather_357_cast_uint16 = gather(axis = gather_357_axis_0, batch_dims = gather_357_batch_dims_0, indices = select_357_to_uint16, validate_indices = gather_357_validate_indices_0, x = var_3685_shape_cast_fp16_to_uint16)[name = string("gather_357_cast_uint16")]; + string gather_357_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_357_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_358 = const()[name = string("gather_358"), val = int32(64)]; + tensor var_3692_axes_0 = const()[name = string("op_3692_axes_0"), val = tensor([2])]; + tensor var_3692_cast_fp16 = expand_dims(axes = var_3692_axes_0, x = var_3678_cast_fp16)[name = string("op_3692_cast_fp16")]; + tensor shape_397_cast_fp16 = shape(x = var_3692_cast_fp16)[name = string("shape_397_cast_fp16")]; + int32 concat_374_axis_0 = const()[name = string("concat_374_axis_0"), val = int32(0)]; + bool concat_374_interleave_0 = const()[name = string("concat_374_interleave_0"), val = bool(false)]; + int32 gather_357_cast_uint16_to_int32 = cast(dtype = gather_357_cast_uint16_to_int32_dtype_0, x = gather_357_cast_uint16)[name = string("cast_85")]; + tensor concat_374 = concat(axis = concat_374_axis_0, interleave = concat_374_interleave_0, values = (gather_355, gather_356, var_85, gather_357_cast_uint16_to_int32, gather_358))[name = string("concat_374")]; + tensor real_div_38 = real_div(x = concat_374, y = shape_397_cast_fp16)[name = string("real_div_38")]; + tensor hidden_states_581_cast_fp16 = tile(reps = real_div_38, x = var_3692_cast_fp16)[name = string("hidden_states_581_cast_fp16")]; + tensor concat_375x = const()[name = string("concat_375x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_79_cast_fp16 = reshape(shape = concat_375x, x = hidden_states_581_cast_fp16)[name = string("key_states_79_cast_fp16")]; + tensor var_3702_shape_cast_fp16 = shape(x = var_3683_cast_fp16)[name = string("op_3702_shape_cast_fp16")]; + int32 gather_359 = const()[name = string("gather_359"), val = int32(1)]; + int32 gather_360 = const()[name = string("gather_360"), val = int32(3)]; + int32 gather_361_axis_0 = const()[name = string("gather_361_axis_0"), val = int32(0)]; + int32 gather_361_batch_dims_0 = const()[name = string("gather_361_batch_dims_0"), val = int32(0)]; + bool gather_361_validate_indices_0 = const()[name = string("gather_361_validate_indices_0"), val = bool(false)]; + string var_3702_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3702_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_361_to_uint16 = const()[name = string("select_361_to_uint16"), val = uint16(2)]; + tensor var_3702_shape_cast_fp16_to_uint16 = cast(dtype = var_3702_shape_cast_fp16_to_uint16_dtype_0, x = var_3702_shape_cast_fp16)[name = string("cast_84")]; + uint16 gather_361_cast_uint16 = gather(axis = gather_361_axis_0, batch_dims = gather_361_batch_dims_0, indices = select_361_to_uint16, validate_indices = gather_361_validate_indices_0, x = var_3702_shape_cast_fp16_to_uint16)[name = string("gather_361_cast_uint16")]; + string gather_361_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_361_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_362 = const()[name = string("gather_362"), val = int32(64)]; + tensor var_3709_axes_0 = const()[name = string("op_3709_axes_0"), val = tensor([2])]; + tensor var_3709_cast_fp16 = expand_dims(axes = var_3709_axes_0, x = var_3683_cast_fp16)[name = string("op_3709_cast_fp16")]; + tensor shape_402_cast_fp16 = shape(x = var_3709_cast_fp16)[name = string("shape_402_cast_fp16")]; + int32 concat_376_axis_0 = const()[name = string("concat_376_axis_0"), val = int32(0)]; + bool concat_376_interleave_0 = const()[name = string("concat_376_interleave_0"), val = bool(false)]; + int32 gather_361_cast_uint16_to_int32 = cast(dtype = gather_361_cast_uint16_to_int32_dtype_0, x = gather_361_cast_uint16)[name = string("cast_83")]; + tensor concat_376 = concat(axis = concat_376_axis_0, interleave = concat_376_interleave_0, values = (gather_359, gather_360, var_85, gather_361_cast_uint16_to_int32, gather_362))[name = string("concat_376")]; + tensor real_div_39 = real_div(x = concat_376, y = shape_402_cast_fp16)[name = string("real_div_39")]; + tensor hidden_states_585_cast_fp16 = tile(reps = real_div_39, x = var_3709_cast_fp16)[name = string("hidden_states_585_cast_fp16")]; + tensor concat_377x = const()[name = string("concat_377x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_79_cast_fp16 = reshape(shape = concat_377x, x = hidden_states_585_cast_fp16)[name = string("value_states_79_cast_fp16")]; + tensor var_3719_shape_cast_fp16 = shape(x = key_states_79_cast_fp16)[name = string("op_3719_shape_cast_fp16")]; + int32 gather_363_axis_0 = const()[name = string("gather_363_axis_0"), val = int32(0)]; + int32 gather_363_batch_dims_0 = const()[name = string("gather_363_batch_dims_0"), val = int32(0)]; + bool gather_363_validate_indices_0 = const()[name = string("gather_363_validate_indices_0"), val = bool(false)]; + string var_3719_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3719_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_363_to_uint16 = const()[name = string("select_363_to_uint16"), val = uint16(2)]; + tensor var_3719_shape_cast_fp16_to_uint16 = cast(dtype = var_3719_shape_cast_fp16_to_uint16_dtype_0, x = var_3719_shape_cast_fp16)[name = string("cast_82")]; + uint16 gather_363_cast_uint16 = gather(axis = gather_363_axis_0, batch_dims = gather_363_batch_dims_0, indices = select_363_to_uint16, validate_indices = gather_363_validate_indices_0, x = var_3719_shape_cast_fp16_to_uint16)[name = string("gather_363_cast_uint16")]; + string gather_363_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_363_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_378_values0_0 = const()[name = string("concat_378_values0_0"), val = int32(1)]; + int32 concat_378_values1_0 = const()[name = string("concat_378_values1_0"), val = int32(1)]; + int32 concat_378_values2_0 = const()[name = string("concat_378_values2_0"), val = int32(0)]; + int32 concat_378_axis_0 = const()[name = string("concat_378_axis_0"), val = int32(0)]; + bool concat_378_interleave_0 = const()[name = string("concat_378_interleave_0"), val = bool(false)]; + int32 gather_363_cast_uint16_to_int32 = cast(dtype = gather_363_cast_uint16_to_int32_dtype_0, x = gather_363_cast_uint16)[name = string("cast_81")]; + tensor concat_378 = concat(axis = concat_378_axis_0, interleave = concat_378_interleave_0, values = (concat_378_values0_0, concat_378_values1_0, concat_378_values2_0, gather_363_cast_uint16_to_int32))[name = string("concat_378")]; + tensor causal_mask_41_begin_0 = const()[name = string("causal_mask_41_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_41_end_mask_0 = const()[name = string("causal_mask_41_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_41_cast_fp16 = slice_by_index(begin = causal_mask_41_begin_0, end = concat_378, end_mask = causal_mask_41_end_mask_0, x = causal_mask)[name = string("causal_mask_41_cast_fp16")]; + tensor attn_output_77_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_41_cast_fp16, key = key_states_79_cast_fp16, query = query_states_79_cast_fp16, value = value_states_79_cast_fp16)[name = string("attn_output_77_cast_fp16")]; + tensor var_3725_perm_0 = const()[name = string("op_3725_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_379_axis_0 = const()[name = string("concat_379_axis_0"), val = int32(0)]; + bool concat_379_interleave_0 = const()[name = string("concat_379_interleave_0"), val = bool(false)]; + int32 gather_347_cast_uint16_to_int32 = cast(dtype = gather_347_cast_uint16_to_int32_dtype_0, x = gather_347_cast_uint16)[name = string("cast_80")]; + tensor concat_379 = concat(axis = concat_379_axis_0, interleave = concat_379_interleave_0, values = (gather_346, gather_347_cast_uint16_to_int32, var_81))[name = string("concat_379")]; + tensor var_3725_cast_fp16 = transpose(perm = var_3725_perm_0, x = attn_output_77_cast_fp16)[name = string("transpose_40")]; + tensor input_153_cast_fp16 = reshape(shape = concat_379, x = var_3725_cast_fp16)[name = string("input_153_cast_fp16")]; + tensor model_model_layers_19_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54128704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54294656))))[name = string("model_model_layers_19_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_136_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_self_attn_o_proj_weight_to_fp16_quantized, x = input_153_cast_fp16)[name = string("linear_136_cast_fp16")]; + tensor hidden_states_589_cast_fp16 = add(x = hidden_states_569_cast_fp16, y = linear_136_cast_fp16)[name = string("hidden_states_589_cast_fp16")]; + fp16 var_76_promoted_39_to_fp16 = const()[name = string("op_76_promoted_39_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3734_cast_fp16 = pow(x = hidden_states_589_cast_fp16, y = var_76_promoted_39_to_fp16)[name = string("op_3734_cast_fp16")]; + tensor variance_79_axes_0 = const()[name = string("variance_79_axes_0"), val = tensor([-1])]; + bool variance_79_keep_dims_0 = const()[name = string("variance_79_keep_dims_0"), val = bool(true)]; + tensor variance_79_cast_fp16 = reduce_mean(axes = variance_79_axes_0, keep_dims = variance_79_keep_dims_0, x = var_3734_cast_fp16)[name = string("variance_79_cast_fp16")]; + fp16 var_3737_to_fp16 = const()[name = string("op_3737_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3738_cast_fp16 = add(x = variance_79_cast_fp16, y = var_3737_to_fp16)[name = string("op_3738_cast_fp16")]; + fp32 var_3739_epsilon_0 = const()[name = string("op_3739_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3739_cast_fp16 = rsqrt(epsilon = var_3739_epsilon_0, x = var_3738_cast_fp16)[name = string("op_3739_cast_fp16")]; + tensor hidden_states_593_cast_fp16 = mul(x = hidden_states_589_cast_fp16, y = var_3739_cast_fp16)[name = string("hidden_states_593_cast_fp16")]; + tensor model_model_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54315456)))]; + tensor input_155_cast_fp16 = mul(x = model_model_layers_19_post_attention_layernorm_weight_to_fp16, y = hidden_states_593_cast_fp16)[name = string("input_155_cast_fp16")]; + tensor model_model_layers_19_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54316672))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54759104))))[name = string("model_model_layers_19_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_137_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_19_mlp_gate_proj_weight_to_fp16_quantized, x = input_155_cast_fp16)[name = string("linear_137_cast_fp16")]; + tensor var_3751_cast_fp16 = silu(x = linear_137_cast_fp16)[name = string("op_3751_cast_fp16")]; + tensor model_model_layers_19_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(54814464))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55256896))))[name = string("model_model_layers_19_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_138_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_19_mlp_up_proj_weight_to_fp16_quantized, x = input_155_cast_fp16)[name = string("linear_138_cast_fp16")]; + tensor input_159_cast_fp16 = mul(x = var_3751_cast_fp16, y = linear_138_cast_fp16)[name = string("input_159_cast_fp16")]; + tensor model_model_layers_19_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55312256))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55754688))))[name = string("model_model_layers_19_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_139_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_mlp_down_proj_weight_to_fp16_quantized, x = input_159_cast_fp16)[name = string("linear_139_cast_fp16")]; + tensor hidden_states_599_cast_fp16 = add(x = hidden_states_589_cast_fp16, y = linear_139_cast_fp16)[name = string("hidden_states_599_cast_fp16")]; + fp16 var_76_promoted_40_to_fp16 = const()[name = string("op_76_promoted_40_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3764_cast_fp16 = pow(x = hidden_states_599_cast_fp16, y = var_76_promoted_40_to_fp16)[name = string("op_3764_cast_fp16")]; + tensor variance_81_axes_0 = const()[name = string("variance_81_axes_0"), val = tensor([-1])]; + bool variance_81_keep_dims_0 = const()[name = string("variance_81_keep_dims_0"), val = bool(true)]; + tensor variance_81_cast_fp16 = reduce_mean(axes = variance_81_axes_0, keep_dims = variance_81_keep_dims_0, x = var_3764_cast_fp16)[name = string("variance_81_cast_fp16")]; + fp16 var_3767_to_fp16 = const()[name = string("op_3767_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3768_cast_fp16 = add(x = variance_81_cast_fp16, y = var_3767_to_fp16)[name = string("op_3768_cast_fp16")]; + fp32 var_3769_epsilon_0 = const()[name = string("op_3769_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3769_cast_fp16 = rsqrt(epsilon = var_3769_epsilon_0, x = var_3768_cast_fp16)[name = string("op_3769_cast_fp16")]; + tensor hidden_states_603_cast_fp16 = mul(x = hidden_states_599_cast_fp16, y = var_3769_cast_fp16)[name = string("hidden_states_603_cast_fp16")]; + tensor model_model_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55810048)))]; + tensor hidden_states_607_cast_fp16 = mul(x = model_model_layers_20_input_layernorm_weight_to_fp16, y = hidden_states_603_cast_fp16)[name = string("hidden_states_607_cast_fp16")]; + tensor var_3780_shape_cast_fp16 = shape(x = hidden_states_607_cast_fp16)[name = string("op_3780_shape_cast_fp16")]; + int32 gather_364 = const()[name = string("gather_364"), val = int32(1)]; + int32 gather_365_axis_0 = const()[name = string("gather_365_axis_0"), val = int32(0)]; + int32 gather_365_batch_dims_0 = const()[name = string("gather_365_batch_dims_0"), val = int32(0)]; + bool gather_365_validate_indices_0 = const()[name = string("gather_365_validate_indices_0"), val = bool(false)]; + string var_3780_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3780_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_365_to_uint16 = const()[name = string("select_365_to_uint16"), val = uint16(1)]; + tensor var_3780_shape_cast_fp16_to_uint16 = cast(dtype = var_3780_shape_cast_fp16_to_uint16_dtype_0, x = var_3780_shape_cast_fp16)[name = string("cast_79")]; + uint16 gather_365_cast_uint16 = gather(axis = gather_365_axis_0, batch_dims = gather_365_batch_dims_0, indices = select_365_to_uint16, validate_indices = gather_365_validate_indices_0, x = var_3780_shape_cast_fp16_to_uint16)[name = string("gather_365_cast_uint16")]; + string gather_365_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_365_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_20_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55811264))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55977216))))[name = string("model_model_layers_20_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_140_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_607_cast_fp16)[name = string("linear_140_cast_fp16")]; + tensor model_model_layers_20_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(55998016))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56053376))))[name = string("model_model_layers_20_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_141_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_20_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_607_cast_fp16)[name = string("linear_141_cast_fp16")]; + tensor model_model_layers_20_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56060352))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56115712))))[name = string("model_model_layers_20_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_142_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_20_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_607_cast_fp16)[name = string("linear_142_cast_fp16")]; + tensor concat_380x = const()[name = string("concat_380x"), val = tensor([1, -1, 9, 64])]; + tensor var_3789_cast_fp16 = reshape(shape = concat_380x, x = linear_140_cast_fp16)[name = string("op_3789_cast_fp16")]; + tensor q_41_perm_0 = const()[name = string("q_41_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_381x = const()[name = string("concat_381x"), val = tensor([1, -1, 3, 64])]; + tensor var_3792_cast_fp16 = reshape(shape = concat_381x, x = linear_141_cast_fp16)[name = string("op_3792_cast_fp16")]; + tensor k_41_perm_0 = const()[name = string("k_41_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_382x = const()[name = string("concat_382x"), val = tensor([1, -1, 3, 64])]; + tensor var_3795_cast_fp16 = reshape(shape = concat_382x, x = linear_142_cast_fp16)[name = string("op_3795_cast_fp16")]; + tensor v_state_41_perm_0 = const()[name = string("v_state_41_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_41_cast_fp16 = transpose(perm = q_41_perm_0, x = var_3789_cast_fp16)[name = string("transpose_39")]; + tensor var_3799_cast_fp16 = mul(x = q_41_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3799_cast_fp16")]; + tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_81_cast_fp16 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_41_cast_fp16)[name = string("x1_81_cast_fp16")]; + tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_81_cast_fp16 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_41_cast_fp16)[name = string("x2_81_cast_fp16")]; + fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3810_cast_fp16 = mul(x = x2_81_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_3810_cast_fp16")]; + bool var_3812_interleave_0 = const()[name = string("op_3812_interleave_0"), val = bool(false)]; + tensor var_3812_cast_fp16 = concat(axis = var_81, interleave = var_3812_interleave_0, values = (var_3810_cast_fp16, x1_81_cast_fp16))[name = string("op_3812_cast_fp16")]; + tensor var_3813_cast_fp16 = mul(x = var_3812_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3813_cast_fp16")]; + tensor query_states_83_cast_fp16 = add(x = var_3799_cast_fp16, y = var_3813_cast_fp16)[name = string("query_states_83_cast_fp16")]; + tensor k_41_cast_fp16 = transpose(perm = k_41_perm_0, x = var_3792_cast_fp16)[name = string("transpose_38")]; + tensor var_3815_cast_fp16 = mul(x = k_41_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3815_cast_fp16")]; + tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_83_cast_fp16 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_41_cast_fp16)[name = string("x1_83_cast_fp16")]; + tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_83_cast_fp16 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_41_cast_fp16)[name = string("x2_83_cast_fp16")]; + fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3826_cast_fp16 = mul(x = x2_83_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_3826_cast_fp16")]; + bool var_3828_interleave_0 = const()[name = string("op_3828_interleave_0"), val = bool(false)]; + tensor var_3828_cast_fp16 = concat(axis = var_81, interleave = var_3828_interleave_0, values = (var_3826_cast_fp16, x1_83_cast_fp16))[name = string("op_3828_cast_fp16")]; + tensor var_3829_cast_fp16 = mul(x = var_3828_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3829_cast_fp16")]; + tensor k_state_41_cast_fp16 = add(x = var_3815_cast_fp16, y = var_3829_cast_fp16)[name = string("k_state_41_cast_fp16")]; + tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([0])]; + tensor expand_dims_241 = const()[name = string("expand_dims_241"), val = tensor([0])]; + tensor expand_dims_243 = const()[name = string("expand_dims_243"), val = tensor([0])]; + tensor concat_385_values0_0 = const()[name = string("concat_385_values0_0"), val = tensor([20])]; + int32 concat_385_axis_0 = const()[name = string("concat_385_axis_0"), val = int32(0)]; + bool concat_385_interleave_0 = const()[name = string("concat_385_interleave_0"), val = bool(false)]; + tensor concat_385 = concat(axis = concat_385_axis_0, interleave = concat_385_interleave_0, values = (concat_385_values0_0, expand_dims_240, expand_dims_241, expand_dims_2, expand_dims_243))[name = string("concat_385")]; + tensor key_cache_internal_tensor_assign_21_stride_0 = const()[name = string("key_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_385, begin_mask = key_cache_internal_tensor_assign_21_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_21_squeeze_mask_0, stride = key_cache_internal_tensor_assign_21_stride_0, update = k_state_41_cast_fp16, x = coreml_update_state_98)[name = string("key_cache_internal_tensor_assign_21_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_21_cast_fp16, input = key_cache)[name = string("coreml_update_state_100_write_state")]; + tensor coreml_update_state_100 = read_state(input = key_cache)[name = string("coreml_update_state_100")]; + tensor value_cache_internal_tensor_assign_21_stride_0 = const()[name = string("value_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_41_cast_fp16 = transpose(perm = v_state_41_perm_0, x = var_3795_cast_fp16)[name = string("transpose_37")]; + tensor value_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_385, begin_mask = value_cache_internal_tensor_assign_21_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_21_squeeze_mask_0, stride = value_cache_internal_tensor_assign_21_stride_0, update = v_state_41_cast_fp16, x = coreml_update_state_99)[name = string("value_cache_internal_tensor_assign_21_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_21_cast_fp16, input = value_cache)[name = string("coreml_update_state_101_write_state")]; + tensor coreml_update_state_101 = read_state(input = value_cache)[name = string("coreml_update_state_101")]; + tensor var_3852_begin_0 = const()[name = string("op_3852_begin_0"), val = tensor([20, 0, 0, 0, 0])]; + tensor var_3852_end_0 = const()[name = string("op_3852_end_0"), val = tensor([21, 1, 3, 2048, 64])]; + tensor var_3852_end_mask_0 = const()[name = string("op_3852_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3852_squeeze_mask_0 = const()[name = string("op_3852_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3852_cast_fp16 = slice_by_index(begin = var_3852_begin_0, end = var_3852_end_0, end_mask = var_3852_end_mask_0, squeeze_mask = var_3852_squeeze_mask_0, x = coreml_update_state_100)[name = string("op_3852_cast_fp16")]; + tensor var_3855_begin_0 = const()[name = string("op_3855_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3855_end_mask_0 = const()[name = string("op_3855_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3855_cast_fp16 = slice_by_index(begin = var_3855_begin_0, end = concat_11, end_mask = var_3855_end_mask_0, x = var_3852_cast_fp16)[name = string("op_3855_cast_fp16")]; + tensor var_3857_begin_0 = const()[name = string("op_3857_begin_0"), val = tensor([20, 0, 0, 0, 0])]; + tensor var_3857_end_0 = const()[name = string("op_3857_end_0"), val = tensor([21, 1, 3, 2048, 64])]; + tensor var_3857_end_mask_0 = const()[name = string("op_3857_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_3857_squeeze_mask_0 = const()[name = string("op_3857_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_3857_cast_fp16 = slice_by_index(begin = var_3857_begin_0, end = var_3857_end_0, end_mask = var_3857_end_mask_0, squeeze_mask = var_3857_squeeze_mask_0, x = coreml_update_state_101)[name = string("op_3857_cast_fp16")]; + tensor var_3860_begin_0 = const()[name = string("op_3860_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3860_end_mask_0 = const()[name = string("op_3860_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3860_cast_fp16 = slice_by_index(begin = var_3860_begin_0, end = concat_11, end_mask = var_3860_end_mask_0, x = var_3857_cast_fp16)[name = string("op_3860_cast_fp16")]; + tensor var_3862_shape_cast_fp16 = shape(x = var_3855_cast_fp16)[name = string("op_3862_shape_cast_fp16")]; + int32 gather_373 = const()[name = string("gather_373"), val = int32(1)]; + int32 gather_374 = const()[name = string("gather_374"), val = int32(3)]; + int32 gather_375_axis_0 = const()[name = string("gather_375_axis_0"), val = int32(0)]; + int32 gather_375_batch_dims_0 = const()[name = string("gather_375_batch_dims_0"), val = int32(0)]; + bool gather_375_validate_indices_0 = const()[name = string("gather_375_validate_indices_0"), val = bool(false)]; + string var_3862_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3862_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_375_to_uint16 = const()[name = string("select_375_to_uint16"), val = uint16(2)]; + tensor var_3862_shape_cast_fp16_to_uint16 = cast(dtype = var_3862_shape_cast_fp16_to_uint16_dtype_0, x = var_3862_shape_cast_fp16)[name = string("cast_78")]; + uint16 gather_375_cast_uint16 = gather(axis = gather_375_axis_0, batch_dims = gather_375_batch_dims_0, indices = select_375_to_uint16, validate_indices = gather_375_validate_indices_0, x = var_3862_shape_cast_fp16_to_uint16)[name = string("gather_375_cast_uint16")]; + string gather_375_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_375_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_376 = const()[name = string("gather_376"), val = int32(64)]; + tensor var_3869_axes_0 = const()[name = string("op_3869_axes_0"), val = tensor([2])]; + tensor var_3869_cast_fp16 = expand_dims(axes = var_3869_axes_0, x = var_3855_cast_fp16)[name = string("op_3869_cast_fp16")]; + tensor shape_417_cast_fp16 = shape(x = var_3869_cast_fp16)[name = string("shape_417_cast_fp16")]; + int32 concat_393_axis_0 = const()[name = string("concat_393_axis_0"), val = int32(0)]; + bool concat_393_interleave_0 = const()[name = string("concat_393_interleave_0"), val = bool(false)]; + int32 gather_375_cast_uint16_to_int32 = cast(dtype = gather_375_cast_uint16_to_int32_dtype_0, x = gather_375_cast_uint16)[name = string("cast_77")]; + tensor concat_393 = concat(axis = concat_393_axis_0, interleave = concat_393_interleave_0, values = (gather_373, gather_374, var_85, gather_375_cast_uint16_to_int32, gather_376))[name = string("concat_393")]; + tensor real_div_40 = real_div(x = concat_393, y = shape_417_cast_fp16)[name = string("real_div_40")]; + tensor hidden_states_611_cast_fp16 = tile(reps = real_div_40, x = var_3869_cast_fp16)[name = string("hidden_states_611_cast_fp16")]; + tensor concat_394x = const()[name = string("concat_394x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_83_cast_fp16 = reshape(shape = concat_394x, x = hidden_states_611_cast_fp16)[name = string("key_states_83_cast_fp16")]; + tensor var_3879_shape_cast_fp16 = shape(x = var_3860_cast_fp16)[name = string("op_3879_shape_cast_fp16")]; + int32 gather_377 = const()[name = string("gather_377"), val = int32(1)]; + int32 gather_378 = const()[name = string("gather_378"), val = int32(3)]; + int32 gather_379_axis_0 = const()[name = string("gather_379_axis_0"), val = int32(0)]; + int32 gather_379_batch_dims_0 = const()[name = string("gather_379_batch_dims_0"), val = int32(0)]; + bool gather_379_validate_indices_0 = const()[name = string("gather_379_validate_indices_0"), val = bool(false)]; + string var_3879_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3879_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_379_to_uint16 = const()[name = string("select_379_to_uint16"), val = uint16(2)]; + tensor var_3879_shape_cast_fp16_to_uint16 = cast(dtype = var_3879_shape_cast_fp16_to_uint16_dtype_0, x = var_3879_shape_cast_fp16)[name = string("cast_76")]; + uint16 gather_379_cast_uint16 = gather(axis = gather_379_axis_0, batch_dims = gather_379_batch_dims_0, indices = select_379_to_uint16, validate_indices = gather_379_validate_indices_0, x = var_3879_shape_cast_fp16_to_uint16)[name = string("gather_379_cast_uint16")]; + string gather_379_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_379_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_380 = const()[name = string("gather_380"), val = int32(64)]; + tensor var_3886_axes_0 = const()[name = string("op_3886_axes_0"), val = tensor([2])]; + tensor var_3886_cast_fp16 = expand_dims(axes = var_3886_axes_0, x = var_3860_cast_fp16)[name = string("op_3886_cast_fp16")]; + tensor shape_422_cast_fp16 = shape(x = var_3886_cast_fp16)[name = string("shape_422_cast_fp16")]; + int32 concat_395_axis_0 = const()[name = string("concat_395_axis_0"), val = int32(0)]; + bool concat_395_interleave_0 = const()[name = string("concat_395_interleave_0"), val = bool(false)]; + int32 gather_379_cast_uint16_to_int32 = cast(dtype = gather_379_cast_uint16_to_int32_dtype_0, x = gather_379_cast_uint16)[name = string("cast_75")]; + tensor concat_395 = concat(axis = concat_395_axis_0, interleave = concat_395_interleave_0, values = (gather_377, gather_378, var_85, gather_379_cast_uint16_to_int32, gather_380))[name = string("concat_395")]; + tensor real_div_41 = real_div(x = concat_395, y = shape_422_cast_fp16)[name = string("real_div_41")]; + tensor hidden_states_615_cast_fp16 = tile(reps = real_div_41, x = var_3886_cast_fp16)[name = string("hidden_states_615_cast_fp16")]; + tensor concat_396x = const()[name = string("concat_396x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_83_cast_fp16 = reshape(shape = concat_396x, x = hidden_states_615_cast_fp16)[name = string("value_states_83_cast_fp16")]; + tensor var_3896_shape_cast_fp16 = shape(x = key_states_83_cast_fp16)[name = string("op_3896_shape_cast_fp16")]; + int32 gather_381_axis_0 = const()[name = string("gather_381_axis_0"), val = int32(0)]; + int32 gather_381_batch_dims_0 = const()[name = string("gather_381_batch_dims_0"), val = int32(0)]; + bool gather_381_validate_indices_0 = const()[name = string("gather_381_validate_indices_0"), val = bool(false)]; + string var_3896_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3896_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_381_to_uint16 = const()[name = string("select_381_to_uint16"), val = uint16(2)]; + tensor var_3896_shape_cast_fp16_to_uint16 = cast(dtype = var_3896_shape_cast_fp16_to_uint16_dtype_0, x = var_3896_shape_cast_fp16)[name = string("cast_74")]; + uint16 gather_381_cast_uint16 = gather(axis = gather_381_axis_0, batch_dims = gather_381_batch_dims_0, indices = select_381_to_uint16, validate_indices = gather_381_validate_indices_0, x = var_3896_shape_cast_fp16_to_uint16)[name = string("gather_381_cast_uint16")]; + string gather_381_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_381_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_397_values0_0 = const()[name = string("concat_397_values0_0"), val = int32(1)]; + int32 concat_397_values1_0 = const()[name = string("concat_397_values1_0"), val = int32(1)]; + int32 concat_397_values2_0 = const()[name = string("concat_397_values2_0"), val = int32(0)]; + int32 concat_397_axis_0 = const()[name = string("concat_397_axis_0"), val = int32(0)]; + bool concat_397_interleave_0 = const()[name = string("concat_397_interleave_0"), val = bool(false)]; + int32 gather_381_cast_uint16_to_int32 = cast(dtype = gather_381_cast_uint16_to_int32_dtype_0, x = gather_381_cast_uint16)[name = string("cast_73")]; + tensor concat_397 = concat(axis = concat_397_axis_0, interleave = concat_397_interleave_0, values = (concat_397_values0_0, concat_397_values1_0, concat_397_values2_0, gather_381_cast_uint16_to_int32))[name = string("concat_397")]; + tensor causal_mask_43_begin_0 = const()[name = string("causal_mask_43_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_43_end_mask_0 = const()[name = string("causal_mask_43_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_43_cast_fp16 = slice_by_index(begin = causal_mask_43_begin_0, end = concat_397, end_mask = causal_mask_43_end_mask_0, x = causal_mask)[name = string("causal_mask_43_cast_fp16")]; + tensor attn_output_81_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_43_cast_fp16, key = key_states_83_cast_fp16, query = query_states_83_cast_fp16, value = value_states_83_cast_fp16)[name = string("attn_output_81_cast_fp16")]; + tensor var_3902_perm_0 = const()[name = string("op_3902_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_398_axis_0 = const()[name = string("concat_398_axis_0"), val = int32(0)]; + bool concat_398_interleave_0 = const()[name = string("concat_398_interleave_0"), val = bool(false)]; + int32 gather_365_cast_uint16_to_int32 = cast(dtype = gather_365_cast_uint16_to_int32_dtype_0, x = gather_365_cast_uint16)[name = string("cast_72")]; + tensor concat_398 = concat(axis = concat_398_axis_0, interleave = concat_398_interleave_0, values = (gather_364, gather_365_cast_uint16_to_int32, var_81))[name = string("concat_398")]; + tensor var_3902_cast_fp16 = transpose(perm = var_3902_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_36")]; + tensor input_161_cast_fp16 = reshape(shape = concat_398, x = var_3902_cast_fp16)[name = string("input_161_cast_fp16")]; + tensor model_model_layers_20_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56122688))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56288640))))[name = string("model_model_layers_20_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_143_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_self_attn_o_proj_weight_to_fp16_quantized, x = input_161_cast_fp16)[name = string("linear_143_cast_fp16")]; + tensor hidden_states_619_cast_fp16 = add(x = hidden_states_599_cast_fp16, y = linear_143_cast_fp16)[name = string("hidden_states_619_cast_fp16")]; + fp16 var_76_promoted_41_to_fp16 = const()[name = string("op_76_promoted_41_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3911_cast_fp16 = pow(x = hidden_states_619_cast_fp16, y = var_76_promoted_41_to_fp16)[name = string("op_3911_cast_fp16")]; + tensor variance_83_axes_0 = const()[name = string("variance_83_axes_0"), val = tensor([-1])]; + bool variance_83_keep_dims_0 = const()[name = string("variance_83_keep_dims_0"), val = bool(true)]; + tensor variance_83_cast_fp16 = reduce_mean(axes = variance_83_axes_0, keep_dims = variance_83_keep_dims_0, x = var_3911_cast_fp16)[name = string("variance_83_cast_fp16")]; + fp16 var_3914_to_fp16 = const()[name = string("op_3914_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3915_cast_fp16 = add(x = variance_83_cast_fp16, y = var_3914_to_fp16)[name = string("op_3915_cast_fp16")]; + fp32 var_3916_epsilon_0 = const()[name = string("op_3916_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3916_cast_fp16 = rsqrt(epsilon = var_3916_epsilon_0, x = var_3915_cast_fp16)[name = string("op_3916_cast_fp16")]; + tensor hidden_states_623_cast_fp16 = mul(x = hidden_states_619_cast_fp16, y = var_3916_cast_fp16)[name = string("hidden_states_623_cast_fp16")]; + tensor model_model_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56309440)))]; + tensor input_163_cast_fp16 = mul(x = model_model_layers_20_post_attention_layernorm_weight_to_fp16, y = hidden_states_623_cast_fp16)[name = string("input_163_cast_fp16")]; + tensor model_model_layers_20_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56310656))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56753088))))[name = string("model_model_layers_20_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_144_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_20_mlp_gate_proj_weight_to_fp16_quantized, x = input_163_cast_fp16)[name = string("linear_144_cast_fp16")]; + tensor var_3928_cast_fp16 = silu(x = linear_144_cast_fp16)[name = string("op_3928_cast_fp16")]; + tensor model_model_layers_20_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(56808448))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57250880))))[name = string("model_model_layers_20_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_145_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_20_mlp_up_proj_weight_to_fp16_quantized, x = input_163_cast_fp16)[name = string("linear_145_cast_fp16")]; + tensor input_167_cast_fp16 = mul(x = var_3928_cast_fp16, y = linear_145_cast_fp16)[name = string("input_167_cast_fp16")]; + tensor model_model_layers_20_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57306240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57748672))))[name = string("model_model_layers_20_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_146_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_mlp_down_proj_weight_to_fp16_quantized, x = input_167_cast_fp16)[name = string("linear_146_cast_fp16")]; + tensor hidden_states_629_cast_fp16 = add(x = hidden_states_619_cast_fp16, y = linear_146_cast_fp16)[name = string("hidden_states_629_cast_fp16")]; + fp16 var_76_promoted_42_to_fp16 = const()[name = string("op_76_promoted_42_to_fp16"), val = fp16(0x1p+1)]; + tensor var_3941_cast_fp16 = pow(x = hidden_states_629_cast_fp16, y = var_76_promoted_42_to_fp16)[name = string("op_3941_cast_fp16")]; + tensor variance_85_axes_0 = const()[name = string("variance_85_axes_0"), val = tensor([-1])]; + bool variance_85_keep_dims_0 = const()[name = string("variance_85_keep_dims_0"), val = bool(true)]; + tensor variance_85_cast_fp16 = reduce_mean(axes = variance_85_axes_0, keep_dims = variance_85_keep_dims_0, x = var_3941_cast_fp16)[name = string("variance_85_cast_fp16")]; + fp16 var_3944_to_fp16 = const()[name = string("op_3944_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_3945_cast_fp16 = add(x = variance_85_cast_fp16, y = var_3944_to_fp16)[name = string("op_3945_cast_fp16")]; + fp32 var_3946_epsilon_0 = const()[name = string("op_3946_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_3946_cast_fp16 = rsqrt(epsilon = var_3946_epsilon_0, x = var_3945_cast_fp16)[name = string("op_3946_cast_fp16")]; + tensor hidden_states_633_cast_fp16 = mul(x = hidden_states_629_cast_fp16, y = var_3946_cast_fp16)[name = string("hidden_states_633_cast_fp16")]; + tensor model_model_layers_21_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57804032)))]; + tensor hidden_states_637_cast_fp16 = mul(x = model_model_layers_21_input_layernorm_weight_to_fp16, y = hidden_states_633_cast_fp16)[name = string("hidden_states_637_cast_fp16")]; + tensor var_3957_shape_cast_fp16 = shape(x = hidden_states_637_cast_fp16)[name = string("op_3957_shape_cast_fp16")]; + int32 gather_382 = const()[name = string("gather_382"), val = int32(1)]; + int32 gather_383_axis_0 = const()[name = string("gather_383_axis_0"), val = int32(0)]; + int32 gather_383_batch_dims_0 = const()[name = string("gather_383_batch_dims_0"), val = int32(0)]; + bool gather_383_validate_indices_0 = const()[name = string("gather_383_validate_indices_0"), val = bool(false)]; + string var_3957_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3957_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_383_to_uint16 = const()[name = string("select_383_to_uint16"), val = uint16(1)]; + tensor var_3957_shape_cast_fp16_to_uint16 = cast(dtype = var_3957_shape_cast_fp16_to_uint16_dtype_0, x = var_3957_shape_cast_fp16)[name = string("cast_71")]; + uint16 gather_383_cast_uint16 = gather(axis = gather_383_axis_0, batch_dims = gather_383_batch_dims_0, indices = select_383_to_uint16, validate_indices = gather_383_validate_indices_0, x = var_3957_shape_cast_fp16_to_uint16)[name = string("gather_383_cast_uint16")]; + string gather_383_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_383_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_21_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57805248))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57971200))))[name = string("model_model_layers_21_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_147_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_21_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_637_cast_fp16)[name = string("linear_147_cast_fp16")]; + tensor model_model_layers_21_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(57992000))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58047360))))[name = string("model_model_layers_21_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_148_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_21_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_637_cast_fp16)[name = string("linear_148_cast_fp16")]; + tensor model_model_layers_21_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58054336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58109696))))[name = string("model_model_layers_21_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_149_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_21_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_637_cast_fp16)[name = string("linear_149_cast_fp16")]; + tensor concat_399x = const()[name = string("concat_399x"), val = tensor([1, -1, 9, 64])]; + tensor var_3966_cast_fp16 = reshape(shape = concat_399x, x = linear_147_cast_fp16)[name = string("op_3966_cast_fp16")]; + tensor q_43_perm_0 = const()[name = string("q_43_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_400x = const()[name = string("concat_400x"), val = tensor([1, -1, 3, 64])]; + tensor var_3969_cast_fp16 = reshape(shape = concat_400x, x = linear_148_cast_fp16)[name = string("op_3969_cast_fp16")]; + tensor k_43_perm_0 = const()[name = string("k_43_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_401x = const()[name = string("concat_401x"), val = tensor([1, -1, 3, 64])]; + tensor var_3972_cast_fp16 = reshape(shape = concat_401x, x = linear_149_cast_fp16)[name = string("op_3972_cast_fp16")]; + tensor v_state_43_perm_0 = const()[name = string("v_state_43_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_43_cast_fp16 = transpose(perm = q_43_perm_0, x = var_3966_cast_fp16)[name = string("transpose_35")]; + tensor var_3976_cast_fp16 = mul(x = q_43_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3976_cast_fp16")]; + tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_85_cast_fp16 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_43_cast_fp16)[name = string("x1_85_cast_fp16")]; + tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_85_cast_fp16 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_43_cast_fp16)[name = string("x2_85_cast_fp16")]; + fp16 const_45_promoted_to_fp16 = const()[name = string("const_45_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_3987_cast_fp16 = mul(x = x2_85_cast_fp16, y = const_45_promoted_to_fp16)[name = string("op_3987_cast_fp16")]; + bool var_3989_interleave_0 = const()[name = string("op_3989_interleave_0"), val = bool(false)]; + tensor var_3989_cast_fp16 = concat(axis = var_81, interleave = var_3989_interleave_0, values = (var_3987_cast_fp16, x1_85_cast_fp16))[name = string("op_3989_cast_fp16")]; + tensor var_3990_cast_fp16 = mul(x = var_3989_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3990_cast_fp16")]; + tensor query_states_87_cast_fp16 = add(x = var_3976_cast_fp16, y = var_3990_cast_fp16)[name = string("query_states_87_cast_fp16")]; + tensor k_43_cast_fp16 = transpose(perm = k_43_perm_0, x = var_3969_cast_fp16)[name = string("transpose_34")]; + tensor var_3992_cast_fp16 = mul(x = k_43_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3992_cast_fp16")]; + tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_87_cast_fp16 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_43_cast_fp16)[name = string("x1_87_cast_fp16")]; + tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_87_cast_fp16 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_43_cast_fp16)[name = string("x2_87_cast_fp16")]; + fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4003_cast_fp16 = mul(x = x2_87_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_4003_cast_fp16")]; + bool var_4005_interleave_0 = const()[name = string("op_4005_interleave_0"), val = bool(false)]; + tensor var_4005_cast_fp16 = concat(axis = var_81, interleave = var_4005_interleave_0, values = (var_4003_cast_fp16, x1_87_cast_fp16))[name = string("op_4005_cast_fp16")]; + tensor var_4006_cast_fp16 = mul(x = var_4005_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4006_cast_fp16")]; + tensor k_state_43_cast_fp16 = add(x = var_3992_cast_fp16, y = var_4006_cast_fp16)[name = string("k_state_43_cast_fp16")]; + tensor expand_dims_252 = const()[name = string("expand_dims_252"), val = tensor([0])]; + tensor expand_dims_253 = const()[name = string("expand_dims_253"), val = tensor([0])]; + tensor expand_dims_255 = const()[name = string("expand_dims_255"), val = tensor([0])]; + tensor concat_404_values0_0 = const()[name = string("concat_404_values0_0"), val = tensor([21])]; + int32 concat_404_axis_0 = const()[name = string("concat_404_axis_0"), val = int32(0)]; + bool concat_404_interleave_0 = const()[name = string("concat_404_interleave_0"), val = bool(false)]; + tensor concat_404 = concat(axis = concat_404_axis_0, interleave = concat_404_interleave_0, values = (concat_404_values0_0, expand_dims_252, expand_dims_253, expand_dims_2, expand_dims_255))[name = string("concat_404")]; + tensor key_cache_internal_tensor_assign_22_stride_0 = const()[name = string("key_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_404, begin_mask = key_cache_internal_tensor_assign_22_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_22_squeeze_mask_0, stride = key_cache_internal_tensor_assign_22_stride_0, update = k_state_43_cast_fp16, x = coreml_update_state_100)[name = string("key_cache_internal_tensor_assign_22_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_22_cast_fp16, input = key_cache)[name = string("coreml_update_state_102_write_state")]; + tensor coreml_update_state_102 = read_state(input = key_cache)[name = string("coreml_update_state_102")]; + tensor value_cache_internal_tensor_assign_22_stride_0 = const()[name = string("value_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_43_cast_fp16 = transpose(perm = v_state_43_perm_0, x = var_3972_cast_fp16)[name = string("transpose_33")]; + tensor value_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_404, begin_mask = value_cache_internal_tensor_assign_22_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_22_squeeze_mask_0, stride = value_cache_internal_tensor_assign_22_stride_0, update = v_state_43_cast_fp16, x = coreml_update_state_101)[name = string("value_cache_internal_tensor_assign_22_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_22_cast_fp16, input = value_cache)[name = string("coreml_update_state_103_write_state")]; + tensor coreml_update_state_103 = read_state(input = value_cache)[name = string("coreml_update_state_103")]; + tensor var_4029_begin_0 = const()[name = string("op_4029_begin_0"), val = tensor([21, 0, 0, 0, 0])]; + tensor var_4029_end_0 = const()[name = string("op_4029_end_0"), val = tensor([22, 1, 3, 2048, 64])]; + tensor var_4029_end_mask_0 = const()[name = string("op_4029_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4029_squeeze_mask_0 = const()[name = string("op_4029_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4029_cast_fp16 = slice_by_index(begin = var_4029_begin_0, end = var_4029_end_0, end_mask = var_4029_end_mask_0, squeeze_mask = var_4029_squeeze_mask_0, x = coreml_update_state_102)[name = string("op_4029_cast_fp16")]; + tensor var_4032_begin_0 = const()[name = string("op_4032_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4032_end_mask_0 = const()[name = string("op_4032_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4032_cast_fp16 = slice_by_index(begin = var_4032_begin_0, end = concat_11, end_mask = var_4032_end_mask_0, x = var_4029_cast_fp16)[name = string("op_4032_cast_fp16")]; + tensor var_4034_begin_0 = const()[name = string("op_4034_begin_0"), val = tensor([21, 0, 0, 0, 0])]; + tensor var_4034_end_0 = const()[name = string("op_4034_end_0"), val = tensor([22, 1, 3, 2048, 64])]; + tensor var_4034_end_mask_0 = const()[name = string("op_4034_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4034_squeeze_mask_0 = const()[name = string("op_4034_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4034_cast_fp16 = slice_by_index(begin = var_4034_begin_0, end = var_4034_end_0, end_mask = var_4034_end_mask_0, squeeze_mask = var_4034_squeeze_mask_0, x = coreml_update_state_103)[name = string("op_4034_cast_fp16")]; + tensor var_4037_begin_0 = const()[name = string("op_4037_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4037_end_mask_0 = const()[name = string("op_4037_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4037_cast_fp16 = slice_by_index(begin = var_4037_begin_0, end = concat_11, end_mask = var_4037_end_mask_0, x = var_4034_cast_fp16)[name = string("op_4037_cast_fp16")]; + tensor var_4039_shape_cast_fp16 = shape(x = var_4032_cast_fp16)[name = string("op_4039_shape_cast_fp16")]; + int32 gather_391 = const()[name = string("gather_391"), val = int32(1)]; + int32 gather_392 = const()[name = string("gather_392"), val = int32(3)]; + int32 gather_393_axis_0 = const()[name = string("gather_393_axis_0"), val = int32(0)]; + int32 gather_393_batch_dims_0 = const()[name = string("gather_393_batch_dims_0"), val = int32(0)]; + bool gather_393_validate_indices_0 = const()[name = string("gather_393_validate_indices_0"), val = bool(false)]; + string var_4039_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4039_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_393_to_uint16 = const()[name = string("select_393_to_uint16"), val = uint16(2)]; + tensor var_4039_shape_cast_fp16_to_uint16 = cast(dtype = var_4039_shape_cast_fp16_to_uint16_dtype_0, x = var_4039_shape_cast_fp16)[name = string("cast_70")]; + uint16 gather_393_cast_uint16 = gather(axis = gather_393_axis_0, batch_dims = gather_393_batch_dims_0, indices = select_393_to_uint16, validate_indices = gather_393_validate_indices_0, x = var_4039_shape_cast_fp16_to_uint16)[name = string("gather_393_cast_uint16")]; + string gather_393_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_393_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_394 = const()[name = string("gather_394"), val = int32(64)]; + tensor var_4046_axes_0 = const()[name = string("op_4046_axes_0"), val = tensor([2])]; + tensor var_4046_cast_fp16 = expand_dims(axes = var_4046_axes_0, x = var_4032_cast_fp16)[name = string("op_4046_cast_fp16")]; + tensor shape_437_cast_fp16 = shape(x = var_4046_cast_fp16)[name = string("shape_437_cast_fp16")]; + int32 concat_412_axis_0 = const()[name = string("concat_412_axis_0"), val = int32(0)]; + bool concat_412_interleave_0 = const()[name = string("concat_412_interleave_0"), val = bool(false)]; + int32 gather_393_cast_uint16_to_int32 = cast(dtype = gather_393_cast_uint16_to_int32_dtype_0, x = gather_393_cast_uint16)[name = string("cast_69")]; + tensor concat_412 = concat(axis = concat_412_axis_0, interleave = concat_412_interleave_0, values = (gather_391, gather_392, var_85, gather_393_cast_uint16_to_int32, gather_394))[name = string("concat_412")]; + tensor real_div_42 = real_div(x = concat_412, y = shape_437_cast_fp16)[name = string("real_div_42")]; + tensor hidden_states_641_cast_fp16 = tile(reps = real_div_42, x = var_4046_cast_fp16)[name = string("hidden_states_641_cast_fp16")]; + tensor concat_413x = const()[name = string("concat_413x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_87_cast_fp16 = reshape(shape = concat_413x, x = hidden_states_641_cast_fp16)[name = string("key_states_87_cast_fp16")]; + tensor var_4056_shape_cast_fp16 = shape(x = var_4037_cast_fp16)[name = string("op_4056_shape_cast_fp16")]; + int32 gather_395 = const()[name = string("gather_395"), val = int32(1)]; + int32 gather_396 = const()[name = string("gather_396"), val = int32(3)]; + int32 gather_397_axis_0 = const()[name = string("gather_397_axis_0"), val = int32(0)]; + int32 gather_397_batch_dims_0 = const()[name = string("gather_397_batch_dims_0"), val = int32(0)]; + bool gather_397_validate_indices_0 = const()[name = string("gather_397_validate_indices_0"), val = bool(false)]; + string var_4056_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4056_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_397_to_uint16 = const()[name = string("select_397_to_uint16"), val = uint16(2)]; + tensor var_4056_shape_cast_fp16_to_uint16 = cast(dtype = var_4056_shape_cast_fp16_to_uint16_dtype_0, x = var_4056_shape_cast_fp16)[name = string("cast_68")]; + uint16 gather_397_cast_uint16 = gather(axis = gather_397_axis_0, batch_dims = gather_397_batch_dims_0, indices = select_397_to_uint16, validate_indices = gather_397_validate_indices_0, x = var_4056_shape_cast_fp16_to_uint16)[name = string("gather_397_cast_uint16")]; + string gather_397_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_397_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_398 = const()[name = string("gather_398"), val = int32(64)]; + tensor var_4063_axes_0 = const()[name = string("op_4063_axes_0"), val = tensor([2])]; + tensor var_4063_cast_fp16 = expand_dims(axes = var_4063_axes_0, x = var_4037_cast_fp16)[name = string("op_4063_cast_fp16")]; + tensor shape_442_cast_fp16 = shape(x = var_4063_cast_fp16)[name = string("shape_442_cast_fp16")]; + int32 concat_414_axis_0 = const()[name = string("concat_414_axis_0"), val = int32(0)]; + bool concat_414_interleave_0 = const()[name = string("concat_414_interleave_0"), val = bool(false)]; + int32 gather_397_cast_uint16_to_int32 = cast(dtype = gather_397_cast_uint16_to_int32_dtype_0, x = gather_397_cast_uint16)[name = string("cast_67")]; + tensor concat_414 = concat(axis = concat_414_axis_0, interleave = concat_414_interleave_0, values = (gather_395, gather_396, var_85, gather_397_cast_uint16_to_int32, gather_398))[name = string("concat_414")]; + tensor real_div_43 = real_div(x = concat_414, y = shape_442_cast_fp16)[name = string("real_div_43")]; + tensor hidden_states_645_cast_fp16 = tile(reps = real_div_43, x = var_4063_cast_fp16)[name = string("hidden_states_645_cast_fp16")]; + tensor concat_415x = const()[name = string("concat_415x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_87_cast_fp16 = reshape(shape = concat_415x, x = hidden_states_645_cast_fp16)[name = string("value_states_87_cast_fp16")]; + tensor var_4073_shape_cast_fp16 = shape(x = key_states_87_cast_fp16)[name = string("op_4073_shape_cast_fp16")]; + int32 gather_399_axis_0 = const()[name = string("gather_399_axis_0"), val = int32(0)]; + int32 gather_399_batch_dims_0 = const()[name = string("gather_399_batch_dims_0"), val = int32(0)]; + bool gather_399_validate_indices_0 = const()[name = string("gather_399_validate_indices_0"), val = bool(false)]; + string var_4073_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4073_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_399_to_uint16 = const()[name = string("select_399_to_uint16"), val = uint16(2)]; + tensor var_4073_shape_cast_fp16_to_uint16 = cast(dtype = var_4073_shape_cast_fp16_to_uint16_dtype_0, x = var_4073_shape_cast_fp16)[name = string("cast_66")]; + uint16 gather_399_cast_uint16 = gather(axis = gather_399_axis_0, batch_dims = gather_399_batch_dims_0, indices = select_399_to_uint16, validate_indices = gather_399_validate_indices_0, x = var_4073_shape_cast_fp16_to_uint16)[name = string("gather_399_cast_uint16")]; + string gather_399_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_399_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_416_values0_0 = const()[name = string("concat_416_values0_0"), val = int32(1)]; + int32 concat_416_values1_0 = const()[name = string("concat_416_values1_0"), val = int32(1)]; + int32 concat_416_values2_0 = const()[name = string("concat_416_values2_0"), val = int32(0)]; + int32 concat_416_axis_0 = const()[name = string("concat_416_axis_0"), val = int32(0)]; + bool concat_416_interleave_0 = const()[name = string("concat_416_interleave_0"), val = bool(false)]; + int32 gather_399_cast_uint16_to_int32 = cast(dtype = gather_399_cast_uint16_to_int32_dtype_0, x = gather_399_cast_uint16)[name = string("cast_65")]; + tensor concat_416 = concat(axis = concat_416_axis_0, interleave = concat_416_interleave_0, values = (concat_416_values0_0, concat_416_values1_0, concat_416_values2_0, gather_399_cast_uint16_to_int32))[name = string("concat_416")]; + tensor causal_mask_45_begin_0 = const()[name = string("causal_mask_45_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_45_end_mask_0 = const()[name = string("causal_mask_45_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_45_cast_fp16 = slice_by_index(begin = causal_mask_45_begin_0, end = concat_416, end_mask = causal_mask_45_end_mask_0, x = causal_mask)[name = string("causal_mask_45_cast_fp16")]; + tensor attn_output_85_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_45_cast_fp16, key = key_states_87_cast_fp16, query = query_states_87_cast_fp16, value = value_states_87_cast_fp16)[name = string("attn_output_85_cast_fp16")]; + tensor var_4079_perm_0 = const()[name = string("op_4079_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_417_axis_0 = const()[name = string("concat_417_axis_0"), val = int32(0)]; + bool concat_417_interleave_0 = const()[name = string("concat_417_interleave_0"), val = bool(false)]; + int32 gather_383_cast_uint16_to_int32 = cast(dtype = gather_383_cast_uint16_to_int32_dtype_0, x = gather_383_cast_uint16)[name = string("cast_64")]; + tensor concat_417 = concat(axis = concat_417_axis_0, interleave = concat_417_interleave_0, values = (gather_382, gather_383_cast_uint16_to_int32, var_81))[name = string("concat_417")]; + tensor var_4079_cast_fp16 = transpose(perm = var_4079_perm_0, x = attn_output_85_cast_fp16)[name = string("transpose_32")]; + tensor input_169_cast_fp16 = reshape(shape = concat_417, x = var_4079_cast_fp16)[name = string("input_169_cast_fp16")]; + tensor model_model_layers_21_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58116672))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58282624))))[name = string("model_model_layers_21_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_150_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_21_self_attn_o_proj_weight_to_fp16_quantized, x = input_169_cast_fp16)[name = string("linear_150_cast_fp16")]; + tensor hidden_states_649_cast_fp16 = add(x = hidden_states_629_cast_fp16, y = linear_150_cast_fp16)[name = string("hidden_states_649_cast_fp16")]; + fp16 var_76_promoted_43_to_fp16 = const()[name = string("op_76_promoted_43_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4088_cast_fp16 = pow(x = hidden_states_649_cast_fp16, y = var_76_promoted_43_to_fp16)[name = string("op_4088_cast_fp16")]; + tensor variance_87_axes_0 = const()[name = string("variance_87_axes_0"), val = tensor([-1])]; + bool variance_87_keep_dims_0 = const()[name = string("variance_87_keep_dims_0"), val = bool(true)]; + tensor variance_87_cast_fp16 = reduce_mean(axes = variance_87_axes_0, keep_dims = variance_87_keep_dims_0, x = var_4088_cast_fp16)[name = string("variance_87_cast_fp16")]; + fp16 var_4091_to_fp16 = const()[name = string("op_4091_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4092_cast_fp16 = add(x = variance_87_cast_fp16, y = var_4091_to_fp16)[name = string("op_4092_cast_fp16")]; + fp32 var_4093_epsilon_0 = const()[name = string("op_4093_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4093_cast_fp16 = rsqrt(epsilon = var_4093_epsilon_0, x = var_4092_cast_fp16)[name = string("op_4093_cast_fp16")]; + tensor hidden_states_653_cast_fp16 = mul(x = hidden_states_649_cast_fp16, y = var_4093_cast_fp16)[name = string("hidden_states_653_cast_fp16")]; + tensor model_model_layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58303424)))]; + tensor input_171_cast_fp16 = mul(x = model_model_layers_21_post_attention_layernorm_weight_to_fp16, y = hidden_states_653_cast_fp16)[name = string("input_171_cast_fp16")]; + tensor model_model_layers_21_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58304640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58747072))))[name = string("model_model_layers_21_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_151_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_21_mlp_gate_proj_weight_to_fp16_quantized, x = input_171_cast_fp16)[name = string("linear_151_cast_fp16")]; + tensor var_4105_cast_fp16 = silu(x = linear_151_cast_fp16)[name = string("op_4105_cast_fp16")]; + tensor model_model_layers_21_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(58802432))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59244864))))[name = string("model_model_layers_21_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_152_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_21_mlp_up_proj_weight_to_fp16_quantized, x = input_171_cast_fp16)[name = string("linear_152_cast_fp16")]; + tensor input_175_cast_fp16 = mul(x = var_4105_cast_fp16, y = linear_152_cast_fp16)[name = string("input_175_cast_fp16")]; + tensor model_model_layers_21_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59300224))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59742656))))[name = string("model_model_layers_21_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_153_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_21_mlp_down_proj_weight_to_fp16_quantized, x = input_175_cast_fp16)[name = string("linear_153_cast_fp16")]; + tensor hidden_states_659_cast_fp16 = add(x = hidden_states_649_cast_fp16, y = linear_153_cast_fp16)[name = string("hidden_states_659_cast_fp16")]; + fp16 var_76_promoted_44_to_fp16 = const()[name = string("op_76_promoted_44_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4118_cast_fp16 = pow(x = hidden_states_659_cast_fp16, y = var_76_promoted_44_to_fp16)[name = string("op_4118_cast_fp16")]; + tensor variance_89_axes_0 = const()[name = string("variance_89_axes_0"), val = tensor([-1])]; + bool variance_89_keep_dims_0 = const()[name = string("variance_89_keep_dims_0"), val = bool(true)]; + tensor variance_89_cast_fp16 = reduce_mean(axes = variance_89_axes_0, keep_dims = variance_89_keep_dims_0, x = var_4118_cast_fp16)[name = string("variance_89_cast_fp16")]; + fp16 var_4121_to_fp16 = const()[name = string("op_4121_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4122_cast_fp16 = add(x = variance_89_cast_fp16, y = var_4121_to_fp16)[name = string("op_4122_cast_fp16")]; + fp32 var_4123_epsilon_0 = const()[name = string("op_4123_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4123_cast_fp16 = rsqrt(epsilon = var_4123_epsilon_0, x = var_4122_cast_fp16)[name = string("op_4123_cast_fp16")]; + tensor hidden_states_663_cast_fp16 = mul(x = hidden_states_659_cast_fp16, y = var_4123_cast_fp16)[name = string("hidden_states_663_cast_fp16")]; + tensor model_model_layers_22_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59798016)))]; + tensor hidden_states_667_cast_fp16 = mul(x = model_model_layers_22_input_layernorm_weight_to_fp16, y = hidden_states_663_cast_fp16)[name = string("hidden_states_667_cast_fp16")]; + tensor var_4134_shape_cast_fp16 = shape(x = hidden_states_667_cast_fp16)[name = string("op_4134_shape_cast_fp16")]; + int32 gather_400 = const()[name = string("gather_400"), val = int32(1)]; + int32 gather_401_axis_0 = const()[name = string("gather_401_axis_0"), val = int32(0)]; + int32 gather_401_batch_dims_0 = const()[name = string("gather_401_batch_dims_0"), val = int32(0)]; + bool gather_401_validate_indices_0 = const()[name = string("gather_401_validate_indices_0"), val = bool(false)]; + string var_4134_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4134_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_401_to_uint16 = const()[name = string("select_401_to_uint16"), val = uint16(1)]; + tensor var_4134_shape_cast_fp16_to_uint16 = cast(dtype = var_4134_shape_cast_fp16_to_uint16_dtype_0, x = var_4134_shape_cast_fp16)[name = string("cast_63")]; + uint16 gather_401_cast_uint16 = gather(axis = gather_401_axis_0, batch_dims = gather_401_batch_dims_0, indices = select_401_to_uint16, validate_indices = gather_401_validate_indices_0, x = var_4134_shape_cast_fp16_to_uint16)[name = string("gather_401_cast_uint16")]; + string gather_401_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_401_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_22_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59799232))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59965184))))[name = string("model_model_layers_22_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_154_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_22_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_667_cast_fp16)[name = string("linear_154_cast_fp16")]; + tensor model_model_layers_22_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(59985984))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60041344))))[name = string("model_model_layers_22_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_155_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_22_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_667_cast_fp16)[name = string("linear_155_cast_fp16")]; + tensor model_model_layers_22_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60048320))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60103680))))[name = string("model_model_layers_22_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_156_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_22_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_667_cast_fp16)[name = string("linear_156_cast_fp16")]; + tensor concat_418x = const()[name = string("concat_418x"), val = tensor([1, -1, 9, 64])]; + tensor var_4143_cast_fp16 = reshape(shape = concat_418x, x = linear_154_cast_fp16)[name = string("op_4143_cast_fp16")]; + tensor q_45_perm_0 = const()[name = string("q_45_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_419x = const()[name = string("concat_419x"), val = tensor([1, -1, 3, 64])]; + tensor var_4146_cast_fp16 = reshape(shape = concat_419x, x = linear_155_cast_fp16)[name = string("op_4146_cast_fp16")]; + tensor k_45_perm_0 = const()[name = string("k_45_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_420x = const()[name = string("concat_420x"), val = tensor([1, -1, 3, 64])]; + tensor var_4149_cast_fp16 = reshape(shape = concat_420x, x = linear_156_cast_fp16)[name = string("op_4149_cast_fp16")]; + tensor v_state_45_perm_0 = const()[name = string("v_state_45_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_45_cast_fp16 = transpose(perm = q_45_perm_0, x = var_4143_cast_fp16)[name = string("transpose_31")]; + tensor var_4153_cast_fp16 = mul(x = q_45_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4153_cast_fp16")]; + tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_89_cast_fp16 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_45_cast_fp16)[name = string("x1_89_cast_fp16")]; + tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_89_cast_fp16 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_45_cast_fp16)[name = string("x2_89_cast_fp16")]; + fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4164_cast_fp16 = mul(x = x2_89_cast_fp16, y = const_47_promoted_to_fp16)[name = string("op_4164_cast_fp16")]; + bool var_4166_interleave_0 = const()[name = string("op_4166_interleave_0"), val = bool(false)]; + tensor var_4166_cast_fp16 = concat(axis = var_81, interleave = var_4166_interleave_0, values = (var_4164_cast_fp16, x1_89_cast_fp16))[name = string("op_4166_cast_fp16")]; + tensor var_4167_cast_fp16 = mul(x = var_4166_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4167_cast_fp16")]; + tensor query_states_91_cast_fp16 = add(x = var_4153_cast_fp16, y = var_4167_cast_fp16)[name = string("query_states_91_cast_fp16")]; + tensor k_45_cast_fp16 = transpose(perm = k_45_perm_0, x = var_4146_cast_fp16)[name = string("transpose_30")]; + tensor var_4169_cast_fp16 = mul(x = k_45_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4169_cast_fp16")]; + tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_91_cast_fp16 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_45_cast_fp16)[name = string("x1_91_cast_fp16")]; + tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_91_cast_fp16 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_45_cast_fp16)[name = string("x2_91_cast_fp16")]; + fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4180_cast_fp16 = mul(x = x2_91_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_4180_cast_fp16")]; + bool var_4182_interleave_0 = const()[name = string("op_4182_interleave_0"), val = bool(false)]; + tensor var_4182_cast_fp16 = concat(axis = var_81, interleave = var_4182_interleave_0, values = (var_4180_cast_fp16, x1_91_cast_fp16))[name = string("op_4182_cast_fp16")]; + tensor var_4183_cast_fp16 = mul(x = var_4182_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4183_cast_fp16")]; + tensor k_state_45_cast_fp16 = add(x = var_4169_cast_fp16, y = var_4183_cast_fp16)[name = string("k_state_45_cast_fp16")]; + tensor expand_dims_264 = const()[name = string("expand_dims_264"), val = tensor([0])]; + tensor expand_dims_265 = const()[name = string("expand_dims_265"), val = tensor([0])]; + tensor expand_dims_267 = const()[name = string("expand_dims_267"), val = tensor([0])]; + tensor concat_423_values0_0 = const()[name = string("concat_423_values0_0"), val = tensor([22])]; + int32 concat_423_axis_0 = const()[name = string("concat_423_axis_0"), val = int32(0)]; + bool concat_423_interleave_0 = const()[name = string("concat_423_interleave_0"), val = bool(false)]; + tensor concat_423 = concat(axis = concat_423_axis_0, interleave = concat_423_interleave_0, values = (concat_423_values0_0, expand_dims_264, expand_dims_265, expand_dims_2, expand_dims_267))[name = string("concat_423")]; + tensor key_cache_internal_tensor_assign_23_stride_0 = const()[name = string("key_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_423, begin_mask = key_cache_internal_tensor_assign_23_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_23_squeeze_mask_0, stride = key_cache_internal_tensor_assign_23_stride_0, update = k_state_45_cast_fp16, x = coreml_update_state_102)[name = string("key_cache_internal_tensor_assign_23_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_23_cast_fp16, input = key_cache)[name = string("coreml_update_state_104_write_state")]; + tensor coreml_update_state_104 = read_state(input = key_cache)[name = string("coreml_update_state_104")]; + tensor value_cache_internal_tensor_assign_23_stride_0 = const()[name = string("value_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_45_cast_fp16 = transpose(perm = v_state_45_perm_0, x = var_4149_cast_fp16)[name = string("transpose_29")]; + tensor value_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_423, begin_mask = value_cache_internal_tensor_assign_23_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_23_squeeze_mask_0, stride = value_cache_internal_tensor_assign_23_stride_0, update = v_state_45_cast_fp16, x = coreml_update_state_103)[name = string("value_cache_internal_tensor_assign_23_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_23_cast_fp16, input = value_cache)[name = string("coreml_update_state_105_write_state")]; + tensor coreml_update_state_105 = read_state(input = value_cache)[name = string("coreml_update_state_105")]; + tensor var_4206_begin_0 = const()[name = string("op_4206_begin_0"), val = tensor([22, 0, 0, 0, 0])]; + tensor var_4206_end_0 = const()[name = string("op_4206_end_0"), val = tensor([23, 1, 3, 2048, 64])]; + tensor var_4206_end_mask_0 = const()[name = string("op_4206_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4206_squeeze_mask_0 = const()[name = string("op_4206_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4206_cast_fp16 = slice_by_index(begin = var_4206_begin_0, end = var_4206_end_0, end_mask = var_4206_end_mask_0, squeeze_mask = var_4206_squeeze_mask_0, x = coreml_update_state_104)[name = string("op_4206_cast_fp16")]; + tensor var_4209_begin_0 = const()[name = string("op_4209_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4209_end_mask_0 = const()[name = string("op_4209_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4209_cast_fp16 = slice_by_index(begin = var_4209_begin_0, end = concat_11, end_mask = var_4209_end_mask_0, x = var_4206_cast_fp16)[name = string("op_4209_cast_fp16")]; + tensor var_4211_begin_0 = const()[name = string("op_4211_begin_0"), val = tensor([22, 0, 0, 0, 0])]; + tensor var_4211_end_0 = const()[name = string("op_4211_end_0"), val = tensor([23, 1, 3, 2048, 64])]; + tensor var_4211_end_mask_0 = const()[name = string("op_4211_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4211_squeeze_mask_0 = const()[name = string("op_4211_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4211_cast_fp16 = slice_by_index(begin = var_4211_begin_0, end = var_4211_end_0, end_mask = var_4211_end_mask_0, squeeze_mask = var_4211_squeeze_mask_0, x = coreml_update_state_105)[name = string("op_4211_cast_fp16")]; + tensor var_4214_begin_0 = const()[name = string("op_4214_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4214_end_mask_0 = const()[name = string("op_4214_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4214_cast_fp16 = slice_by_index(begin = var_4214_begin_0, end = concat_11, end_mask = var_4214_end_mask_0, x = var_4211_cast_fp16)[name = string("op_4214_cast_fp16")]; + tensor var_4216_shape_cast_fp16 = shape(x = var_4209_cast_fp16)[name = string("op_4216_shape_cast_fp16")]; + int32 gather_409 = const()[name = string("gather_409"), val = int32(1)]; + int32 gather_410 = const()[name = string("gather_410"), val = int32(3)]; + int32 gather_411_axis_0 = const()[name = string("gather_411_axis_0"), val = int32(0)]; + int32 gather_411_batch_dims_0 = const()[name = string("gather_411_batch_dims_0"), val = int32(0)]; + bool gather_411_validate_indices_0 = const()[name = string("gather_411_validate_indices_0"), val = bool(false)]; + string var_4216_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4216_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_411_to_uint16 = const()[name = string("select_411_to_uint16"), val = uint16(2)]; + tensor var_4216_shape_cast_fp16_to_uint16 = cast(dtype = var_4216_shape_cast_fp16_to_uint16_dtype_0, x = var_4216_shape_cast_fp16)[name = string("cast_62")]; + uint16 gather_411_cast_uint16 = gather(axis = gather_411_axis_0, batch_dims = gather_411_batch_dims_0, indices = select_411_to_uint16, validate_indices = gather_411_validate_indices_0, x = var_4216_shape_cast_fp16_to_uint16)[name = string("gather_411_cast_uint16")]; + string gather_411_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_411_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_412 = const()[name = string("gather_412"), val = int32(64)]; + tensor var_4223_axes_0 = const()[name = string("op_4223_axes_0"), val = tensor([2])]; + tensor var_4223_cast_fp16 = expand_dims(axes = var_4223_axes_0, x = var_4209_cast_fp16)[name = string("op_4223_cast_fp16")]; + tensor shape_457_cast_fp16 = shape(x = var_4223_cast_fp16)[name = string("shape_457_cast_fp16")]; + int32 concat_431_axis_0 = const()[name = string("concat_431_axis_0"), val = int32(0)]; + bool concat_431_interleave_0 = const()[name = string("concat_431_interleave_0"), val = bool(false)]; + int32 gather_411_cast_uint16_to_int32 = cast(dtype = gather_411_cast_uint16_to_int32_dtype_0, x = gather_411_cast_uint16)[name = string("cast_61")]; + tensor concat_431 = concat(axis = concat_431_axis_0, interleave = concat_431_interleave_0, values = (gather_409, gather_410, var_85, gather_411_cast_uint16_to_int32, gather_412))[name = string("concat_431")]; + tensor real_div_44 = real_div(x = concat_431, y = shape_457_cast_fp16)[name = string("real_div_44")]; + tensor hidden_states_671_cast_fp16 = tile(reps = real_div_44, x = var_4223_cast_fp16)[name = string("hidden_states_671_cast_fp16")]; + tensor concat_432x = const()[name = string("concat_432x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_91_cast_fp16 = reshape(shape = concat_432x, x = hidden_states_671_cast_fp16)[name = string("key_states_91_cast_fp16")]; + tensor var_4233_shape_cast_fp16 = shape(x = var_4214_cast_fp16)[name = string("op_4233_shape_cast_fp16")]; + int32 gather_413 = const()[name = string("gather_413"), val = int32(1)]; + int32 gather_414 = const()[name = string("gather_414"), val = int32(3)]; + int32 gather_415_axis_0 = const()[name = string("gather_415_axis_0"), val = int32(0)]; + int32 gather_415_batch_dims_0 = const()[name = string("gather_415_batch_dims_0"), val = int32(0)]; + bool gather_415_validate_indices_0 = const()[name = string("gather_415_validate_indices_0"), val = bool(false)]; + string var_4233_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4233_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_415_to_uint16 = const()[name = string("select_415_to_uint16"), val = uint16(2)]; + tensor var_4233_shape_cast_fp16_to_uint16 = cast(dtype = var_4233_shape_cast_fp16_to_uint16_dtype_0, x = var_4233_shape_cast_fp16)[name = string("cast_60")]; + uint16 gather_415_cast_uint16 = gather(axis = gather_415_axis_0, batch_dims = gather_415_batch_dims_0, indices = select_415_to_uint16, validate_indices = gather_415_validate_indices_0, x = var_4233_shape_cast_fp16_to_uint16)[name = string("gather_415_cast_uint16")]; + string gather_415_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_415_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_416 = const()[name = string("gather_416"), val = int32(64)]; + tensor var_4240_axes_0 = const()[name = string("op_4240_axes_0"), val = tensor([2])]; + tensor var_4240_cast_fp16 = expand_dims(axes = var_4240_axes_0, x = var_4214_cast_fp16)[name = string("op_4240_cast_fp16")]; + tensor shape_462_cast_fp16 = shape(x = var_4240_cast_fp16)[name = string("shape_462_cast_fp16")]; + int32 concat_433_axis_0 = const()[name = string("concat_433_axis_0"), val = int32(0)]; + bool concat_433_interleave_0 = const()[name = string("concat_433_interleave_0"), val = bool(false)]; + int32 gather_415_cast_uint16_to_int32 = cast(dtype = gather_415_cast_uint16_to_int32_dtype_0, x = gather_415_cast_uint16)[name = string("cast_59")]; + tensor concat_433 = concat(axis = concat_433_axis_0, interleave = concat_433_interleave_0, values = (gather_413, gather_414, var_85, gather_415_cast_uint16_to_int32, gather_416))[name = string("concat_433")]; + tensor real_div_45 = real_div(x = concat_433, y = shape_462_cast_fp16)[name = string("real_div_45")]; + tensor hidden_states_675_cast_fp16 = tile(reps = real_div_45, x = var_4240_cast_fp16)[name = string("hidden_states_675_cast_fp16")]; + tensor concat_434x = const()[name = string("concat_434x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_91_cast_fp16 = reshape(shape = concat_434x, x = hidden_states_675_cast_fp16)[name = string("value_states_91_cast_fp16")]; + tensor var_4250_shape_cast_fp16 = shape(x = key_states_91_cast_fp16)[name = string("op_4250_shape_cast_fp16")]; + int32 gather_417_axis_0 = const()[name = string("gather_417_axis_0"), val = int32(0)]; + int32 gather_417_batch_dims_0 = const()[name = string("gather_417_batch_dims_0"), val = int32(0)]; + bool gather_417_validate_indices_0 = const()[name = string("gather_417_validate_indices_0"), val = bool(false)]; + string var_4250_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4250_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_417_to_uint16 = const()[name = string("select_417_to_uint16"), val = uint16(2)]; + tensor var_4250_shape_cast_fp16_to_uint16 = cast(dtype = var_4250_shape_cast_fp16_to_uint16_dtype_0, x = var_4250_shape_cast_fp16)[name = string("cast_58")]; + uint16 gather_417_cast_uint16 = gather(axis = gather_417_axis_0, batch_dims = gather_417_batch_dims_0, indices = select_417_to_uint16, validate_indices = gather_417_validate_indices_0, x = var_4250_shape_cast_fp16_to_uint16)[name = string("gather_417_cast_uint16")]; + string gather_417_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_417_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_435_values0_0 = const()[name = string("concat_435_values0_0"), val = int32(1)]; + int32 concat_435_values1_0 = const()[name = string("concat_435_values1_0"), val = int32(1)]; + int32 concat_435_values2_0 = const()[name = string("concat_435_values2_0"), val = int32(0)]; + int32 concat_435_axis_0 = const()[name = string("concat_435_axis_0"), val = int32(0)]; + bool concat_435_interleave_0 = const()[name = string("concat_435_interleave_0"), val = bool(false)]; + int32 gather_417_cast_uint16_to_int32 = cast(dtype = gather_417_cast_uint16_to_int32_dtype_0, x = gather_417_cast_uint16)[name = string("cast_57")]; + tensor concat_435 = concat(axis = concat_435_axis_0, interleave = concat_435_interleave_0, values = (concat_435_values0_0, concat_435_values1_0, concat_435_values2_0, gather_417_cast_uint16_to_int32))[name = string("concat_435")]; + tensor causal_mask_47_begin_0 = const()[name = string("causal_mask_47_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_47_end_mask_0 = const()[name = string("causal_mask_47_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_47_cast_fp16 = slice_by_index(begin = causal_mask_47_begin_0, end = concat_435, end_mask = causal_mask_47_end_mask_0, x = causal_mask)[name = string("causal_mask_47_cast_fp16")]; + tensor attn_output_89_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_47_cast_fp16, key = key_states_91_cast_fp16, query = query_states_91_cast_fp16, value = value_states_91_cast_fp16)[name = string("attn_output_89_cast_fp16")]; + tensor var_4256_perm_0 = const()[name = string("op_4256_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_436_axis_0 = const()[name = string("concat_436_axis_0"), val = int32(0)]; + bool concat_436_interleave_0 = const()[name = string("concat_436_interleave_0"), val = bool(false)]; + int32 gather_401_cast_uint16_to_int32 = cast(dtype = gather_401_cast_uint16_to_int32_dtype_0, x = gather_401_cast_uint16)[name = string("cast_56")]; + tensor concat_436 = concat(axis = concat_436_axis_0, interleave = concat_436_interleave_0, values = (gather_400, gather_401_cast_uint16_to_int32, var_81))[name = string("concat_436")]; + tensor var_4256_cast_fp16 = transpose(perm = var_4256_perm_0, x = attn_output_89_cast_fp16)[name = string("transpose_28")]; + tensor input_177_cast_fp16 = reshape(shape = concat_436, x = var_4256_cast_fp16)[name = string("input_177_cast_fp16")]; + tensor model_model_layers_22_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60110656))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60276608))))[name = string("model_model_layers_22_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_157_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_22_self_attn_o_proj_weight_to_fp16_quantized, x = input_177_cast_fp16)[name = string("linear_157_cast_fp16")]; + tensor hidden_states_679_cast_fp16 = add(x = hidden_states_659_cast_fp16, y = linear_157_cast_fp16)[name = string("hidden_states_679_cast_fp16")]; + fp16 var_76_promoted_45_to_fp16 = const()[name = string("op_76_promoted_45_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4265_cast_fp16 = pow(x = hidden_states_679_cast_fp16, y = var_76_promoted_45_to_fp16)[name = string("op_4265_cast_fp16")]; + tensor variance_91_axes_0 = const()[name = string("variance_91_axes_0"), val = tensor([-1])]; + bool variance_91_keep_dims_0 = const()[name = string("variance_91_keep_dims_0"), val = bool(true)]; + tensor variance_91_cast_fp16 = reduce_mean(axes = variance_91_axes_0, keep_dims = variance_91_keep_dims_0, x = var_4265_cast_fp16)[name = string("variance_91_cast_fp16")]; + fp16 var_4268_to_fp16 = const()[name = string("op_4268_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4269_cast_fp16 = add(x = variance_91_cast_fp16, y = var_4268_to_fp16)[name = string("op_4269_cast_fp16")]; + fp32 var_4270_epsilon_0 = const()[name = string("op_4270_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4270_cast_fp16 = rsqrt(epsilon = var_4270_epsilon_0, x = var_4269_cast_fp16)[name = string("op_4270_cast_fp16")]; + tensor hidden_states_683_cast_fp16 = mul(x = hidden_states_679_cast_fp16, y = var_4270_cast_fp16)[name = string("hidden_states_683_cast_fp16")]; + tensor model_model_layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60297408)))]; + tensor input_179_cast_fp16 = mul(x = model_model_layers_22_post_attention_layernorm_weight_to_fp16, y = hidden_states_683_cast_fp16)[name = string("input_179_cast_fp16")]; + tensor model_model_layers_22_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60298624))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60741056))))[name = string("model_model_layers_22_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_158_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_22_mlp_gate_proj_weight_to_fp16_quantized, x = input_179_cast_fp16)[name = string("linear_158_cast_fp16")]; + tensor var_4282_cast_fp16 = silu(x = linear_158_cast_fp16)[name = string("op_4282_cast_fp16")]; + tensor model_model_layers_22_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(60796416))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61238848))))[name = string("model_model_layers_22_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_159_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_22_mlp_up_proj_weight_to_fp16_quantized, x = input_179_cast_fp16)[name = string("linear_159_cast_fp16")]; + tensor input_183_cast_fp16 = mul(x = var_4282_cast_fp16, y = linear_159_cast_fp16)[name = string("input_183_cast_fp16")]; + tensor model_model_layers_22_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61294208))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61736640))))[name = string("model_model_layers_22_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_160_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_22_mlp_down_proj_weight_to_fp16_quantized, x = input_183_cast_fp16)[name = string("linear_160_cast_fp16")]; + tensor hidden_states_689_cast_fp16 = add(x = hidden_states_679_cast_fp16, y = linear_160_cast_fp16)[name = string("hidden_states_689_cast_fp16")]; + fp16 var_76_promoted_46_to_fp16 = const()[name = string("op_76_promoted_46_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4295_cast_fp16 = pow(x = hidden_states_689_cast_fp16, y = var_76_promoted_46_to_fp16)[name = string("op_4295_cast_fp16")]; + tensor variance_93_axes_0 = const()[name = string("variance_93_axes_0"), val = tensor([-1])]; + bool variance_93_keep_dims_0 = const()[name = string("variance_93_keep_dims_0"), val = bool(true)]; + tensor variance_93_cast_fp16 = reduce_mean(axes = variance_93_axes_0, keep_dims = variance_93_keep_dims_0, x = var_4295_cast_fp16)[name = string("variance_93_cast_fp16")]; + fp16 var_4298_to_fp16 = const()[name = string("op_4298_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4299_cast_fp16 = add(x = variance_93_cast_fp16, y = var_4298_to_fp16)[name = string("op_4299_cast_fp16")]; + fp32 var_4300_epsilon_0 = const()[name = string("op_4300_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4300_cast_fp16 = rsqrt(epsilon = var_4300_epsilon_0, x = var_4299_cast_fp16)[name = string("op_4300_cast_fp16")]; + tensor hidden_states_693_cast_fp16 = mul(x = hidden_states_689_cast_fp16, y = var_4300_cast_fp16)[name = string("hidden_states_693_cast_fp16")]; + tensor model_model_layers_23_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61792000)))]; + tensor hidden_states_697_cast_fp16 = mul(x = model_model_layers_23_input_layernorm_weight_to_fp16, y = hidden_states_693_cast_fp16)[name = string("hidden_states_697_cast_fp16")]; + tensor var_4311_shape_cast_fp16 = shape(x = hidden_states_697_cast_fp16)[name = string("op_4311_shape_cast_fp16")]; + int32 gather_418 = const()[name = string("gather_418"), val = int32(1)]; + int32 gather_419_axis_0 = const()[name = string("gather_419_axis_0"), val = int32(0)]; + int32 gather_419_batch_dims_0 = const()[name = string("gather_419_batch_dims_0"), val = int32(0)]; + bool gather_419_validate_indices_0 = const()[name = string("gather_419_validate_indices_0"), val = bool(false)]; + string var_4311_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4311_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_419_to_uint16 = const()[name = string("select_419_to_uint16"), val = uint16(1)]; + tensor var_4311_shape_cast_fp16_to_uint16 = cast(dtype = var_4311_shape_cast_fp16_to_uint16_dtype_0, x = var_4311_shape_cast_fp16)[name = string("cast_55")]; + uint16 gather_419_cast_uint16 = gather(axis = gather_419_axis_0, batch_dims = gather_419_batch_dims_0, indices = select_419_to_uint16, validate_indices = gather_419_validate_indices_0, x = var_4311_shape_cast_fp16_to_uint16)[name = string("gather_419_cast_uint16")]; + string gather_419_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_419_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_23_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61793216))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61959168))))[name = string("model_model_layers_23_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_161_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_23_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_697_cast_fp16)[name = string("linear_161_cast_fp16")]; + tensor model_model_layers_23_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(61979968))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62035328))))[name = string("model_model_layers_23_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_162_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_23_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_697_cast_fp16)[name = string("linear_162_cast_fp16")]; + tensor model_model_layers_23_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62042304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62097664))))[name = string("model_model_layers_23_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_163_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_23_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_697_cast_fp16)[name = string("linear_163_cast_fp16")]; + tensor concat_437x = const()[name = string("concat_437x"), val = tensor([1, -1, 9, 64])]; + tensor var_4320_cast_fp16 = reshape(shape = concat_437x, x = linear_161_cast_fp16)[name = string("op_4320_cast_fp16")]; + tensor q_47_perm_0 = const()[name = string("q_47_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_438x = const()[name = string("concat_438x"), val = tensor([1, -1, 3, 64])]; + tensor var_4323_cast_fp16 = reshape(shape = concat_438x, x = linear_162_cast_fp16)[name = string("op_4323_cast_fp16")]; + tensor k_47_perm_0 = const()[name = string("k_47_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_439x = const()[name = string("concat_439x"), val = tensor([1, -1, 3, 64])]; + tensor var_4326_cast_fp16 = reshape(shape = concat_439x, x = linear_163_cast_fp16)[name = string("op_4326_cast_fp16")]; + tensor v_state_47_perm_0 = const()[name = string("v_state_47_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_47_cast_fp16 = transpose(perm = q_47_perm_0, x = var_4320_cast_fp16)[name = string("transpose_27")]; + tensor var_4330_cast_fp16 = mul(x = q_47_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4330_cast_fp16")]; + tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_93_cast_fp16 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_47_cast_fp16)[name = string("x1_93_cast_fp16")]; + tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_93_cast_fp16 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_47_cast_fp16)[name = string("x2_93_cast_fp16")]; + fp16 const_49_promoted_to_fp16 = const()[name = string("const_49_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4341_cast_fp16 = mul(x = x2_93_cast_fp16, y = const_49_promoted_to_fp16)[name = string("op_4341_cast_fp16")]; + bool var_4343_interleave_0 = const()[name = string("op_4343_interleave_0"), val = bool(false)]; + tensor var_4343_cast_fp16 = concat(axis = var_81, interleave = var_4343_interleave_0, values = (var_4341_cast_fp16, x1_93_cast_fp16))[name = string("op_4343_cast_fp16")]; + tensor var_4344_cast_fp16 = mul(x = var_4343_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4344_cast_fp16")]; + tensor query_states_95_cast_fp16 = add(x = var_4330_cast_fp16, y = var_4344_cast_fp16)[name = string("query_states_95_cast_fp16")]; + tensor k_47_cast_fp16 = transpose(perm = k_47_perm_0, x = var_4323_cast_fp16)[name = string("transpose_26")]; + tensor var_4346_cast_fp16 = mul(x = k_47_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4346_cast_fp16")]; + tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_95_cast_fp16 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = k_47_cast_fp16)[name = string("x1_95_cast_fp16")]; + tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_95_cast_fp16 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = k_47_cast_fp16)[name = string("x2_95_cast_fp16")]; + fp16 const_50_promoted_to_fp16 = const()[name = string("const_50_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4357_cast_fp16 = mul(x = x2_95_cast_fp16, y = const_50_promoted_to_fp16)[name = string("op_4357_cast_fp16")]; + bool var_4359_interleave_0 = const()[name = string("op_4359_interleave_0"), val = bool(false)]; + tensor var_4359_cast_fp16 = concat(axis = var_81, interleave = var_4359_interleave_0, values = (var_4357_cast_fp16, x1_95_cast_fp16))[name = string("op_4359_cast_fp16")]; + tensor var_4360_cast_fp16 = mul(x = var_4359_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4360_cast_fp16")]; + tensor k_state_47_cast_fp16 = add(x = var_4346_cast_fp16, y = var_4360_cast_fp16)[name = string("k_state_47_cast_fp16")]; + tensor expand_dims_276 = const()[name = string("expand_dims_276"), val = tensor([0])]; + tensor expand_dims_277 = const()[name = string("expand_dims_277"), val = tensor([0])]; + tensor expand_dims_279 = const()[name = string("expand_dims_279"), val = tensor([0])]; + tensor concat_442_values0_0 = const()[name = string("concat_442_values0_0"), val = tensor([23])]; + int32 concat_442_axis_0 = const()[name = string("concat_442_axis_0"), val = int32(0)]; + bool concat_442_interleave_0 = const()[name = string("concat_442_interleave_0"), val = bool(false)]; + tensor concat_442 = concat(axis = concat_442_axis_0, interleave = concat_442_interleave_0, values = (concat_442_values0_0, expand_dims_276, expand_dims_277, expand_dims_2, expand_dims_279))[name = string("concat_442")]; + tensor key_cache_internal_tensor_assign_24_stride_0 = const()[name = string("key_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_442, begin_mask = key_cache_internal_tensor_assign_24_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_24_squeeze_mask_0, stride = key_cache_internal_tensor_assign_24_stride_0, update = k_state_47_cast_fp16, x = coreml_update_state_104)[name = string("key_cache_internal_tensor_assign_24_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_24_cast_fp16, input = key_cache)[name = string("coreml_update_state_106_write_state")]; + tensor coreml_update_state_106 = read_state(input = key_cache)[name = string("coreml_update_state_106")]; + tensor value_cache_internal_tensor_assign_24_stride_0 = const()[name = string("value_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_47_cast_fp16 = transpose(perm = v_state_47_perm_0, x = var_4326_cast_fp16)[name = string("transpose_25")]; + tensor value_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_442, begin_mask = value_cache_internal_tensor_assign_24_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_24_squeeze_mask_0, stride = value_cache_internal_tensor_assign_24_stride_0, update = v_state_47_cast_fp16, x = coreml_update_state_105)[name = string("value_cache_internal_tensor_assign_24_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_24_cast_fp16, input = value_cache)[name = string("coreml_update_state_107_write_state")]; + tensor coreml_update_state_107 = read_state(input = value_cache)[name = string("coreml_update_state_107")]; + tensor var_4383_begin_0 = const()[name = string("op_4383_begin_0"), val = tensor([23, 0, 0, 0, 0])]; + tensor var_4383_end_0 = const()[name = string("op_4383_end_0"), val = tensor([24, 1, 3, 2048, 64])]; + tensor var_4383_end_mask_0 = const()[name = string("op_4383_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4383_squeeze_mask_0 = const()[name = string("op_4383_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4383_cast_fp16 = slice_by_index(begin = var_4383_begin_0, end = var_4383_end_0, end_mask = var_4383_end_mask_0, squeeze_mask = var_4383_squeeze_mask_0, x = coreml_update_state_106)[name = string("op_4383_cast_fp16")]; + tensor var_4386_begin_0 = const()[name = string("op_4386_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4386_end_mask_0 = const()[name = string("op_4386_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4386_cast_fp16 = slice_by_index(begin = var_4386_begin_0, end = concat_11, end_mask = var_4386_end_mask_0, x = var_4383_cast_fp16)[name = string("op_4386_cast_fp16")]; + tensor var_4388_begin_0 = const()[name = string("op_4388_begin_0"), val = tensor([23, 0, 0, 0, 0])]; + tensor var_4388_end_0 = const()[name = string("op_4388_end_0"), val = tensor([24, 1, 3, 2048, 64])]; + tensor var_4388_end_mask_0 = const()[name = string("op_4388_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4388_squeeze_mask_0 = const()[name = string("op_4388_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4388_cast_fp16 = slice_by_index(begin = var_4388_begin_0, end = var_4388_end_0, end_mask = var_4388_end_mask_0, squeeze_mask = var_4388_squeeze_mask_0, x = coreml_update_state_107)[name = string("op_4388_cast_fp16")]; + tensor var_4391_begin_0 = const()[name = string("op_4391_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4391_end_mask_0 = const()[name = string("op_4391_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4391_cast_fp16 = slice_by_index(begin = var_4391_begin_0, end = concat_11, end_mask = var_4391_end_mask_0, x = var_4388_cast_fp16)[name = string("op_4391_cast_fp16")]; + tensor var_4393_shape_cast_fp16 = shape(x = var_4386_cast_fp16)[name = string("op_4393_shape_cast_fp16")]; + int32 gather_427 = const()[name = string("gather_427"), val = int32(1)]; + int32 gather_428 = const()[name = string("gather_428"), val = int32(3)]; + int32 gather_429_axis_0 = const()[name = string("gather_429_axis_0"), val = int32(0)]; + int32 gather_429_batch_dims_0 = const()[name = string("gather_429_batch_dims_0"), val = int32(0)]; + bool gather_429_validate_indices_0 = const()[name = string("gather_429_validate_indices_0"), val = bool(false)]; + string var_4393_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4393_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_429_to_uint16 = const()[name = string("select_429_to_uint16"), val = uint16(2)]; + tensor var_4393_shape_cast_fp16_to_uint16 = cast(dtype = var_4393_shape_cast_fp16_to_uint16_dtype_0, x = var_4393_shape_cast_fp16)[name = string("cast_54")]; + uint16 gather_429_cast_uint16 = gather(axis = gather_429_axis_0, batch_dims = gather_429_batch_dims_0, indices = select_429_to_uint16, validate_indices = gather_429_validate_indices_0, x = var_4393_shape_cast_fp16_to_uint16)[name = string("gather_429_cast_uint16")]; + string gather_429_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_429_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_430 = const()[name = string("gather_430"), val = int32(64)]; + tensor var_4400_axes_0 = const()[name = string("op_4400_axes_0"), val = tensor([2])]; + tensor var_4400_cast_fp16 = expand_dims(axes = var_4400_axes_0, x = var_4386_cast_fp16)[name = string("op_4400_cast_fp16")]; + tensor shape_477_cast_fp16 = shape(x = var_4400_cast_fp16)[name = string("shape_477_cast_fp16")]; + int32 concat_450_axis_0 = const()[name = string("concat_450_axis_0"), val = int32(0)]; + bool concat_450_interleave_0 = const()[name = string("concat_450_interleave_0"), val = bool(false)]; + int32 gather_429_cast_uint16_to_int32 = cast(dtype = gather_429_cast_uint16_to_int32_dtype_0, x = gather_429_cast_uint16)[name = string("cast_53")]; + tensor concat_450 = concat(axis = concat_450_axis_0, interleave = concat_450_interleave_0, values = (gather_427, gather_428, var_85, gather_429_cast_uint16_to_int32, gather_430))[name = string("concat_450")]; + tensor real_div_46 = real_div(x = concat_450, y = shape_477_cast_fp16)[name = string("real_div_46")]; + tensor hidden_states_701_cast_fp16 = tile(reps = real_div_46, x = var_4400_cast_fp16)[name = string("hidden_states_701_cast_fp16")]; + tensor concat_451x = const()[name = string("concat_451x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_95_cast_fp16 = reshape(shape = concat_451x, x = hidden_states_701_cast_fp16)[name = string("key_states_95_cast_fp16")]; + tensor var_4410_shape_cast_fp16 = shape(x = var_4391_cast_fp16)[name = string("op_4410_shape_cast_fp16")]; + int32 gather_431 = const()[name = string("gather_431"), val = int32(1)]; + int32 gather_432 = const()[name = string("gather_432"), val = int32(3)]; + int32 gather_433_axis_0 = const()[name = string("gather_433_axis_0"), val = int32(0)]; + int32 gather_433_batch_dims_0 = const()[name = string("gather_433_batch_dims_0"), val = int32(0)]; + bool gather_433_validate_indices_0 = const()[name = string("gather_433_validate_indices_0"), val = bool(false)]; + string var_4410_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4410_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_433_to_uint16 = const()[name = string("select_433_to_uint16"), val = uint16(2)]; + tensor var_4410_shape_cast_fp16_to_uint16 = cast(dtype = var_4410_shape_cast_fp16_to_uint16_dtype_0, x = var_4410_shape_cast_fp16)[name = string("cast_52")]; + uint16 gather_433_cast_uint16 = gather(axis = gather_433_axis_0, batch_dims = gather_433_batch_dims_0, indices = select_433_to_uint16, validate_indices = gather_433_validate_indices_0, x = var_4410_shape_cast_fp16_to_uint16)[name = string("gather_433_cast_uint16")]; + string gather_433_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_433_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_434 = const()[name = string("gather_434"), val = int32(64)]; + tensor var_4417_axes_0 = const()[name = string("op_4417_axes_0"), val = tensor([2])]; + tensor var_4417_cast_fp16 = expand_dims(axes = var_4417_axes_0, x = var_4391_cast_fp16)[name = string("op_4417_cast_fp16")]; + tensor shape_482_cast_fp16 = shape(x = var_4417_cast_fp16)[name = string("shape_482_cast_fp16")]; + int32 concat_452_axis_0 = const()[name = string("concat_452_axis_0"), val = int32(0)]; + bool concat_452_interleave_0 = const()[name = string("concat_452_interleave_0"), val = bool(false)]; + int32 gather_433_cast_uint16_to_int32 = cast(dtype = gather_433_cast_uint16_to_int32_dtype_0, x = gather_433_cast_uint16)[name = string("cast_51")]; + tensor concat_452 = concat(axis = concat_452_axis_0, interleave = concat_452_interleave_0, values = (gather_431, gather_432, var_85, gather_433_cast_uint16_to_int32, gather_434))[name = string("concat_452")]; + tensor real_div_47 = real_div(x = concat_452, y = shape_482_cast_fp16)[name = string("real_div_47")]; + tensor hidden_states_705_cast_fp16 = tile(reps = real_div_47, x = var_4417_cast_fp16)[name = string("hidden_states_705_cast_fp16")]; + tensor concat_453x = const()[name = string("concat_453x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_95_cast_fp16 = reshape(shape = concat_453x, x = hidden_states_705_cast_fp16)[name = string("value_states_95_cast_fp16")]; + tensor var_4427_shape_cast_fp16 = shape(x = key_states_95_cast_fp16)[name = string("op_4427_shape_cast_fp16")]; + int32 gather_435_axis_0 = const()[name = string("gather_435_axis_0"), val = int32(0)]; + int32 gather_435_batch_dims_0 = const()[name = string("gather_435_batch_dims_0"), val = int32(0)]; + bool gather_435_validate_indices_0 = const()[name = string("gather_435_validate_indices_0"), val = bool(false)]; + string var_4427_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4427_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_435_to_uint16 = const()[name = string("select_435_to_uint16"), val = uint16(2)]; + tensor var_4427_shape_cast_fp16_to_uint16 = cast(dtype = var_4427_shape_cast_fp16_to_uint16_dtype_0, x = var_4427_shape_cast_fp16)[name = string("cast_50")]; + uint16 gather_435_cast_uint16 = gather(axis = gather_435_axis_0, batch_dims = gather_435_batch_dims_0, indices = select_435_to_uint16, validate_indices = gather_435_validate_indices_0, x = var_4427_shape_cast_fp16_to_uint16)[name = string("gather_435_cast_uint16")]; + string gather_435_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_435_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_454_values0_0 = const()[name = string("concat_454_values0_0"), val = int32(1)]; + int32 concat_454_values1_0 = const()[name = string("concat_454_values1_0"), val = int32(1)]; + int32 concat_454_values2_0 = const()[name = string("concat_454_values2_0"), val = int32(0)]; + int32 concat_454_axis_0 = const()[name = string("concat_454_axis_0"), val = int32(0)]; + bool concat_454_interleave_0 = const()[name = string("concat_454_interleave_0"), val = bool(false)]; + int32 gather_435_cast_uint16_to_int32 = cast(dtype = gather_435_cast_uint16_to_int32_dtype_0, x = gather_435_cast_uint16)[name = string("cast_49")]; + tensor concat_454 = concat(axis = concat_454_axis_0, interleave = concat_454_interleave_0, values = (concat_454_values0_0, concat_454_values1_0, concat_454_values2_0, gather_435_cast_uint16_to_int32))[name = string("concat_454")]; + tensor causal_mask_49_begin_0 = const()[name = string("causal_mask_49_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_49_end_mask_0 = const()[name = string("causal_mask_49_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_49_cast_fp16 = slice_by_index(begin = causal_mask_49_begin_0, end = concat_454, end_mask = causal_mask_49_end_mask_0, x = causal_mask)[name = string("causal_mask_49_cast_fp16")]; + tensor attn_output_93_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_49_cast_fp16, key = key_states_95_cast_fp16, query = query_states_95_cast_fp16, value = value_states_95_cast_fp16)[name = string("attn_output_93_cast_fp16")]; + tensor var_4433_perm_0 = const()[name = string("op_4433_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_455_axis_0 = const()[name = string("concat_455_axis_0"), val = int32(0)]; + bool concat_455_interleave_0 = const()[name = string("concat_455_interleave_0"), val = bool(false)]; + int32 gather_419_cast_uint16_to_int32 = cast(dtype = gather_419_cast_uint16_to_int32_dtype_0, x = gather_419_cast_uint16)[name = string("cast_48")]; + tensor concat_455 = concat(axis = concat_455_axis_0, interleave = concat_455_interleave_0, values = (gather_418, gather_419_cast_uint16_to_int32, var_81))[name = string("concat_455")]; + tensor var_4433_cast_fp16 = transpose(perm = var_4433_perm_0, x = attn_output_93_cast_fp16)[name = string("transpose_24")]; + tensor input_185_cast_fp16 = reshape(shape = concat_455, x = var_4433_cast_fp16)[name = string("input_185_cast_fp16")]; + tensor model_model_layers_23_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62104640))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62270592))))[name = string("model_model_layers_23_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_164_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_23_self_attn_o_proj_weight_to_fp16_quantized, x = input_185_cast_fp16)[name = string("linear_164_cast_fp16")]; + tensor hidden_states_709_cast_fp16 = add(x = hidden_states_689_cast_fp16, y = linear_164_cast_fp16)[name = string("hidden_states_709_cast_fp16")]; + fp16 var_76_promoted_47_to_fp16 = const()[name = string("op_76_promoted_47_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4442_cast_fp16 = pow(x = hidden_states_709_cast_fp16, y = var_76_promoted_47_to_fp16)[name = string("op_4442_cast_fp16")]; + tensor variance_95_axes_0 = const()[name = string("variance_95_axes_0"), val = tensor([-1])]; + bool variance_95_keep_dims_0 = const()[name = string("variance_95_keep_dims_0"), val = bool(true)]; + tensor variance_95_cast_fp16 = reduce_mean(axes = variance_95_axes_0, keep_dims = variance_95_keep_dims_0, x = var_4442_cast_fp16)[name = string("variance_95_cast_fp16")]; + fp16 var_4445_to_fp16 = const()[name = string("op_4445_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4446_cast_fp16 = add(x = variance_95_cast_fp16, y = var_4445_to_fp16)[name = string("op_4446_cast_fp16")]; + fp32 var_4447_epsilon_0 = const()[name = string("op_4447_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4447_cast_fp16 = rsqrt(epsilon = var_4447_epsilon_0, x = var_4446_cast_fp16)[name = string("op_4447_cast_fp16")]; + tensor hidden_states_713_cast_fp16 = mul(x = hidden_states_709_cast_fp16, y = var_4447_cast_fp16)[name = string("hidden_states_713_cast_fp16")]; + tensor model_model_layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62291392)))]; + tensor input_187_cast_fp16 = mul(x = model_model_layers_23_post_attention_layernorm_weight_to_fp16, y = hidden_states_713_cast_fp16)[name = string("input_187_cast_fp16")]; + tensor model_model_layers_23_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62292608))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62735040))))[name = string("model_model_layers_23_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_165_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_23_mlp_gate_proj_weight_to_fp16_quantized, x = input_187_cast_fp16)[name = string("linear_165_cast_fp16")]; + tensor var_4459_cast_fp16 = silu(x = linear_165_cast_fp16)[name = string("op_4459_cast_fp16")]; + tensor model_model_layers_23_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(62790400))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63232832))))[name = string("model_model_layers_23_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_166_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_23_mlp_up_proj_weight_to_fp16_quantized, x = input_187_cast_fp16)[name = string("linear_166_cast_fp16")]; + tensor input_191_cast_fp16 = mul(x = var_4459_cast_fp16, y = linear_166_cast_fp16)[name = string("input_191_cast_fp16")]; + tensor model_model_layers_23_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63288192))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63730624))))[name = string("model_model_layers_23_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_167_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_23_mlp_down_proj_weight_to_fp16_quantized, x = input_191_cast_fp16)[name = string("linear_167_cast_fp16")]; + tensor hidden_states_719_cast_fp16 = add(x = hidden_states_709_cast_fp16, y = linear_167_cast_fp16)[name = string("hidden_states_719_cast_fp16")]; + fp16 var_76_promoted_48_to_fp16 = const()[name = string("op_76_promoted_48_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4472_cast_fp16 = pow(x = hidden_states_719_cast_fp16, y = var_76_promoted_48_to_fp16)[name = string("op_4472_cast_fp16")]; + tensor variance_97_axes_0 = const()[name = string("variance_97_axes_0"), val = tensor([-1])]; + bool variance_97_keep_dims_0 = const()[name = string("variance_97_keep_dims_0"), val = bool(true)]; + tensor variance_97_cast_fp16 = reduce_mean(axes = variance_97_axes_0, keep_dims = variance_97_keep_dims_0, x = var_4472_cast_fp16)[name = string("variance_97_cast_fp16")]; + fp16 var_4475_to_fp16 = const()[name = string("op_4475_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4476_cast_fp16 = add(x = variance_97_cast_fp16, y = var_4475_to_fp16)[name = string("op_4476_cast_fp16")]; + fp32 var_4477_epsilon_0 = const()[name = string("op_4477_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4477_cast_fp16 = rsqrt(epsilon = var_4477_epsilon_0, x = var_4476_cast_fp16)[name = string("op_4477_cast_fp16")]; + tensor hidden_states_723_cast_fp16 = mul(x = hidden_states_719_cast_fp16, y = var_4477_cast_fp16)[name = string("hidden_states_723_cast_fp16")]; + tensor model_model_layers_24_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_24_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63785984)))]; + tensor hidden_states_727_cast_fp16 = mul(x = model_model_layers_24_input_layernorm_weight_to_fp16, y = hidden_states_723_cast_fp16)[name = string("hidden_states_727_cast_fp16")]; + tensor var_4488_shape_cast_fp16 = shape(x = hidden_states_727_cast_fp16)[name = string("op_4488_shape_cast_fp16")]; + int32 gather_436 = const()[name = string("gather_436"), val = int32(1)]; + int32 gather_437_axis_0 = const()[name = string("gather_437_axis_0"), val = int32(0)]; + int32 gather_437_batch_dims_0 = const()[name = string("gather_437_batch_dims_0"), val = int32(0)]; + bool gather_437_validate_indices_0 = const()[name = string("gather_437_validate_indices_0"), val = bool(false)]; + string var_4488_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4488_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_437_to_uint16 = const()[name = string("select_437_to_uint16"), val = uint16(1)]; + tensor var_4488_shape_cast_fp16_to_uint16 = cast(dtype = var_4488_shape_cast_fp16_to_uint16_dtype_0, x = var_4488_shape_cast_fp16)[name = string("cast_47")]; + uint16 gather_437_cast_uint16 = gather(axis = gather_437_axis_0, batch_dims = gather_437_batch_dims_0, indices = select_437_to_uint16, validate_indices = gather_437_validate_indices_0, x = var_4488_shape_cast_fp16_to_uint16)[name = string("gather_437_cast_uint16")]; + string gather_437_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_437_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_24_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63787200))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63953152))))[name = string("model_model_layers_24_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_168_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_24_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_727_cast_fp16)[name = string("linear_168_cast_fp16")]; + tensor model_model_layers_24_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(63973952))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64029312))))[name = string("model_model_layers_24_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_24_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_727_cast_fp16)[name = string("linear_169_cast_fp16")]; + tensor model_model_layers_24_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64036288))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64091648))))[name = string("model_model_layers_24_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_170_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_24_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_727_cast_fp16)[name = string("linear_170_cast_fp16")]; + tensor concat_456x = const()[name = string("concat_456x"), val = tensor([1, -1, 9, 64])]; + tensor var_4497_cast_fp16 = reshape(shape = concat_456x, x = linear_168_cast_fp16)[name = string("op_4497_cast_fp16")]; + tensor q_49_perm_0 = const()[name = string("q_49_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_457x = const()[name = string("concat_457x"), val = tensor([1, -1, 3, 64])]; + tensor var_4500_cast_fp16 = reshape(shape = concat_457x, x = linear_169_cast_fp16)[name = string("op_4500_cast_fp16")]; + tensor k_49_perm_0 = const()[name = string("k_49_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_458x = const()[name = string("concat_458x"), val = tensor([1, -1, 3, 64])]; + tensor var_4503_cast_fp16 = reshape(shape = concat_458x, x = linear_170_cast_fp16)[name = string("op_4503_cast_fp16")]; + tensor v_state_49_perm_0 = const()[name = string("v_state_49_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_49_cast_fp16 = transpose(perm = q_49_perm_0, x = var_4497_cast_fp16)[name = string("transpose_23")]; + tensor var_4507_cast_fp16 = mul(x = q_49_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4507_cast_fp16")]; + tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_97_cast_fp16 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = q_49_cast_fp16)[name = string("x1_97_cast_fp16")]; + tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_97_cast_fp16 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = q_49_cast_fp16)[name = string("x2_97_cast_fp16")]; + fp16 const_51_promoted_to_fp16 = const()[name = string("const_51_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4518_cast_fp16 = mul(x = x2_97_cast_fp16, y = const_51_promoted_to_fp16)[name = string("op_4518_cast_fp16")]; + bool var_4520_interleave_0 = const()[name = string("op_4520_interleave_0"), val = bool(false)]; + tensor var_4520_cast_fp16 = concat(axis = var_81, interleave = var_4520_interleave_0, values = (var_4518_cast_fp16, x1_97_cast_fp16))[name = string("op_4520_cast_fp16")]; + tensor var_4521_cast_fp16 = mul(x = var_4520_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4521_cast_fp16")]; + tensor query_states_99_cast_fp16 = add(x = var_4507_cast_fp16, y = var_4521_cast_fp16)[name = string("query_states_99_cast_fp16")]; + tensor k_49_cast_fp16 = transpose(perm = k_49_perm_0, x = var_4500_cast_fp16)[name = string("transpose_22")]; + tensor var_4523_cast_fp16 = mul(x = k_49_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4523_cast_fp16")]; + tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_99_cast_fp16 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = k_49_cast_fp16)[name = string("x1_99_cast_fp16")]; + tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_99_cast_fp16 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = k_49_cast_fp16)[name = string("x2_99_cast_fp16")]; + fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4534_cast_fp16 = mul(x = x2_99_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_4534_cast_fp16")]; + bool var_4536_interleave_0 = const()[name = string("op_4536_interleave_0"), val = bool(false)]; + tensor var_4536_cast_fp16 = concat(axis = var_81, interleave = var_4536_interleave_0, values = (var_4534_cast_fp16, x1_99_cast_fp16))[name = string("op_4536_cast_fp16")]; + tensor var_4537_cast_fp16 = mul(x = var_4536_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4537_cast_fp16")]; + tensor k_state_49_cast_fp16 = add(x = var_4523_cast_fp16, y = var_4537_cast_fp16)[name = string("k_state_49_cast_fp16")]; + tensor expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor([0])]; + tensor expand_dims_289 = const()[name = string("expand_dims_289"), val = tensor([0])]; + tensor expand_dims_291 = const()[name = string("expand_dims_291"), val = tensor([0])]; + tensor concat_461_values0_0 = const()[name = string("concat_461_values0_0"), val = tensor([24])]; + int32 concat_461_axis_0 = const()[name = string("concat_461_axis_0"), val = int32(0)]; + bool concat_461_interleave_0 = const()[name = string("concat_461_interleave_0"), val = bool(false)]; + tensor concat_461 = concat(axis = concat_461_axis_0, interleave = concat_461_interleave_0, values = (concat_461_values0_0, expand_dims_288, expand_dims_289, expand_dims_2, expand_dims_291))[name = string("concat_461")]; + tensor key_cache_internal_tensor_assign_25_stride_0 = const()[name = string("key_cache_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_25_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_25_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_461, begin_mask = key_cache_internal_tensor_assign_25_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_25_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_25_squeeze_mask_0, stride = key_cache_internal_tensor_assign_25_stride_0, update = k_state_49_cast_fp16, x = coreml_update_state_106)[name = string("key_cache_internal_tensor_assign_25_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_25_cast_fp16, input = key_cache)[name = string("coreml_update_state_108_write_state")]; + tensor coreml_update_state_108 = read_state(input = key_cache)[name = string("coreml_update_state_108")]; + tensor value_cache_internal_tensor_assign_25_stride_0 = const()[name = string("value_cache_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_25_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_25_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_49_cast_fp16 = transpose(perm = v_state_49_perm_0, x = var_4503_cast_fp16)[name = string("transpose_21")]; + tensor value_cache_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_461, begin_mask = value_cache_internal_tensor_assign_25_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_25_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_25_squeeze_mask_0, stride = value_cache_internal_tensor_assign_25_stride_0, update = v_state_49_cast_fp16, x = coreml_update_state_107)[name = string("value_cache_internal_tensor_assign_25_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_25_cast_fp16, input = value_cache)[name = string("coreml_update_state_109_write_state")]; + tensor coreml_update_state_109 = read_state(input = value_cache)[name = string("coreml_update_state_109")]; + tensor var_4560_begin_0 = const()[name = string("op_4560_begin_0"), val = tensor([24, 0, 0, 0, 0])]; + tensor var_4560_end_0 = const()[name = string("op_4560_end_0"), val = tensor([25, 1, 3, 2048, 64])]; + tensor var_4560_end_mask_0 = const()[name = string("op_4560_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4560_squeeze_mask_0 = const()[name = string("op_4560_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4560_cast_fp16 = slice_by_index(begin = var_4560_begin_0, end = var_4560_end_0, end_mask = var_4560_end_mask_0, squeeze_mask = var_4560_squeeze_mask_0, x = coreml_update_state_108)[name = string("op_4560_cast_fp16")]; + tensor var_4563_begin_0 = const()[name = string("op_4563_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4563_end_mask_0 = const()[name = string("op_4563_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4563_cast_fp16 = slice_by_index(begin = var_4563_begin_0, end = concat_11, end_mask = var_4563_end_mask_0, x = var_4560_cast_fp16)[name = string("op_4563_cast_fp16")]; + tensor var_4565_begin_0 = const()[name = string("op_4565_begin_0"), val = tensor([24, 0, 0, 0, 0])]; + tensor var_4565_end_0 = const()[name = string("op_4565_end_0"), val = tensor([25, 1, 3, 2048, 64])]; + tensor var_4565_end_mask_0 = const()[name = string("op_4565_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4565_squeeze_mask_0 = const()[name = string("op_4565_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4565_cast_fp16 = slice_by_index(begin = var_4565_begin_0, end = var_4565_end_0, end_mask = var_4565_end_mask_0, squeeze_mask = var_4565_squeeze_mask_0, x = coreml_update_state_109)[name = string("op_4565_cast_fp16")]; + tensor var_4568_begin_0 = const()[name = string("op_4568_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4568_end_mask_0 = const()[name = string("op_4568_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4568_cast_fp16 = slice_by_index(begin = var_4568_begin_0, end = concat_11, end_mask = var_4568_end_mask_0, x = var_4565_cast_fp16)[name = string("op_4568_cast_fp16")]; + tensor var_4570_shape_cast_fp16 = shape(x = var_4563_cast_fp16)[name = string("op_4570_shape_cast_fp16")]; + int32 gather_445 = const()[name = string("gather_445"), val = int32(1)]; + int32 gather_446 = const()[name = string("gather_446"), val = int32(3)]; + int32 gather_447_axis_0 = const()[name = string("gather_447_axis_0"), val = int32(0)]; + int32 gather_447_batch_dims_0 = const()[name = string("gather_447_batch_dims_0"), val = int32(0)]; + bool gather_447_validate_indices_0 = const()[name = string("gather_447_validate_indices_0"), val = bool(false)]; + string var_4570_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4570_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_447_to_uint16 = const()[name = string("select_447_to_uint16"), val = uint16(2)]; + tensor var_4570_shape_cast_fp16_to_uint16 = cast(dtype = var_4570_shape_cast_fp16_to_uint16_dtype_0, x = var_4570_shape_cast_fp16)[name = string("cast_46")]; + uint16 gather_447_cast_uint16 = gather(axis = gather_447_axis_0, batch_dims = gather_447_batch_dims_0, indices = select_447_to_uint16, validate_indices = gather_447_validate_indices_0, x = var_4570_shape_cast_fp16_to_uint16)[name = string("gather_447_cast_uint16")]; + string gather_447_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_447_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_448 = const()[name = string("gather_448"), val = int32(64)]; + tensor var_4577_axes_0 = const()[name = string("op_4577_axes_0"), val = tensor([2])]; + tensor var_4577_cast_fp16 = expand_dims(axes = var_4577_axes_0, x = var_4563_cast_fp16)[name = string("op_4577_cast_fp16")]; + tensor shape_497_cast_fp16 = shape(x = var_4577_cast_fp16)[name = string("shape_497_cast_fp16")]; + int32 concat_469_axis_0 = const()[name = string("concat_469_axis_0"), val = int32(0)]; + bool concat_469_interleave_0 = const()[name = string("concat_469_interleave_0"), val = bool(false)]; + int32 gather_447_cast_uint16_to_int32 = cast(dtype = gather_447_cast_uint16_to_int32_dtype_0, x = gather_447_cast_uint16)[name = string("cast_45")]; + tensor concat_469 = concat(axis = concat_469_axis_0, interleave = concat_469_interleave_0, values = (gather_445, gather_446, var_85, gather_447_cast_uint16_to_int32, gather_448))[name = string("concat_469")]; + tensor real_div_48 = real_div(x = concat_469, y = shape_497_cast_fp16)[name = string("real_div_48")]; + tensor hidden_states_731_cast_fp16 = tile(reps = real_div_48, x = var_4577_cast_fp16)[name = string("hidden_states_731_cast_fp16")]; + tensor concat_470x = const()[name = string("concat_470x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_99_cast_fp16 = reshape(shape = concat_470x, x = hidden_states_731_cast_fp16)[name = string("key_states_99_cast_fp16")]; + tensor var_4587_shape_cast_fp16 = shape(x = var_4568_cast_fp16)[name = string("op_4587_shape_cast_fp16")]; + int32 gather_449 = const()[name = string("gather_449"), val = int32(1)]; + int32 gather_450 = const()[name = string("gather_450"), val = int32(3)]; + int32 gather_451_axis_0 = const()[name = string("gather_451_axis_0"), val = int32(0)]; + int32 gather_451_batch_dims_0 = const()[name = string("gather_451_batch_dims_0"), val = int32(0)]; + bool gather_451_validate_indices_0 = const()[name = string("gather_451_validate_indices_0"), val = bool(false)]; + string var_4587_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4587_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_451_to_uint16 = const()[name = string("select_451_to_uint16"), val = uint16(2)]; + tensor var_4587_shape_cast_fp16_to_uint16 = cast(dtype = var_4587_shape_cast_fp16_to_uint16_dtype_0, x = var_4587_shape_cast_fp16)[name = string("cast_44")]; + uint16 gather_451_cast_uint16 = gather(axis = gather_451_axis_0, batch_dims = gather_451_batch_dims_0, indices = select_451_to_uint16, validate_indices = gather_451_validate_indices_0, x = var_4587_shape_cast_fp16_to_uint16)[name = string("gather_451_cast_uint16")]; + string gather_451_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_451_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_452 = const()[name = string("gather_452"), val = int32(64)]; + tensor var_4594_axes_0 = const()[name = string("op_4594_axes_0"), val = tensor([2])]; + tensor var_4594_cast_fp16 = expand_dims(axes = var_4594_axes_0, x = var_4568_cast_fp16)[name = string("op_4594_cast_fp16")]; + tensor shape_502_cast_fp16 = shape(x = var_4594_cast_fp16)[name = string("shape_502_cast_fp16")]; + int32 concat_471_axis_0 = const()[name = string("concat_471_axis_0"), val = int32(0)]; + bool concat_471_interleave_0 = const()[name = string("concat_471_interleave_0"), val = bool(false)]; + int32 gather_451_cast_uint16_to_int32 = cast(dtype = gather_451_cast_uint16_to_int32_dtype_0, x = gather_451_cast_uint16)[name = string("cast_43")]; + tensor concat_471 = concat(axis = concat_471_axis_0, interleave = concat_471_interleave_0, values = (gather_449, gather_450, var_85, gather_451_cast_uint16_to_int32, gather_452))[name = string("concat_471")]; + tensor real_div_49 = real_div(x = concat_471, y = shape_502_cast_fp16)[name = string("real_div_49")]; + tensor hidden_states_735_cast_fp16 = tile(reps = real_div_49, x = var_4594_cast_fp16)[name = string("hidden_states_735_cast_fp16")]; + tensor concat_472x = const()[name = string("concat_472x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_99_cast_fp16 = reshape(shape = concat_472x, x = hidden_states_735_cast_fp16)[name = string("value_states_99_cast_fp16")]; + tensor var_4604_shape_cast_fp16 = shape(x = key_states_99_cast_fp16)[name = string("op_4604_shape_cast_fp16")]; + int32 gather_453_axis_0 = const()[name = string("gather_453_axis_0"), val = int32(0)]; + int32 gather_453_batch_dims_0 = const()[name = string("gather_453_batch_dims_0"), val = int32(0)]; + bool gather_453_validate_indices_0 = const()[name = string("gather_453_validate_indices_0"), val = bool(false)]; + string var_4604_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4604_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_453_to_uint16 = const()[name = string("select_453_to_uint16"), val = uint16(2)]; + tensor var_4604_shape_cast_fp16_to_uint16 = cast(dtype = var_4604_shape_cast_fp16_to_uint16_dtype_0, x = var_4604_shape_cast_fp16)[name = string("cast_42")]; + uint16 gather_453_cast_uint16 = gather(axis = gather_453_axis_0, batch_dims = gather_453_batch_dims_0, indices = select_453_to_uint16, validate_indices = gather_453_validate_indices_0, x = var_4604_shape_cast_fp16_to_uint16)[name = string("gather_453_cast_uint16")]; + string gather_453_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_453_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_473_values0_0 = const()[name = string("concat_473_values0_0"), val = int32(1)]; + int32 concat_473_values1_0 = const()[name = string("concat_473_values1_0"), val = int32(1)]; + int32 concat_473_values2_0 = const()[name = string("concat_473_values2_0"), val = int32(0)]; + int32 concat_473_axis_0 = const()[name = string("concat_473_axis_0"), val = int32(0)]; + bool concat_473_interleave_0 = const()[name = string("concat_473_interleave_0"), val = bool(false)]; + int32 gather_453_cast_uint16_to_int32 = cast(dtype = gather_453_cast_uint16_to_int32_dtype_0, x = gather_453_cast_uint16)[name = string("cast_41")]; + tensor concat_473 = concat(axis = concat_473_axis_0, interleave = concat_473_interleave_0, values = (concat_473_values0_0, concat_473_values1_0, concat_473_values2_0, gather_453_cast_uint16_to_int32))[name = string("concat_473")]; + tensor causal_mask_51_begin_0 = const()[name = string("causal_mask_51_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_51_end_mask_0 = const()[name = string("causal_mask_51_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_51_cast_fp16 = slice_by_index(begin = causal_mask_51_begin_0, end = concat_473, end_mask = causal_mask_51_end_mask_0, x = causal_mask)[name = string("causal_mask_51_cast_fp16")]; + tensor attn_output_97_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_51_cast_fp16, key = key_states_99_cast_fp16, query = query_states_99_cast_fp16, value = value_states_99_cast_fp16)[name = string("attn_output_97_cast_fp16")]; + tensor var_4610_perm_0 = const()[name = string("op_4610_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_474_axis_0 = const()[name = string("concat_474_axis_0"), val = int32(0)]; + bool concat_474_interleave_0 = const()[name = string("concat_474_interleave_0"), val = bool(false)]; + int32 gather_437_cast_uint16_to_int32 = cast(dtype = gather_437_cast_uint16_to_int32_dtype_0, x = gather_437_cast_uint16)[name = string("cast_40")]; + tensor concat_474 = concat(axis = concat_474_axis_0, interleave = concat_474_interleave_0, values = (gather_436, gather_437_cast_uint16_to_int32, var_81))[name = string("concat_474")]; + tensor var_4610_cast_fp16 = transpose(perm = var_4610_perm_0, x = attn_output_97_cast_fp16)[name = string("transpose_20")]; + tensor input_193_cast_fp16 = reshape(shape = concat_474, x = var_4610_cast_fp16)[name = string("input_193_cast_fp16")]; + tensor model_model_layers_24_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64098624))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64264576))))[name = string("model_model_layers_24_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_171_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_24_self_attn_o_proj_weight_to_fp16_quantized, x = input_193_cast_fp16)[name = string("linear_171_cast_fp16")]; + tensor hidden_states_739_cast_fp16 = add(x = hidden_states_719_cast_fp16, y = linear_171_cast_fp16)[name = string("hidden_states_739_cast_fp16")]; + fp16 var_76_promoted_49_to_fp16 = const()[name = string("op_76_promoted_49_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4619_cast_fp16 = pow(x = hidden_states_739_cast_fp16, y = var_76_promoted_49_to_fp16)[name = string("op_4619_cast_fp16")]; + tensor variance_99_axes_0 = const()[name = string("variance_99_axes_0"), val = tensor([-1])]; + bool variance_99_keep_dims_0 = const()[name = string("variance_99_keep_dims_0"), val = bool(true)]; + tensor variance_99_cast_fp16 = reduce_mean(axes = variance_99_axes_0, keep_dims = variance_99_keep_dims_0, x = var_4619_cast_fp16)[name = string("variance_99_cast_fp16")]; + fp16 var_4622_to_fp16 = const()[name = string("op_4622_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4623_cast_fp16 = add(x = variance_99_cast_fp16, y = var_4622_to_fp16)[name = string("op_4623_cast_fp16")]; + fp32 var_4624_epsilon_0 = const()[name = string("op_4624_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4624_cast_fp16 = rsqrt(epsilon = var_4624_epsilon_0, x = var_4623_cast_fp16)[name = string("op_4624_cast_fp16")]; + tensor hidden_states_743_cast_fp16 = mul(x = hidden_states_739_cast_fp16, y = var_4624_cast_fp16)[name = string("hidden_states_743_cast_fp16")]; + tensor model_model_layers_24_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_24_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64285376)))]; + tensor input_195_cast_fp16 = mul(x = model_model_layers_24_post_attention_layernorm_weight_to_fp16, y = hidden_states_743_cast_fp16)[name = string("input_195_cast_fp16")]; + tensor model_model_layers_24_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64286592))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64729024))))[name = string("model_model_layers_24_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_172_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_24_mlp_gate_proj_weight_to_fp16_quantized, x = input_195_cast_fp16)[name = string("linear_172_cast_fp16")]; + tensor var_4636_cast_fp16 = silu(x = linear_172_cast_fp16)[name = string("op_4636_cast_fp16")]; + tensor model_model_layers_24_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64784384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65226816))))[name = string("model_model_layers_24_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_173_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_24_mlp_up_proj_weight_to_fp16_quantized, x = input_195_cast_fp16)[name = string("linear_173_cast_fp16")]; + tensor input_199_cast_fp16 = mul(x = var_4636_cast_fp16, y = linear_173_cast_fp16)[name = string("input_199_cast_fp16")]; + tensor model_model_layers_24_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65282176))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65724608))))[name = string("model_model_layers_24_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_174_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_24_mlp_down_proj_weight_to_fp16_quantized, x = input_199_cast_fp16)[name = string("linear_174_cast_fp16")]; + tensor hidden_states_749_cast_fp16 = add(x = hidden_states_739_cast_fp16, y = linear_174_cast_fp16)[name = string("hidden_states_749_cast_fp16")]; + fp16 var_76_promoted_50_to_fp16 = const()[name = string("op_76_promoted_50_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4649_cast_fp16 = pow(x = hidden_states_749_cast_fp16, y = var_76_promoted_50_to_fp16)[name = string("op_4649_cast_fp16")]; + tensor variance_101_axes_0 = const()[name = string("variance_101_axes_0"), val = tensor([-1])]; + bool variance_101_keep_dims_0 = const()[name = string("variance_101_keep_dims_0"), val = bool(true)]; + tensor variance_101_cast_fp16 = reduce_mean(axes = variance_101_axes_0, keep_dims = variance_101_keep_dims_0, x = var_4649_cast_fp16)[name = string("variance_101_cast_fp16")]; + fp16 var_4652_to_fp16 = const()[name = string("op_4652_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4653_cast_fp16 = add(x = variance_101_cast_fp16, y = var_4652_to_fp16)[name = string("op_4653_cast_fp16")]; + fp32 var_4654_epsilon_0 = const()[name = string("op_4654_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4654_cast_fp16 = rsqrt(epsilon = var_4654_epsilon_0, x = var_4653_cast_fp16)[name = string("op_4654_cast_fp16")]; + tensor hidden_states_753_cast_fp16 = mul(x = hidden_states_749_cast_fp16, y = var_4654_cast_fp16)[name = string("hidden_states_753_cast_fp16")]; + tensor model_model_layers_25_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_25_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65779968)))]; + tensor hidden_states_757_cast_fp16 = mul(x = model_model_layers_25_input_layernorm_weight_to_fp16, y = hidden_states_753_cast_fp16)[name = string("hidden_states_757_cast_fp16")]; + tensor var_4665_shape_cast_fp16 = shape(x = hidden_states_757_cast_fp16)[name = string("op_4665_shape_cast_fp16")]; + int32 gather_454 = const()[name = string("gather_454"), val = int32(1)]; + int32 gather_455_axis_0 = const()[name = string("gather_455_axis_0"), val = int32(0)]; + int32 gather_455_batch_dims_0 = const()[name = string("gather_455_batch_dims_0"), val = int32(0)]; + bool gather_455_validate_indices_0 = const()[name = string("gather_455_validate_indices_0"), val = bool(false)]; + string var_4665_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4665_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_455_to_uint16 = const()[name = string("select_455_to_uint16"), val = uint16(1)]; + tensor var_4665_shape_cast_fp16_to_uint16 = cast(dtype = var_4665_shape_cast_fp16_to_uint16_dtype_0, x = var_4665_shape_cast_fp16)[name = string("cast_39")]; + uint16 gather_455_cast_uint16 = gather(axis = gather_455_axis_0, batch_dims = gather_455_batch_dims_0, indices = select_455_to_uint16, validate_indices = gather_455_validate_indices_0, x = var_4665_shape_cast_fp16_to_uint16)[name = string("gather_455_cast_uint16")]; + string gather_455_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_455_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_25_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65781184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65947136))))[name = string("model_model_layers_25_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_175_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_25_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_757_cast_fp16)[name = string("linear_175_cast_fp16")]; + tensor model_model_layers_25_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(65967936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66023296))))[name = string("model_model_layers_25_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_176_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_25_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_757_cast_fp16)[name = string("linear_176_cast_fp16")]; + tensor model_model_layers_25_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66030272))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66085632))))[name = string("model_model_layers_25_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_177_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_25_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_757_cast_fp16)[name = string("linear_177_cast_fp16")]; + tensor concat_475x = const()[name = string("concat_475x"), val = tensor([1, -1, 9, 64])]; + tensor var_4674_cast_fp16 = reshape(shape = concat_475x, x = linear_175_cast_fp16)[name = string("op_4674_cast_fp16")]; + tensor q_51_perm_0 = const()[name = string("q_51_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_476x = const()[name = string("concat_476x"), val = tensor([1, -1, 3, 64])]; + tensor var_4677_cast_fp16 = reshape(shape = concat_476x, x = linear_176_cast_fp16)[name = string("op_4677_cast_fp16")]; + tensor k_51_perm_0 = const()[name = string("k_51_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_477x = const()[name = string("concat_477x"), val = tensor([1, -1, 3, 64])]; + tensor var_4680_cast_fp16 = reshape(shape = concat_477x, x = linear_177_cast_fp16)[name = string("op_4680_cast_fp16")]; + tensor v_state_51_perm_0 = const()[name = string("v_state_51_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_51_cast_fp16 = transpose(perm = q_51_perm_0, x = var_4674_cast_fp16)[name = string("transpose_19")]; + tensor var_4684_cast_fp16 = mul(x = q_51_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4684_cast_fp16")]; + tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_101_cast_fp16 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = q_51_cast_fp16)[name = string("x1_101_cast_fp16")]; + tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_101_cast_fp16 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = q_51_cast_fp16)[name = string("x2_101_cast_fp16")]; + fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4695_cast_fp16 = mul(x = x2_101_cast_fp16, y = const_53_promoted_to_fp16)[name = string("op_4695_cast_fp16")]; + bool var_4697_interleave_0 = const()[name = string("op_4697_interleave_0"), val = bool(false)]; + tensor var_4697_cast_fp16 = concat(axis = var_81, interleave = var_4697_interleave_0, values = (var_4695_cast_fp16, x1_101_cast_fp16))[name = string("op_4697_cast_fp16")]; + tensor var_4698_cast_fp16 = mul(x = var_4697_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4698_cast_fp16")]; + tensor query_states_103_cast_fp16 = add(x = var_4684_cast_fp16, y = var_4698_cast_fp16)[name = string("query_states_103_cast_fp16")]; + tensor k_51_cast_fp16 = transpose(perm = k_51_perm_0, x = var_4677_cast_fp16)[name = string("transpose_18")]; + tensor var_4700_cast_fp16 = mul(x = k_51_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4700_cast_fp16")]; + tensor x1_103_begin_0 = const()[name = string("x1_103_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_103_end_0 = const()[name = string("x1_103_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_103_end_mask_0 = const()[name = string("x1_103_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_103_cast_fp16 = slice_by_index(begin = x1_103_begin_0, end = x1_103_end_0, end_mask = x1_103_end_mask_0, x = k_51_cast_fp16)[name = string("x1_103_cast_fp16")]; + tensor x2_103_begin_0 = const()[name = string("x2_103_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_103_end_0 = const()[name = string("x2_103_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_103_end_mask_0 = const()[name = string("x2_103_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_103_cast_fp16 = slice_by_index(begin = x2_103_begin_0, end = x2_103_end_0, end_mask = x2_103_end_mask_0, x = k_51_cast_fp16)[name = string("x2_103_cast_fp16")]; + fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4711_cast_fp16 = mul(x = x2_103_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_4711_cast_fp16")]; + bool var_4713_interleave_0 = const()[name = string("op_4713_interleave_0"), val = bool(false)]; + tensor var_4713_cast_fp16 = concat(axis = var_81, interleave = var_4713_interleave_0, values = (var_4711_cast_fp16, x1_103_cast_fp16))[name = string("op_4713_cast_fp16")]; + tensor var_4714_cast_fp16 = mul(x = var_4713_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4714_cast_fp16")]; + tensor k_state_51_cast_fp16 = add(x = var_4700_cast_fp16, y = var_4714_cast_fp16)[name = string("k_state_51_cast_fp16")]; + tensor expand_dims_300 = const()[name = string("expand_dims_300"), val = tensor([0])]; + tensor expand_dims_301 = const()[name = string("expand_dims_301"), val = tensor([0])]; + tensor expand_dims_303 = const()[name = string("expand_dims_303"), val = tensor([0])]; + tensor concat_480_values0_0 = const()[name = string("concat_480_values0_0"), val = tensor([25])]; + int32 concat_480_axis_0 = const()[name = string("concat_480_axis_0"), val = int32(0)]; + bool concat_480_interleave_0 = const()[name = string("concat_480_interleave_0"), val = bool(false)]; + tensor concat_480 = concat(axis = concat_480_axis_0, interleave = concat_480_interleave_0, values = (concat_480_values0_0, expand_dims_300, expand_dims_301, expand_dims_2, expand_dims_303))[name = string("concat_480")]; + tensor key_cache_internal_tensor_assign_26_stride_0 = const()[name = string("key_cache_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_26_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_26_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_480, begin_mask = key_cache_internal_tensor_assign_26_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_26_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_26_squeeze_mask_0, stride = key_cache_internal_tensor_assign_26_stride_0, update = k_state_51_cast_fp16, x = coreml_update_state_108)[name = string("key_cache_internal_tensor_assign_26_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_26_cast_fp16, input = key_cache)[name = string("coreml_update_state_110_write_state")]; + tensor coreml_update_state_110 = read_state(input = key_cache)[name = string("coreml_update_state_110")]; + tensor value_cache_internal_tensor_assign_26_stride_0 = const()[name = string("value_cache_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_26_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_26_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_51_cast_fp16 = transpose(perm = v_state_51_perm_0, x = var_4680_cast_fp16)[name = string("transpose_17")]; + tensor value_cache_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_480, begin_mask = value_cache_internal_tensor_assign_26_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_26_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_26_squeeze_mask_0, stride = value_cache_internal_tensor_assign_26_stride_0, update = v_state_51_cast_fp16, x = coreml_update_state_109)[name = string("value_cache_internal_tensor_assign_26_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_26_cast_fp16, input = value_cache)[name = string("coreml_update_state_111_write_state")]; + tensor coreml_update_state_111 = read_state(input = value_cache)[name = string("coreml_update_state_111")]; + tensor var_4737_begin_0 = const()[name = string("op_4737_begin_0"), val = tensor([25, 0, 0, 0, 0])]; + tensor var_4737_end_0 = const()[name = string("op_4737_end_0"), val = tensor([26, 1, 3, 2048, 64])]; + tensor var_4737_end_mask_0 = const()[name = string("op_4737_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4737_squeeze_mask_0 = const()[name = string("op_4737_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4737_cast_fp16 = slice_by_index(begin = var_4737_begin_0, end = var_4737_end_0, end_mask = var_4737_end_mask_0, squeeze_mask = var_4737_squeeze_mask_0, x = coreml_update_state_110)[name = string("op_4737_cast_fp16")]; + tensor var_4740_begin_0 = const()[name = string("op_4740_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4740_end_mask_0 = const()[name = string("op_4740_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4740_cast_fp16 = slice_by_index(begin = var_4740_begin_0, end = concat_11, end_mask = var_4740_end_mask_0, x = var_4737_cast_fp16)[name = string("op_4740_cast_fp16")]; + tensor var_4742_begin_0 = const()[name = string("op_4742_begin_0"), val = tensor([25, 0, 0, 0, 0])]; + tensor var_4742_end_0 = const()[name = string("op_4742_end_0"), val = tensor([26, 1, 3, 2048, 64])]; + tensor var_4742_end_mask_0 = const()[name = string("op_4742_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4742_squeeze_mask_0 = const()[name = string("op_4742_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4742_cast_fp16 = slice_by_index(begin = var_4742_begin_0, end = var_4742_end_0, end_mask = var_4742_end_mask_0, squeeze_mask = var_4742_squeeze_mask_0, x = coreml_update_state_111)[name = string("op_4742_cast_fp16")]; + tensor var_4745_begin_0 = const()[name = string("op_4745_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4745_end_mask_0 = const()[name = string("op_4745_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4745_cast_fp16 = slice_by_index(begin = var_4745_begin_0, end = concat_11, end_mask = var_4745_end_mask_0, x = var_4742_cast_fp16)[name = string("op_4745_cast_fp16")]; + tensor var_4747_shape_cast_fp16 = shape(x = var_4740_cast_fp16)[name = string("op_4747_shape_cast_fp16")]; + int32 gather_463 = const()[name = string("gather_463"), val = int32(1)]; + int32 gather_464 = const()[name = string("gather_464"), val = int32(3)]; + int32 gather_465_axis_0 = const()[name = string("gather_465_axis_0"), val = int32(0)]; + int32 gather_465_batch_dims_0 = const()[name = string("gather_465_batch_dims_0"), val = int32(0)]; + bool gather_465_validate_indices_0 = const()[name = string("gather_465_validate_indices_0"), val = bool(false)]; + string var_4747_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4747_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_465_to_uint16 = const()[name = string("select_465_to_uint16"), val = uint16(2)]; + tensor var_4747_shape_cast_fp16_to_uint16 = cast(dtype = var_4747_shape_cast_fp16_to_uint16_dtype_0, x = var_4747_shape_cast_fp16)[name = string("cast_38")]; + uint16 gather_465_cast_uint16 = gather(axis = gather_465_axis_0, batch_dims = gather_465_batch_dims_0, indices = select_465_to_uint16, validate_indices = gather_465_validate_indices_0, x = var_4747_shape_cast_fp16_to_uint16)[name = string("gather_465_cast_uint16")]; + string gather_465_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_465_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_466 = const()[name = string("gather_466"), val = int32(64)]; + tensor var_4754_axes_0 = const()[name = string("op_4754_axes_0"), val = tensor([2])]; + tensor var_4754_cast_fp16 = expand_dims(axes = var_4754_axes_0, x = var_4740_cast_fp16)[name = string("op_4754_cast_fp16")]; + tensor shape_517_cast_fp16 = shape(x = var_4754_cast_fp16)[name = string("shape_517_cast_fp16")]; + int32 concat_488_axis_0 = const()[name = string("concat_488_axis_0"), val = int32(0)]; + bool concat_488_interleave_0 = const()[name = string("concat_488_interleave_0"), val = bool(false)]; + int32 gather_465_cast_uint16_to_int32 = cast(dtype = gather_465_cast_uint16_to_int32_dtype_0, x = gather_465_cast_uint16)[name = string("cast_37")]; + tensor concat_488 = concat(axis = concat_488_axis_0, interleave = concat_488_interleave_0, values = (gather_463, gather_464, var_85, gather_465_cast_uint16_to_int32, gather_466))[name = string("concat_488")]; + tensor real_div_50 = real_div(x = concat_488, y = shape_517_cast_fp16)[name = string("real_div_50")]; + tensor hidden_states_761_cast_fp16 = tile(reps = real_div_50, x = var_4754_cast_fp16)[name = string("hidden_states_761_cast_fp16")]; + tensor concat_489x = const()[name = string("concat_489x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_103_cast_fp16 = reshape(shape = concat_489x, x = hidden_states_761_cast_fp16)[name = string("key_states_103_cast_fp16")]; + tensor var_4764_shape_cast_fp16 = shape(x = var_4745_cast_fp16)[name = string("op_4764_shape_cast_fp16")]; + int32 gather_467 = const()[name = string("gather_467"), val = int32(1)]; + int32 gather_468 = const()[name = string("gather_468"), val = int32(3)]; + int32 gather_469_axis_0 = const()[name = string("gather_469_axis_0"), val = int32(0)]; + int32 gather_469_batch_dims_0 = const()[name = string("gather_469_batch_dims_0"), val = int32(0)]; + bool gather_469_validate_indices_0 = const()[name = string("gather_469_validate_indices_0"), val = bool(false)]; + string var_4764_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4764_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_469_to_uint16 = const()[name = string("select_469_to_uint16"), val = uint16(2)]; + tensor var_4764_shape_cast_fp16_to_uint16 = cast(dtype = var_4764_shape_cast_fp16_to_uint16_dtype_0, x = var_4764_shape_cast_fp16)[name = string("cast_36")]; + uint16 gather_469_cast_uint16 = gather(axis = gather_469_axis_0, batch_dims = gather_469_batch_dims_0, indices = select_469_to_uint16, validate_indices = gather_469_validate_indices_0, x = var_4764_shape_cast_fp16_to_uint16)[name = string("gather_469_cast_uint16")]; + string gather_469_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_469_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_470 = const()[name = string("gather_470"), val = int32(64)]; + tensor var_4771_axes_0 = const()[name = string("op_4771_axes_0"), val = tensor([2])]; + tensor var_4771_cast_fp16 = expand_dims(axes = var_4771_axes_0, x = var_4745_cast_fp16)[name = string("op_4771_cast_fp16")]; + tensor shape_522_cast_fp16 = shape(x = var_4771_cast_fp16)[name = string("shape_522_cast_fp16")]; + int32 concat_490_axis_0 = const()[name = string("concat_490_axis_0"), val = int32(0)]; + bool concat_490_interleave_0 = const()[name = string("concat_490_interleave_0"), val = bool(false)]; + int32 gather_469_cast_uint16_to_int32 = cast(dtype = gather_469_cast_uint16_to_int32_dtype_0, x = gather_469_cast_uint16)[name = string("cast_35")]; + tensor concat_490 = concat(axis = concat_490_axis_0, interleave = concat_490_interleave_0, values = (gather_467, gather_468, var_85, gather_469_cast_uint16_to_int32, gather_470))[name = string("concat_490")]; + tensor real_div_51 = real_div(x = concat_490, y = shape_522_cast_fp16)[name = string("real_div_51")]; + tensor hidden_states_765_cast_fp16 = tile(reps = real_div_51, x = var_4771_cast_fp16)[name = string("hidden_states_765_cast_fp16")]; + tensor concat_491x = const()[name = string("concat_491x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_103_cast_fp16 = reshape(shape = concat_491x, x = hidden_states_765_cast_fp16)[name = string("value_states_103_cast_fp16")]; + tensor var_4781_shape_cast_fp16 = shape(x = key_states_103_cast_fp16)[name = string("op_4781_shape_cast_fp16")]; + int32 gather_471_axis_0 = const()[name = string("gather_471_axis_0"), val = int32(0)]; + int32 gather_471_batch_dims_0 = const()[name = string("gather_471_batch_dims_0"), val = int32(0)]; + bool gather_471_validate_indices_0 = const()[name = string("gather_471_validate_indices_0"), val = bool(false)]; + string var_4781_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4781_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_471_to_uint16 = const()[name = string("select_471_to_uint16"), val = uint16(2)]; + tensor var_4781_shape_cast_fp16_to_uint16 = cast(dtype = var_4781_shape_cast_fp16_to_uint16_dtype_0, x = var_4781_shape_cast_fp16)[name = string("cast_34")]; + uint16 gather_471_cast_uint16 = gather(axis = gather_471_axis_0, batch_dims = gather_471_batch_dims_0, indices = select_471_to_uint16, validate_indices = gather_471_validate_indices_0, x = var_4781_shape_cast_fp16_to_uint16)[name = string("gather_471_cast_uint16")]; + string gather_471_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_471_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_492_values0_0 = const()[name = string("concat_492_values0_0"), val = int32(1)]; + int32 concat_492_values1_0 = const()[name = string("concat_492_values1_0"), val = int32(1)]; + int32 concat_492_values2_0 = const()[name = string("concat_492_values2_0"), val = int32(0)]; + int32 concat_492_axis_0 = const()[name = string("concat_492_axis_0"), val = int32(0)]; + bool concat_492_interleave_0 = const()[name = string("concat_492_interleave_0"), val = bool(false)]; + int32 gather_471_cast_uint16_to_int32 = cast(dtype = gather_471_cast_uint16_to_int32_dtype_0, x = gather_471_cast_uint16)[name = string("cast_33")]; + tensor concat_492 = concat(axis = concat_492_axis_0, interleave = concat_492_interleave_0, values = (concat_492_values0_0, concat_492_values1_0, concat_492_values2_0, gather_471_cast_uint16_to_int32))[name = string("concat_492")]; + tensor causal_mask_53_begin_0 = const()[name = string("causal_mask_53_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_53_end_mask_0 = const()[name = string("causal_mask_53_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_53_cast_fp16 = slice_by_index(begin = causal_mask_53_begin_0, end = concat_492, end_mask = causal_mask_53_end_mask_0, x = causal_mask)[name = string("causal_mask_53_cast_fp16")]; + tensor attn_output_101_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_53_cast_fp16, key = key_states_103_cast_fp16, query = query_states_103_cast_fp16, value = value_states_103_cast_fp16)[name = string("attn_output_101_cast_fp16")]; + tensor var_4787_perm_0 = const()[name = string("op_4787_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_493_axis_0 = const()[name = string("concat_493_axis_0"), val = int32(0)]; + bool concat_493_interleave_0 = const()[name = string("concat_493_interleave_0"), val = bool(false)]; + int32 gather_455_cast_uint16_to_int32 = cast(dtype = gather_455_cast_uint16_to_int32_dtype_0, x = gather_455_cast_uint16)[name = string("cast_32")]; + tensor concat_493 = concat(axis = concat_493_axis_0, interleave = concat_493_interleave_0, values = (gather_454, gather_455_cast_uint16_to_int32, var_81))[name = string("concat_493")]; + tensor var_4787_cast_fp16 = transpose(perm = var_4787_perm_0, x = attn_output_101_cast_fp16)[name = string("transpose_16")]; + tensor input_201_cast_fp16 = reshape(shape = concat_493, x = var_4787_cast_fp16)[name = string("input_201_cast_fp16")]; + tensor model_model_layers_25_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66092608))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66258560))))[name = string("model_model_layers_25_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_178_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_25_self_attn_o_proj_weight_to_fp16_quantized, x = input_201_cast_fp16)[name = string("linear_178_cast_fp16")]; + tensor hidden_states_769_cast_fp16 = add(x = hidden_states_749_cast_fp16, y = linear_178_cast_fp16)[name = string("hidden_states_769_cast_fp16")]; + fp16 var_76_promoted_51_to_fp16 = const()[name = string("op_76_promoted_51_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4796_cast_fp16 = pow(x = hidden_states_769_cast_fp16, y = var_76_promoted_51_to_fp16)[name = string("op_4796_cast_fp16")]; + tensor variance_103_axes_0 = const()[name = string("variance_103_axes_0"), val = tensor([-1])]; + bool variance_103_keep_dims_0 = const()[name = string("variance_103_keep_dims_0"), val = bool(true)]; + tensor variance_103_cast_fp16 = reduce_mean(axes = variance_103_axes_0, keep_dims = variance_103_keep_dims_0, x = var_4796_cast_fp16)[name = string("variance_103_cast_fp16")]; + fp16 var_4799_to_fp16 = const()[name = string("op_4799_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4800_cast_fp16 = add(x = variance_103_cast_fp16, y = var_4799_to_fp16)[name = string("op_4800_cast_fp16")]; + fp32 var_4801_epsilon_0 = const()[name = string("op_4801_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4801_cast_fp16 = rsqrt(epsilon = var_4801_epsilon_0, x = var_4800_cast_fp16)[name = string("op_4801_cast_fp16")]; + tensor hidden_states_773_cast_fp16 = mul(x = hidden_states_769_cast_fp16, y = var_4801_cast_fp16)[name = string("hidden_states_773_cast_fp16")]; + tensor model_model_layers_25_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_25_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66279360)))]; + tensor input_203_cast_fp16 = mul(x = model_model_layers_25_post_attention_layernorm_weight_to_fp16, y = hidden_states_773_cast_fp16)[name = string("input_203_cast_fp16")]; + tensor model_model_layers_25_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66280576))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66723008))))[name = string("model_model_layers_25_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_179_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_25_mlp_gate_proj_weight_to_fp16_quantized, x = input_203_cast_fp16)[name = string("linear_179_cast_fp16")]; + tensor var_4813_cast_fp16 = silu(x = linear_179_cast_fp16)[name = string("op_4813_cast_fp16")]; + tensor model_model_layers_25_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(66778368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67220800))))[name = string("model_model_layers_25_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_180_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_25_mlp_up_proj_weight_to_fp16_quantized, x = input_203_cast_fp16)[name = string("linear_180_cast_fp16")]; + tensor input_207_cast_fp16 = mul(x = var_4813_cast_fp16, y = linear_180_cast_fp16)[name = string("input_207_cast_fp16")]; + tensor model_model_layers_25_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67276160))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67718592))))[name = string("model_model_layers_25_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_181_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_25_mlp_down_proj_weight_to_fp16_quantized, x = input_207_cast_fp16)[name = string("linear_181_cast_fp16")]; + tensor hidden_states_779_cast_fp16 = add(x = hidden_states_769_cast_fp16, y = linear_181_cast_fp16)[name = string("hidden_states_779_cast_fp16")]; + fp16 var_76_promoted_52_to_fp16 = const()[name = string("op_76_promoted_52_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4826_cast_fp16 = pow(x = hidden_states_779_cast_fp16, y = var_76_promoted_52_to_fp16)[name = string("op_4826_cast_fp16")]; + tensor variance_105_axes_0 = const()[name = string("variance_105_axes_0"), val = tensor([-1])]; + bool variance_105_keep_dims_0 = const()[name = string("variance_105_keep_dims_0"), val = bool(true)]; + tensor variance_105_cast_fp16 = reduce_mean(axes = variance_105_axes_0, keep_dims = variance_105_keep_dims_0, x = var_4826_cast_fp16)[name = string("variance_105_cast_fp16")]; + fp16 var_4829_to_fp16 = const()[name = string("op_4829_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4830_cast_fp16 = add(x = variance_105_cast_fp16, y = var_4829_to_fp16)[name = string("op_4830_cast_fp16")]; + fp32 var_4831_epsilon_0 = const()[name = string("op_4831_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4831_cast_fp16 = rsqrt(epsilon = var_4831_epsilon_0, x = var_4830_cast_fp16)[name = string("op_4831_cast_fp16")]; + tensor hidden_states_783_cast_fp16 = mul(x = hidden_states_779_cast_fp16, y = var_4831_cast_fp16)[name = string("hidden_states_783_cast_fp16")]; + tensor model_model_layers_26_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_26_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67773952)))]; + tensor hidden_states_787_cast_fp16 = mul(x = model_model_layers_26_input_layernorm_weight_to_fp16, y = hidden_states_783_cast_fp16)[name = string("hidden_states_787_cast_fp16")]; + tensor var_4842_shape_cast_fp16 = shape(x = hidden_states_787_cast_fp16)[name = string("op_4842_shape_cast_fp16")]; + int32 gather_472 = const()[name = string("gather_472"), val = int32(1)]; + int32 gather_473_axis_0 = const()[name = string("gather_473_axis_0"), val = int32(0)]; + int32 gather_473_batch_dims_0 = const()[name = string("gather_473_batch_dims_0"), val = int32(0)]; + bool gather_473_validate_indices_0 = const()[name = string("gather_473_validate_indices_0"), val = bool(false)]; + string var_4842_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4842_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_473_to_uint16 = const()[name = string("select_473_to_uint16"), val = uint16(1)]; + tensor var_4842_shape_cast_fp16_to_uint16 = cast(dtype = var_4842_shape_cast_fp16_to_uint16_dtype_0, x = var_4842_shape_cast_fp16)[name = string("cast_31")]; + uint16 gather_473_cast_uint16 = gather(axis = gather_473_axis_0, batch_dims = gather_473_batch_dims_0, indices = select_473_to_uint16, validate_indices = gather_473_validate_indices_0, x = var_4842_shape_cast_fp16_to_uint16)[name = string("gather_473_cast_uint16")]; + string gather_473_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_473_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_26_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67775168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67941120))))[name = string("model_model_layers_26_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_182_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_26_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_787_cast_fp16)[name = string("linear_182_cast_fp16")]; + tensor model_model_layers_26_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(67961920))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68017280))))[name = string("model_model_layers_26_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_183_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_26_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_787_cast_fp16)[name = string("linear_183_cast_fp16")]; + tensor model_model_layers_26_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68024256))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68079616))))[name = string("model_model_layers_26_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_184_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_26_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_787_cast_fp16)[name = string("linear_184_cast_fp16")]; + tensor concat_494x = const()[name = string("concat_494x"), val = tensor([1, -1, 9, 64])]; + tensor var_4851_cast_fp16 = reshape(shape = concat_494x, x = linear_182_cast_fp16)[name = string("op_4851_cast_fp16")]; + tensor q_53_perm_0 = const()[name = string("q_53_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_495x = const()[name = string("concat_495x"), val = tensor([1, -1, 3, 64])]; + tensor var_4854_cast_fp16 = reshape(shape = concat_495x, x = linear_183_cast_fp16)[name = string("op_4854_cast_fp16")]; + tensor k_53_perm_0 = const()[name = string("k_53_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_496x = const()[name = string("concat_496x"), val = tensor([1, -1, 3, 64])]; + tensor var_4857_cast_fp16 = reshape(shape = concat_496x, x = linear_184_cast_fp16)[name = string("op_4857_cast_fp16")]; + tensor v_state_53_perm_0 = const()[name = string("v_state_53_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_53_cast_fp16 = transpose(perm = q_53_perm_0, x = var_4851_cast_fp16)[name = string("transpose_15")]; + tensor var_4861_cast_fp16 = mul(x = q_53_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4861_cast_fp16")]; + tensor x1_105_begin_0 = const()[name = string("x1_105_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_105_end_0 = const()[name = string("x1_105_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_105_end_mask_0 = const()[name = string("x1_105_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_105_cast_fp16 = slice_by_index(begin = x1_105_begin_0, end = x1_105_end_0, end_mask = x1_105_end_mask_0, x = q_53_cast_fp16)[name = string("x1_105_cast_fp16")]; + tensor x2_105_begin_0 = const()[name = string("x2_105_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_105_end_0 = const()[name = string("x2_105_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_105_end_mask_0 = const()[name = string("x2_105_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_105_cast_fp16 = slice_by_index(begin = x2_105_begin_0, end = x2_105_end_0, end_mask = x2_105_end_mask_0, x = q_53_cast_fp16)[name = string("x2_105_cast_fp16")]; + fp16 const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4872_cast_fp16 = mul(x = x2_105_cast_fp16, y = const_55_promoted_to_fp16)[name = string("op_4872_cast_fp16")]; + bool var_4874_interleave_0 = const()[name = string("op_4874_interleave_0"), val = bool(false)]; + tensor var_4874_cast_fp16 = concat(axis = var_81, interleave = var_4874_interleave_0, values = (var_4872_cast_fp16, x1_105_cast_fp16))[name = string("op_4874_cast_fp16")]; + tensor var_4875_cast_fp16 = mul(x = var_4874_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4875_cast_fp16")]; + tensor query_states_107_cast_fp16 = add(x = var_4861_cast_fp16, y = var_4875_cast_fp16)[name = string("query_states_107_cast_fp16")]; + tensor k_53_cast_fp16 = transpose(perm = k_53_perm_0, x = var_4854_cast_fp16)[name = string("transpose_14")]; + tensor var_4877_cast_fp16 = mul(x = k_53_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4877_cast_fp16")]; + tensor x1_107_begin_0 = const()[name = string("x1_107_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_107_end_0 = const()[name = string("x1_107_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_107_end_mask_0 = const()[name = string("x1_107_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_107_cast_fp16 = slice_by_index(begin = x1_107_begin_0, end = x1_107_end_0, end_mask = x1_107_end_mask_0, x = k_53_cast_fp16)[name = string("x1_107_cast_fp16")]; + tensor x2_107_begin_0 = const()[name = string("x2_107_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_107_end_0 = const()[name = string("x2_107_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_107_end_mask_0 = const()[name = string("x2_107_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_107_cast_fp16 = slice_by_index(begin = x2_107_begin_0, end = x2_107_end_0, end_mask = x2_107_end_mask_0, x = k_53_cast_fp16)[name = string("x2_107_cast_fp16")]; + fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_4888_cast_fp16 = mul(x = x2_107_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_4888_cast_fp16")]; + bool var_4890_interleave_0 = const()[name = string("op_4890_interleave_0"), val = bool(false)]; + tensor var_4890_cast_fp16 = concat(axis = var_81, interleave = var_4890_interleave_0, values = (var_4888_cast_fp16, x1_107_cast_fp16))[name = string("op_4890_cast_fp16")]; + tensor var_4891_cast_fp16 = mul(x = var_4890_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4891_cast_fp16")]; + tensor k_state_53_cast_fp16 = add(x = var_4877_cast_fp16, y = var_4891_cast_fp16)[name = string("k_state_53_cast_fp16")]; + tensor expand_dims_312 = const()[name = string("expand_dims_312"), val = tensor([0])]; + tensor expand_dims_313 = const()[name = string("expand_dims_313"), val = tensor([0])]; + tensor expand_dims_315 = const()[name = string("expand_dims_315"), val = tensor([0])]; + tensor concat_499_values0_0 = const()[name = string("concat_499_values0_0"), val = tensor([26])]; + int32 concat_499_axis_0 = const()[name = string("concat_499_axis_0"), val = int32(0)]; + bool concat_499_interleave_0 = const()[name = string("concat_499_interleave_0"), val = bool(false)]; + tensor concat_499 = concat(axis = concat_499_axis_0, interleave = concat_499_interleave_0, values = (concat_499_values0_0, expand_dims_312, expand_dims_313, expand_dims_2, expand_dims_315))[name = string("concat_499")]; + tensor key_cache_internal_tensor_assign_27_stride_0 = const()[name = string("key_cache_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_27_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_27_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_499, begin_mask = key_cache_internal_tensor_assign_27_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_27_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_27_squeeze_mask_0, stride = key_cache_internal_tensor_assign_27_stride_0, update = k_state_53_cast_fp16, x = coreml_update_state_110)[name = string("key_cache_internal_tensor_assign_27_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_27_cast_fp16, input = key_cache)[name = string("coreml_update_state_112_write_state")]; + tensor coreml_update_state_112 = read_state(input = key_cache)[name = string("coreml_update_state_112")]; + tensor value_cache_internal_tensor_assign_27_stride_0 = const()[name = string("value_cache_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_27_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_27_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_53_cast_fp16 = transpose(perm = v_state_53_perm_0, x = var_4857_cast_fp16)[name = string("transpose_13")]; + tensor value_cache_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_499, begin_mask = value_cache_internal_tensor_assign_27_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_27_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_27_squeeze_mask_0, stride = value_cache_internal_tensor_assign_27_stride_0, update = v_state_53_cast_fp16, x = coreml_update_state_111)[name = string("value_cache_internal_tensor_assign_27_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_27_cast_fp16, input = value_cache)[name = string("coreml_update_state_113_write_state")]; + tensor coreml_update_state_113 = read_state(input = value_cache)[name = string("coreml_update_state_113")]; + tensor var_4914_begin_0 = const()[name = string("op_4914_begin_0"), val = tensor([26, 0, 0, 0, 0])]; + tensor var_4914_end_0 = const()[name = string("op_4914_end_0"), val = tensor([27, 1, 3, 2048, 64])]; + tensor var_4914_end_mask_0 = const()[name = string("op_4914_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4914_squeeze_mask_0 = const()[name = string("op_4914_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4914_cast_fp16 = slice_by_index(begin = var_4914_begin_0, end = var_4914_end_0, end_mask = var_4914_end_mask_0, squeeze_mask = var_4914_squeeze_mask_0, x = coreml_update_state_112)[name = string("op_4914_cast_fp16")]; + tensor var_4917_begin_0 = const()[name = string("op_4917_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4917_end_mask_0 = const()[name = string("op_4917_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4917_cast_fp16 = slice_by_index(begin = var_4917_begin_0, end = concat_11, end_mask = var_4917_end_mask_0, x = var_4914_cast_fp16)[name = string("op_4917_cast_fp16")]; + tensor var_4919_begin_0 = const()[name = string("op_4919_begin_0"), val = tensor([26, 0, 0, 0, 0])]; + tensor var_4919_end_0 = const()[name = string("op_4919_end_0"), val = tensor([27, 1, 3, 2048, 64])]; + tensor var_4919_end_mask_0 = const()[name = string("op_4919_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_4919_squeeze_mask_0 = const()[name = string("op_4919_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_4919_cast_fp16 = slice_by_index(begin = var_4919_begin_0, end = var_4919_end_0, end_mask = var_4919_end_mask_0, squeeze_mask = var_4919_squeeze_mask_0, x = coreml_update_state_113)[name = string("op_4919_cast_fp16")]; + tensor var_4922_begin_0 = const()[name = string("op_4922_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4922_end_mask_0 = const()[name = string("op_4922_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_4922_cast_fp16 = slice_by_index(begin = var_4922_begin_0, end = concat_11, end_mask = var_4922_end_mask_0, x = var_4919_cast_fp16)[name = string("op_4922_cast_fp16")]; + tensor var_4924_shape_cast_fp16 = shape(x = var_4917_cast_fp16)[name = string("op_4924_shape_cast_fp16")]; + int32 gather_481 = const()[name = string("gather_481"), val = int32(1)]; + int32 gather_482 = const()[name = string("gather_482"), val = int32(3)]; + int32 gather_483_axis_0 = const()[name = string("gather_483_axis_0"), val = int32(0)]; + int32 gather_483_batch_dims_0 = const()[name = string("gather_483_batch_dims_0"), val = int32(0)]; + bool gather_483_validate_indices_0 = const()[name = string("gather_483_validate_indices_0"), val = bool(false)]; + string var_4924_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4924_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_483_to_uint16 = const()[name = string("select_483_to_uint16"), val = uint16(2)]; + tensor var_4924_shape_cast_fp16_to_uint16 = cast(dtype = var_4924_shape_cast_fp16_to_uint16_dtype_0, x = var_4924_shape_cast_fp16)[name = string("cast_30")]; + uint16 gather_483_cast_uint16 = gather(axis = gather_483_axis_0, batch_dims = gather_483_batch_dims_0, indices = select_483_to_uint16, validate_indices = gather_483_validate_indices_0, x = var_4924_shape_cast_fp16_to_uint16)[name = string("gather_483_cast_uint16")]; + string gather_483_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_483_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_484 = const()[name = string("gather_484"), val = int32(64)]; + tensor var_4931_axes_0 = const()[name = string("op_4931_axes_0"), val = tensor([2])]; + tensor var_4931_cast_fp16 = expand_dims(axes = var_4931_axes_0, x = var_4917_cast_fp16)[name = string("op_4931_cast_fp16")]; + tensor shape_537_cast_fp16 = shape(x = var_4931_cast_fp16)[name = string("shape_537_cast_fp16")]; + int32 concat_507_axis_0 = const()[name = string("concat_507_axis_0"), val = int32(0)]; + bool concat_507_interleave_0 = const()[name = string("concat_507_interleave_0"), val = bool(false)]; + int32 gather_483_cast_uint16_to_int32 = cast(dtype = gather_483_cast_uint16_to_int32_dtype_0, x = gather_483_cast_uint16)[name = string("cast_29")]; + tensor concat_507 = concat(axis = concat_507_axis_0, interleave = concat_507_interleave_0, values = (gather_481, gather_482, var_85, gather_483_cast_uint16_to_int32, gather_484))[name = string("concat_507")]; + tensor real_div_52 = real_div(x = concat_507, y = shape_537_cast_fp16)[name = string("real_div_52")]; + tensor hidden_states_791_cast_fp16 = tile(reps = real_div_52, x = var_4931_cast_fp16)[name = string("hidden_states_791_cast_fp16")]; + tensor concat_508x = const()[name = string("concat_508x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_107_cast_fp16 = reshape(shape = concat_508x, x = hidden_states_791_cast_fp16)[name = string("key_states_107_cast_fp16")]; + tensor var_4941_shape_cast_fp16 = shape(x = var_4922_cast_fp16)[name = string("op_4941_shape_cast_fp16")]; + int32 gather_485 = const()[name = string("gather_485"), val = int32(1)]; + int32 gather_486 = const()[name = string("gather_486"), val = int32(3)]; + int32 gather_487_axis_0 = const()[name = string("gather_487_axis_0"), val = int32(0)]; + int32 gather_487_batch_dims_0 = const()[name = string("gather_487_batch_dims_0"), val = int32(0)]; + bool gather_487_validate_indices_0 = const()[name = string("gather_487_validate_indices_0"), val = bool(false)]; + string var_4941_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4941_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_487_to_uint16 = const()[name = string("select_487_to_uint16"), val = uint16(2)]; + tensor var_4941_shape_cast_fp16_to_uint16 = cast(dtype = var_4941_shape_cast_fp16_to_uint16_dtype_0, x = var_4941_shape_cast_fp16)[name = string("cast_28")]; + uint16 gather_487_cast_uint16 = gather(axis = gather_487_axis_0, batch_dims = gather_487_batch_dims_0, indices = select_487_to_uint16, validate_indices = gather_487_validate_indices_0, x = var_4941_shape_cast_fp16_to_uint16)[name = string("gather_487_cast_uint16")]; + string gather_487_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_487_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_488 = const()[name = string("gather_488"), val = int32(64)]; + tensor var_4948_axes_0 = const()[name = string("op_4948_axes_0"), val = tensor([2])]; + tensor var_4948_cast_fp16 = expand_dims(axes = var_4948_axes_0, x = var_4922_cast_fp16)[name = string("op_4948_cast_fp16")]; + tensor shape_542_cast_fp16 = shape(x = var_4948_cast_fp16)[name = string("shape_542_cast_fp16")]; + int32 concat_509_axis_0 = const()[name = string("concat_509_axis_0"), val = int32(0)]; + bool concat_509_interleave_0 = const()[name = string("concat_509_interleave_0"), val = bool(false)]; + int32 gather_487_cast_uint16_to_int32 = cast(dtype = gather_487_cast_uint16_to_int32_dtype_0, x = gather_487_cast_uint16)[name = string("cast_27")]; + tensor concat_509 = concat(axis = concat_509_axis_0, interleave = concat_509_interleave_0, values = (gather_485, gather_486, var_85, gather_487_cast_uint16_to_int32, gather_488))[name = string("concat_509")]; + tensor real_div_53 = real_div(x = concat_509, y = shape_542_cast_fp16)[name = string("real_div_53")]; + tensor hidden_states_795_cast_fp16 = tile(reps = real_div_53, x = var_4948_cast_fp16)[name = string("hidden_states_795_cast_fp16")]; + tensor concat_510x = const()[name = string("concat_510x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_107_cast_fp16 = reshape(shape = concat_510x, x = hidden_states_795_cast_fp16)[name = string("value_states_107_cast_fp16")]; + tensor var_4958_shape_cast_fp16 = shape(x = key_states_107_cast_fp16)[name = string("op_4958_shape_cast_fp16")]; + int32 gather_489_axis_0 = const()[name = string("gather_489_axis_0"), val = int32(0)]; + int32 gather_489_batch_dims_0 = const()[name = string("gather_489_batch_dims_0"), val = int32(0)]; + bool gather_489_validate_indices_0 = const()[name = string("gather_489_validate_indices_0"), val = bool(false)]; + string var_4958_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4958_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_489_to_uint16 = const()[name = string("select_489_to_uint16"), val = uint16(2)]; + tensor var_4958_shape_cast_fp16_to_uint16 = cast(dtype = var_4958_shape_cast_fp16_to_uint16_dtype_0, x = var_4958_shape_cast_fp16)[name = string("cast_26")]; + uint16 gather_489_cast_uint16 = gather(axis = gather_489_axis_0, batch_dims = gather_489_batch_dims_0, indices = select_489_to_uint16, validate_indices = gather_489_validate_indices_0, x = var_4958_shape_cast_fp16_to_uint16)[name = string("gather_489_cast_uint16")]; + string gather_489_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_489_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_511_values0_0 = const()[name = string("concat_511_values0_0"), val = int32(1)]; + int32 concat_511_values1_0 = const()[name = string("concat_511_values1_0"), val = int32(1)]; + int32 concat_511_values2_0 = const()[name = string("concat_511_values2_0"), val = int32(0)]; + int32 concat_511_axis_0 = const()[name = string("concat_511_axis_0"), val = int32(0)]; + bool concat_511_interleave_0 = const()[name = string("concat_511_interleave_0"), val = bool(false)]; + int32 gather_489_cast_uint16_to_int32 = cast(dtype = gather_489_cast_uint16_to_int32_dtype_0, x = gather_489_cast_uint16)[name = string("cast_25")]; + tensor concat_511 = concat(axis = concat_511_axis_0, interleave = concat_511_interleave_0, values = (concat_511_values0_0, concat_511_values1_0, concat_511_values2_0, gather_489_cast_uint16_to_int32))[name = string("concat_511")]; + tensor causal_mask_55_begin_0 = const()[name = string("causal_mask_55_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_55_end_mask_0 = const()[name = string("causal_mask_55_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_55_cast_fp16 = slice_by_index(begin = causal_mask_55_begin_0, end = concat_511, end_mask = causal_mask_55_end_mask_0, x = causal_mask)[name = string("causal_mask_55_cast_fp16")]; + tensor attn_output_105_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_55_cast_fp16, key = key_states_107_cast_fp16, query = query_states_107_cast_fp16, value = value_states_107_cast_fp16)[name = string("attn_output_105_cast_fp16")]; + tensor var_4964_perm_0 = const()[name = string("op_4964_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_512_axis_0 = const()[name = string("concat_512_axis_0"), val = int32(0)]; + bool concat_512_interleave_0 = const()[name = string("concat_512_interleave_0"), val = bool(false)]; + int32 gather_473_cast_uint16_to_int32 = cast(dtype = gather_473_cast_uint16_to_int32_dtype_0, x = gather_473_cast_uint16)[name = string("cast_24")]; + tensor concat_512 = concat(axis = concat_512_axis_0, interleave = concat_512_interleave_0, values = (gather_472, gather_473_cast_uint16_to_int32, var_81))[name = string("concat_512")]; + tensor var_4964_cast_fp16 = transpose(perm = var_4964_perm_0, x = attn_output_105_cast_fp16)[name = string("transpose_12")]; + tensor input_209_cast_fp16 = reshape(shape = concat_512, x = var_4964_cast_fp16)[name = string("input_209_cast_fp16")]; + tensor model_model_layers_26_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68086592))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68252544))))[name = string("model_model_layers_26_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_185_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_26_self_attn_o_proj_weight_to_fp16_quantized, x = input_209_cast_fp16)[name = string("linear_185_cast_fp16")]; + tensor hidden_states_799_cast_fp16 = add(x = hidden_states_779_cast_fp16, y = linear_185_cast_fp16)[name = string("hidden_states_799_cast_fp16")]; + fp16 var_76_promoted_53_to_fp16 = const()[name = string("op_76_promoted_53_to_fp16"), val = fp16(0x1p+1)]; + tensor var_4973_cast_fp16 = pow(x = hidden_states_799_cast_fp16, y = var_76_promoted_53_to_fp16)[name = string("op_4973_cast_fp16")]; + tensor variance_107_axes_0 = const()[name = string("variance_107_axes_0"), val = tensor([-1])]; + bool variance_107_keep_dims_0 = const()[name = string("variance_107_keep_dims_0"), val = bool(true)]; + tensor variance_107_cast_fp16 = reduce_mean(axes = variance_107_axes_0, keep_dims = variance_107_keep_dims_0, x = var_4973_cast_fp16)[name = string("variance_107_cast_fp16")]; + fp16 var_4976_to_fp16 = const()[name = string("op_4976_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_4977_cast_fp16 = add(x = variance_107_cast_fp16, y = var_4976_to_fp16)[name = string("op_4977_cast_fp16")]; + fp32 var_4978_epsilon_0 = const()[name = string("op_4978_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_4978_cast_fp16 = rsqrt(epsilon = var_4978_epsilon_0, x = var_4977_cast_fp16)[name = string("op_4978_cast_fp16")]; + tensor hidden_states_803_cast_fp16 = mul(x = hidden_states_799_cast_fp16, y = var_4978_cast_fp16)[name = string("hidden_states_803_cast_fp16")]; + tensor model_model_layers_26_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_26_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68273344)))]; + tensor input_211_cast_fp16 = mul(x = model_model_layers_26_post_attention_layernorm_weight_to_fp16, y = hidden_states_803_cast_fp16)[name = string("input_211_cast_fp16")]; + tensor model_model_layers_26_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68274560))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68716992))))[name = string("model_model_layers_26_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_186_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_26_mlp_gate_proj_weight_to_fp16_quantized, x = input_211_cast_fp16)[name = string("linear_186_cast_fp16")]; + tensor var_4990_cast_fp16 = silu(x = linear_186_cast_fp16)[name = string("op_4990_cast_fp16")]; + tensor model_model_layers_26_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68772352))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69214784))))[name = string("model_model_layers_26_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_187_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_26_mlp_up_proj_weight_to_fp16_quantized, x = input_211_cast_fp16)[name = string("linear_187_cast_fp16")]; + tensor input_215_cast_fp16 = mul(x = var_4990_cast_fp16, y = linear_187_cast_fp16)[name = string("input_215_cast_fp16")]; + tensor model_model_layers_26_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69270144))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69712576))))[name = string("model_model_layers_26_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_188_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_26_mlp_down_proj_weight_to_fp16_quantized, x = input_215_cast_fp16)[name = string("linear_188_cast_fp16")]; + tensor hidden_states_809_cast_fp16 = add(x = hidden_states_799_cast_fp16, y = linear_188_cast_fp16)[name = string("hidden_states_809_cast_fp16")]; + fp16 var_76_promoted_54_to_fp16 = const()[name = string("op_76_promoted_54_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5003_cast_fp16 = pow(x = hidden_states_809_cast_fp16, y = var_76_promoted_54_to_fp16)[name = string("op_5003_cast_fp16")]; + tensor variance_109_axes_0 = const()[name = string("variance_109_axes_0"), val = tensor([-1])]; + bool variance_109_keep_dims_0 = const()[name = string("variance_109_keep_dims_0"), val = bool(true)]; + tensor variance_109_cast_fp16 = reduce_mean(axes = variance_109_axes_0, keep_dims = variance_109_keep_dims_0, x = var_5003_cast_fp16)[name = string("variance_109_cast_fp16")]; + fp16 var_5006_to_fp16 = const()[name = string("op_5006_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5007_cast_fp16 = add(x = variance_109_cast_fp16, y = var_5006_to_fp16)[name = string("op_5007_cast_fp16")]; + fp32 var_5008_epsilon_0 = const()[name = string("op_5008_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5008_cast_fp16 = rsqrt(epsilon = var_5008_epsilon_0, x = var_5007_cast_fp16)[name = string("op_5008_cast_fp16")]; + tensor hidden_states_813_cast_fp16 = mul(x = hidden_states_809_cast_fp16, y = var_5008_cast_fp16)[name = string("hidden_states_813_cast_fp16")]; + tensor model_model_layers_27_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_27_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69767936)))]; + tensor hidden_states_817_cast_fp16 = mul(x = model_model_layers_27_input_layernorm_weight_to_fp16, y = hidden_states_813_cast_fp16)[name = string("hidden_states_817_cast_fp16")]; + tensor var_5019_shape_cast_fp16 = shape(x = hidden_states_817_cast_fp16)[name = string("op_5019_shape_cast_fp16")]; + int32 gather_490 = const()[name = string("gather_490"), val = int32(1)]; + int32 gather_491_axis_0 = const()[name = string("gather_491_axis_0"), val = int32(0)]; + int32 gather_491_batch_dims_0 = const()[name = string("gather_491_batch_dims_0"), val = int32(0)]; + bool gather_491_validate_indices_0 = const()[name = string("gather_491_validate_indices_0"), val = bool(false)]; + string var_5019_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5019_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_491_to_uint16 = const()[name = string("select_491_to_uint16"), val = uint16(1)]; + tensor var_5019_shape_cast_fp16_to_uint16 = cast(dtype = var_5019_shape_cast_fp16_to_uint16_dtype_0, x = var_5019_shape_cast_fp16)[name = string("cast_23")]; + uint16 gather_491_cast_uint16 = gather(axis = gather_491_axis_0, batch_dims = gather_491_batch_dims_0, indices = select_491_to_uint16, validate_indices = gather_491_validate_indices_0, x = var_5019_shape_cast_fp16_to_uint16)[name = string("gather_491_cast_uint16")]; + string gather_491_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_491_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_27_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69769152))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69935104))))[name = string("model_model_layers_27_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_189_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_27_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_817_cast_fp16)[name = string("linear_189_cast_fp16")]; + tensor model_model_layers_27_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(69955904))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70011264))))[name = string("model_model_layers_27_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_190_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_27_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_817_cast_fp16)[name = string("linear_190_cast_fp16")]; + tensor model_model_layers_27_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70018240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70073600))))[name = string("model_model_layers_27_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_191_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_27_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_817_cast_fp16)[name = string("linear_191_cast_fp16")]; + tensor concat_513x = const()[name = string("concat_513x"), val = tensor([1, -1, 9, 64])]; + tensor var_5028_cast_fp16 = reshape(shape = concat_513x, x = linear_189_cast_fp16)[name = string("op_5028_cast_fp16")]; + tensor q_55_perm_0 = const()[name = string("q_55_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_514x = const()[name = string("concat_514x"), val = tensor([1, -1, 3, 64])]; + tensor var_5031_cast_fp16 = reshape(shape = concat_514x, x = linear_190_cast_fp16)[name = string("op_5031_cast_fp16")]; + tensor k_55_perm_0 = const()[name = string("k_55_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_515x = const()[name = string("concat_515x"), val = tensor([1, -1, 3, 64])]; + tensor var_5034_cast_fp16 = reshape(shape = concat_515x, x = linear_191_cast_fp16)[name = string("op_5034_cast_fp16")]; + tensor v_state_55_perm_0 = const()[name = string("v_state_55_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_55_cast_fp16 = transpose(perm = q_55_perm_0, x = var_5028_cast_fp16)[name = string("transpose_11")]; + tensor var_5038_cast_fp16 = mul(x = q_55_cast_fp16, y = cos_7_cast_fp16)[name = string("op_5038_cast_fp16")]; + tensor x1_109_begin_0 = const()[name = string("x1_109_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_109_end_0 = const()[name = string("x1_109_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_109_end_mask_0 = const()[name = string("x1_109_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_109_cast_fp16 = slice_by_index(begin = x1_109_begin_0, end = x1_109_end_0, end_mask = x1_109_end_mask_0, x = q_55_cast_fp16)[name = string("x1_109_cast_fp16")]; + tensor x2_109_begin_0 = const()[name = string("x2_109_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_109_end_0 = const()[name = string("x2_109_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_109_end_mask_0 = const()[name = string("x2_109_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_109_cast_fp16 = slice_by_index(begin = x2_109_begin_0, end = x2_109_end_0, end_mask = x2_109_end_mask_0, x = q_55_cast_fp16)[name = string("x2_109_cast_fp16")]; + fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5049_cast_fp16 = mul(x = x2_109_cast_fp16, y = const_57_promoted_to_fp16)[name = string("op_5049_cast_fp16")]; + bool var_5051_interleave_0 = const()[name = string("op_5051_interleave_0"), val = bool(false)]; + tensor var_5051_cast_fp16 = concat(axis = var_81, interleave = var_5051_interleave_0, values = (var_5049_cast_fp16, x1_109_cast_fp16))[name = string("op_5051_cast_fp16")]; + tensor var_5052_cast_fp16 = mul(x = var_5051_cast_fp16, y = sin_7_cast_fp16)[name = string("op_5052_cast_fp16")]; + tensor query_states_111_cast_fp16 = add(x = var_5038_cast_fp16, y = var_5052_cast_fp16)[name = string("query_states_111_cast_fp16")]; + tensor k_55_cast_fp16 = transpose(perm = k_55_perm_0, x = var_5031_cast_fp16)[name = string("transpose_10")]; + tensor var_5054_cast_fp16 = mul(x = k_55_cast_fp16, y = cos_7_cast_fp16)[name = string("op_5054_cast_fp16")]; + tensor x1_111_begin_0 = const()[name = string("x1_111_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_111_end_0 = const()[name = string("x1_111_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_111_end_mask_0 = const()[name = string("x1_111_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_111_cast_fp16 = slice_by_index(begin = x1_111_begin_0, end = x1_111_end_0, end_mask = x1_111_end_mask_0, x = k_55_cast_fp16)[name = string("x1_111_cast_fp16")]; + tensor x2_111_begin_0 = const()[name = string("x2_111_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_111_end_0 = const()[name = string("x2_111_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_111_end_mask_0 = const()[name = string("x2_111_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_111_cast_fp16 = slice_by_index(begin = x2_111_begin_0, end = x2_111_end_0, end_mask = x2_111_end_mask_0, x = k_55_cast_fp16)[name = string("x2_111_cast_fp16")]; + fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5065_cast_fp16 = mul(x = x2_111_cast_fp16, y = const_58_promoted_to_fp16)[name = string("op_5065_cast_fp16")]; + bool var_5067_interleave_0 = const()[name = string("op_5067_interleave_0"), val = bool(false)]; + tensor var_5067_cast_fp16 = concat(axis = var_81, interleave = var_5067_interleave_0, values = (var_5065_cast_fp16, x1_111_cast_fp16))[name = string("op_5067_cast_fp16")]; + tensor var_5068_cast_fp16 = mul(x = var_5067_cast_fp16, y = sin_7_cast_fp16)[name = string("op_5068_cast_fp16")]; + tensor k_state_55_cast_fp16 = add(x = var_5054_cast_fp16, y = var_5068_cast_fp16)[name = string("k_state_55_cast_fp16")]; + tensor expand_dims_324 = const()[name = string("expand_dims_324"), val = tensor([0])]; + tensor expand_dims_325 = const()[name = string("expand_dims_325"), val = tensor([0])]; + tensor expand_dims_327 = const()[name = string("expand_dims_327"), val = tensor([0])]; + tensor concat_518_values0_0 = const()[name = string("concat_518_values0_0"), val = tensor([27])]; + int32 concat_518_axis_0 = const()[name = string("concat_518_axis_0"), val = int32(0)]; + bool concat_518_interleave_0 = const()[name = string("concat_518_interleave_0"), val = bool(false)]; + tensor concat_518 = concat(axis = concat_518_axis_0, interleave = concat_518_interleave_0, values = (concat_518_values0_0, expand_dims_324, expand_dims_325, expand_dims_2, expand_dims_327))[name = string("concat_518")]; + tensor key_cache_internal_tensor_assign_28_stride_0 = const()[name = string("key_cache_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_28_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_28_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_518, begin_mask = key_cache_internal_tensor_assign_28_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_28_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_28_squeeze_mask_0, stride = key_cache_internal_tensor_assign_28_stride_0, update = k_state_55_cast_fp16, x = coreml_update_state_112)[name = string("key_cache_internal_tensor_assign_28_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_28_cast_fp16, input = key_cache)[name = string("coreml_update_state_114_write_state")]; + tensor coreml_update_state_114 = read_state(input = key_cache)[name = string("coreml_update_state_114")]; + tensor value_cache_internal_tensor_assign_28_stride_0 = const()[name = string("value_cache_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_28_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_28_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_55_cast_fp16 = transpose(perm = v_state_55_perm_0, x = var_5034_cast_fp16)[name = string("transpose_9")]; + tensor value_cache_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_518, begin_mask = value_cache_internal_tensor_assign_28_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_28_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_28_squeeze_mask_0, stride = value_cache_internal_tensor_assign_28_stride_0, update = v_state_55_cast_fp16, x = coreml_update_state_113)[name = string("value_cache_internal_tensor_assign_28_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_28_cast_fp16, input = value_cache)[name = string("coreml_update_state_115_write_state")]; + tensor coreml_update_state_115 = read_state(input = value_cache)[name = string("coreml_update_state_115")]; + tensor var_5091_begin_0 = const()[name = string("op_5091_begin_0"), val = tensor([27, 0, 0, 0, 0])]; + tensor var_5091_end_0 = const()[name = string("op_5091_end_0"), val = tensor([28, 1, 3, 2048, 64])]; + tensor var_5091_end_mask_0 = const()[name = string("op_5091_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_5091_squeeze_mask_0 = const()[name = string("op_5091_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_5091_cast_fp16 = slice_by_index(begin = var_5091_begin_0, end = var_5091_end_0, end_mask = var_5091_end_mask_0, squeeze_mask = var_5091_squeeze_mask_0, x = coreml_update_state_114)[name = string("op_5091_cast_fp16")]; + tensor var_5094_begin_0 = const()[name = string("op_5094_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5094_end_mask_0 = const()[name = string("op_5094_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5094_cast_fp16 = slice_by_index(begin = var_5094_begin_0, end = concat_11, end_mask = var_5094_end_mask_0, x = var_5091_cast_fp16)[name = string("op_5094_cast_fp16")]; + tensor var_5096_begin_0 = const()[name = string("op_5096_begin_0"), val = tensor([27, 0, 0, 0, 0])]; + tensor var_5096_end_0 = const()[name = string("op_5096_end_0"), val = tensor([28, 1, 3, 2048, 64])]; + tensor var_5096_end_mask_0 = const()[name = string("op_5096_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_5096_squeeze_mask_0 = const()[name = string("op_5096_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_5096_cast_fp16 = slice_by_index(begin = var_5096_begin_0, end = var_5096_end_0, end_mask = var_5096_end_mask_0, squeeze_mask = var_5096_squeeze_mask_0, x = coreml_update_state_115)[name = string("op_5096_cast_fp16")]; + tensor var_5099_begin_0 = const()[name = string("op_5099_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5099_end_mask_0 = const()[name = string("op_5099_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5099_cast_fp16 = slice_by_index(begin = var_5099_begin_0, end = concat_11, end_mask = var_5099_end_mask_0, x = var_5096_cast_fp16)[name = string("op_5099_cast_fp16")]; + tensor var_5101_shape_cast_fp16 = shape(x = var_5094_cast_fp16)[name = string("op_5101_shape_cast_fp16")]; + int32 gather_499 = const()[name = string("gather_499"), val = int32(1)]; + int32 gather_500 = const()[name = string("gather_500"), val = int32(3)]; + int32 gather_501_axis_0 = const()[name = string("gather_501_axis_0"), val = int32(0)]; + int32 gather_501_batch_dims_0 = const()[name = string("gather_501_batch_dims_0"), val = int32(0)]; + bool gather_501_validate_indices_0 = const()[name = string("gather_501_validate_indices_0"), val = bool(false)]; + string var_5101_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5101_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_501_to_uint16 = const()[name = string("select_501_to_uint16"), val = uint16(2)]; + tensor var_5101_shape_cast_fp16_to_uint16 = cast(dtype = var_5101_shape_cast_fp16_to_uint16_dtype_0, x = var_5101_shape_cast_fp16)[name = string("cast_22")]; + uint16 gather_501_cast_uint16 = gather(axis = gather_501_axis_0, batch_dims = gather_501_batch_dims_0, indices = select_501_to_uint16, validate_indices = gather_501_validate_indices_0, x = var_5101_shape_cast_fp16_to_uint16)[name = string("gather_501_cast_uint16")]; + string gather_501_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_501_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_502 = const()[name = string("gather_502"), val = int32(64)]; + tensor var_5108_axes_0 = const()[name = string("op_5108_axes_0"), val = tensor([2])]; + tensor var_5108_cast_fp16 = expand_dims(axes = var_5108_axes_0, x = var_5094_cast_fp16)[name = string("op_5108_cast_fp16")]; + tensor shape_557_cast_fp16 = shape(x = var_5108_cast_fp16)[name = string("shape_557_cast_fp16")]; + int32 concat_526_axis_0 = const()[name = string("concat_526_axis_0"), val = int32(0)]; + bool concat_526_interleave_0 = const()[name = string("concat_526_interleave_0"), val = bool(false)]; + int32 gather_501_cast_uint16_to_int32 = cast(dtype = gather_501_cast_uint16_to_int32_dtype_0, x = gather_501_cast_uint16)[name = string("cast_21")]; + tensor concat_526 = concat(axis = concat_526_axis_0, interleave = concat_526_interleave_0, values = (gather_499, gather_500, var_85, gather_501_cast_uint16_to_int32, gather_502))[name = string("concat_526")]; + tensor real_div_54 = real_div(x = concat_526, y = shape_557_cast_fp16)[name = string("real_div_54")]; + tensor hidden_states_821_cast_fp16 = tile(reps = real_div_54, x = var_5108_cast_fp16)[name = string("hidden_states_821_cast_fp16")]; + tensor concat_527x = const()[name = string("concat_527x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_111_cast_fp16 = reshape(shape = concat_527x, x = hidden_states_821_cast_fp16)[name = string("key_states_111_cast_fp16")]; + tensor var_5118_shape_cast_fp16 = shape(x = var_5099_cast_fp16)[name = string("op_5118_shape_cast_fp16")]; + int32 gather_503 = const()[name = string("gather_503"), val = int32(1)]; + int32 gather_504 = const()[name = string("gather_504"), val = int32(3)]; + int32 gather_505_axis_0 = const()[name = string("gather_505_axis_0"), val = int32(0)]; + int32 gather_505_batch_dims_0 = const()[name = string("gather_505_batch_dims_0"), val = int32(0)]; + bool gather_505_validate_indices_0 = const()[name = string("gather_505_validate_indices_0"), val = bool(false)]; + string var_5118_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5118_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_505_to_uint16 = const()[name = string("select_505_to_uint16"), val = uint16(2)]; + tensor var_5118_shape_cast_fp16_to_uint16 = cast(dtype = var_5118_shape_cast_fp16_to_uint16_dtype_0, x = var_5118_shape_cast_fp16)[name = string("cast_20")]; + uint16 gather_505_cast_uint16 = gather(axis = gather_505_axis_0, batch_dims = gather_505_batch_dims_0, indices = select_505_to_uint16, validate_indices = gather_505_validate_indices_0, x = var_5118_shape_cast_fp16_to_uint16)[name = string("gather_505_cast_uint16")]; + string gather_505_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_505_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_506 = const()[name = string("gather_506"), val = int32(64)]; + tensor var_5125_axes_0 = const()[name = string("op_5125_axes_0"), val = tensor([2])]; + tensor var_5125_cast_fp16 = expand_dims(axes = var_5125_axes_0, x = var_5099_cast_fp16)[name = string("op_5125_cast_fp16")]; + tensor shape_562_cast_fp16 = shape(x = var_5125_cast_fp16)[name = string("shape_562_cast_fp16")]; + int32 concat_528_axis_0 = const()[name = string("concat_528_axis_0"), val = int32(0)]; + bool concat_528_interleave_0 = const()[name = string("concat_528_interleave_0"), val = bool(false)]; + int32 gather_505_cast_uint16_to_int32 = cast(dtype = gather_505_cast_uint16_to_int32_dtype_0, x = gather_505_cast_uint16)[name = string("cast_19")]; + tensor concat_528 = concat(axis = concat_528_axis_0, interleave = concat_528_interleave_0, values = (gather_503, gather_504, var_85, gather_505_cast_uint16_to_int32, gather_506))[name = string("concat_528")]; + tensor real_div_55 = real_div(x = concat_528, y = shape_562_cast_fp16)[name = string("real_div_55")]; + tensor hidden_states_825_cast_fp16 = tile(reps = real_div_55, x = var_5125_cast_fp16)[name = string("hidden_states_825_cast_fp16")]; + tensor concat_529x = const()[name = string("concat_529x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_111_cast_fp16 = reshape(shape = concat_529x, x = hidden_states_825_cast_fp16)[name = string("value_states_111_cast_fp16")]; + tensor var_5135_shape_cast_fp16 = shape(x = key_states_111_cast_fp16)[name = string("op_5135_shape_cast_fp16")]; + int32 gather_507_axis_0 = const()[name = string("gather_507_axis_0"), val = int32(0)]; + int32 gather_507_batch_dims_0 = const()[name = string("gather_507_batch_dims_0"), val = int32(0)]; + bool gather_507_validate_indices_0 = const()[name = string("gather_507_validate_indices_0"), val = bool(false)]; + string var_5135_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5135_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_507_to_uint16 = const()[name = string("select_507_to_uint16"), val = uint16(2)]; + tensor var_5135_shape_cast_fp16_to_uint16 = cast(dtype = var_5135_shape_cast_fp16_to_uint16_dtype_0, x = var_5135_shape_cast_fp16)[name = string("cast_18")]; + uint16 gather_507_cast_uint16 = gather(axis = gather_507_axis_0, batch_dims = gather_507_batch_dims_0, indices = select_507_to_uint16, validate_indices = gather_507_validate_indices_0, x = var_5135_shape_cast_fp16_to_uint16)[name = string("gather_507_cast_uint16")]; + string gather_507_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_507_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_530_values0_0 = const()[name = string("concat_530_values0_0"), val = int32(1)]; + int32 concat_530_values1_0 = const()[name = string("concat_530_values1_0"), val = int32(1)]; + int32 concat_530_values2_0 = const()[name = string("concat_530_values2_0"), val = int32(0)]; + int32 concat_530_axis_0 = const()[name = string("concat_530_axis_0"), val = int32(0)]; + bool concat_530_interleave_0 = const()[name = string("concat_530_interleave_0"), val = bool(false)]; + int32 gather_507_cast_uint16_to_int32 = cast(dtype = gather_507_cast_uint16_to_int32_dtype_0, x = gather_507_cast_uint16)[name = string("cast_17")]; + tensor concat_530 = concat(axis = concat_530_axis_0, interleave = concat_530_interleave_0, values = (concat_530_values0_0, concat_530_values1_0, concat_530_values2_0, gather_507_cast_uint16_to_int32))[name = string("concat_530")]; + tensor causal_mask_57_begin_0 = const()[name = string("causal_mask_57_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_57_end_mask_0 = const()[name = string("causal_mask_57_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_57_cast_fp16 = slice_by_index(begin = causal_mask_57_begin_0, end = concat_530, end_mask = causal_mask_57_end_mask_0, x = causal_mask)[name = string("causal_mask_57_cast_fp16")]; + tensor attn_output_109_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_57_cast_fp16, key = key_states_111_cast_fp16, query = query_states_111_cast_fp16, value = value_states_111_cast_fp16)[name = string("attn_output_109_cast_fp16")]; + tensor var_5141_perm_0 = const()[name = string("op_5141_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_531_axis_0 = const()[name = string("concat_531_axis_0"), val = int32(0)]; + bool concat_531_interleave_0 = const()[name = string("concat_531_interleave_0"), val = bool(false)]; + int32 gather_491_cast_uint16_to_int32 = cast(dtype = gather_491_cast_uint16_to_int32_dtype_0, x = gather_491_cast_uint16)[name = string("cast_16")]; + tensor concat_531 = concat(axis = concat_531_axis_0, interleave = concat_531_interleave_0, values = (gather_490, gather_491_cast_uint16_to_int32, var_81))[name = string("concat_531")]; + tensor var_5141_cast_fp16 = transpose(perm = var_5141_perm_0, x = attn_output_109_cast_fp16)[name = string("transpose_8")]; + tensor input_217_cast_fp16 = reshape(shape = concat_531, x = var_5141_cast_fp16)[name = string("input_217_cast_fp16")]; + tensor model_model_layers_27_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70080576))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70246528))))[name = string("model_model_layers_27_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_192_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_27_self_attn_o_proj_weight_to_fp16_quantized, x = input_217_cast_fp16)[name = string("linear_192_cast_fp16")]; + tensor hidden_states_829_cast_fp16 = add(x = hidden_states_809_cast_fp16, y = linear_192_cast_fp16)[name = string("hidden_states_829_cast_fp16")]; + fp16 var_76_promoted_55_to_fp16 = const()[name = string("op_76_promoted_55_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5150_cast_fp16 = pow(x = hidden_states_829_cast_fp16, y = var_76_promoted_55_to_fp16)[name = string("op_5150_cast_fp16")]; + tensor variance_111_axes_0 = const()[name = string("variance_111_axes_0"), val = tensor([-1])]; + bool variance_111_keep_dims_0 = const()[name = string("variance_111_keep_dims_0"), val = bool(true)]; + tensor variance_111_cast_fp16 = reduce_mean(axes = variance_111_axes_0, keep_dims = variance_111_keep_dims_0, x = var_5150_cast_fp16)[name = string("variance_111_cast_fp16")]; + fp16 var_5153_to_fp16 = const()[name = string("op_5153_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5154_cast_fp16 = add(x = variance_111_cast_fp16, y = var_5153_to_fp16)[name = string("op_5154_cast_fp16")]; + fp32 var_5155_epsilon_0 = const()[name = string("op_5155_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5155_cast_fp16 = rsqrt(epsilon = var_5155_epsilon_0, x = var_5154_cast_fp16)[name = string("op_5155_cast_fp16")]; + tensor hidden_states_833_cast_fp16 = mul(x = hidden_states_829_cast_fp16, y = var_5155_cast_fp16)[name = string("hidden_states_833_cast_fp16")]; + tensor model_model_layers_27_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_27_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70267328)))]; + tensor input_219_cast_fp16 = mul(x = model_model_layers_27_post_attention_layernorm_weight_to_fp16, y = hidden_states_833_cast_fp16)[name = string("input_219_cast_fp16")]; + tensor model_model_layers_27_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70268544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70710976))))[name = string("model_model_layers_27_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_193_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_27_mlp_gate_proj_weight_to_fp16_quantized, x = input_219_cast_fp16)[name = string("linear_193_cast_fp16")]; + tensor var_5167_cast_fp16 = silu(x = linear_193_cast_fp16)[name = string("op_5167_cast_fp16")]; + tensor model_model_layers_27_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(70766336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71208768))))[name = string("model_model_layers_27_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_194_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_27_mlp_up_proj_weight_to_fp16_quantized, x = input_219_cast_fp16)[name = string("linear_194_cast_fp16")]; + tensor input_223_cast_fp16 = mul(x = var_5167_cast_fp16, y = linear_194_cast_fp16)[name = string("input_223_cast_fp16")]; + tensor model_model_layers_27_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71264128))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71706560))))[name = string("model_model_layers_27_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_195_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_27_mlp_down_proj_weight_to_fp16_quantized, x = input_223_cast_fp16)[name = string("linear_195_cast_fp16")]; + tensor hidden_states_839_cast_fp16 = add(x = hidden_states_829_cast_fp16, y = linear_195_cast_fp16)[name = string("hidden_states_839_cast_fp16")]; + fp16 var_76_promoted_56_to_fp16 = const()[name = string("op_76_promoted_56_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5180_cast_fp16 = pow(x = hidden_states_839_cast_fp16, y = var_76_promoted_56_to_fp16)[name = string("op_5180_cast_fp16")]; + tensor variance_113_axes_0 = const()[name = string("variance_113_axes_0"), val = tensor([-1])]; + bool variance_113_keep_dims_0 = const()[name = string("variance_113_keep_dims_0"), val = bool(true)]; + tensor variance_113_cast_fp16 = reduce_mean(axes = variance_113_axes_0, keep_dims = variance_113_keep_dims_0, x = var_5180_cast_fp16)[name = string("variance_113_cast_fp16")]; + fp16 var_5183_to_fp16 = const()[name = string("op_5183_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5184_cast_fp16 = add(x = variance_113_cast_fp16, y = var_5183_to_fp16)[name = string("op_5184_cast_fp16")]; + fp32 var_5185_epsilon_0 = const()[name = string("op_5185_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5185_cast_fp16 = rsqrt(epsilon = var_5185_epsilon_0, x = var_5184_cast_fp16)[name = string("op_5185_cast_fp16")]; + tensor hidden_states_843_cast_fp16 = mul(x = hidden_states_839_cast_fp16, y = var_5185_cast_fp16)[name = string("hidden_states_843_cast_fp16")]; + tensor model_model_layers_28_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_28_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71761920)))]; + tensor hidden_states_847_cast_fp16 = mul(x = model_model_layers_28_input_layernorm_weight_to_fp16, y = hidden_states_843_cast_fp16)[name = string("hidden_states_847_cast_fp16")]; + tensor var_5196_shape_cast_fp16 = shape(x = hidden_states_847_cast_fp16)[name = string("op_5196_shape_cast_fp16")]; + int32 gather_508 = const()[name = string("gather_508"), val = int32(1)]; + int32 gather_509_axis_0 = const()[name = string("gather_509_axis_0"), val = int32(0)]; + int32 gather_509_batch_dims_0 = const()[name = string("gather_509_batch_dims_0"), val = int32(0)]; + bool gather_509_validate_indices_0 = const()[name = string("gather_509_validate_indices_0"), val = bool(false)]; + string var_5196_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5196_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_509_to_uint16 = const()[name = string("select_509_to_uint16"), val = uint16(1)]; + tensor var_5196_shape_cast_fp16_to_uint16 = cast(dtype = var_5196_shape_cast_fp16_to_uint16_dtype_0, x = var_5196_shape_cast_fp16)[name = string("cast_15")]; + uint16 gather_509_cast_uint16 = gather(axis = gather_509_axis_0, batch_dims = gather_509_batch_dims_0, indices = select_509_to_uint16, validate_indices = gather_509_validate_indices_0, x = var_5196_shape_cast_fp16_to_uint16)[name = string("gather_509_cast_uint16")]; + string gather_509_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_509_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_28_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71763136))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71929088))))[name = string("model_model_layers_28_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_196_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_28_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_847_cast_fp16)[name = string("linear_196_cast_fp16")]; + tensor model_model_layers_28_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(71949888))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72005248))))[name = string("model_model_layers_28_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_197_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_28_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_847_cast_fp16)[name = string("linear_197_cast_fp16")]; + tensor model_model_layers_28_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72012224))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72067584))))[name = string("model_model_layers_28_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_198_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_28_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_847_cast_fp16)[name = string("linear_198_cast_fp16")]; + tensor concat_532x = const()[name = string("concat_532x"), val = tensor([1, -1, 9, 64])]; + tensor var_5205_cast_fp16 = reshape(shape = concat_532x, x = linear_196_cast_fp16)[name = string("op_5205_cast_fp16")]; + tensor q_57_perm_0 = const()[name = string("q_57_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_533x = const()[name = string("concat_533x"), val = tensor([1, -1, 3, 64])]; + tensor var_5208_cast_fp16 = reshape(shape = concat_533x, x = linear_197_cast_fp16)[name = string("op_5208_cast_fp16")]; + tensor k_57_perm_0 = const()[name = string("k_57_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_534x = const()[name = string("concat_534x"), val = tensor([1, -1, 3, 64])]; + tensor var_5211_cast_fp16 = reshape(shape = concat_534x, x = linear_198_cast_fp16)[name = string("op_5211_cast_fp16")]; + tensor v_state_57_perm_0 = const()[name = string("v_state_57_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_57_cast_fp16 = transpose(perm = q_57_perm_0, x = var_5205_cast_fp16)[name = string("transpose_7")]; + tensor var_5215_cast_fp16 = mul(x = q_57_cast_fp16, y = cos_7_cast_fp16)[name = string("op_5215_cast_fp16")]; + tensor x1_113_begin_0 = const()[name = string("x1_113_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_113_end_0 = const()[name = string("x1_113_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_113_end_mask_0 = const()[name = string("x1_113_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_113_cast_fp16 = slice_by_index(begin = x1_113_begin_0, end = x1_113_end_0, end_mask = x1_113_end_mask_0, x = q_57_cast_fp16)[name = string("x1_113_cast_fp16")]; + tensor x2_113_begin_0 = const()[name = string("x2_113_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_113_end_0 = const()[name = string("x2_113_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_113_end_mask_0 = const()[name = string("x2_113_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_113_cast_fp16 = slice_by_index(begin = x2_113_begin_0, end = x2_113_end_0, end_mask = x2_113_end_mask_0, x = q_57_cast_fp16)[name = string("x2_113_cast_fp16")]; + fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5226_cast_fp16 = mul(x = x2_113_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_5226_cast_fp16")]; + bool var_5228_interleave_0 = const()[name = string("op_5228_interleave_0"), val = bool(false)]; + tensor var_5228_cast_fp16 = concat(axis = var_81, interleave = var_5228_interleave_0, values = (var_5226_cast_fp16, x1_113_cast_fp16))[name = string("op_5228_cast_fp16")]; + tensor var_5229_cast_fp16 = mul(x = var_5228_cast_fp16, y = sin_7_cast_fp16)[name = string("op_5229_cast_fp16")]; + tensor query_states_115_cast_fp16 = add(x = var_5215_cast_fp16, y = var_5229_cast_fp16)[name = string("query_states_115_cast_fp16")]; + tensor k_57_cast_fp16 = transpose(perm = k_57_perm_0, x = var_5208_cast_fp16)[name = string("transpose_6")]; + tensor var_5231_cast_fp16 = mul(x = k_57_cast_fp16, y = cos_7_cast_fp16)[name = string("op_5231_cast_fp16")]; + tensor x1_115_begin_0 = const()[name = string("x1_115_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_115_end_0 = const()[name = string("x1_115_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_115_end_mask_0 = const()[name = string("x1_115_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_115_cast_fp16 = slice_by_index(begin = x1_115_begin_0, end = x1_115_end_0, end_mask = x1_115_end_mask_0, x = k_57_cast_fp16)[name = string("x1_115_cast_fp16")]; + tensor x2_115_begin_0 = const()[name = string("x2_115_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_115_end_0 = const()[name = string("x2_115_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_115_end_mask_0 = const()[name = string("x2_115_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_115_cast_fp16 = slice_by_index(begin = x2_115_begin_0, end = x2_115_end_0, end_mask = x2_115_end_mask_0, x = k_57_cast_fp16)[name = string("x2_115_cast_fp16")]; + fp16 const_60_promoted_to_fp16 = const()[name = string("const_60_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5242_cast_fp16 = mul(x = x2_115_cast_fp16, y = const_60_promoted_to_fp16)[name = string("op_5242_cast_fp16")]; + bool var_5244_interleave_0 = const()[name = string("op_5244_interleave_0"), val = bool(false)]; + tensor var_5244_cast_fp16 = concat(axis = var_81, interleave = var_5244_interleave_0, values = (var_5242_cast_fp16, x1_115_cast_fp16))[name = string("op_5244_cast_fp16")]; + tensor var_5245_cast_fp16 = mul(x = var_5244_cast_fp16, y = sin_7_cast_fp16)[name = string("op_5245_cast_fp16")]; + tensor k_state_57_cast_fp16 = add(x = var_5231_cast_fp16, y = var_5245_cast_fp16)[name = string("k_state_57_cast_fp16")]; + tensor expand_dims_336 = const()[name = string("expand_dims_336"), val = tensor([0])]; + tensor expand_dims_337 = const()[name = string("expand_dims_337"), val = tensor([0])]; + tensor expand_dims_339 = const()[name = string("expand_dims_339"), val = tensor([0])]; + tensor concat_537_values0_0 = const()[name = string("concat_537_values0_0"), val = tensor([28])]; + int32 concat_537_axis_0 = const()[name = string("concat_537_axis_0"), val = int32(0)]; + bool concat_537_interleave_0 = const()[name = string("concat_537_interleave_0"), val = bool(false)]; + tensor concat_537 = concat(axis = concat_537_axis_0, interleave = concat_537_interleave_0, values = (concat_537_values0_0, expand_dims_336, expand_dims_337, expand_dims_2, expand_dims_339))[name = string("concat_537")]; + tensor key_cache_internal_tensor_assign_29_stride_0 = const()[name = string("key_cache_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_29_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_29_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_537, begin_mask = key_cache_internal_tensor_assign_29_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_29_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_29_squeeze_mask_0, stride = key_cache_internal_tensor_assign_29_stride_0, update = k_state_57_cast_fp16, x = coreml_update_state_114)[name = string("key_cache_internal_tensor_assign_29_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_29_cast_fp16, input = key_cache)[name = string("coreml_update_state_116_write_state")]; + tensor coreml_update_state_116 = read_state(input = key_cache)[name = string("coreml_update_state_116")]; + tensor value_cache_internal_tensor_assign_29_stride_0 = const()[name = string("value_cache_internal_tensor_assign_29_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_29_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_29_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_29_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_29_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_29_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_29_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_57_cast_fp16 = transpose(perm = v_state_57_perm_0, x = var_5211_cast_fp16)[name = string("transpose_5")]; + tensor value_cache_internal_tensor_assign_29_cast_fp16 = slice_update(begin = concat_537, begin_mask = value_cache_internal_tensor_assign_29_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_29_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_29_squeeze_mask_0, stride = value_cache_internal_tensor_assign_29_stride_0, update = v_state_57_cast_fp16, x = coreml_update_state_115)[name = string("value_cache_internal_tensor_assign_29_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_29_cast_fp16, input = value_cache)[name = string("coreml_update_state_117_write_state")]; + tensor coreml_update_state_117 = read_state(input = value_cache)[name = string("coreml_update_state_117")]; + tensor var_5268_begin_0 = const()[name = string("op_5268_begin_0"), val = tensor([28, 0, 0, 0, 0])]; + tensor var_5268_end_0 = const()[name = string("op_5268_end_0"), val = tensor([29, 1, 3, 2048, 64])]; + tensor var_5268_end_mask_0 = const()[name = string("op_5268_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_5268_squeeze_mask_0 = const()[name = string("op_5268_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_5268_cast_fp16 = slice_by_index(begin = var_5268_begin_0, end = var_5268_end_0, end_mask = var_5268_end_mask_0, squeeze_mask = var_5268_squeeze_mask_0, x = coreml_update_state_116)[name = string("op_5268_cast_fp16")]; + tensor var_5271_begin_0 = const()[name = string("op_5271_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5271_end_mask_0 = const()[name = string("op_5271_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5271_cast_fp16 = slice_by_index(begin = var_5271_begin_0, end = concat_11, end_mask = var_5271_end_mask_0, x = var_5268_cast_fp16)[name = string("op_5271_cast_fp16")]; + tensor var_5273_begin_0 = const()[name = string("op_5273_begin_0"), val = tensor([28, 0, 0, 0, 0])]; + tensor var_5273_end_0 = const()[name = string("op_5273_end_0"), val = tensor([29, 1, 3, 2048, 64])]; + tensor var_5273_end_mask_0 = const()[name = string("op_5273_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_5273_squeeze_mask_0 = const()[name = string("op_5273_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_5273_cast_fp16 = slice_by_index(begin = var_5273_begin_0, end = var_5273_end_0, end_mask = var_5273_end_mask_0, squeeze_mask = var_5273_squeeze_mask_0, x = coreml_update_state_117)[name = string("op_5273_cast_fp16")]; + tensor var_5276_begin_0 = const()[name = string("op_5276_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5276_end_mask_0 = const()[name = string("op_5276_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5276_cast_fp16 = slice_by_index(begin = var_5276_begin_0, end = concat_11, end_mask = var_5276_end_mask_0, x = var_5273_cast_fp16)[name = string("op_5276_cast_fp16")]; + tensor var_5278_shape_cast_fp16 = shape(x = var_5271_cast_fp16)[name = string("op_5278_shape_cast_fp16")]; + int32 gather_517 = const()[name = string("gather_517"), val = int32(1)]; + int32 gather_518 = const()[name = string("gather_518"), val = int32(3)]; + int32 gather_519_axis_0 = const()[name = string("gather_519_axis_0"), val = int32(0)]; + int32 gather_519_batch_dims_0 = const()[name = string("gather_519_batch_dims_0"), val = int32(0)]; + bool gather_519_validate_indices_0 = const()[name = string("gather_519_validate_indices_0"), val = bool(false)]; + string var_5278_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5278_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_519_to_uint16 = const()[name = string("select_519_to_uint16"), val = uint16(2)]; + tensor var_5278_shape_cast_fp16_to_uint16 = cast(dtype = var_5278_shape_cast_fp16_to_uint16_dtype_0, x = var_5278_shape_cast_fp16)[name = string("cast_14")]; + uint16 gather_519_cast_uint16 = gather(axis = gather_519_axis_0, batch_dims = gather_519_batch_dims_0, indices = select_519_to_uint16, validate_indices = gather_519_validate_indices_0, x = var_5278_shape_cast_fp16_to_uint16)[name = string("gather_519_cast_uint16")]; + string gather_519_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_519_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_520 = const()[name = string("gather_520"), val = int32(64)]; + tensor var_5285_axes_0 = const()[name = string("op_5285_axes_0"), val = tensor([2])]; + tensor var_5285_cast_fp16 = expand_dims(axes = var_5285_axes_0, x = var_5271_cast_fp16)[name = string("op_5285_cast_fp16")]; + tensor shape_577_cast_fp16 = shape(x = var_5285_cast_fp16)[name = string("shape_577_cast_fp16")]; + int32 concat_545_axis_0 = const()[name = string("concat_545_axis_0"), val = int32(0)]; + bool concat_545_interleave_0 = const()[name = string("concat_545_interleave_0"), val = bool(false)]; + int32 gather_519_cast_uint16_to_int32 = cast(dtype = gather_519_cast_uint16_to_int32_dtype_0, x = gather_519_cast_uint16)[name = string("cast_13")]; + tensor concat_545 = concat(axis = concat_545_axis_0, interleave = concat_545_interleave_0, values = (gather_517, gather_518, var_85, gather_519_cast_uint16_to_int32, gather_520))[name = string("concat_545")]; + tensor real_div_56 = real_div(x = concat_545, y = shape_577_cast_fp16)[name = string("real_div_56")]; + tensor hidden_states_851_cast_fp16 = tile(reps = real_div_56, x = var_5285_cast_fp16)[name = string("hidden_states_851_cast_fp16")]; + tensor concat_546x = const()[name = string("concat_546x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_115_cast_fp16 = reshape(shape = concat_546x, x = hidden_states_851_cast_fp16)[name = string("key_states_115_cast_fp16")]; + tensor var_5295_shape_cast_fp16 = shape(x = var_5276_cast_fp16)[name = string("op_5295_shape_cast_fp16")]; + int32 gather_521 = const()[name = string("gather_521"), val = int32(1)]; + int32 gather_522 = const()[name = string("gather_522"), val = int32(3)]; + int32 gather_523_axis_0 = const()[name = string("gather_523_axis_0"), val = int32(0)]; + int32 gather_523_batch_dims_0 = const()[name = string("gather_523_batch_dims_0"), val = int32(0)]; + bool gather_523_validate_indices_0 = const()[name = string("gather_523_validate_indices_0"), val = bool(false)]; + string var_5295_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5295_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_523_to_uint16 = const()[name = string("select_523_to_uint16"), val = uint16(2)]; + tensor var_5295_shape_cast_fp16_to_uint16 = cast(dtype = var_5295_shape_cast_fp16_to_uint16_dtype_0, x = var_5295_shape_cast_fp16)[name = string("cast_12")]; + uint16 gather_523_cast_uint16 = gather(axis = gather_523_axis_0, batch_dims = gather_523_batch_dims_0, indices = select_523_to_uint16, validate_indices = gather_523_validate_indices_0, x = var_5295_shape_cast_fp16_to_uint16)[name = string("gather_523_cast_uint16")]; + string gather_523_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_523_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_524 = const()[name = string("gather_524"), val = int32(64)]; + tensor var_5302_axes_0 = const()[name = string("op_5302_axes_0"), val = tensor([2])]; + tensor var_5302_cast_fp16 = expand_dims(axes = var_5302_axes_0, x = var_5276_cast_fp16)[name = string("op_5302_cast_fp16")]; + tensor shape_582_cast_fp16 = shape(x = var_5302_cast_fp16)[name = string("shape_582_cast_fp16")]; + int32 concat_547_axis_0 = const()[name = string("concat_547_axis_0"), val = int32(0)]; + bool concat_547_interleave_0 = const()[name = string("concat_547_interleave_0"), val = bool(false)]; + int32 gather_523_cast_uint16_to_int32 = cast(dtype = gather_523_cast_uint16_to_int32_dtype_0, x = gather_523_cast_uint16)[name = string("cast_11")]; + tensor concat_547 = concat(axis = concat_547_axis_0, interleave = concat_547_interleave_0, values = (gather_521, gather_522, var_85, gather_523_cast_uint16_to_int32, gather_524))[name = string("concat_547")]; + tensor real_div_57 = real_div(x = concat_547, y = shape_582_cast_fp16)[name = string("real_div_57")]; + tensor hidden_states_855_cast_fp16 = tile(reps = real_div_57, x = var_5302_cast_fp16)[name = string("hidden_states_855_cast_fp16")]; + tensor concat_548x = const()[name = string("concat_548x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_115_cast_fp16 = reshape(shape = concat_548x, x = hidden_states_855_cast_fp16)[name = string("value_states_115_cast_fp16")]; + tensor var_5312_shape_cast_fp16 = shape(x = key_states_115_cast_fp16)[name = string("op_5312_shape_cast_fp16")]; + int32 gather_525_axis_0 = const()[name = string("gather_525_axis_0"), val = int32(0)]; + int32 gather_525_batch_dims_0 = const()[name = string("gather_525_batch_dims_0"), val = int32(0)]; + bool gather_525_validate_indices_0 = const()[name = string("gather_525_validate_indices_0"), val = bool(false)]; + string var_5312_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5312_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_525_to_uint16 = const()[name = string("select_525_to_uint16"), val = uint16(2)]; + tensor var_5312_shape_cast_fp16_to_uint16 = cast(dtype = var_5312_shape_cast_fp16_to_uint16_dtype_0, x = var_5312_shape_cast_fp16)[name = string("cast_10")]; + uint16 gather_525_cast_uint16 = gather(axis = gather_525_axis_0, batch_dims = gather_525_batch_dims_0, indices = select_525_to_uint16, validate_indices = gather_525_validate_indices_0, x = var_5312_shape_cast_fp16_to_uint16)[name = string("gather_525_cast_uint16")]; + string gather_525_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_525_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_549_values0_0 = const()[name = string("concat_549_values0_0"), val = int32(1)]; + int32 concat_549_values1_0 = const()[name = string("concat_549_values1_0"), val = int32(1)]; + int32 concat_549_values2_0 = const()[name = string("concat_549_values2_0"), val = int32(0)]; + int32 concat_549_axis_0 = const()[name = string("concat_549_axis_0"), val = int32(0)]; + bool concat_549_interleave_0 = const()[name = string("concat_549_interleave_0"), val = bool(false)]; + int32 gather_525_cast_uint16_to_int32 = cast(dtype = gather_525_cast_uint16_to_int32_dtype_0, x = gather_525_cast_uint16)[name = string("cast_9")]; + tensor concat_549 = concat(axis = concat_549_axis_0, interleave = concat_549_interleave_0, values = (concat_549_values0_0, concat_549_values1_0, concat_549_values2_0, gather_525_cast_uint16_to_int32))[name = string("concat_549")]; + tensor causal_mask_59_begin_0 = const()[name = string("causal_mask_59_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_59_end_mask_0 = const()[name = string("causal_mask_59_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_59_cast_fp16 = slice_by_index(begin = causal_mask_59_begin_0, end = concat_549, end_mask = causal_mask_59_end_mask_0, x = causal_mask)[name = string("causal_mask_59_cast_fp16")]; + tensor attn_output_113_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_59_cast_fp16, key = key_states_115_cast_fp16, query = query_states_115_cast_fp16, value = value_states_115_cast_fp16)[name = string("attn_output_113_cast_fp16")]; + tensor var_5318_perm_0 = const()[name = string("op_5318_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_550_axis_0 = const()[name = string("concat_550_axis_0"), val = int32(0)]; + bool concat_550_interleave_0 = const()[name = string("concat_550_interleave_0"), val = bool(false)]; + int32 gather_509_cast_uint16_to_int32 = cast(dtype = gather_509_cast_uint16_to_int32_dtype_0, x = gather_509_cast_uint16)[name = string("cast_8")]; + tensor concat_550 = concat(axis = concat_550_axis_0, interleave = concat_550_interleave_0, values = (gather_508, gather_509_cast_uint16_to_int32, var_81))[name = string("concat_550")]; + tensor var_5318_cast_fp16 = transpose(perm = var_5318_perm_0, x = attn_output_113_cast_fp16)[name = string("transpose_4")]; + tensor input_225_cast_fp16 = reshape(shape = concat_550, x = var_5318_cast_fp16)[name = string("input_225_cast_fp16")]; + tensor model_model_layers_28_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72074560))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72240512))))[name = string("model_model_layers_28_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_199_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_28_self_attn_o_proj_weight_to_fp16_quantized, x = input_225_cast_fp16)[name = string("linear_199_cast_fp16")]; + tensor hidden_states_859_cast_fp16 = add(x = hidden_states_839_cast_fp16, y = linear_199_cast_fp16)[name = string("hidden_states_859_cast_fp16")]; + fp16 var_76_promoted_57_to_fp16 = const()[name = string("op_76_promoted_57_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5327_cast_fp16 = pow(x = hidden_states_859_cast_fp16, y = var_76_promoted_57_to_fp16)[name = string("op_5327_cast_fp16")]; + tensor variance_115_axes_0 = const()[name = string("variance_115_axes_0"), val = tensor([-1])]; + bool variance_115_keep_dims_0 = const()[name = string("variance_115_keep_dims_0"), val = bool(true)]; + tensor variance_115_cast_fp16 = reduce_mean(axes = variance_115_axes_0, keep_dims = variance_115_keep_dims_0, x = var_5327_cast_fp16)[name = string("variance_115_cast_fp16")]; + fp16 var_5330_to_fp16 = const()[name = string("op_5330_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5331_cast_fp16 = add(x = variance_115_cast_fp16, y = var_5330_to_fp16)[name = string("op_5331_cast_fp16")]; + fp32 var_5332_epsilon_0 = const()[name = string("op_5332_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5332_cast_fp16 = rsqrt(epsilon = var_5332_epsilon_0, x = var_5331_cast_fp16)[name = string("op_5332_cast_fp16")]; + tensor hidden_states_863_cast_fp16 = mul(x = hidden_states_859_cast_fp16, y = var_5332_cast_fp16)[name = string("hidden_states_863_cast_fp16")]; + tensor model_model_layers_28_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_28_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72261312)))]; + tensor input_227_cast_fp16 = mul(x = model_model_layers_28_post_attention_layernorm_weight_to_fp16, y = hidden_states_863_cast_fp16)[name = string("input_227_cast_fp16")]; + tensor model_model_layers_28_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72262528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72704960))))[name = string("model_model_layers_28_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_200_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_28_mlp_gate_proj_weight_to_fp16_quantized, x = input_227_cast_fp16)[name = string("linear_200_cast_fp16")]; + tensor var_5344_cast_fp16 = silu(x = linear_200_cast_fp16)[name = string("op_5344_cast_fp16")]; + tensor model_model_layers_28_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(72760320))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73202752))))[name = string("model_model_layers_28_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_201_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_28_mlp_up_proj_weight_to_fp16_quantized, x = input_227_cast_fp16)[name = string("linear_201_cast_fp16")]; + tensor input_231_cast_fp16 = mul(x = var_5344_cast_fp16, y = linear_201_cast_fp16)[name = string("input_231_cast_fp16")]; + tensor model_model_layers_28_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73258112))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73700544))))[name = string("model_model_layers_28_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_202_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_28_mlp_down_proj_weight_to_fp16_quantized, x = input_231_cast_fp16)[name = string("linear_202_cast_fp16")]; + tensor hidden_states_869_cast_fp16 = add(x = hidden_states_859_cast_fp16, y = linear_202_cast_fp16)[name = string("hidden_states_869_cast_fp16")]; + fp16 var_76_promoted_58_to_fp16 = const()[name = string("op_76_promoted_58_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5357_cast_fp16 = pow(x = hidden_states_869_cast_fp16, y = var_76_promoted_58_to_fp16)[name = string("op_5357_cast_fp16")]; + tensor variance_117_axes_0 = const()[name = string("variance_117_axes_0"), val = tensor([-1])]; + bool variance_117_keep_dims_0 = const()[name = string("variance_117_keep_dims_0"), val = bool(true)]; + tensor variance_117_cast_fp16 = reduce_mean(axes = variance_117_axes_0, keep_dims = variance_117_keep_dims_0, x = var_5357_cast_fp16)[name = string("variance_117_cast_fp16")]; + fp16 var_5360_to_fp16 = const()[name = string("op_5360_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5361_cast_fp16 = add(x = variance_117_cast_fp16, y = var_5360_to_fp16)[name = string("op_5361_cast_fp16")]; + fp32 var_5362_epsilon_0 = const()[name = string("op_5362_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5362_cast_fp16 = rsqrt(epsilon = var_5362_epsilon_0, x = var_5361_cast_fp16)[name = string("op_5362_cast_fp16")]; + tensor hidden_states_873_cast_fp16 = mul(x = hidden_states_869_cast_fp16, y = var_5362_cast_fp16)[name = string("hidden_states_873_cast_fp16")]; + tensor model_model_layers_29_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_29_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73755904)))]; + tensor hidden_states_877_cast_fp16 = mul(x = model_model_layers_29_input_layernorm_weight_to_fp16, y = hidden_states_873_cast_fp16)[name = string("hidden_states_877_cast_fp16")]; + tensor var_5373_shape_cast_fp16 = shape(x = hidden_states_877_cast_fp16)[name = string("op_5373_shape_cast_fp16")]; + int32 gather_526 = const()[name = string("gather_526"), val = int32(1)]; + int32 gather_527_axis_0 = const()[name = string("gather_527_axis_0"), val = int32(0)]; + int32 gather_527_batch_dims_0 = const()[name = string("gather_527_batch_dims_0"), val = int32(0)]; + bool gather_527_validate_indices_0 = const()[name = string("gather_527_validate_indices_0"), val = bool(false)]; + string var_5373_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5373_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_527_to_uint16 = const()[name = string("select_527_to_uint16"), val = uint16(1)]; + tensor var_5373_shape_cast_fp16_to_uint16 = cast(dtype = var_5373_shape_cast_fp16_to_uint16_dtype_0, x = var_5373_shape_cast_fp16)[name = string("cast_7")]; + uint16 gather_527_cast_uint16 = gather(axis = gather_527_axis_0, batch_dims = gather_527_batch_dims_0, indices = select_527_to_uint16, validate_indices = gather_527_validate_indices_0, x = var_5373_shape_cast_fp16_to_uint16)[name = string("gather_527_cast_uint16")]; + string gather_527_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_527_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + tensor model_model_layers_29_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73757120))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73923072))))[name = string("model_model_layers_29_self_attn_q_proj_weight_to_fp16_quantized")]; + tensor linear_203_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_29_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_877_cast_fp16)[name = string("linear_203_cast_fp16")]; + tensor model_model_layers_29_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73943872))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(73999232))))[name = string("model_model_layers_29_self_attn_k_proj_weight_to_fp16_quantized")]; + tensor linear_204_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_29_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_877_cast_fp16)[name = string("linear_204_cast_fp16")]; + tensor model_model_layers_29_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74006208))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74061568))))[name = string("model_model_layers_29_self_attn_v_proj_weight_to_fp16_quantized")]; + tensor linear_205_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_29_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_877_cast_fp16)[name = string("linear_205_cast_fp16")]; + tensor concat_551x = const()[name = string("concat_551x"), val = tensor([1, -1, 9, 64])]; + tensor var_5382_cast_fp16 = reshape(shape = concat_551x, x = linear_203_cast_fp16)[name = string("op_5382_cast_fp16")]; + tensor q_perm_0 = const()[name = string("q_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_552x = const()[name = string("concat_552x"), val = tensor([1, -1, 3, 64])]; + tensor var_5385_cast_fp16 = reshape(shape = concat_552x, x = linear_204_cast_fp16)[name = string("op_5385_cast_fp16")]; + tensor k_perm_0 = const()[name = string("k_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor concat_553x = const()[name = string("concat_553x"), val = tensor([1, -1, 3, 64])]; + tensor var_5388_cast_fp16 = reshape(shape = concat_553x, x = linear_205_cast_fp16)[name = string("op_5388_cast_fp16")]; + tensor v_state_perm_0 = const()[name = string("v_state_perm_0"), val = tensor([0, 2, 1, 3])]; + tensor q_cast_fp16 = transpose(perm = q_perm_0, x = var_5382_cast_fp16)[name = string("transpose_3")]; + tensor var_5392_cast_fp16 = mul(x = q_cast_fp16, y = cos_7_cast_fp16)[name = string("op_5392_cast_fp16")]; + tensor x1_117_begin_0 = const()[name = string("x1_117_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_117_end_0 = const()[name = string("x1_117_end_0"), val = tensor([1, 9, 0, 32])]; + tensor x1_117_end_mask_0 = const()[name = string("x1_117_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_117_cast_fp16 = slice_by_index(begin = x1_117_begin_0, end = x1_117_end_0, end_mask = x1_117_end_mask_0, x = q_cast_fp16)[name = string("x1_117_cast_fp16")]; + tensor x2_117_begin_0 = const()[name = string("x2_117_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_117_end_0 = const()[name = string("x2_117_end_0"), val = tensor([1, 9, 0, 64])]; + tensor x2_117_end_mask_0 = const()[name = string("x2_117_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_117_cast_fp16 = slice_by_index(begin = x2_117_begin_0, end = x2_117_end_0, end_mask = x2_117_end_mask_0, x = q_cast_fp16)[name = string("x2_117_cast_fp16")]; + fp16 const_61_promoted_to_fp16 = const()[name = string("const_61_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5403_cast_fp16 = mul(x = x2_117_cast_fp16, y = const_61_promoted_to_fp16)[name = string("op_5403_cast_fp16")]; + bool var_5405_interleave_0 = const()[name = string("op_5405_interleave_0"), val = bool(false)]; + tensor var_5405_cast_fp16 = concat(axis = var_81, interleave = var_5405_interleave_0, values = (var_5403_cast_fp16, x1_117_cast_fp16))[name = string("op_5405_cast_fp16")]; + tensor var_5406_cast_fp16 = mul(x = var_5405_cast_fp16, y = sin_7_cast_fp16)[name = string("op_5406_cast_fp16")]; + tensor query_states_cast_fp16 = add(x = var_5392_cast_fp16, y = var_5406_cast_fp16)[name = string("query_states_cast_fp16")]; + tensor k_cast_fp16 = transpose(perm = k_perm_0, x = var_5385_cast_fp16)[name = string("transpose_2")]; + tensor var_5408_cast_fp16 = mul(x = k_cast_fp16, y = cos_7_cast_fp16)[name = string("op_5408_cast_fp16")]; + tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 3, 0, 32])]; + tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; + tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_cast_fp16)[name = string("x1_cast_fp16")]; + tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 32])]; + tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 3, 0, 64])]; + tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; + tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_cast_fp16)[name = string("x2_cast_fp16")]; + fp16 const_62_promoted_to_fp16 = const()[name = string("const_62_promoted_to_fp16"), val = fp16(-0x1p+0)]; + tensor var_5419_cast_fp16 = mul(x = x2_cast_fp16, y = const_62_promoted_to_fp16)[name = string("op_5419_cast_fp16")]; + bool var_5421_interleave_0 = const()[name = string("op_5421_interleave_0"), val = bool(false)]; + tensor var_5421_cast_fp16 = concat(axis = var_81, interleave = var_5421_interleave_0, values = (var_5419_cast_fp16, x1_cast_fp16))[name = string("op_5421_cast_fp16")]; + tensor var_5422_cast_fp16 = mul(x = var_5421_cast_fp16, y = sin_7_cast_fp16)[name = string("op_5422_cast_fp16")]; + tensor k_state_cast_fp16 = add(x = var_5408_cast_fp16, y = var_5422_cast_fp16)[name = string("k_state_cast_fp16")]; + tensor expand_dims_348 = const()[name = string("expand_dims_348"), val = tensor([0])]; + tensor expand_dims_349 = const()[name = string("expand_dims_349"), val = tensor([0])]; + tensor expand_dims_351 = const()[name = string("expand_dims_351"), val = tensor([0])]; + tensor concat_556_values0_0 = const()[name = string("concat_556_values0_0"), val = tensor([29])]; + int32 concat_556_axis_0 = const()[name = string("concat_556_axis_0"), val = int32(0)]; + bool concat_556_interleave_0 = const()[name = string("concat_556_interleave_0"), val = bool(false)]; + tensor concat_556 = concat(axis = concat_556_axis_0, interleave = concat_556_interleave_0, values = (concat_556_values0_0, expand_dims_348, expand_dims_349, expand_dims_2, expand_dims_351))[name = string("concat_556")]; + tensor key_cache_internal_tensor_assign_30_stride_0 = const()[name = string("key_cache_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor key_cache_internal_tensor_assign_30_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_30_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor key_cache_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor key_cache_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_556, begin_mask = key_cache_internal_tensor_assign_30_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_30_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_30_squeeze_mask_0, stride = key_cache_internal_tensor_assign_30_stride_0, update = k_state_cast_fp16, x = coreml_update_state_116)[name = string("key_cache_internal_tensor_assign_30_cast_fp16")]; + write_state(data = key_cache_internal_tensor_assign_30_cast_fp16, input = key_cache)[name = string("coreml_update_state_118_write_state")]; + tensor coreml_update_state_118 = read_state(input = key_cache)[name = string("coreml_update_state_118")]; + tensor value_cache_internal_tensor_assign_30_stride_0 = const()[name = string("value_cache_internal_tensor_assign_30_stride_0"), val = tensor([1, 1, 1, 1, 1])]; + tensor value_cache_internal_tensor_assign_30_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_30_begin_mask_0"), val = tensor([false, false, false, false, false])]; + tensor value_cache_internal_tensor_assign_30_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_30_end_mask_0"), val = tensor([false, true, false, false, true])]; + tensor value_cache_internal_tensor_assign_30_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_30_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor v_state_cast_fp16 = transpose(perm = v_state_perm_0, x = var_5388_cast_fp16)[name = string("transpose_1")]; + tensor value_cache_internal_tensor_assign_30_cast_fp16 = slice_update(begin = concat_556, begin_mask = value_cache_internal_tensor_assign_30_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_30_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_30_squeeze_mask_0, stride = value_cache_internal_tensor_assign_30_stride_0, update = v_state_cast_fp16, x = coreml_update_state_117)[name = string("value_cache_internal_tensor_assign_30_cast_fp16")]; + write_state(data = value_cache_internal_tensor_assign_30_cast_fp16, input = value_cache)[name = string("coreml_update_state_119_write_state")]; + tensor coreml_update_state_119 = read_state(input = value_cache)[name = string("coreml_update_state_119")]; + tensor var_5445_begin_0 = const()[name = string("op_5445_begin_0"), val = tensor([29, 0, 0, 0, 0])]; + tensor var_5445_end_0 = const()[name = string("op_5445_end_0"), val = tensor([30, 1, 3, 2048, 64])]; + tensor var_5445_end_mask_0 = const()[name = string("op_5445_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_5445_squeeze_mask_0 = const()[name = string("op_5445_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_5445_cast_fp16 = slice_by_index(begin = var_5445_begin_0, end = var_5445_end_0, end_mask = var_5445_end_mask_0, squeeze_mask = var_5445_squeeze_mask_0, x = coreml_update_state_118)[name = string("op_5445_cast_fp16")]; + tensor var_5448_begin_0 = const()[name = string("op_5448_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5448_end_mask_0 = const()[name = string("op_5448_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5448_cast_fp16 = slice_by_index(begin = var_5448_begin_0, end = concat_11, end_mask = var_5448_end_mask_0, x = var_5445_cast_fp16)[name = string("op_5448_cast_fp16")]; + tensor var_5450_begin_0 = const()[name = string("op_5450_begin_0"), val = tensor([29, 0, 0, 0, 0])]; + tensor var_5450_end_0 = const()[name = string("op_5450_end_0"), val = tensor([30, 1, 3, 2048, 64])]; + tensor var_5450_end_mask_0 = const()[name = string("op_5450_end_mask_0"), val = tensor([false, true, true, true, true])]; + tensor var_5450_squeeze_mask_0 = const()[name = string("op_5450_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; + tensor var_5450_cast_fp16 = slice_by_index(begin = var_5450_begin_0, end = var_5450_end_0, end_mask = var_5450_end_mask_0, squeeze_mask = var_5450_squeeze_mask_0, x = coreml_update_state_119)[name = string("op_5450_cast_fp16")]; + tensor var_5453_begin_0 = const()[name = string("op_5453_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5453_end_mask_0 = const()[name = string("op_5453_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_5453_cast_fp16 = slice_by_index(begin = var_5453_begin_0, end = concat_11, end_mask = var_5453_end_mask_0, x = var_5450_cast_fp16)[name = string("op_5453_cast_fp16")]; + tensor var_5455_shape_cast_fp16 = shape(x = var_5448_cast_fp16)[name = string("op_5455_shape_cast_fp16")]; + int32 gather_535 = const()[name = string("gather_535"), val = int32(1)]; + int32 gather_536 = const()[name = string("gather_536"), val = int32(3)]; + int32 gather_537_axis_0 = const()[name = string("gather_537_axis_0"), val = int32(0)]; + int32 gather_537_batch_dims_0 = const()[name = string("gather_537_batch_dims_0"), val = int32(0)]; + bool gather_537_validate_indices_0 = const()[name = string("gather_537_validate_indices_0"), val = bool(false)]; + string var_5455_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5455_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_537_to_uint16 = const()[name = string("select_537_to_uint16"), val = uint16(2)]; + tensor var_5455_shape_cast_fp16_to_uint16 = cast(dtype = var_5455_shape_cast_fp16_to_uint16_dtype_0, x = var_5455_shape_cast_fp16)[name = string("cast_6")]; + uint16 gather_537_cast_uint16 = gather(axis = gather_537_axis_0, batch_dims = gather_537_batch_dims_0, indices = select_537_to_uint16, validate_indices = gather_537_validate_indices_0, x = var_5455_shape_cast_fp16_to_uint16)[name = string("gather_537_cast_uint16")]; + string gather_537_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_537_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_538 = const()[name = string("gather_538"), val = int32(64)]; + tensor var_5462_axes_0 = const()[name = string("op_5462_axes_0"), val = tensor([2])]; + tensor var_5462_cast_fp16 = expand_dims(axes = var_5462_axes_0, x = var_5448_cast_fp16)[name = string("op_5462_cast_fp16")]; + tensor shape_597_cast_fp16 = shape(x = var_5462_cast_fp16)[name = string("shape_597_cast_fp16")]; + int32 concat_564_axis_0 = const()[name = string("concat_564_axis_0"), val = int32(0)]; + bool concat_564_interleave_0 = const()[name = string("concat_564_interleave_0"), val = bool(false)]; + int32 gather_537_cast_uint16_to_int32 = cast(dtype = gather_537_cast_uint16_to_int32_dtype_0, x = gather_537_cast_uint16)[name = string("cast_5")]; + tensor concat_564 = concat(axis = concat_564_axis_0, interleave = concat_564_interleave_0, values = (gather_535, gather_536, var_85, gather_537_cast_uint16_to_int32, gather_538))[name = string("concat_564")]; + tensor real_div_58 = real_div(x = concat_564, y = shape_597_cast_fp16)[name = string("real_div_58")]; + tensor hidden_states_881_cast_fp16 = tile(reps = real_div_58, x = var_5462_cast_fp16)[name = string("hidden_states_881_cast_fp16")]; + tensor concat_565x = const()[name = string("concat_565x"), val = tensor([1, 9, -1, 64])]; + tensor key_states_cast_fp16 = reshape(shape = concat_565x, x = hidden_states_881_cast_fp16)[name = string("key_states_cast_fp16")]; + tensor var_5472_shape_cast_fp16 = shape(x = var_5453_cast_fp16)[name = string("op_5472_shape_cast_fp16")]; + int32 gather_539 = const()[name = string("gather_539"), val = int32(1)]; + int32 gather_540 = const()[name = string("gather_540"), val = int32(3)]; + int32 gather_541_axis_0 = const()[name = string("gather_541_axis_0"), val = int32(0)]; + int32 gather_541_batch_dims_0 = const()[name = string("gather_541_batch_dims_0"), val = int32(0)]; + bool gather_541_validate_indices_0 = const()[name = string("gather_541_validate_indices_0"), val = bool(false)]; + string var_5472_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5472_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_541_to_uint16 = const()[name = string("select_541_to_uint16"), val = uint16(2)]; + tensor var_5472_shape_cast_fp16_to_uint16 = cast(dtype = var_5472_shape_cast_fp16_to_uint16_dtype_0, x = var_5472_shape_cast_fp16)[name = string("cast_4")]; + uint16 gather_541_cast_uint16 = gather(axis = gather_541_axis_0, batch_dims = gather_541_batch_dims_0, indices = select_541_to_uint16, validate_indices = gather_541_validate_indices_0, x = var_5472_shape_cast_fp16_to_uint16)[name = string("gather_541_cast_uint16")]; + string gather_541_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_541_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 gather_542 = const()[name = string("gather_542"), val = int32(64)]; + tensor var_5479_axes_0 = const()[name = string("op_5479_axes_0"), val = tensor([2])]; + tensor var_5479_cast_fp16 = expand_dims(axes = var_5479_axes_0, x = var_5453_cast_fp16)[name = string("op_5479_cast_fp16")]; + tensor shape_602_cast_fp16 = shape(x = var_5479_cast_fp16)[name = string("shape_602_cast_fp16")]; + int32 concat_566_axis_0 = const()[name = string("concat_566_axis_0"), val = int32(0)]; + bool concat_566_interleave_0 = const()[name = string("concat_566_interleave_0"), val = bool(false)]; + int32 gather_541_cast_uint16_to_int32 = cast(dtype = gather_541_cast_uint16_to_int32_dtype_0, x = gather_541_cast_uint16)[name = string("cast_3")]; + tensor concat_566 = concat(axis = concat_566_axis_0, interleave = concat_566_interleave_0, values = (gather_539, gather_540, var_85, gather_541_cast_uint16_to_int32, gather_542))[name = string("concat_566")]; + tensor real_div_59 = real_div(x = concat_566, y = shape_602_cast_fp16)[name = string("real_div_59")]; + tensor hidden_states_885_cast_fp16 = tile(reps = real_div_59, x = var_5479_cast_fp16)[name = string("hidden_states_885_cast_fp16")]; + tensor concat_567x = const()[name = string("concat_567x"), val = tensor([1, 9, -1, 64])]; + tensor value_states_cast_fp16 = reshape(shape = concat_567x, x = hidden_states_885_cast_fp16)[name = string("value_states_cast_fp16")]; + tensor var_5489_shape_cast_fp16 = shape(x = key_states_cast_fp16)[name = string("op_5489_shape_cast_fp16")]; + int32 gather_543_axis_0 = const()[name = string("gather_543_axis_0"), val = int32(0)]; + int32 gather_543_batch_dims_0 = const()[name = string("gather_543_batch_dims_0"), val = int32(0)]; + bool gather_543_validate_indices_0 = const()[name = string("gather_543_validate_indices_0"), val = bool(false)]; + string var_5489_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5489_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; + uint16 select_543_to_uint16 = const()[name = string("select_543_to_uint16"), val = uint16(2)]; + tensor var_5489_shape_cast_fp16_to_uint16 = cast(dtype = var_5489_shape_cast_fp16_to_uint16_dtype_0, x = var_5489_shape_cast_fp16)[name = string("cast_2")]; + uint16 gather_543_cast_uint16 = gather(axis = gather_543_axis_0, batch_dims = gather_543_batch_dims_0, indices = select_543_to_uint16, validate_indices = gather_543_validate_indices_0, x = var_5489_shape_cast_fp16_to_uint16)[name = string("gather_543_cast_uint16")]; + string gather_543_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_543_cast_uint16_to_int32_dtype_0"), val = string("int32")]; + int32 concat_568_values0_0 = const()[name = string("concat_568_values0_0"), val = int32(1)]; + int32 concat_568_values1_0 = const()[name = string("concat_568_values1_0"), val = int32(1)]; + int32 concat_568_values2_0 = const()[name = string("concat_568_values2_0"), val = int32(0)]; + int32 concat_568_axis_0 = const()[name = string("concat_568_axis_0"), val = int32(0)]; + bool concat_568_interleave_0 = const()[name = string("concat_568_interleave_0"), val = bool(false)]; + int32 gather_543_cast_uint16_to_int32 = cast(dtype = gather_543_cast_uint16_to_int32_dtype_0, x = gather_543_cast_uint16)[name = string("cast_1")]; + tensor concat_568 = concat(axis = concat_568_axis_0, interleave = concat_568_interleave_0, values = (concat_568_values0_0, concat_568_values1_0, concat_568_values2_0, gather_543_cast_uint16_to_int32))[name = string("concat_568")]; + tensor causal_mask_begin_0 = const()[name = string("causal_mask_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor causal_mask_end_mask_0 = const()[name = string("causal_mask_end_mask_0"), val = tensor([true, true, true, false])]; + tensor causal_mask_cast_fp16 = slice_by_index(begin = causal_mask_begin_0, end = concat_568, end_mask = causal_mask_end_mask_0, x = causal_mask)[name = string("causal_mask_cast_fp16")]; + tensor attn_output_117_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_cast_fp16, key = key_states_cast_fp16, query = query_states_cast_fp16, value = value_states_cast_fp16)[name = string("attn_output_117_cast_fp16")]; + tensor var_5495_perm_0 = const()[name = string("op_5495_perm_0"), val = tensor([0, 2, 1, 3])]; + int32 concat_569_axis_0 = const()[name = string("concat_569_axis_0"), val = int32(0)]; + bool concat_569_interleave_0 = const()[name = string("concat_569_interleave_0"), val = bool(false)]; + int32 gather_527_cast_uint16_to_int32 = cast(dtype = gather_527_cast_uint16_to_int32_dtype_0, x = gather_527_cast_uint16)[name = string("cast_0")]; + tensor concat_569 = concat(axis = concat_569_axis_0, interleave = concat_569_interleave_0, values = (gather_526, gather_527_cast_uint16_to_int32, var_81))[name = string("concat_569")]; + tensor var_5495_cast_fp16 = transpose(perm = var_5495_perm_0, x = attn_output_117_cast_fp16)[name = string("transpose_0")]; + tensor input_233_cast_fp16 = reshape(shape = concat_569, x = var_5495_cast_fp16)[name = string("input_233_cast_fp16")]; + tensor model_model_layers_29_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74068544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74234496))))[name = string("model_model_layers_29_self_attn_o_proj_weight_to_fp16_quantized")]; + tensor linear_206_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_29_self_attn_o_proj_weight_to_fp16_quantized, x = input_233_cast_fp16)[name = string("linear_206_cast_fp16")]; + tensor hidden_states_889_cast_fp16 = add(x = hidden_states_869_cast_fp16, y = linear_206_cast_fp16)[name = string("hidden_states_889_cast_fp16")]; + fp16 var_76_promoted_59_to_fp16 = const()[name = string("op_76_promoted_59_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5504_cast_fp16 = pow(x = hidden_states_889_cast_fp16, y = var_76_promoted_59_to_fp16)[name = string("op_5504_cast_fp16")]; + tensor variance_119_axes_0 = const()[name = string("variance_119_axes_0"), val = tensor([-1])]; + bool variance_119_keep_dims_0 = const()[name = string("variance_119_keep_dims_0"), val = bool(true)]; + tensor variance_119_cast_fp16 = reduce_mean(axes = variance_119_axes_0, keep_dims = variance_119_keep_dims_0, x = var_5504_cast_fp16)[name = string("variance_119_cast_fp16")]; + fp16 var_5507_to_fp16 = const()[name = string("op_5507_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5508_cast_fp16 = add(x = variance_119_cast_fp16, y = var_5507_to_fp16)[name = string("op_5508_cast_fp16")]; + fp32 var_5509_epsilon_0 = const()[name = string("op_5509_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5509_cast_fp16 = rsqrt(epsilon = var_5509_epsilon_0, x = var_5508_cast_fp16)[name = string("op_5509_cast_fp16")]; + tensor hidden_states_893_cast_fp16 = mul(x = hidden_states_889_cast_fp16, y = var_5509_cast_fp16)[name = string("hidden_states_893_cast_fp16")]; + tensor model_model_layers_29_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_29_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74255296)))]; + tensor input_235_cast_fp16 = mul(x = model_model_layers_29_post_attention_layernorm_weight_to_fp16, y = hidden_states_893_cast_fp16)[name = string("input_235_cast_fp16")]; + tensor model_model_layers_29_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74256512))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74698944))))[name = string("model_model_layers_29_mlp_gate_proj_weight_to_fp16_quantized")]; + tensor linear_207_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_29_mlp_gate_proj_weight_to_fp16_quantized, x = input_235_cast_fp16)[name = string("linear_207_cast_fp16")]; + tensor var_5521_cast_fp16 = silu(x = linear_207_cast_fp16)[name = string("op_5521_cast_fp16")]; + tensor model_model_layers_29_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(74754304))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75196736))))[name = string("model_model_layers_29_mlp_up_proj_weight_to_fp16_quantized")]; + tensor linear_208_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_29_mlp_up_proj_weight_to_fp16_quantized, x = input_235_cast_fp16)[name = string("linear_208_cast_fp16")]; + tensor input_239_cast_fp16 = mul(x = var_5521_cast_fp16, y = linear_208_cast_fp16)[name = string("input_239_cast_fp16")]; + tensor model_model_layers_29_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75252096))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75694528))))[name = string("model_model_layers_29_mlp_down_proj_weight_to_fp16_quantized")]; + tensor linear_209_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_29_mlp_down_proj_weight_to_fp16_quantized, x = input_239_cast_fp16)[name = string("linear_209_cast_fp16")]; + tensor hidden_states_899_cast_fp16 = add(x = hidden_states_889_cast_fp16, y = linear_209_cast_fp16)[name = string("hidden_states_899_cast_fp16")]; + fp16 var_76_promoted_60_to_fp16 = const()[name = string("op_76_promoted_60_to_fp16"), val = fp16(0x1p+1)]; + tensor var_5530_cast_fp16 = pow(x = hidden_states_899_cast_fp16, y = var_76_promoted_60_to_fp16)[name = string("op_5530_cast_fp16")]; + tensor variance_axes_0 = const()[name = string("variance_axes_0"), val = tensor([-1])]; + bool variance_keep_dims_0 = const()[name = string("variance_keep_dims_0"), val = bool(true)]; + tensor variance_cast_fp16 = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_5530_cast_fp16)[name = string("variance_cast_fp16")]; + fp16 var_5533_to_fp16 = const()[name = string("op_5533_to_fp16"), val = fp16(0x1.5p-17)]; + tensor var_5534_cast_fp16 = add(x = variance_cast_fp16, y = var_5533_to_fp16)[name = string("op_5534_cast_fp16")]; + fp32 var_5535_epsilon_0 = const()[name = string("op_5535_epsilon_0"), val = fp32(0x1.197998p-40)]; + tensor var_5535_cast_fp16 = rsqrt(epsilon = var_5535_epsilon_0, x = var_5534_cast_fp16)[name = string("op_5535_cast_fp16")]; + tensor hidden_states_903_cast_fp16 = mul(x = hidden_states_899_cast_fp16, y = var_5535_cast_fp16)[name = string("hidden_states_903_cast_fp16")]; + tensor model_model_norm_weight_to_fp16 = const()[name = string("model_model_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75749888)))]; + tensor hidden_states_cast_fp16 = mul(x = model_model_norm_weight_to_fp16, y = hidden_states_903_cast_fp16)[name = string("hidden_states_cast_fp16")]; + tensor linear_210_bias_0_to_fp16 = const()[name = string("linear_210_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(75751104)))]; + tensor logits = linear(bias = linear_210_bias_0_to_fp16, weight = model_model_embed_tokens_weight_to_fp16_quantized, x = hidden_states_cast_fp16)[name = string("linear_210_cast_fp16")]; + } -> (logits); +} \ No newline at end of file