program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3402.3.2"}, {"coremlc-version", "3402.4.1"}})] { func main(tensor causal_mask, tensor input_ids, state> key_cache, state> value_cache) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"causal_mask", [1, 1, 1, 1]}, {"input_ids", [1, 1]}}), ("RangeDims", {{"causal_mask", [[1, 1], [1, 1], [1, 2048], [1, 2048]]}, {"input_ids", [[1, 1], [1, 2048]]}})))] { tensor var_7_shape_cast_fp16 = shape(x = causal_mask)[name = string("op_7_shape_cast_fp16")]; int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; string var_7_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_7_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(3)]; tensor var_7_shape_cast_fp16_to_int16 = cast(dtype = var_7_shape_cast_fp16_to_int16_dtype_0, x = var_7_shape_cast_fp16)[name = string("cast_152")]; int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_7_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor var_10_shape = shape(x = input_ids)[name = string("op_10_shape")]; int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; string var_10_shape_to_uint16_dtype_0 = const()[name = string("op_10_shape_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; tensor var_10_shape_to_uint16 = cast(dtype = var_10_shape_to_uint16_dtype_0, x = var_10_shape)[name = string("cast_151")]; uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_10_shape_to_uint16)[name = string("gather_1_cast_uint16")]; string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_149")]; int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_150")]; int32 past_seen_tokens = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("past_seen_tokens")]; int32 var_64 = const()[name = string("op_64"), val = int32(-1)]; int32 var_76 = const()[name = string("op_76"), val = int32(7)]; int32 inputs_embeds_axis_0 = const()[name = string("inputs_embeds_axis_0"), val = int32(0)]; int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)]; bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)]; tensor model_model_embed_tokens_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(68067456))))[name = string("model_model_embed_tokens_weight_to_fp16_quantized")]; tensor inputs_embeds_cast_fp16 = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = input_ids, validate_indices = inputs_embeds_validate_indices_0, x = model_model_embed_tokens_weight_to_fp16_quantized)[name = string("inputs_embeds_cast_fp16")]; tensor var_154_shape_cast_fp16 = shape(x = inputs_embeds_cast_fp16)[name = string("op_154_shape_cast_fp16")]; int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; string var_154_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_154_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; tensor var_154_shape_cast_fp16_to_uint16 = cast(dtype = var_154_shape_cast_fp16_to_uint16_dtype_0, x = var_154_shape_cast_fp16)[name = string("cast_148")]; uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_154_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_147")]; int32 var_156 = add(x = past_seen_tokens, y = gather_2_cast_uint16_to_int32)[name = string("op_156")]; int32 const_0 = const()[name = string("const_0"), val = int32(1)]; tensor cache_position = range_1d(end = var_156, start = past_seen_tokens, step = const_0)[name = string("cache_position")]; tensor position_ids_axes_0 = const()[name = string("position_ids_axes_0"), val = tensor([0])]; tensor position_ids = expand_dims(axes = position_ids_axes_0, x = cache_position)[name = string("position_ids")]; tensor var_169_axes_0 = const()[name = string("op_169_axes_0"), val = tensor([1])]; tensor var_169 = expand_dims(axes = var_169_axes_0, x = position_ids)[name = string("op_169")]; bool var_174_transpose_x_0 = const()[name = string("op_174_transpose_x_0"), val = bool(false)]; bool var_174_transpose_y_0 = const()[name = string("op_174_transpose_y_0"), val = bool(false)]; tensor const_2_to_fp16 = const()[name = string("const_2_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76575936)))]; string cast_2_to_fp16_dtype_0 = const()[name = string("cast_2_to_fp16_dtype_0"), val = string("fp16")]; tensor var_169_to_fp16 = cast(dtype = cast_2_to_fp16_dtype_0, x = var_169)[name = string("cast_146")]; tensor var_174_cast_fp16 = matmul(transpose_x = var_174_transpose_x_0, transpose_y = var_174_transpose_y_0, x = const_2_to_fp16, y = var_169_to_fp16)[name = string("op_174_cast_fp16")]; tensor freqs_perm_0 = const()[name = string("freqs_perm_0"), val = tensor([0, 2, 1])]; bool emb_interleave_0 = const()[name = string("emb_interleave_0"), val = bool(false)]; tensor freqs_cast_fp16 = transpose(perm = freqs_perm_0, x = var_174_cast_fp16)[name = string("transpose_96")]; tensor emb_cast_fp16 = concat(axis = var_64, interleave = emb_interleave_0, values = (freqs_cast_fp16, freqs_cast_fp16))[name = string("emb_cast_fp16")]; tensor cos_1_cast_fp16 = cos(x = emb_cast_fp16)[name = string("cos_1_cast_fp16")]; tensor sin_1_cast_fp16 = sin(x = emb_cast_fp16)[name = string("sin_1_cast_fp16")]; fp16 var_70_promoted_to_fp16 = const()[name = string("op_70_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_195_cast_fp16 = pow(x = inputs_embeds_cast_fp16, y = var_70_promoted_to_fp16)[name = string("op_195_cast_fp16")]; tensor variance_1_axes_0 = const()[name = string("variance_1_axes_0"), val = tensor([-1])]; bool variance_1_keep_dims_0 = const()[name = string("variance_1_keep_dims_0"), val = bool(true)]; tensor variance_1_cast_fp16 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_195_cast_fp16)[name = string("variance_1_cast_fp16")]; fp16 var_198_to_fp16 = const()[name = string("op_198_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_199_cast_fp16 = add(x = variance_1_cast_fp16, y = var_198_to_fp16)[name = string("op_199_cast_fp16")]; fp32 var_200_epsilon_0 = const()[name = string("op_200_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_200_cast_fp16 = rsqrt(epsilon = var_200_epsilon_0, x = var_199_cast_fp16)[name = string("op_200_cast_fp16")]; tensor hidden_states_3_cast_fp16 = mul(x = inputs_embeds_cast_fp16, y = var_200_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76576064)))]; tensor hidden_states_7_cast_fp16 = mul(x = model_model_layers_0_input_layernorm_weight_to_fp16, y = hidden_states_3_cast_fp16)[name = string("hidden_states_7_cast_fp16")]; tensor model_model_layers_0_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76577920))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(76979392))))[name = string("model_model_layers_0_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77029632)))]; tensor linear_0_cast_fp16 = linear(bias = model_model_layers_0_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_0_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor model_model_layers_0_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77031488))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77088896))))[name = string("model_model_layers_0_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_0_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_0_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77096128)))]; tensor linear_1_cast_fp16 = linear(bias = model_model_layers_0_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_0_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor model_model_layers_0_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77096448))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77153856))))[name = string("model_model_layers_0_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77161088)))]; tensor linear_2_cast_fp16 = linear(bias = model_model_layers_0_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_0_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_7_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor concat_0x = const()[name = string("concat_0x"), val = tensor([1, -1, 14, 64])]; tensor var_223_cast_fp16 = reshape(shape = concat_0x, x = linear_0_cast_fp16)[name = string("op_223_cast_fp16")]; tensor q_1_perm_0 = const()[name = string("q_1_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_1x = const()[name = string("concat_1x"), val = tensor([1, -1, 2, 64])]; tensor var_226_cast_fp16 = reshape(shape = concat_1x, x = linear_1_cast_fp16)[name = string("op_226_cast_fp16")]; tensor k_1_perm_0 = const()[name = string("k_1_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_2x = const()[name = string("concat_2x"), val = tensor([1, -1, 2, 64])]; tensor var_229_cast_fp16 = reshape(shape = concat_2x, x = linear_2_cast_fp16)[name = string("op_229_cast_fp16")]; tensor v_state_1_perm_0 = const()[name = string("v_state_1_perm_0"), val = tensor([0, 2, 1, 3])]; tensor cos_7_axes_0 = const()[name = string("cos_7_axes_0"), val = tensor([1])]; tensor cos_7_cast_fp16 = expand_dims(axes = cos_7_axes_0, x = cos_1_cast_fp16)[name = string("cos_7_cast_fp16")]; tensor sin_7_axes_0 = const()[name = string("sin_7_axes_0"), val = tensor([1])]; tensor sin_7_cast_fp16 = expand_dims(axes = sin_7_axes_0, x = sin_1_cast_fp16)[name = string("sin_7_cast_fp16")]; tensor q_1_cast_fp16 = transpose(perm = q_1_perm_0, x = var_223_cast_fp16)[name = string("transpose_95")]; tensor var_233_cast_fp16 = mul(x = q_1_cast_fp16, y = cos_7_cast_fp16)[name = string("op_233_cast_fp16")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1_cast_fp16)[name = string("x1_1_cast_fp16")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1_cast_fp16)[name = string("x2_1_cast_fp16")]; fp16 const_3_promoted_to_fp16 = const()[name = string("const_3_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_244_cast_fp16 = mul(x = x2_1_cast_fp16, y = const_3_promoted_to_fp16)[name = string("op_244_cast_fp16")]; bool var_246_interleave_0 = const()[name = string("op_246_interleave_0"), val = bool(false)]; tensor var_246_cast_fp16 = concat(axis = var_64, interleave = var_246_interleave_0, values = (var_244_cast_fp16, x1_1_cast_fp16))[name = string("op_246_cast_fp16")]; tensor var_247_cast_fp16 = mul(x = var_246_cast_fp16, y = sin_7_cast_fp16)[name = string("op_247_cast_fp16")]; tensor query_states_3_cast_fp16 = add(x = var_233_cast_fp16, y = var_247_cast_fp16)[name = string("query_states_3_cast_fp16")]; tensor k_1_cast_fp16 = transpose(perm = k_1_perm_0, x = var_226_cast_fp16)[name = string("transpose_94")]; tensor var_249_cast_fp16 = mul(x = k_1_cast_fp16, y = cos_7_cast_fp16)[name = string("op_249_cast_fp16")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1_cast_fp16)[name = string("x1_3_cast_fp16")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1_cast_fp16)[name = string("x2_3_cast_fp16")]; fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_260_cast_fp16 = mul(x = x2_3_cast_fp16, y = const_4_promoted_to_fp16)[name = string("op_260_cast_fp16")]; bool var_262_interleave_0 = const()[name = string("op_262_interleave_0"), val = bool(false)]; tensor var_262_cast_fp16 = concat(axis = var_64, interleave = var_262_interleave_0, values = (var_260_cast_fp16, x1_3_cast_fp16))[name = string("op_262_cast_fp16")]; tensor var_263_cast_fp16 = mul(x = var_262_cast_fp16, y = sin_7_cast_fp16)[name = string("op_263_cast_fp16")]; tensor k_state_1_cast_fp16 = add(x = var_249_cast_fp16, y = var_263_cast_fp16)[name = string("k_state_1_cast_fp16")]; tensor var_265_shape = shape(x = cache_position)[name = string("op_265_shape")]; int32 gather_10_axis_0 = const()[name = string("gather_10_axis_0"), val = int32(0)]; int32 gather_10_batch_dims_0 = const()[name = string("gather_10_batch_dims_0"), val = int32(0)]; bool gather_10_validate_indices_0 = const()[name = string("gather_10_validate_indices_0"), val = bool(false)]; string var_265_shape_to_uint16_dtype_0 = const()[name = string("op_265_shape_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_10_to_uint16 = const()[name = string("select_10_to_uint16"), val = uint16(0)]; tensor var_265_shape_to_uint16 = cast(dtype = var_265_shape_to_uint16_dtype_0, x = var_265_shape)[name = string("cast_145")]; uint16 gather_10_cast_uint16 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = select_10_to_uint16, validate_indices = gather_10_validate_indices_0, x = var_265_shape_to_uint16)[name = string("gather_10_cast_uint16")]; string gather_10_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_10_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_10_cast_uint16_to_int32 = cast(dtype = gather_10_cast_uint16_to_int32_dtype_0, x = gather_10_cast_uint16)[name = string("cast_144")]; int32 end_1 = add(x = past_seen_tokens, y = gather_10_cast_uint16_to_int32)[name = string("end_1")]; tensor read_state_0 = read_state(input = key_cache)[name = string("read_state_0")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_2_axes_0 = const()[name = string("expand_dims_2_axes_0"), val = tensor([0])]; tensor expand_dims_2 = expand_dims(axes = expand_dims_2_axes_0, x = past_seen_tokens)[name = string("expand_dims_2")]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([2])]; tensor expand_dims_5_axes_0 = const()[name = string("expand_dims_5_axes_0"), val = tensor([0])]; tensor expand_dims_5 = expand_dims(axes = expand_dims_5_axes_0, x = end_1)[name = string("expand_dims_5")]; tensor concat_5_values0_0 = const()[name = string("concat_5_values0_0"), val = tensor([0])]; int32 concat_5_axis_0 = const()[name = string("concat_5_axis_0"), val = int32(0)]; bool concat_5_interleave_0 = const()[name = string("concat_5_interleave_0"), val = bool(false)]; tensor concat_5 = concat(axis = concat_5_axis_0, interleave = concat_5_interleave_0, values = (concat_5_values0_0, expand_dims_0, expand_dims_1, expand_dims_2, expand_dims_3))[name = string("concat_5")]; tensor concat_6_values0_0 = const()[name = string("concat_6_values0_0"), val = tensor([0])]; tensor concat_6_values1_0 = const()[name = string("concat_6_values1_0"), val = tensor([0])]; tensor concat_6_values4_0 = const()[name = string("concat_6_values4_0"), val = tensor([0])]; int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (concat_6_values0_0, concat_6_values1_0, expand_dims_4, expand_dims_5, concat_6_values4_0))[name = string("concat_6")]; tensor key_cache_internal_tensor_assign_1_stride_0 = const()[name = string("key_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = k_state_1_cast_fp16, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_1_cast_fp16, input = key_cache)[name = string("coreml_update_state_48_write_state")]; tensor coreml_update_state_48 = read_state(input = key_cache)[name = string("coreml_update_state_48")]; tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_1_cast_fp16 = transpose(perm = v_state_1_perm_0, x = var_229_cast_fp16)[name = string("transpose_93")]; tensor value_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_5, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = v_state_1_cast_fp16, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_1_cast_fp16, input = value_cache)[name = string("coreml_update_state_49_write_state")]; tensor coreml_update_state_49 = read_state(input = value_cache)[name = string("coreml_update_state_49")]; tensor var_286_begin_0 = const()[name = string("op_286_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor var_286_end_0 = const()[name = string("op_286_end_0"), val = tensor([1, 1, 2, 2048, 64])]; tensor var_286_end_mask_0 = const()[name = string("op_286_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_286_squeeze_mask_0 = const()[name = string("op_286_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_286_cast_fp16 = slice_by_index(begin = var_286_begin_0, end = var_286_end_0, end_mask = var_286_end_mask_0, squeeze_mask = var_286_squeeze_mask_0, x = coreml_update_state_48)[name = string("op_286_cast_fp16")]; int32 concat_11_values0_0 = const()[name = string("concat_11_values0_0"), val = int32(1)]; int32 concat_11_values1_0 = const()[name = string("concat_11_values1_0"), val = int32(2)]; int32 concat_11_values3_0 = const()[name = string("concat_11_values3_0"), val = int32(64)]; int32 concat_11_axis_0 = const()[name = string("concat_11_axis_0"), val = int32(0)]; bool concat_11_interleave_0 = const()[name = string("concat_11_interleave_0"), val = bool(false)]; tensor concat_11 = concat(axis = concat_11_axis_0, interleave = concat_11_interleave_0, values = (concat_11_values0_0, concat_11_values1_0, end_1, concat_11_values3_0))[name = string("concat_11")]; tensor var_289_begin_0 = const()[name = string("op_289_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_289_end_mask_0 = const()[name = string("op_289_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = concat_11, end_mask = var_289_end_mask_0, x = var_286_cast_fp16)[name = string("op_289_cast_fp16")]; tensor var_291_begin_0 = const()[name = string("op_291_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor var_291_end_0 = const()[name = string("op_291_end_0"), val = tensor([1, 1, 2, 2048, 64])]; tensor var_291_end_mask_0 = const()[name = string("op_291_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_291_squeeze_mask_0 = const()[name = string("op_291_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_291_cast_fp16 = slice_by_index(begin = var_291_begin_0, end = var_291_end_0, end_mask = var_291_end_mask_0, squeeze_mask = var_291_squeeze_mask_0, x = coreml_update_state_49)[name = string("op_291_cast_fp16")]; tensor var_294_begin_0 = const()[name = string("op_294_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_294_end_mask_0 = const()[name = string("op_294_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_294_cast_fp16 = slice_by_index(begin = var_294_begin_0, end = concat_11, end_mask = var_294_end_mask_0, x = var_291_cast_fp16)[name = string("op_294_cast_fp16")]; tensor var_296_shape_cast_fp16 = shape(x = var_289_cast_fp16)[name = string("op_296_shape_cast_fp16")]; int32 gather_13 = const()[name = string("gather_13"), val = int32(1)]; int32 gather_14 = const()[name = string("gather_14"), val = int32(2)]; int32 gather_15_axis_0 = const()[name = string("gather_15_axis_0"), val = int32(0)]; int32 gather_15_batch_dims_0 = const()[name = string("gather_15_batch_dims_0"), val = int32(0)]; bool gather_15_validate_indices_0 = const()[name = string("gather_15_validate_indices_0"), val = bool(false)]; string var_296_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_296_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_15_to_uint16 = const()[name = string("select_15_to_uint16"), val = uint16(2)]; tensor var_296_shape_cast_fp16_to_uint16 = cast(dtype = var_296_shape_cast_fp16_to_uint16_dtype_0, x = var_296_shape_cast_fp16)[name = string("cast_143")]; uint16 gather_15_cast_uint16 = gather(axis = gather_15_axis_0, batch_dims = gather_15_batch_dims_0, indices = select_15_to_uint16, validate_indices = gather_15_validate_indices_0, x = var_296_shape_cast_fp16_to_uint16)[name = string("gather_15_cast_uint16")]; string gather_15_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_15_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_16 = const()[name = string("gather_16"), val = int32(64)]; tensor var_303_axes_0 = const()[name = string("op_303_axes_0"), val = tensor([2])]; tensor var_303_cast_fp16 = expand_dims(axes = var_303_axes_0, x = var_289_cast_fp16)[name = string("op_303_cast_fp16")]; tensor shape_17_cast_fp16 = shape(x = var_303_cast_fp16)[name = string("shape_17_cast_fp16")]; int32 concat_13_axis_0 = const()[name = string("concat_13_axis_0"), val = int32(0)]; bool concat_13_interleave_0 = const()[name = string("concat_13_interleave_0"), val = bool(false)]; int32 gather_15_cast_uint16_to_int32 = cast(dtype = gather_15_cast_uint16_to_int32_dtype_0, x = gather_15_cast_uint16)[name = string("cast_142")]; tensor concat_13 = concat(axis = concat_13_axis_0, interleave = concat_13_interleave_0, values = (gather_13, gather_14, var_76, gather_15_cast_uint16_to_int32, gather_16))[name = string("concat_13")]; tensor real_div_0 = real_div(x = concat_13, y = shape_17_cast_fp16)[name = string("real_div_0")]; tensor hidden_states_11_cast_fp16 = tile(reps = real_div_0, x = var_303_cast_fp16)[name = string("hidden_states_11_cast_fp16")]; tensor concat_14x = const()[name = string("concat_14x"), val = tensor([1, 14, -1, 64])]; tensor key_states_3_cast_fp16 = reshape(shape = concat_14x, x = hidden_states_11_cast_fp16)[name = string("key_states_3_cast_fp16")]; tensor var_313_shape_cast_fp16 = shape(x = var_294_cast_fp16)[name = string("op_313_shape_cast_fp16")]; int32 gather_17 = const()[name = string("gather_17"), val = int32(1)]; int32 gather_18 = const()[name = string("gather_18"), val = int32(2)]; int32 gather_19_axis_0 = const()[name = string("gather_19_axis_0"), val = int32(0)]; int32 gather_19_batch_dims_0 = const()[name = string("gather_19_batch_dims_0"), val = int32(0)]; bool gather_19_validate_indices_0 = const()[name = string("gather_19_validate_indices_0"), val = bool(false)]; string var_313_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_313_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_19_to_uint16 = const()[name = string("select_19_to_uint16"), val = uint16(2)]; tensor var_313_shape_cast_fp16_to_uint16 = cast(dtype = var_313_shape_cast_fp16_to_uint16_dtype_0, x = var_313_shape_cast_fp16)[name = string("cast_141")]; uint16 gather_19_cast_uint16 = gather(axis = gather_19_axis_0, batch_dims = gather_19_batch_dims_0, indices = select_19_to_uint16, validate_indices = gather_19_validate_indices_0, x = var_313_shape_cast_fp16_to_uint16)[name = string("gather_19_cast_uint16")]; string gather_19_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_19_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_20 = const()[name = string("gather_20"), val = int32(64)]; tensor var_320_axes_0 = const()[name = string("op_320_axes_0"), val = tensor([2])]; tensor var_320_cast_fp16 = expand_dims(axes = var_320_axes_0, x = var_294_cast_fp16)[name = string("op_320_cast_fp16")]; tensor shape_22_cast_fp16 = shape(x = var_320_cast_fp16)[name = string("shape_22_cast_fp16")]; int32 concat_15_axis_0 = const()[name = string("concat_15_axis_0"), val = int32(0)]; bool concat_15_interleave_0 = const()[name = string("concat_15_interleave_0"), val = bool(false)]; int32 gather_19_cast_uint16_to_int32 = cast(dtype = gather_19_cast_uint16_to_int32_dtype_0, x = gather_19_cast_uint16)[name = string("cast_140")]; tensor concat_15 = concat(axis = concat_15_axis_0, interleave = concat_15_interleave_0, values = (gather_17, gather_18, var_76, gather_19_cast_uint16_to_int32, gather_20))[name = string("concat_15")]; tensor real_div_1 = real_div(x = concat_15, y = shape_22_cast_fp16)[name = string("real_div_1")]; tensor hidden_states_15_cast_fp16 = tile(reps = real_div_1, x = var_320_cast_fp16)[name = string("hidden_states_15_cast_fp16")]; tensor concat_16x = const()[name = string("concat_16x"), val = tensor([1, 14, -1, 64])]; tensor value_states_3_cast_fp16 = reshape(shape = concat_16x, x = hidden_states_15_cast_fp16)[name = string("value_states_3_cast_fp16")]; tensor var_330_shape_cast_fp16 = shape(x = key_states_3_cast_fp16)[name = string("op_330_shape_cast_fp16")]; int32 gather_21_axis_0 = const()[name = string("gather_21_axis_0"), val = int32(0)]; int32 gather_21_batch_dims_0 = const()[name = string("gather_21_batch_dims_0"), val = int32(0)]; bool gather_21_validate_indices_0 = const()[name = string("gather_21_validate_indices_0"), val = bool(false)]; string var_330_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_330_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_21_to_uint16 = const()[name = string("select_21_to_uint16"), val = uint16(2)]; tensor var_330_shape_cast_fp16_to_uint16 = cast(dtype = var_330_shape_cast_fp16_to_uint16_dtype_0, x = var_330_shape_cast_fp16)[name = string("cast_139")]; uint16 gather_21_cast_uint16 = gather(axis = gather_21_axis_0, batch_dims = gather_21_batch_dims_0, indices = select_21_to_uint16, validate_indices = gather_21_validate_indices_0, x = var_330_shape_cast_fp16_to_uint16)[name = string("gather_21_cast_uint16")]; string gather_21_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_21_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_17_values0_0 = const()[name = string("concat_17_values0_0"), val = int32(1)]; int32 concat_17_values1_0 = const()[name = string("concat_17_values1_0"), val = int32(1)]; int32 concat_17_values2_0 = const()[name = string("concat_17_values2_0"), val = int32(0)]; int32 concat_17_axis_0 = const()[name = string("concat_17_axis_0"), val = int32(0)]; bool concat_17_interleave_0 = const()[name = string("concat_17_interleave_0"), val = bool(false)]; int32 gather_21_cast_uint16_to_int32 = cast(dtype = gather_21_cast_uint16_to_int32_dtype_0, x = gather_21_cast_uint16)[name = string("cast_138")]; tensor concat_17 = concat(axis = concat_17_axis_0, interleave = concat_17_interleave_0, values = (concat_17_values0_0, concat_17_values1_0, concat_17_values2_0, gather_21_cast_uint16_to_int32))[name = string("concat_17")]; tensor causal_mask_3_begin_0 = const()[name = string("causal_mask_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_3_end_mask_0 = const()[name = string("causal_mask_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_3_cast_fp16 = slice_by_index(begin = causal_mask_3_begin_0, end = concat_17, end_mask = causal_mask_3_end_mask_0, x = causal_mask)[name = string("causal_mask_3_cast_fp16")]; tensor attn_output_1_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_3_cast_fp16, key = key_states_3_cast_fp16, query = query_states_3_cast_fp16, value = value_states_3_cast_fp16)[name = string("attn_output_1_cast_fp16")]; tensor var_336_perm_0 = const()[name = string("op_336_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_18x = const()[name = string("concat_18x"), val = tensor([1, -1, 896])]; tensor var_336_cast_fp16 = transpose(perm = var_336_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_92")]; tensor input_1_cast_fp16 = reshape(shape = concat_18x, x = var_336_cast_fp16)[name = string("input_1_cast_fp16")]; tensor model_model_layers_0_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77161408))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77562880))))[name = string("model_model_layers_0_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_3_bias_0_to_fp16 = const()[name = string("linear_3_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77613120)))]; tensor linear_3_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_to_fp16_quantized, x = input_1_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor hidden_states_19_cast_fp16 = add(x = inputs_embeds_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_19_cast_fp16")]; fp16 var_70_promoted_1_to_fp16 = const()[name = string("op_70_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_345_cast_fp16 = pow(x = hidden_states_19_cast_fp16, y = var_70_promoted_1_to_fp16)[name = string("op_345_cast_fp16")]; tensor variance_3_axes_0 = const()[name = string("variance_3_axes_0"), val = tensor([-1])]; bool variance_3_keep_dims_0 = const()[name = string("variance_3_keep_dims_0"), val = bool(true)]; tensor variance_3_cast_fp16 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = var_345_cast_fp16)[name = string("variance_3_cast_fp16")]; fp16 var_348_to_fp16 = const()[name = string("op_348_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_349_cast_fp16 = add(x = variance_3_cast_fp16, y = var_348_to_fp16)[name = string("op_349_cast_fp16")]; fp32 var_350_epsilon_0 = const()[name = string("op_350_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_350_cast_fp16 = rsqrt(epsilon = var_350_epsilon_0, x = var_349_cast_fp16)[name = string("op_350_cast_fp16")]; tensor hidden_states_23_cast_fp16 = mul(x = hidden_states_19_cast_fp16, y = var_350_cast_fp16)[name = string("hidden_states_23_cast_fp16")]; tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77614976)))]; tensor input_3_cast_fp16 = mul(x = model_model_layers_0_post_attention_layernorm_weight_to_fp16, y = hidden_states_23_cast_fp16)[name = string("input_3_cast_fp16")]; tensor model_model_layers_0_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(77616832))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(79795968))))[name = string("model_model_layers_0_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_4_bias_0_to_fp16 = const()[name = string("linear_4_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80068416)))]; tensor linear_4_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_0_mlp_gate_proj_weight_to_fp16_quantized, x = input_3_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor var_362_cast_fp16 = silu(x = linear_4_cast_fp16)[name = string("op_362_cast_fp16")]; tensor model_model_layers_0_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(80078208))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82257344))))[name = string("model_model_layers_0_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_5_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_0_mlp_up_proj_weight_to_fp16_quantized, x = input_3_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor input_7_cast_fp16 = mul(x = var_362_cast_fp16, y = linear_5_cast_fp16)[name = string("input_7_cast_fp16")]; tensor model_model_layers_0_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(82529792))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84708928))))[name = string("model_model_layers_0_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_6_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_0_mlp_down_proj_weight_to_fp16_quantized, x = input_7_cast_fp16)[name = string("linear_6_cast_fp16")]; tensor hidden_states_29_cast_fp16 = add(x = hidden_states_19_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_29_cast_fp16")]; fp16 var_70_promoted_2_to_fp16 = const()[name = string("op_70_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_375_cast_fp16 = pow(x = hidden_states_29_cast_fp16, y = var_70_promoted_2_to_fp16)[name = string("op_375_cast_fp16")]; tensor variance_5_axes_0 = const()[name = string("variance_5_axes_0"), val = tensor([-1])]; bool variance_5_keep_dims_0 = const()[name = string("variance_5_keep_dims_0"), val = bool(true)]; tensor variance_5_cast_fp16 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = var_375_cast_fp16)[name = string("variance_5_cast_fp16")]; fp16 var_378_to_fp16 = const()[name = string("op_378_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_379_cast_fp16 = add(x = variance_5_cast_fp16, y = var_378_to_fp16)[name = string("op_379_cast_fp16")]; fp32 var_380_epsilon_0 = const()[name = string("op_380_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_380_cast_fp16 = rsqrt(epsilon = var_380_epsilon_0, x = var_379_cast_fp16)[name = string("op_380_cast_fp16")]; tensor hidden_states_33_cast_fp16 = mul(x = hidden_states_29_cast_fp16, y = var_380_cast_fp16)[name = string("hidden_states_33_cast_fp16")]; tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84981376)))]; tensor hidden_states_37_cast_fp16 = mul(x = model_model_layers_1_input_layernorm_weight_to_fp16, y = hidden_states_33_cast_fp16)[name = string("hidden_states_37_cast_fp16")]; tensor model_model_layers_1_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(84983232))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85384704))))[name = string("model_model_layers_1_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85434944)))]; tensor linear_7_cast_fp16 = linear(bias = model_model_layers_1_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_1_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_37_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor model_model_layers_1_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85436800))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85494208))))[name = string("model_model_layers_1_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_1_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_1_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85501440)))]; tensor linear_8_cast_fp16 = linear(bias = model_model_layers_1_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_1_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_37_cast_fp16)[name = string("linear_8_cast_fp16")]; tensor model_model_layers_1_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85501760))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85559168))))[name = string("model_model_layers_1_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85566400)))]; tensor linear_9_cast_fp16 = linear(bias = model_model_layers_1_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_1_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_37_cast_fp16)[name = string("linear_9_cast_fp16")]; tensor concat_19x = const()[name = string("concat_19x"), val = tensor([1, -1, 14, 64])]; tensor var_403_cast_fp16 = reshape(shape = concat_19x, x = linear_7_cast_fp16)[name = string("op_403_cast_fp16")]; tensor q_3_perm_0 = const()[name = string("q_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_20x = const()[name = string("concat_20x"), val = tensor([1, -1, 2, 64])]; tensor var_406_cast_fp16 = reshape(shape = concat_20x, x = linear_8_cast_fp16)[name = string("op_406_cast_fp16")]; tensor k_3_perm_0 = const()[name = string("k_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_21x = const()[name = string("concat_21x"), val = tensor([1, -1, 2, 64])]; tensor var_409_cast_fp16 = reshape(shape = concat_21x, x = linear_9_cast_fp16)[name = string("op_409_cast_fp16")]; tensor v_state_3_perm_0 = const()[name = string("v_state_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_3_cast_fp16 = transpose(perm = q_3_perm_0, x = var_403_cast_fp16)[name = string("transpose_91")]; tensor var_413_cast_fp16 = mul(x = q_3_cast_fp16, y = cos_7_cast_fp16)[name = string("op_413_cast_fp16")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3_cast_fp16)[name = string("x1_5_cast_fp16")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3_cast_fp16)[name = string("x2_5_cast_fp16")]; fp16 const_5_promoted_to_fp16 = const()[name = string("const_5_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_424_cast_fp16 = mul(x = x2_5_cast_fp16, y = const_5_promoted_to_fp16)[name = string("op_424_cast_fp16")]; bool var_426_interleave_0 = const()[name = string("op_426_interleave_0"), val = bool(false)]; tensor var_426_cast_fp16 = concat(axis = var_64, interleave = var_426_interleave_0, values = (var_424_cast_fp16, x1_5_cast_fp16))[name = string("op_426_cast_fp16")]; tensor var_427_cast_fp16 = mul(x = var_426_cast_fp16, y = sin_7_cast_fp16)[name = string("op_427_cast_fp16")]; tensor query_states_7_cast_fp16 = add(x = var_413_cast_fp16, y = var_427_cast_fp16)[name = string("query_states_7_cast_fp16")]; tensor k_3_cast_fp16 = transpose(perm = k_3_perm_0, x = var_406_cast_fp16)[name = string("transpose_90")]; tensor var_429_cast_fp16 = mul(x = k_3_cast_fp16, y = cos_7_cast_fp16)[name = string("op_429_cast_fp16")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3_cast_fp16)[name = string("x1_7_cast_fp16")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3_cast_fp16)[name = string("x2_7_cast_fp16")]; fp16 const_6_promoted_to_fp16 = const()[name = string("const_6_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_440_cast_fp16 = mul(x = x2_7_cast_fp16, y = const_6_promoted_to_fp16)[name = string("op_440_cast_fp16")]; bool var_442_interleave_0 = const()[name = string("op_442_interleave_0"), val = bool(false)]; tensor var_442_cast_fp16 = concat(axis = var_64, interleave = var_442_interleave_0, values = (var_440_cast_fp16, x1_7_cast_fp16))[name = string("op_442_cast_fp16")]; tensor var_443_cast_fp16 = mul(x = var_442_cast_fp16, y = sin_7_cast_fp16)[name = string("op_443_cast_fp16")]; tensor k_state_3_cast_fp16 = add(x = var_429_cast_fp16, y = var_443_cast_fp16)[name = string("k_state_3_cast_fp16")]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; tensor concat_24_values0_0 = const()[name = string("concat_24_values0_0"), val = tensor([1])]; int32 concat_24_axis_0 = const()[name = string("concat_24_axis_0"), val = int32(0)]; bool concat_24_interleave_0 = const()[name = string("concat_24_interleave_0"), val = bool(false)]; tensor concat_24 = concat(axis = concat_24_axis_0, interleave = concat_24_interleave_0, values = (concat_24_values0_0, expand_dims_12, expand_dims_13, expand_dims_2, expand_dims_15))[name = string("concat_24")]; tensor key_cache_internal_tensor_assign_2_stride_0 = const()[name = string("key_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_24, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = k_state_3_cast_fp16, x = coreml_update_state_48)[name = string("key_cache_internal_tensor_assign_2_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_2_cast_fp16, input = key_cache)[name = string("coreml_update_state_50_write_state")]; tensor coreml_update_state_50 = read_state(input = key_cache)[name = string("coreml_update_state_50")]; tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_3_cast_fp16 = transpose(perm = v_state_3_perm_0, x = var_409_cast_fp16)[name = string("transpose_89")]; tensor value_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_24, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = v_state_3_cast_fp16, x = coreml_update_state_49)[name = string("value_cache_internal_tensor_assign_2_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_2_cast_fp16, input = value_cache)[name = string("coreml_update_state_51_write_state")]; tensor coreml_update_state_51 = read_state(input = value_cache)[name = string("coreml_update_state_51")]; tensor var_466_begin_0 = const()[name = string("op_466_begin_0"), val = tensor([1, 0, 0, 0, 0])]; tensor var_466_end_0 = const()[name = string("op_466_end_0"), val = tensor([2, 1, 2, 2048, 64])]; tensor var_466_end_mask_0 = const()[name = string("op_466_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_466_squeeze_mask_0 = const()[name = string("op_466_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_466_cast_fp16 = slice_by_index(begin = var_466_begin_0, end = var_466_end_0, end_mask = var_466_end_mask_0, squeeze_mask = var_466_squeeze_mask_0, x = coreml_update_state_50)[name = string("op_466_cast_fp16")]; tensor var_469_begin_0 = const()[name = string("op_469_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_469_end_mask_0 = const()[name = string("op_469_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_469_cast_fp16 = slice_by_index(begin = var_469_begin_0, end = concat_11, end_mask = var_469_end_mask_0, x = var_466_cast_fp16)[name = string("op_469_cast_fp16")]; tensor var_471_begin_0 = const()[name = string("op_471_begin_0"), val = tensor([1, 0, 0, 0, 0])]; tensor var_471_end_0 = const()[name = string("op_471_end_0"), val = tensor([2, 1, 2, 2048, 64])]; tensor var_471_end_mask_0 = const()[name = string("op_471_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_471_squeeze_mask_0 = const()[name = string("op_471_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_471_cast_fp16 = slice_by_index(begin = var_471_begin_0, end = var_471_end_0, end_mask = var_471_end_mask_0, squeeze_mask = var_471_squeeze_mask_0, x = coreml_update_state_51)[name = string("op_471_cast_fp16")]; tensor var_474_begin_0 = const()[name = string("op_474_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_474_end_mask_0 = const()[name = string("op_474_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_474_cast_fp16 = slice_by_index(begin = var_474_begin_0, end = concat_11, end_mask = var_474_end_mask_0, x = var_471_cast_fp16)[name = string("op_474_cast_fp16")]; tensor var_476_shape_cast_fp16 = shape(x = var_469_cast_fp16)[name = string("op_476_shape_cast_fp16")]; int32 gather_31 = const()[name = string("gather_31"), val = int32(1)]; int32 gather_32 = const()[name = string("gather_32"), val = int32(2)]; int32 gather_33_axis_0 = const()[name = string("gather_33_axis_0"), val = int32(0)]; int32 gather_33_batch_dims_0 = const()[name = string("gather_33_batch_dims_0"), val = int32(0)]; bool gather_33_validate_indices_0 = const()[name = string("gather_33_validate_indices_0"), val = bool(false)]; string var_476_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_476_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_33_to_uint16 = const()[name = string("select_33_to_uint16"), val = uint16(2)]; tensor var_476_shape_cast_fp16_to_uint16 = cast(dtype = var_476_shape_cast_fp16_to_uint16_dtype_0, x = var_476_shape_cast_fp16)[name = string("cast_137")]; uint16 gather_33_cast_uint16 = gather(axis = gather_33_axis_0, batch_dims = gather_33_batch_dims_0, indices = select_33_to_uint16, validate_indices = gather_33_validate_indices_0, x = var_476_shape_cast_fp16_to_uint16)[name = string("gather_33_cast_uint16")]; string gather_33_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_33_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_34 = const()[name = string("gather_34"), val = int32(64)]; tensor var_483_axes_0 = const()[name = string("op_483_axes_0"), val = tensor([2])]; tensor var_483_cast_fp16 = expand_dims(axes = var_483_axes_0, x = var_469_cast_fp16)[name = string("op_483_cast_fp16")]; tensor shape_37_cast_fp16 = shape(x = var_483_cast_fp16)[name = string("shape_37_cast_fp16")]; int32 concat_32_axis_0 = const()[name = string("concat_32_axis_0"), val = int32(0)]; bool concat_32_interleave_0 = const()[name = string("concat_32_interleave_0"), val = bool(false)]; int32 gather_33_cast_uint16_to_int32 = cast(dtype = gather_33_cast_uint16_to_int32_dtype_0, x = gather_33_cast_uint16)[name = string("cast_136")]; tensor concat_32 = concat(axis = concat_32_axis_0, interleave = concat_32_interleave_0, values = (gather_31, gather_32, var_76, gather_33_cast_uint16_to_int32, gather_34))[name = string("concat_32")]; tensor real_div_2 = real_div(x = concat_32, y = shape_37_cast_fp16)[name = string("real_div_2")]; tensor hidden_states_41_cast_fp16 = tile(reps = real_div_2, x = var_483_cast_fp16)[name = string("hidden_states_41_cast_fp16")]; tensor concat_33x = const()[name = string("concat_33x"), val = tensor([1, 14, -1, 64])]; tensor key_states_7_cast_fp16 = reshape(shape = concat_33x, x = hidden_states_41_cast_fp16)[name = string("key_states_7_cast_fp16")]; tensor var_493_shape_cast_fp16 = shape(x = var_474_cast_fp16)[name = string("op_493_shape_cast_fp16")]; int32 gather_35 = const()[name = string("gather_35"), val = int32(1)]; int32 gather_36 = const()[name = string("gather_36"), val = int32(2)]; int32 gather_37_axis_0 = const()[name = string("gather_37_axis_0"), val = int32(0)]; int32 gather_37_batch_dims_0 = const()[name = string("gather_37_batch_dims_0"), val = int32(0)]; bool gather_37_validate_indices_0 = const()[name = string("gather_37_validate_indices_0"), val = bool(false)]; string var_493_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_493_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_37_to_uint16 = const()[name = string("select_37_to_uint16"), val = uint16(2)]; tensor var_493_shape_cast_fp16_to_uint16 = cast(dtype = var_493_shape_cast_fp16_to_uint16_dtype_0, x = var_493_shape_cast_fp16)[name = string("cast_135")]; uint16 gather_37_cast_uint16 = gather(axis = gather_37_axis_0, batch_dims = gather_37_batch_dims_0, indices = select_37_to_uint16, validate_indices = gather_37_validate_indices_0, x = var_493_shape_cast_fp16_to_uint16)[name = string("gather_37_cast_uint16")]; string gather_37_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_37_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_38 = const()[name = string("gather_38"), val = int32(64)]; tensor var_500_axes_0 = const()[name = string("op_500_axes_0"), val = tensor([2])]; tensor var_500_cast_fp16 = expand_dims(axes = var_500_axes_0, x = var_474_cast_fp16)[name = string("op_500_cast_fp16")]; tensor shape_42_cast_fp16 = shape(x = var_500_cast_fp16)[name = string("shape_42_cast_fp16")]; int32 concat_34_axis_0 = const()[name = string("concat_34_axis_0"), val = int32(0)]; bool concat_34_interleave_0 = const()[name = string("concat_34_interleave_0"), val = bool(false)]; int32 gather_37_cast_uint16_to_int32 = cast(dtype = gather_37_cast_uint16_to_int32_dtype_0, x = gather_37_cast_uint16)[name = string("cast_134")]; tensor concat_34 = concat(axis = concat_34_axis_0, interleave = concat_34_interleave_0, values = (gather_35, gather_36, var_76, gather_37_cast_uint16_to_int32, gather_38))[name = string("concat_34")]; tensor real_div_3 = real_div(x = concat_34, y = shape_42_cast_fp16)[name = string("real_div_3")]; tensor hidden_states_45_cast_fp16 = tile(reps = real_div_3, x = var_500_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; tensor concat_35x = const()[name = string("concat_35x"), val = tensor([1, 14, -1, 64])]; tensor value_states_7_cast_fp16 = reshape(shape = concat_35x, x = hidden_states_45_cast_fp16)[name = string("value_states_7_cast_fp16")]; tensor var_510_shape_cast_fp16 = shape(x = key_states_7_cast_fp16)[name = string("op_510_shape_cast_fp16")]; int32 gather_39_axis_0 = const()[name = string("gather_39_axis_0"), val = int32(0)]; int32 gather_39_batch_dims_0 = const()[name = string("gather_39_batch_dims_0"), val = int32(0)]; bool gather_39_validate_indices_0 = const()[name = string("gather_39_validate_indices_0"), val = bool(false)]; string var_510_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_510_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_39_to_uint16 = const()[name = string("select_39_to_uint16"), val = uint16(2)]; tensor var_510_shape_cast_fp16_to_uint16 = cast(dtype = var_510_shape_cast_fp16_to_uint16_dtype_0, x = var_510_shape_cast_fp16)[name = string("cast_133")]; uint16 gather_39_cast_uint16 = gather(axis = gather_39_axis_0, batch_dims = gather_39_batch_dims_0, indices = select_39_to_uint16, validate_indices = gather_39_validate_indices_0, x = var_510_shape_cast_fp16_to_uint16)[name = string("gather_39_cast_uint16")]; string gather_39_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_39_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_36_values0_0 = const()[name = string("concat_36_values0_0"), val = int32(1)]; int32 concat_36_values1_0 = const()[name = string("concat_36_values1_0"), val = int32(1)]; int32 concat_36_values2_0 = const()[name = string("concat_36_values2_0"), val = int32(0)]; int32 concat_36_axis_0 = const()[name = string("concat_36_axis_0"), val = int32(0)]; bool concat_36_interleave_0 = const()[name = string("concat_36_interleave_0"), val = bool(false)]; int32 gather_39_cast_uint16_to_int32 = cast(dtype = gather_39_cast_uint16_to_int32_dtype_0, x = gather_39_cast_uint16)[name = string("cast_132")]; tensor concat_36 = concat(axis = concat_36_axis_0, interleave = concat_36_interleave_0, values = (concat_36_values0_0, concat_36_values1_0, concat_36_values2_0, gather_39_cast_uint16_to_int32))[name = string("concat_36")]; tensor causal_mask_5_begin_0 = const()[name = string("causal_mask_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_5_end_mask_0 = const()[name = string("causal_mask_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_5_cast_fp16 = slice_by_index(begin = causal_mask_5_begin_0, end = concat_36, end_mask = causal_mask_5_end_mask_0, x = causal_mask)[name = string("causal_mask_5_cast_fp16")]; tensor attn_output_5_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_5_cast_fp16, key = key_states_7_cast_fp16, query = query_states_7_cast_fp16, value = value_states_7_cast_fp16)[name = string("attn_output_5_cast_fp16")]; tensor var_516_perm_0 = const()[name = string("op_516_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_37x = const()[name = string("concat_37x"), val = tensor([1, -1, 896])]; tensor var_516_cast_fp16 = transpose(perm = var_516_perm_0, x = attn_output_5_cast_fp16)[name = string("transpose_88")]; tensor input_9_cast_fp16 = reshape(shape = concat_37x, x = var_516_cast_fp16)[name = string("input_9_cast_fp16")]; tensor model_model_layers_1_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85566720))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(85968192))))[name = string("model_model_layers_1_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_10_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_to_fp16_quantized, x = input_9_cast_fp16)[name = string("linear_10_cast_fp16")]; tensor hidden_states_49_cast_fp16 = add(x = hidden_states_29_cast_fp16, y = linear_10_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; fp16 var_70_promoted_3_to_fp16 = const()[name = string("op_70_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_525_cast_fp16 = pow(x = hidden_states_49_cast_fp16, y = var_70_promoted_3_to_fp16)[name = string("op_525_cast_fp16")]; tensor variance_7_axes_0 = const()[name = string("variance_7_axes_0"), val = tensor([-1])]; bool variance_7_keep_dims_0 = const()[name = string("variance_7_keep_dims_0"), val = bool(true)]; tensor variance_7_cast_fp16 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_525_cast_fp16)[name = string("variance_7_cast_fp16")]; fp16 var_528_to_fp16 = const()[name = string("op_528_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_529_cast_fp16 = add(x = variance_7_cast_fp16, y = var_528_to_fp16)[name = string("op_529_cast_fp16")]; fp32 var_530_epsilon_0 = const()[name = string("op_530_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_530_cast_fp16 = rsqrt(epsilon = var_530_epsilon_0, x = var_529_cast_fp16)[name = string("op_530_cast_fp16")]; tensor hidden_states_53_cast_fp16 = mul(x = hidden_states_49_cast_fp16, y = var_530_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86018432)))]; tensor input_11_cast_fp16 = mul(x = model_model_layers_1_post_attention_layernorm_weight_to_fp16, y = hidden_states_53_cast_fp16)[name = string("input_11_cast_fp16")]; tensor model_model_layers_1_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(86020288))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88199424))))[name = string("model_model_layers_1_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_11_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_1_mlp_gate_proj_weight_to_fp16_quantized, x = input_11_cast_fp16)[name = string("linear_11_cast_fp16")]; tensor var_542_cast_fp16 = silu(x = linear_11_cast_fp16)[name = string("op_542_cast_fp16")]; tensor model_model_layers_1_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(88471872))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90651008))))[name = string("model_model_layers_1_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_12_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_1_mlp_up_proj_weight_to_fp16_quantized, x = input_11_cast_fp16)[name = string("linear_12_cast_fp16")]; tensor input_15_cast_fp16 = mul(x = var_542_cast_fp16, y = linear_12_cast_fp16)[name = string("input_15_cast_fp16")]; tensor model_model_layers_1_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(90923456))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93102592))))[name = string("model_model_layers_1_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_13_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_1_mlp_down_proj_weight_to_fp16_quantized, x = input_15_cast_fp16)[name = string("linear_13_cast_fp16")]; tensor hidden_states_59_cast_fp16 = add(x = hidden_states_49_cast_fp16, y = linear_13_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; fp16 var_70_promoted_4_to_fp16 = const()[name = string("op_70_promoted_4_to_fp16"), val = fp16(0x1p+1)]; tensor var_555_cast_fp16 = pow(x = hidden_states_59_cast_fp16, y = var_70_promoted_4_to_fp16)[name = string("op_555_cast_fp16")]; tensor variance_9_axes_0 = const()[name = string("variance_9_axes_0"), val = tensor([-1])]; bool variance_9_keep_dims_0 = const()[name = string("variance_9_keep_dims_0"), val = bool(true)]; tensor variance_9_cast_fp16 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_555_cast_fp16)[name = string("variance_9_cast_fp16")]; fp16 var_558_to_fp16 = const()[name = string("op_558_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_559_cast_fp16 = add(x = variance_9_cast_fp16, y = var_558_to_fp16)[name = string("op_559_cast_fp16")]; fp32 var_560_epsilon_0 = const()[name = string("op_560_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_560_cast_fp16 = rsqrt(epsilon = var_560_epsilon_0, x = var_559_cast_fp16)[name = string("op_560_cast_fp16")]; tensor hidden_states_63_cast_fp16 = mul(x = hidden_states_59_cast_fp16, y = var_560_cast_fp16)[name = string("hidden_states_63_cast_fp16")]; tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93375040)))]; tensor hidden_states_67_cast_fp16 = mul(x = model_model_layers_2_input_layernorm_weight_to_fp16, y = hidden_states_63_cast_fp16)[name = string("hidden_states_67_cast_fp16")]; tensor model_model_layers_2_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93376896))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93778368))))[name = string("model_model_layers_2_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_2_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_2_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93828608)))]; tensor linear_14_cast_fp16 = linear(bias = model_model_layers_2_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_2_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_67_cast_fp16)[name = string("linear_14_cast_fp16")]; tensor model_model_layers_2_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93830464))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93887872))))[name = string("model_model_layers_2_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_2_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_2_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93895104)))]; tensor linear_15_cast_fp16 = linear(bias = model_model_layers_2_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_2_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_67_cast_fp16)[name = string("linear_15_cast_fp16")]; tensor model_model_layers_2_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93895424))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93952832))))[name = string("model_model_layers_2_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_2_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_2_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93960064)))]; tensor linear_16_cast_fp16 = linear(bias = model_model_layers_2_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_2_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_67_cast_fp16)[name = string("linear_16_cast_fp16")]; tensor concat_38x = const()[name = string("concat_38x"), val = tensor([1, -1, 14, 64])]; tensor var_583_cast_fp16 = reshape(shape = concat_38x, x = linear_14_cast_fp16)[name = string("op_583_cast_fp16")]; tensor q_5_perm_0 = const()[name = string("q_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_39x = const()[name = string("concat_39x"), val = tensor([1, -1, 2, 64])]; tensor var_586_cast_fp16 = reshape(shape = concat_39x, x = linear_15_cast_fp16)[name = string("op_586_cast_fp16")]; tensor k_5_perm_0 = const()[name = string("k_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_40x = const()[name = string("concat_40x"), val = tensor([1, -1, 2, 64])]; tensor var_589_cast_fp16 = reshape(shape = concat_40x, x = linear_16_cast_fp16)[name = string("op_589_cast_fp16")]; tensor v_state_5_perm_0 = const()[name = string("v_state_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_5_cast_fp16 = transpose(perm = q_5_perm_0, x = var_583_cast_fp16)[name = string("transpose_87")]; tensor var_593_cast_fp16 = mul(x = q_5_cast_fp16, y = cos_7_cast_fp16)[name = string("op_593_cast_fp16")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5_cast_fp16)[name = string("x1_9_cast_fp16")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5_cast_fp16)[name = string("x2_9_cast_fp16")]; fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_604_cast_fp16 = mul(x = x2_9_cast_fp16, y = const_7_promoted_to_fp16)[name = string("op_604_cast_fp16")]; bool var_606_interleave_0 = const()[name = string("op_606_interleave_0"), val = bool(false)]; tensor var_606_cast_fp16 = concat(axis = var_64, interleave = var_606_interleave_0, values = (var_604_cast_fp16, x1_9_cast_fp16))[name = string("op_606_cast_fp16")]; tensor var_607_cast_fp16 = mul(x = var_606_cast_fp16, y = sin_7_cast_fp16)[name = string("op_607_cast_fp16")]; tensor query_states_11_cast_fp16 = add(x = var_593_cast_fp16, y = var_607_cast_fp16)[name = string("query_states_11_cast_fp16")]; tensor k_5_cast_fp16 = transpose(perm = k_5_perm_0, x = var_586_cast_fp16)[name = string("transpose_86")]; tensor var_609_cast_fp16 = mul(x = k_5_cast_fp16, y = cos_7_cast_fp16)[name = string("op_609_cast_fp16")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5_cast_fp16)[name = string("x1_11_cast_fp16")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5_cast_fp16)[name = string("x2_11_cast_fp16")]; fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_620_cast_fp16 = mul(x = x2_11_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_620_cast_fp16")]; bool var_622_interleave_0 = const()[name = string("op_622_interleave_0"), val = bool(false)]; tensor var_622_cast_fp16 = concat(axis = var_64, interleave = var_622_interleave_0, values = (var_620_cast_fp16, x1_11_cast_fp16))[name = string("op_622_cast_fp16")]; tensor var_623_cast_fp16 = mul(x = var_622_cast_fp16, y = sin_7_cast_fp16)[name = string("op_623_cast_fp16")]; tensor k_state_5_cast_fp16 = add(x = var_609_cast_fp16, y = var_623_cast_fp16)[name = string("k_state_5_cast_fp16")]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([0])]; tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; tensor concat_43_values0_0 = const()[name = string("concat_43_values0_0"), val = tensor([2])]; int32 concat_43_axis_0 = const()[name = string("concat_43_axis_0"), val = int32(0)]; bool concat_43_interleave_0 = const()[name = string("concat_43_interleave_0"), val = bool(false)]; tensor concat_43 = concat(axis = concat_43_axis_0, interleave = concat_43_interleave_0, values = (concat_43_values0_0, expand_dims_24, expand_dims_25, expand_dims_2, expand_dims_27))[name = string("concat_43")]; tensor key_cache_internal_tensor_assign_3_stride_0 = const()[name = string("key_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_43, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = k_state_5_cast_fp16, x = coreml_update_state_50)[name = string("key_cache_internal_tensor_assign_3_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_3_cast_fp16, input = key_cache)[name = string("coreml_update_state_52_write_state")]; tensor coreml_update_state_52 = read_state(input = key_cache)[name = string("coreml_update_state_52")]; tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_5_cast_fp16 = transpose(perm = v_state_5_perm_0, x = var_589_cast_fp16)[name = string("transpose_85")]; tensor value_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_43, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = v_state_5_cast_fp16, x = coreml_update_state_51)[name = string("value_cache_internal_tensor_assign_3_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_3_cast_fp16, input = value_cache)[name = string("coreml_update_state_53_write_state")]; tensor coreml_update_state_53 = read_state(input = value_cache)[name = string("coreml_update_state_53")]; tensor var_646_begin_0 = const()[name = string("op_646_begin_0"), val = tensor([2, 0, 0, 0, 0])]; tensor var_646_end_0 = const()[name = string("op_646_end_0"), val = tensor([3, 1, 2, 2048, 64])]; tensor var_646_end_mask_0 = const()[name = string("op_646_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_646_squeeze_mask_0 = const()[name = string("op_646_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_646_cast_fp16 = slice_by_index(begin = var_646_begin_0, end = var_646_end_0, end_mask = var_646_end_mask_0, squeeze_mask = var_646_squeeze_mask_0, x = coreml_update_state_52)[name = string("op_646_cast_fp16")]; tensor var_649_begin_0 = const()[name = string("op_649_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_649_end_mask_0 = const()[name = string("op_649_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_649_cast_fp16 = slice_by_index(begin = var_649_begin_0, end = concat_11, end_mask = var_649_end_mask_0, x = var_646_cast_fp16)[name = string("op_649_cast_fp16")]; tensor var_651_begin_0 = const()[name = string("op_651_begin_0"), val = tensor([2, 0, 0, 0, 0])]; tensor var_651_end_0 = const()[name = string("op_651_end_0"), val = tensor([3, 1, 2, 2048, 64])]; tensor var_651_end_mask_0 = const()[name = string("op_651_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_651_squeeze_mask_0 = const()[name = string("op_651_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_651_cast_fp16 = slice_by_index(begin = var_651_begin_0, end = var_651_end_0, end_mask = var_651_end_mask_0, squeeze_mask = var_651_squeeze_mask_0, x = coreml_update_state_53)[name = string("op_651_cast_fp16")]; tensor var_654_begin_0 = const()[name = string("op_654_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_654_end_mask_0 = const()[name = string("op_654_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_654_cast_fp16 = slice_by_index(begin = var_654_begin_0, end = concat_11, end_mask = var_654_end_mask_0, x = var_651_cast_fp16)[name = string("op_654_cast_fp16")]; tensor var_656_shape_cast_fp16 = shape(x = var_649_cast_fp16)[name = string("op_656_shape_cast_fp16")]; int32 gather_49 = const()[name = string("gather_49"), val = int32(1)]; int32 gather_50 = const()[name = string("gather_50"), val = int32(2)]; int32 gather_51_axis_0 = const()[name = string("gather_51_axis_0"), val = int32(0)]; int32 gather_51_batch_dims_0 = const()[name = string("gather_51_batch_dims_0"), val = int32(0)]; bool gather_51_validate_indices_0 = const()[name = string("gather_51_validate_indices_0"), val = bool(false)]; string var_656_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_656_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_51_to_uint16 = const()[name = string("select_51_to_uint16"), val = uint16(2)]; tensor var_656_shape_cast_fp16_to_uint16 = cast(dtype = var_656_shape_cast_fp16_to_uint16_dtype_0, x = var_656_shape_cast_fp16)[name = string("cast_131")]; uint16 gather_51_cast_uint16 = gather(axis = gather_51_axis_0, batch_dims = gather_51_batch_dims_0, indices = select_51_to_uint16, validate_indices = gather_51_validate_indices_0, x = var_656_shape_cast_fp16_to_uint16)[name = string("gather_51_cast_uint16")]; string gather_51_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_51_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_52 = const()[name = string("gather_52"), val = int32(64)]; tensor var_663_axes_0 = const()[name = string("op_663_axes_0"), val = tensor([2])]; tensor var_663_cast_fp16 = expand_dims(axes = var_663_axes_0, x = var_649_cast_fp16)[name = string("op_663_cast_fp16")]; tensor shape_57_cast_fp16 = shape(x = var_663_cast_fp16)[name = string("shape_57_cast_fp16")]; int32 concat_51_axis_0 = const()[name = string("concat_51_axis_0"), val = int32(0)]; bool concat_51_interleave_0 = const()[name = string("concat_51_interleave_0"), val = bool(false)]; int32 gather_51_cast_uint16_to_int32 = cast(dtype = gather_51_cast_uint16_to_int32_dtype_0, x = gather_51_cast_uint16)[name = string("cast_130")]; tensor concat_51 = concat(axis = concat_51_axis_0, interleave = concat_51_interleave_0, values = (gather_49, gather_50, var_76, gather_51_cast_uint16_to_int32, gather_52))[name = string("concat_51")]; tensor real_div_4 = real_div(x = concat_51, y = shape_57_cast_fp16)[name = string("real_div_4")]; tensor hidden_states_71_cast_fp16 = tile(reps = real_div_4, x = var_663_cast_fp16)[name = string("hidden_states_71_cast_fp16")]; tensor concat_52x = const()[name = string("concat_52x"), val = tensor([1, 14, -1, 64])]; tensor key_states_11_cast_fp16 = reshape(shape = concat_52x, x = hidden_states_71_cast_fp16)[name = string("key_states_11_cast_fp16")]; tensor var_673_shape_cast_fp16 = shape(x = var_654_cast_fp16)[name = string("op_673_shape_cast_fp16")]; int32 gather_53 = const()[name = string("gather_53"), val = int32(1)]; int32 gather_54 = const()[name = string("gather_54"), val = int32(2)]; int32 gather_55_axis_0 = const()[name = string("gather_55_axis_0"), val = int32(0)]; int32 gather_55_batch_dims_0 = const()[name = string("gather_55_batch_dims_0"), val = int32(0)]; bool gather_55_validate_indices_0 = const()[name = string("gather_55_validate_indices_0"), val = bool(false)]; string var_673_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_673_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_55_to_uint16 = const()[name = string("select_55_to_uint16"), val = uint16(2)]; tensor var_673_shape_cast_fp16_to_uint16 = cast(dtype = var_673_shape_cast_fp16_to_uint16_dtype_0, x = var_673_shape_cast_fp16)[name = string("cast_129")]; uint16 gather_55_cast_uint16 = gather(axis = gather_55_axis_0, batch_dims = gather_55_batch_dims_0, indices = select_55_to_uint16, validate_indices = gather_55_validate_indices_0, x = var_673_shape_cast_fp16_to_uint16)[name = string("gather_55_cast_uint16")]; string gather_55_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_55_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_56 = const()[name = string("gather_56"), val = int32(64)]; tensor var_680_axes_0 = const()[name = string("op_680_axes_0"), val = tensor([2])]; tensor var_680_cast_fp16 = expand_dims(axes = var_680_axes_0, x = var_654_cast_fp16)[name = string("op_680_cast_fp16")]; tensor shape_62_cast_fp16 = shape(x = var_680_cast_fp16)[name = string("shape_62_cast_fp16")]; int32 concat_53_axis_0 = const()[name = string("concat_53_axis_0"), val = int32(0)]; bool concat_53_interleave_0 = const()[name = string("concat_53_interleave_0"), val = bool(false)]; int32 gather_55_cast_uint16_to_int32 = cast(dtype = gather_55_cast_uint16_to_int32_dtype_0, x = gather_55_cast_uint16)[name = string("cast_128")]; tensor concat_53 = concat(axis = concat_53_axis_0, interleave = concat_53_interleave_0, values = (gather_53, gather_54, var_76, gather_55_cast_uint16_to_int32, gather_56))[name = string("concat_53")]; tensor real_div_5 = real_div(x = concat_53, y = shape_62_cast_fp16)[name = string("real_div_5")]; tensor hidden_states_75_cast_fp16 = tile(reps = real_div_5, x = var_680_cast_fp16)[name = string("hidden_states_75_cast_fp16")]; tensor concat_54x = const()[name = string("concat_54x"), val = tensor([1, 14, -1, 64])]; tensor value_states_11_cast_fp16 = reshape(shape = concat_54x, x = hidden_states_75_cast_fp16)[name = string("value_states_11_cast_fp16")]; tensor var_690_shape_cast_fp16 = shape(x = key_states_11_cast_fp16)[name = string("op_690_shape_cast_fp16")]; int32 gather_57_axis_0 = const()[name = string("gather_57_axis_0"), val = int32(0)]; int32 gather_57_batch_dims_0 = const()[name = string("gather_57_batch_dims_0"), val = int32(0)]; bool gather_57_validate_indices_0 = const()[name = string("gather_57_validate_indices_0"), val = bool(false)]; string var_690_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_690_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_57_to_uint16 = const()[name = string("select_57_to_uint16"), val = uint16(2)]; tensor var_690_shape_cast_fp16_to_uint16 = cast(dtype = var_690_shape_cast_fp16_to_uint16_dtype_0, x = var_690_shape_cast_fp16)[name = string("cast_127")]; uint16 gather_57_cast_uint16 = gather(axis = gather_57_axis_0, batch_dims = gather_57_batch_dims_0, indices = select_57_to_uint16, validate_indices = gather_57_validate_indices_0, x = var_690_shape_cast_fp16_to_uint16)[name = string("gather_57_cast_uint16")]; string gather_57_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_57_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_55_values0_0 = const()[name = string("concat_55_values0_0"), val = int32(1)]; int32 concat_55_values1_0 = const()[name = string("concat_55_values1_0"), val = int32(1)]; int32 concat_55_values2_0 = const()[name = string("concat_55_values2_0"), val = int32(0)]; int32 concat_55_axis_0 = const()[name = string("concat_55_axis_0"), val = int32(0)]; bool concat_55_interleave_0 = const()[name = string("concat_55_interleave_0"), val = bool(false)]; int32 gather_57_cast_uint16_to_int32 = cast(dtype = gather_57_cast_uint16_to_int32_dtype_0, x = gather_57_cast_uint16)[name = string("cast_126")]; tensor concat_55 = concat(axis = concat_55_axis_0, interleave = concat_55_interleave_0, values = (concat_55_values0_0, concat_55_values1_0, concat_55_values2_0, gather_57_cast_uint16_to_int32))[name = string("concat_55")]; tensor causal_mask_7_begin_0 = const()[name = string("causal_mask_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_7_end_mask_0 = const()[name = string("causal_mask_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_7_cast_fp16 = slice_by_index(begin = causal_mask_7_begin_0, end = concat_55, end_mask = causal_mask_7_end_mask_0, x = causal_mask)[name = string("causal_mask_7_cast_fp16")]; tensor attn_output_9_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_7_cast_fp16, key = key_states_11_cast_fp16, query = query_states_11_cast_fp16, value = value_states_11_cast_fp16)[name = string("attn_output_9_cast_fp16")]; tensor var_696_perm_0 = const()[name = string("op_696_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_56x = const()[name = string("concat_56x"), val = tensor([1, -1, 896])]; tensor var_696_cast_fp16 = transpose(perm = var_696_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_84")]; tensor input_17_cast_fp16 = reshape(shape = concat_56x, x = var_696_cast_fp16)[name = string("input_17_cast_fp16")]; tensor model_model_layers_2_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(93960384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94361856))))[name = string("model_model_layers_2_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_17_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_to_fp16_quantized, x = input_17_cast_fp16)[name = string("linear_17_cast_fp16")]; tensor hidden_states_79_cast_fp16 = add(x = hidden_states_59_cast_fp16, y = linear_17_cast_fp16)[name = string("hidden_states_79_cast_fp16")]; fp16 var_70_promoted_5_to_fp16 = const()[name = string("op_70_promoted_5_to_fp16"), val = fp16(0x1p+1)]; tensor var_705_cast_fp16 = pow(x = hidden_states_79_cast_fp16, y = var_70_promoted_5_to_fp16)[name = string("op_705_cast_fp16")]; tensor variance_11_axes_0 = const()[name = string("variance_11_axes_0"), val = tensor([-1])]; bool variance_11_keep_dims_0 = const()[name = string("variance_11_keep_dims_0"), val = bool(true)]; tensor variance_11_cast_fp16 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = var_705_cast_fp16)[name = string("variance_11_cast_fp16")]; fp16 var_708_to_fp16 = const()[name = string("op_708_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_709_cast_fp16 = add(x = variance_11_cast_fp16, y = var_708_to_fp16)[name = string("op_709_cast_fp16")]; fp32 var_710_epsilon_0 = const()[name = string("op_710_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_710_cast_fp16 = rsqrt(epsilon = var_710_epsilon_0, x = var_709_cast_fp16)[name = string("op_710_cast_fp16")]; tensor hidden_states_83_cast_fp16 = mul(x = hidden_states_79_cast_fp16, y = var_710_cast_fp16)[name = string("hidden_states_83_cast_fp16")]; tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94412096)))]; tensor input_19_cast_fp16 = mul(x = model_model_layers_2_post_attention_layernorm_weight_to_fp16, y = hidden_states_83_cast_fp16)[name = string("input_19_cast_fp16")]; tensor model_model_layers_2_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(94413952))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96593088))))[name = string("model_model_layers_2_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_18_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_2_mlp_gate_proj_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = string("linear_18_cast_fp16")]; tensor var_722_cast_fp16 = silu(x = linear_18_cast_fp16)[name = string("op_722_cast_fp16")]; tensor model_model_layers_2_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(96865536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99044672))))[name = string("model_model_layers_2_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_19_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_2_mlp_up_proj_weight_to_fp16_quantized, x = input_19_cast_fp16)[name = string("linear_19_cast_fp16")]; tensor input_23_cast_fp16 = mul(x = var_722_cast_fp16, y = linear_19_cast_fp16)[name = string("input_23_cast_fp16")]; tensor model_model_layers_2_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(99317120))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101496256))))[name = string("model_model_layers_2_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_20_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_2_mlp_down_proj_weight_to_fp16_quantized, x = input_23_cast_fp16)[name = string("linear_20_cast_fp16")]; tensor hidden_states_89_cast_fp16 = add(x = hidden_states_79_cast_fp16, y = linear_20_cast_fp16)[name = string("hidden_states_89_cast_fp16")]; fp16 var_70_promoted_6_to_fp16 = const()[name = string("op_70_promoted_6_to_fp16"), val = fp16(0x1p+1)]; tensor var_735_cast_fp16 = pow(x = hidden_states_89_cast_fp16, y = var_70_promoted_6_to_fp16)[name = string("op_735_cast_fp16")]; tensor variance_13_axes_0 = const()[name = string("variance_13_axes_0"), val = tensor([-1])]; bool variance_13_keep_dims_0 = const()[name = string("variance_13_keep_dims_0"), val = bool(true)]; tensor variance_13_cast_fp16 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = var_735_cast_fp16)[name = string("variance_13_cast_fp16")]; fp16 var_738_to_fp16 = const()[name = string("op_738_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_739_cast_fp16 = add(x = variance_13_cast_fp16, y = var_738_to_fp16)[name = string("op_739_cast_fp16")]; fp32 var_740_epsilon_0 = const()[name = string("op_740_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_740_cast_fp16 = rsqrt(epsilon = var_740_epsilon_0, x = var_739_cast_fp16)[name = string("op_740_cast_fp16")]; tensor hidden_states_93_cast_fp16 = mul(x = hidden_states_89_cast_fp16, y = var_740_cast_fp16)[name = string("hidden_states_93_cast_fp16")]; tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101768704)))]; tensor hidden_states_97_cast_fp16 = mul(x = model_model_layers_3_input_layernorm_weight_to_fp16, y = hidden_states_93_cast_fp16)[name = string("hidden_states_97_cast_fp16")]; tensor model_model_layers_3_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(101770560))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102172032))))[name = string("model_model_layers_3_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_3_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_3_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102222272)))]; tensor linear_21_cast_fp16 = linear(bias = model_model_layers_3_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_3_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_97_cast_fp16)[name = string("linear_21_cast_fp16")]; tensor model_model_layers_3_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102224128))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102281536))))[name = string("model_model_layers_3_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_3_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_3_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102288768)))]; tensor linear_22_cast_fp16 = linear(bias = model_model_layers_3_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_3_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_97_cast_fp16)[name = string("linear_22_cast_fp16")]; tensor model_model_layers_3_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102289088))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102346496))))[name = string("model_model_layers_3_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_3_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_3_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102353728)))]; tensor linear_23_cast_fp16 = linear(bias = model_model_layers_3_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_3_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_97_cast_fp16)[name = string("linear_23_cast_fp16")]; tensor concat_57x = const()[name = string("concat_57x"), val = tensor([1, -1, 14, 64])]; tensor var_763_cast_fp16 = reshape(shape = concat_57x, x = linear_21_cast_fp16)[name = string("op_763_cast_fp16")]; tensor q_7_perm_0 = const()[name = string("q_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_58x = const()[name = string("concat_58x"), val = tensor([1, -1, 2, 64])]; tensor var_766_cast_fp16 = reshape(shape = concat_58x, x = linear_22_cast_fp16)[name = string("op_766_cast_fp16")]; tensor k_7_perm_0 = const()[name = string("k_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_59x = const()[name = string("concat_59x"), val = tensor([1, -1, 2, 64])]; tensor var_769_cast_fp16 = reshape(shape = concat_59x, x = linear_23_cast_fp16)[name = string("op_769_cast_fp16")]; tensor v_state_7_perm_0 = const()[name = string("v_state_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_7_cast_fp16 = transpose(perm = q_7_perm_0, x = var_763_cast_fp16)[name = string("transpose_83")]; tensor var_773_cast_fp16 = mul(x = q_7_cast_fp16, y = cos_7_cast_fp16)[name = string("op_773_cast_fp16")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7_cast_fp16)[name = string("x1_13_cast_fp16")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7_cast_fp16)[name = string("x2_13_cast_fp16")]; fp16 const_9_promoted_to_fp16 = const()[name = string("const_9_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_784_cast_fp16 = mul(x = x2_13_cast_fp16, y = const_9_promoted_to_fp16)[name = string("op_784_cast_fp16")]; bool var_786_interleave_0 = const()[name = string("op_786_interleave_0"), val = bool(false)]; tensor var_786_cast_fp16 = concat(axis = var_64, interleave = var_786_interleave_0, values = (var_784_cast_fp16, x1_13_cast_fp16))[name = string("op_786_cast_fp16")]; tensor var_787_cast_fp16 = mul(x = var_786_cast_fp16, y = sin_7_cast_fp16)[name = string("op_787_cast_fp16")]; tensor query_states_15_cast_fp16 = add(x = var_773_cast_fp16, y = var_787_cast_fp16)[name = string("query_states_15_cast_fp16")]; tensor k_7_cast_fp16 = transpose(perm = k_7_perm_0, x = var_766_cast_fp16)[name = string("transpose_82")]; tensor var_789_cast_fp16 = mul(x = k_7_cast_fp16, y = cos_7_cast_fp16)[name = string("op_789_cast_fp16")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7_cast_fp16)[name = string("x1_15_cast_fp16")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7_cast_fp16)[name = string("x2_15_cast_fp16")]; fp16 const_10_promoted_to_fp16 = const()[name = string("const_10_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_800_cast_fp16 = mul(x = x2_15_cast_fp16, y = const_10_promoted_to_fp16)[name = string("op_800_cast_fp16")]; bool var_802_interleave_0 = const()[name = string("op_802_interleave_0"), val = bool(false)]; tensor var_802_cast_fp16 = concat(axis = var_64, interleave = var_802_interleave_0, values = (var_800_cast_fp16, x1_15_cast_fp16))[name = string("op_802_cast_fp16")]; tensor var_803_cast_fp16 = mul(x = var_802_cast_fp16, y = sin_7_cast_fp16)[name = string("op_803_cast_fp16")]; tensor k_state_7_cast_fp16 = add(x = var_789_cast_fp16, y = var_803_cast_fp16)[name = string("k_state_7_cast_fp16")]; tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([0])]; tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; tensor concat_62_values0_0 = const()[name = string("concat_62_values0_0"), val = tensor([3])]; int32 concat_62_axis_0 = const()[name = string("concat_62_axis_0"), val = int32(0)]; bool concat_62_interleave_0 = const()[name = string("concat_62_interleave_0"), val = bool(false)]; tensor concat_62 = concat(axis = concat_62_axis_0, interleave = concat_62_interleave_0, values = (concat_62_values0_0, expand_dims_36, expand_dims_37, expand_dims_2, expand_dims_39))[name = string("concat_62")]; tensor key_cache_internal_tensor_assign_4_stride_0 = const()[name = string("key_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_62, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = k_state_7_cast_fp16, x = coreml_update_state_52)[name = string("key_cache_internal_tensor_assign_4_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_4_cast_fp16, input = key_cache)[name = string("coreml_update_state_54_write_state")]; tensor coreml_update_state_54 = read_state(input = key_cache)[name = string("coreml_update_state_54")]; tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_7_cast_fp16 = transpose(perm = v_state_7_perm_0, x = var_769_cast_fp16)[name = string("transpose_81")]; tensor value_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_62, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = v_state_7_cast_fp16, x = coreml_update_state_53)[name = string("value_cache_internal_tensor_assign_4_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_4_cast_fp16, input = value_cache)[name = string("coreml_update_state_55_write_state")]; tensor coreml_update_state_55 = read_state(input = value_cache)[name = string("coreml_update_state_55")]; tensor var_826_begin_0 = const()[name = string("op_826_begin_0"), val = tensor([3, 0, 0, 0, 0])]; tensor var_826_end_0 = const()[name = string("op_826_end_0"), val = tensor([4, 1, 2, 2048, 64])]; tensor var_826_end_mask_0 = const()[name = string("op_826_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_826_squeeze_mask_0 = const()[name = string("op_826_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_826_cast_fp16 = slice_by_index(begin = var_826_begin_0, end = var_826_end_0, end_mask = var_826_end_mask_0, squeeze_mask = var_826_squeeze_mask_0, x = coreml_update_state_54)[name = string("op_826_cast_fp16")]; tensor var_829_begin_0 = const()[name = string("op_829_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_829_end_mask_0 = const()[name = string("op_829_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_829_cast_fp16 = slice_by_index(begin = var_829_begin_0, end = concat_11, end_mask = var_829_end_mask_0, x = var_826_cast_fp16)[name = string("op_829_cast_fp16")]; tensor var_831_begin_0 = const()[name = string("op_831_begin_0"), val = tensor([3, 0, 0, 0, 0])]; tensor var_831_end_0 = const()[name = string("op_831_end_0"), val = tensor([4, 1, 2, 2048, 64])]; tensor var_831_end_mask_0 = const()[name = string("op_831_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_831_squeeze_mask_0 = const()[name = string("op_831_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_831_cast_fp16 = slice_by_index(begin = var_831_begin_0, end = var_831_end_0, end_mask = var_831_end_mask_0, squeeze_mask = var_831_squeeze_mask_0, x = coreml_update_state_55)[name = string("op_831_cast_fp16")]; tensor var_834_begin_0 = const()[name = string("op_834_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_834_end_mask_0 = const()[name = string("op_834_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_834_cast_fp16 = slice_by_index(begin = var_834_begin_0, end = concat_11, end_mask = var_834_end_mask_0, x = var_831_cast_fp16)[name = string("op_834_cast_fp16")]; tensor var_836_shape_cast_fp16 = shape(x = var_829_cast_fp16)[name = string("op_836_shape_cast_fp16")]; int32 gather_67 = const()[name = string("gather_67"), val = int32(1)]; int32 gather_68 = const()[name = string("gather_68"), val = int32(2)]; int32 gather_69_axis_0 = const()[name = string("gather_69_axis_0"), val = int32(0)]; int32 gather_69_batch_dims_0 = const()[name = string("gather_69_batch_dims_0"), val = int32(0)]; bool gather_69_validate_indices_0 = const()[name = string("gather_69_validate_indices_0"), val = bool(false)]; string var_836_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_836_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_69_to_uint16 = const()[name = string("select_69_to_uint16"), val = uint16(2)]; tensor var_836_shape_cast_fp16_to_uint16 = cast(dtype = var_836_shape_cast_fp16_to_uint16_dtype_0, x = var_836_shape_cast_fp16)[name = string("cast_125")]; uint16 gather_69_cast_uint16 = gather(axis = gather_69_axis_0, batch_dims = gather_69_batch_dims_0, indices = select_69_to_uint16, validate_indices = gather_69_validate_indices_0, x = var_836_shape_cast_fp16_to_uint16)[name = string("gather_69_cast_uint16")]; string gather_69_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_69_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_70 = const()[name = string("gather_70"), val = int32(64)]; tensor var_843_axes_0 = const()[name = string("op_843_axes_0"), val = tensor([2])]; tensor var_843_cast_fp16 = expand_dims(axes = var_843_axes_0, x = var_829_cast_fp16)[name = string("op_843_cast_fp16")]; tensor shape_77_cast_fp16 = shape(x = var_843_cast_fp16)[name = string("shape_77_cast_fp16")]; int32 concat_70_axis_0 = const()[name = string("concat_70_axis_0"), val = int32(0)]; bool concat_70_interleave_0 = const()[name = string("concat_70_interleave_0"), val = bool(false)]; int32 gather_69_cast_uint16_to_int32 = cast(dtype = gather_69_cast_uint16_to_int32_dtype_0, x = gather_69_cast_uint16)[name = string("cast_124")]; tensor concat_70 = concat(axis = concat_70_axis_0, interleave = concat_70_interleave_0, values = (gather_67, gather_68, var_76, gather_69_cast_uint16_to_int32, gather_70))[name = string("concat_70")]; tensor real_div_6 = real_div(x = concat_70, y = shape_77_cast_fp16)[name = string("real_div_6")]; tensor hidden_states_101_cast_fp16 = tile(reps = real_div_6, x = var_843_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; tensor concat_71x = const()[name = string("concat_71x"), val = tensor([1, 14, -1, 64])]; tensor key_states_15_cast_fp16 = reshape(shape = concat_71x, x = hidden_states_101_cast_fp16)[name = string("key_states_15_cast_fp16")]; tensor var_853_shape_cast_fp16 = shape(x = var_834_cast_fp16)[name = string("op_853_shape_cast_fp16")]; int32 gather_71 = const()[name = string("gather_71"), val = int32(1)]; int32 gather_72 = const()[name = string("gather_72"), val = int32(2)]; int32 gather_73_axis_0 = const()[name = string("gather_73_axis_0"), val = int32(0)]; int32 gather_73_batch_dims_0 = const()[name = string("gather_73_batch_dims_0"), val = int32(0)]; bool gather_73_validate_indices_0 = const()[name = string("gather_73_validate_indices_0"), val = bool(false)]; string var_853_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_853_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_73_to_uint16 = const()[name = string("select_73_to_uint16"), val = uint16(2)]; tensor var_853_shape_cast_fp16_to_uint16 = cast(dtype = var_853_shape_cast_fp16_to_uint16_dtype_0, x = var_853_shape_cast_fp16)[name = string("cast_123")]; uint16 gather_73_cast_uint16 = gather(axis = gather_73_axis_0, batch_dims = gather_73_batch_dims_0, indices = select_73_to_uint16, validate_indices = gather_73_validate_indices_0, x = var_853_shape_cast_fp16_to_uint16)[name = string("gather_73_cast_uint16")]; string gather_73_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_73_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_74 = const()[name = string("gather_74"), val = int32(64)]; tensor var_860_axes_0 = const()[name = string("op_860_axes_0"), val = tensor([2])]; tensor var_860_cast_fp16 = expand_dims(axes = var_860_axes_0, x = var_834_cast_fp16)[name = string("op_860_cast_fp16")]; tensor shape_82_cast_fp16 = shape(x = var_860_cast_fp16)[name = string("shape_82_cast_fp16")]; int32 concat_72_axis_0 = const()[name = string("concat_72_axis_0"), val = int32(0)]; bool concat_72_interleave_0 = const()[name = string("concat_72_interleave_0"), val = bool(false)]; int32 gather_73_cast_uint16_to_int32 = cast(dtype = gather_73_cast_uint16_to_int32_dtype_0, x = gather_73_cast_uint16)[name = string("cast_122")]; tensor concat_72 = concat(axis = concat_72_axis_0, interleave = concat_72_interleave_0, values = (gather_71, gather_72, var_76, gather_73_cast_uint16_to_int32, gather_74))[name = string("concat_72")]; tensor real_div_7 = real_div(x = concat_72, y = shape_82_cast_fp16)[name = string("real_div_7")]; tensor hidden_states_105_cast_fp16 = tile(reps = real_div_7, x = var_860_cast_fp16)[name = string("hidden_states_105_cast_fp16")]; tensor concat_73x = const()[name = string("concat_73x"), val = tensor([1, 14, -1, 64])]; tensor value_states_15_cast_fp16 = reshape(shape = concat_73x, x = hidden_states_105_cast_fp16)[name = string("value_states_15_cast_fp16")]; tensor var_870_shape_cast_fp16 = shape(x = key_states_15_cast_fp16)[name = string("op_870_shape_cast_fp16")]; int32 gather_75_axis_0 = const()[name = string("gather_75_axis_0"), val = int32(0)]; int32 gather_75_batch_dims_0 = const()[name = string("gather_75_batch_dims_0"), val = int32(0)]; bool gather_75_validate_indices_0 = const()[name = string("gather_75_validate_indices_0"), val = bool(false)]; string var_870_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_870_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_75_to_uint16 = const()[name = string("select_75_to_uint16"), val = uint16(2)]; tensor var_870_shape_cast_fp16_to_uint16 = cast(dtype = var_870_shape_cast_fp16_to_uint16_dtype_0, x = var_870_shape_cast_fp16)[name = string("cast_121")]; uint16 gather_75_cast_uint16 = gather(axis = gather_75_axis_0, batch_dims = gather_75_batch_dims_0, indices = select_75_to_uint16, validate_indices = gather_75_validate_indices_0, x = var_870_shape_cast_fp16_to_uint16)[name = string("gather_75_cast_uint16")]; string gather_75_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_75_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_74_values0_0 = const()[name = string("concat_74_values0_0"), val = int32(1)]; int32 concat_74_values1_0 = const()[name = string("concat_74_values1_0"), val = int32(1)]; int32 concat_74_values2_0 = const()[name = string("concat_74_values2_0"), val = int32(0)]; int32 concat_74_axis_0 = const()[name = string("concat_74_axis_0"), val = int32(0)]; bool concat_74_interleave_0 = const()[name = string("concat_74_interleave_0"), val = bool(false)]; int32 gather_75_cast_uint16_to_int32 = cast(dtype = gather_75_cast_uint16_to_int32_dtype_0, x = gather_75_cast_uint16)[name = string("cast_120")]; tensor concat_74 = concat(axis = concat_74_axis_0, interleave = concat_74_interleave_0, values = (concat_74_values0_0, concat_74_values1_0, concat_74_values2_0, gather_75_cast_uint16_to_int32))[name = string("concat_74")]; tensor causal_mask_9_begin_0 = const()[name = string("causal_mask_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_9_end_mask_0 = const()[name = string("causal_mask_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_9_cast_fp16 = slice_by_index(begin = causal_mask_9_begin_0, end = concat_74, end_mask = causal_mask_9_end_mask_0, x = causal_mask)[name = string("causal_mask_9_cast_fp16")]; tensor attn_output_13_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_9_cast_fp16, key = key_states_15_cast_fp16, query = query_states_15_cast_fp16, value = value_states_15_cast_fp16)[name = string("attn_output_13_cast_fp16")]; tensor var_876_perm_0 = const()[name = string("op_876_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_75x = const()[name = string("concat_75x"), val = tensor([1, -1, 896])]; tensor var_876_cast_fp16 = transpose(perm = var_876_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_80")]; tensor input_25_cast_fp16 = reshape(shape = concat_75x, x = var_876_cast_fp16)[name = string("input_25_cast_fp16")]; tensor model_model_layers_3_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102354048))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102755520))))[name = string("model_model_layers_3_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_24_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_to_fp16_quantized, x = input_25_cast_fp16)[name = string("linear_24_cast_fp16")]; tensor hidden_states_109_cast_fp16 = add(x = hidden_states_89_cast_fp16, y = linear_24_cast_fp16)[name = string("hidden_states_109_cast_fp16")]; fp16 var_70_promoted_7_to_fp16 = const()[name = string("op_70_promoted_7_to_fp16"), val = fp16(0x1p+1)]; tensor var_885_cast_fp16 = pow(x = hidden_states_109_cast_fp16, y = var_70_promoted_7_to_fp16)[name = string("op_885_cast_fp16")]; tensor variance_15_axes_0 = const()[name = string("variance_15_axes_0"), val = tensor([-1])]; bool variance_15_keep_dims_0 = const()[name = string("variance_15_keep_dims_0"), val = bool(true)]; tensor variance_15_cast_fp16 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_885_cast_fp16)[name = string("variance_15_cast_fp16")]; fp16 var_888_to_fp16 = const()[name = string("op_888_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_889_cast_fp16 = add(x = variance_15_cast_fp16, y = var_888_to_fp16)[name = string("op_889_cast_fp16")]; fp32 var_890_epsilon_0 = const()[name = string("op_890_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_890_cast_fp16 = rsqrt(epsilon = var_890_epsilon_0, x = var_889_cast_fp16)[name = string("op_890_cast_fp16")]; tensor hidden_states_113_cast_fp16 = mul(x = hidden_states_109_cast_fp16, y = var_890_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102805760)))]; tensor input_27_cast_fp16 = mul(x = model_model_layers_3_post_attention_layernorm_weight_to_fp16, y = hidden_states_113_cast_fp16)[name = string("input_27_cast_fp16")]; tensor model_model_layers_3_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(102807616))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(104986752))))[name = string("model_model_layers_3_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_25_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_3_mlp_gate_proj_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = string("linear_25_cast_fp16")]; tensor var_902_cast_fp16 = silu(x = linear_25_cast_fp16)[name = string("op_902_cast_fp16")]; tensor model_model_layers_3_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(105259200))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107438336))))[name = string("model_model_layers_3_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_26_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_3_mlp_up_proj_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = string("linear_26_cast_fp16")]; tensor input_31_cast_fp16 = mul(x = var_902_cast_fp16, y = linear_26_cast_fp16)[name = string("input_31_cast_fp16")]; tensor model_model_layers_3_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(107710784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(109889920))))[name = string("model_model_layers_3_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_27_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_3_mlp_down_proj_weight_to_fp16_quantized, x = input_31_cast_fp16)[name = string("linear_27_cast_fp16")]; tensor hidden_states_119_cast_fp16 = add(x = hidden_states_109_cast_fp16, y = linear_27_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; fp16 var_70_promoted_8_to_fp16 = const()[name = string("op_70_promoted_8_to_fp16"), val = fp16(0x1p+1)]; tensor var_915_cast_fp16 = pow(x = hidden_states_119_cast_fp16, y = var_70_promoted_8_to_fp16)[name = string("op_915_cast_fp16")]; tensor variance_17_axes_0 = const()[name = string("variance_17_axes_0"), val = tensor([-1])]; bool variance_17_keep_dims_0 = const()[name = string("variance_17_keep_dims_0"), val = bool(true)]; tensor variance_17_cast_fp16 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_915_cast_fp16)[name = string("variance_17_cast_fp16")]; fp16 var_918_to_fp16 = const()[name = string("op_918_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_919_cast_fp16 = add(x = variance_17_cast_fp16, y = var_918_to_fp16)[name = string("op_919_cast_fp16")]; fp32 var_920_epsilon_0 = const()[name = string("op_920_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_920_cast_fp16 = rsqrt(epsilon = var_920_epsilon_0, x = var_919_cast_fp16)[name = string("op_920_cast_fp16")]; tensor hidden_states_123_cast_fp16 = mul(x = hidden_states_119_cast_fp16, y = var_920_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110162368)))]; tensor hidden_states_127_cast_fp16 = mul(x = model_model_layers_4_input_layernorm_weight_to_fp16, y = hidden_states_123_cast_fp16)[name = string("hidden_states_127_cast_fp16")]; tensor model_model_layers_4_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110164224))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110565696))))[name = string("model_model_layers_4_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_4_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_4_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110615936)))]; tensor linear_28_cast_fp16 = linear(bias = model_model_layers_4_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_4_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_127_cast_fp16)[name = string("linear_28_cast_fp16")]; tensor model_model_layers_4_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110617792))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110675200))))[name = string("model_model_layers_4_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_4_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_4_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110682432)))]; tensor linear_29_cast_fp16 = linear(bias = model_model_layers_4_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_4_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_127_cast_fp16)[name = string("linear_29_cast_fp16")]; tensor model_model_layers_4_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110682752))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110740160))))[name = string("model_model_layers_4_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_4_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_4_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110747392)))]; tensor linear_30_cast_fp16 = linear(bias = model_model_layers_4_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_4_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_127_cast_fp16)[name = string("linear_30_cast_fp16")]; tensor concat_76x = const()[name = string("concat_76x"), val = tensor([1, -1, 14, 64])]; tensor var_943_cast_fp16 = reshape(shape = concat_76x, x = linear_28_cast_fp16)[name = string("op_943_cast_fp16")]; tensor q_9_perm_0 = const()[name = string("q_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_77x = const()[name = string("concat_77x"), val = tensor([1, -1, 2, 64])]; tensor var_946_cast_fp16 = reshape(shape = concat_77x, x = linear_29_cast_fp16)[name = string("op_946_cast_fp16")]; tensor k_9_perm_0 = const()[name = string("k_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_78x = const()[name = string("concat_78x"), val = tensor([1, -1, 2, 64])]; tensor var_949_cast_fp16 = reshape(shape = concat_78x, x = linear_30_cast_fp16)[name = string("op_949_cast_fp16")]; tensor v_state_9_perm_0 = const()[name = string("v_state_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_9_cast_fp16 = transpose(perm = q_9_perm_0, x = var_943_cast_fp16)[name = string("transpose_79")]; tensor var_953_cast_fp16 = mul(x = q_9_cast_fp16, y = cos_7_cast_fp16)[name = string("op_953_cast_fp16")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9_cast_fp16)[name = string("x1_17_cast_fp16")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9_cast_fp16)[name = string("x2_17_cast_fp16")]; fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_964_cast_fp16 = mul(x = x2_17_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_964_cast_fp16")]; bool var_966_interleave_0 = const()[name = string("op_966_interleave_0"), val = bool(false)]; tensor var_966_cast_fp16 = concat(axis = var_64, interleave = var_966_interleave_0, values = (var_964_cast_fp16, x1_17_cast_fp16))[name = string("op_966_cast_fp16")]; tensor var_967_cast_fp16 = mul(x = var_966_cast_fp16, y = sin_7_cast_fp16)[name = string("op_967_cast_fp16")]; tensor query_states_19_cast_fp16 = add(x = var_953_cast_fp16, y = var_967_cast_fp16)[name = string("query_states_19_cast_fp16")]; tensor k_9_cast_fp16 = transpose(perm = k_9_perm_0, x = var_946_cast_fp16)[name = string("transpose_78")]; tensor var_969_cast_fp16 = mul(x = k_9_cast_fp16, y = cos_7_cast_fp16)[name = string("op_969_cast_fp16")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9_cast_fp16)[name = string("x1_19_cast_fp16")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9_cast_fp16)[name = string("x2_19_cast_fp16")]; fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_980_cast_fp16 = mul(x = x2_19_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_980_cast_fp16")]; bool var_982_interleave_0 = const()[name = string("op_982_interleave_0"), val = bool(false)]; tensor var_982_cast_fp16 = concat(axis = var_64, interleave = var_982_interleave_0, values = (var_980_cast_fp16, x1_19_cast_fp16))[name = string("op_982_cast_fp16")]; tensor var_983_cast_fp16 = mul(x = var_982_cast_fp16, y = sin_7_cast_fp16)[name = string("op_983_cast_fp16")]; tensor k_state_9_cast_fp16 = add(x = var_969_cast_fp16, y = var_983_cast_fp16)[name = string("k_state_9_cast_fp16")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([0])]; tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; tensor concat_81_values0_0 = const()[name = string("concat_81_values0_0"), val = tensor([4])]; int32 concat_81_axis_0 = const()[name = string("concat_81_axis_0"), val = int32(0)]; bool concat_81_interleave_0 = const()[name = string("concat_81_interleave_0"), val = bool(false)]; tensor concat_81 = concat(axis = concat_81_axis_0, interleave = concat_81_interleave_0, values = (concat_81_values0_0, expand_dims_48, expand_dims_49, expand_dims_2, expand_dims_51))[name = string("concat_81")]; tensor key_cache_internal_tensor_assign_5_stride_0 = const()[name = string("key_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_81, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = k_state_9_cast_fp16, x = coreml_update_state_54)[name = string("key_cache_internal_tensor_assign_5_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_5_cast_fp16, input = key_cache)[name = string("coreml_update_state_56_write_state")]; tensor coreml_update_state_56 = read_state(input = key_cache)[name = string("coreml_update_state_56")]; tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_9_cast_fp16 = transpose(perm = v_state_9_perm_0, x = var_949_cast_fp16)[name = string("transpose_77")]; tensor value_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_81, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = v_state_9_cast_fp16, x = coreml_update_state_55)[name = string("value_cache_internal_tensor_assign_5_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_5_cast_fp16, input = value_cache)[name = string("coreml_update_state_57_write_state")]; tensor coreml_update_state_57 = read_state(input = value_cache)[name = string("coreml_update_state_57")]; tensor var_1006_begin_0 = const()[name = string("op_1006_begin_0"), val = tensor([4, 0, 0, 0, 0])]; tensor var_1006_end_0 = const()[name = string("op_1006_end_0"), val = tensor([5, 1, 2, 2048, 64])]; tensor var_1006_end_mask_0 = const()[name = string("op_1006_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1006_squeeze_mask_0 = const()[name = string("op_1006_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1006_cast_fp16 = slice_by_index(begin = var_1006_begin_0, end = var_1006_end_0, end_mask = var_1006_end_mask_0, squeeze_mask = var_1006_squeeze_mask_0, x = coreml_update_state_56)[name = string("op_1006_cast_fp16")]; tensor var_1009_begin_0 = const()[name = string("op_1009_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1009_end_mask_0 = const()[name = string("op_1009_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1009_cast_fp16 = slice_by_index(begin = var_1009_begin_0, end = concat_11, end_mask = var_1009_end_mask_0, x = var_1006_cast_fp16)[name = string("op_1009_cast_fp16")]; tensor var_1011_begin_0 = const()[name = string("op_1011_begin_0"), val = tensor([4, 0, 0, 0, 0])]; tensor var_1011_end_0 = const()[name = string("op_1011_end_0"), val = tensor([5, 1, 2, 2048, 64])]; tensor var_1011_end_mask_0 = const()[name = string("op_1011_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1011_squeeze_mask_0 = const()[name = string("op_1011_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1011_cast_fp16 = slice_by_index(begin = var_1011_begin_0, end = var_1011_end_0, end_mask = var_1011_end_mask_0, squeeze_mask = var_1011_squeeze_mask_0, x = coreml_update_state_57)[name = string("op_1011_cast_fp16")]; tensor var_1014_begin_0 = const()[name = string("op_1014_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1014_end_mask_0 = const()[name = string("op_1014_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1014_cast_fp16 = slice_by_index(begin = var_1014_begin_0, end = concat_11, end_mask = var_1014_end_mask_0, x = var_1011_cast_fp16)[name = string("op_1014_cast_fp16")]; tensor var_1016_shape_cast_fp16 = shape(x = var_1009_cast_fp16)[name = string("op_1016_shape_cast_fp16")]; int32 gather_85 = const()[name = string("gather_85"), val = int32(1)]; int32 gather_86 = const()[name = string("gather_86"), val = int32(2)]; int32 gather_87_axis_0 = const()[name = string("gather_87_axis_0"), val = int32(0)]; int32 gather_87_batch_dims_0 = const()[name = string("gather_87_batch_dims_0"), val = int32(0)]; bool gather_87_validate_indices_0 = const()[name = string("gather_87_validate_indices_0"), val = bool(false)]; string var_1016_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1016_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_87_to_uint16 = const()[name = string("select_87_to_uint16"), val = uint16(2)]; tensor var_1016_shape_cast_fp16_to_uint16 = cast(dtype = var_1016_shape_cast_fp16_to_uint16_dtype_0, x = var_1016_shape_cast_fp16)[name = string("cast_119")]; uint16 gather_87_cast_uint16 = gather(axis = gather_87_axis_0, batch_dims = gather_87_batch_dims_0, indices = select_87_to_uint16, validate_indices = gather_87_validate_indices_0, x = var_1016_shape_cast_fp16_to_uint16)[name = string("gather_87_cast_uint16")]; string gather_87_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_87_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_88 = const()[name = string("gather_88"), val = int32(64)]; tensor var_1023_axes_0 = const()[name = string("op_1023_axes_0"), val = tensor([2])]; tensor var_1023_cast_fp16 = expand_dims(axes = var_1023_axes_0, x = var_1009_cast_fp16)[name = string("op_1023_cast_fp16")]; tensor shape_97_cast_fp16 = shape(x = var_1023_cast_fp16)[name = string("shape_97_cast_fp16")]; int32 concat_89_axis_0 = const()[name = string("concat_89_axis_0"), val = int32(0)]; bool concat_89_interleave_0 = const()[name = string("concat_89_interleave_0"), val = bool(false)]; int32 gather_87_cast_uint16_to_int32 = cast(dtype = gather_87_cast_uint16_to_int32_dtype_0, x = gather_87_cast_uint16)[name = string("cast_118")]; tensor concat_89 = concat(axis = concat_89_axis_0, interleave = concat_89_interleave_0, values = (gather_85, gather_86, var_76, gather_87_cast_uint16_to_int32, gather_88))[name = string("concat_89")]; tensor real_div_8 = real_div(x = concat_89, y = shape_97_cast_fp16)[name = string("real_div_8")]; tensor hidden_states_131_cast_fp16 = tile(reps = real_div_8, x = var_1023_cast_fp16)[name = string("hidden_states_131_cast_fp16")]; tensor concat_90x = const()[name = string("concat_90x"), val = tensor([1, 14, -1, 64])]; tensor key_states_19_cast_fp16 = reshape(shape = concat_90x, x = hidden_states_131_cast_fp16)[name = string("key_states_19_cast_fp16")]; tensor var_1033_shape_cast_fp16 = shape(x = var_1014_cast_fp16)[name = string("op_1033_shape_cast_fp16")]; int32 gather_89 = const()[name = string("gather_89"), val = int32(1)]; int32 gather_90 = const()[name = string("gather_90"), val = int32(2)]; int32 gather_91_axis_0 = const()[name = string("gather_91_axis_0"), val = int32(0)]; int32 gather_91_batch_dims_0 = const()[name = string("gather_91_batch_dims_0"), val = int32(0)]; bool gather_91_validate_indices_0 = const()[name = string("gather_91_validate_indices_0"), val = bool(false)]; string var_1033_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1033_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_91_to_uint16 = const()[name = string("select_91_to_uint16"), val = uint16(2)]; tensor var_1033_shape_cast_fp16_to_uint16 = cast(dtype = var_1033_shape_cast_fp16_to_uint16_dtype_0, x = var_1033_shape_cast_fp16)[name = string("cast_117")]; uint16 gather_91_cast_uint16 = gather(axis = gather_91_axis_0, batch_dims = gather_91_batch_dims_0, indices = select_91_to_uint16, validate_indices = gather_91_validate_indices_0, x = var_1033_shape_cast_fp16_to_uint16)[name = string("gather_91_cast_uint16")]; string gather_91_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_91_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_92 = const()[name = string("gather_92"), val = int32(64)]; tensor var_1040_axes_0 = const()[name = string("op_1040_axes_0"), val = tensor([2])]; tensor var_1040_cast_fp16 = expand_dims(axes = var_1040_axes_0, x = var_1014_cast_fp16)[name = string("op_1040_cast_fp16")]; tensor shape_102_cast_fp16 = shape(x = var_1040_cast_fp16)[name = string("shape_102_cast_fp16")]; int32 concat_91_axis_0 = const()[name = string("concat_91_axis_0"), val = int32(0)]; bool concat_91_interleave_0 = const()[name = string("concat_91_interleave_0"), val = bool(false)]; int32 gather_91_cast_uint16_to_int32 = cast(dtype = gather_91_cast_uint16_to_int32_dtype_0, x = gather_91_cast_uint16)[name = string("cast_116")]; tensor concat_91 = concat(axis = concat_91_axis_0, interleave = concat_91_interleave_0, values = (gather_89, gather_90, var_76, gather_91_cast_uint16_to_int32, gather_92))[name = string("concat_91")]; tensor real_div_9 = real_div(x = concat_91, y = shape_102_cast_fp16)[name = string("real_div_9")]; tensor hidden_states_135_cast_fp16 = tile(reps = real_div_9, x = var_1040_cast_fp16)[name = string("hidden_states_135_cast_fp16")]; tensor concat_92x = const()[name = string("concat_92x"), val = tensor([1, 14, -1, 64])]; tensor value_states_19_cast_fp16 = reshape(shape = concat_92x, x = hidden_states_135_cast_fp16)[name = string("value_states_19_cast_fp16")]; tensor var_1050_shape_cast_fp16 = shape(x = key_states_19_cast_fp16)[name = string("op_1050_shape_cast_fp16")]; int32 gather_93_axis_0 = const()[name = string("gather_93_axis_0"), val = int32(0)]; int32 gather_93_batch_dims_0 = const()[name = string("gather_93_batch_dims_0"), val = int32(0)]; bool gather_93_validate_indices_0 = const()[name = string("gather_93_validate_indices_0"), val = bool(false)]; string var_1050_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1050_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_93_to_uint16 = const()[name = string("select_93_to_uint16"), val = uint16(2)]; tensor var_1050_shape_cast_fp16_to_uint16 = cast(dtype = var_1050_shape_cast_fp16_to_uint16_dtype_0, x = var_1050_shape_cast_fp16)[name = string("cast_115")]; uint16 gather_93_cast_uint16 = gather(axis = gather_93_axis_0, batch_dims = gather_93_batch_dims_0, indices = select_93_to_uint16, validate_indices = gather_93_validate_indices_0, x = var_1050_shape_cast_fp16_to_uint16)[name = string("gather_93_cast_uint16")]; string gather_93_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_93_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_93_values0_0 = const()[name = string("concat_93_values0_0"), val = int32(1)]; int32 concat_93_values1_0 = const()[name = string("concat_93_values1_0"), val = int32(1)]; int32 concat_93_values2_0 = const()[name = string("concat_93_values2_0"), val = int32(0)]; int32 concat_93_axis_0 = const()[name = string("concat_93_axis_0"), val = int32(0)]; bool concat_93_interleave_0 = const()[name = string("concat_93_interleave_0"), val = bool(false)]; int32 gather_93_cast_uint16_to_int32 = cast(dtype = gather_93_cast_uint16_to_int32_dtype_0, x = gather_93_cast_uint16)[name = string("cast_114")]; tensor concat_93 = concat(axis = concat_93_axis_0, interleave = concat_93_interleave_0, values = (concat_93_values0_0, concat_93_values1_0, concat_93_values2_0, gather_93_cast_uint16_to_int32))[name = string("concat_93")]; tensor causal_mask_11_begin_0 = const()[name = string("causal_mask_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_11_end_mask_0 = const()[name = string("causal_mask_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_11_cast_fp16 = slice_by_index(begin = causal_mask_11_begin_0, end = concat_93, end_mask = causal_mask_11_end_mask_0, x = causal_mask)[name = string("causal_mask_11_cast_fp16")]; tensor attn_output_17_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_11_cast_fp16, key = key_states_19_cast_fp16, query = query_states_19_cast_fp16, value = value_states_19_cast_fp16)[name = string("attn_output_17_cast_fp16")]; tensor var_1056_perm_0 = const()[name = string("op_1056_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_94x = const()[name = string("concat_94x"), val = tensor([1, -1, 896])]; tensor var_1056_cast_fp16 = transpose(perm = var_1056_perm_0, x = attn_output_17_cast_fp16)[name = string("transpose_76")]; tensor input_33_cast_fp16 = reshape(shape = concat_94x, x = var_1056_cast_fp16)[name = string("input_33_cast_fp16")]; tensor model_model_layers_4_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(110747712))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111149184))))[name = string("model_model_layers_4_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_31_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_to_fp16_quantized, x = input_33_cast_fp16)[name = string("linear_31_cast_fp16")]; tensor hidden_states_139_cast_fp16 = add(x = hidden_states_119_cast_fp16, y = linear_31_cast_fp16)[name = string("hidden_states_139_cast_fp16")]; fp16 var_70_promoted_9_to_fp16 = const()[name = string("op_70_promoted_9_to_fp16"), val = fp16(0x1p+1)]; tensor var_1065_cast_fp16 = pow(x = hidden_states_139_cast_fp16, y = var_70_promoted_9_to_fp16)[name = string("op_1065_cast_fp16")]; tensor variance_19_axes_0 = const()[name = string("variance_19_axes_0"), val = tensor([-1])]; bool variance_19_keep_dims_0 = const()[name = string("variance_19_keep_dims_0"), val = bool(true)]; tensor variance_19_cast_fp16 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = var_1065_cast_fp16)[name = string("variance_19_cast_fp16")]; fp16 var_1068_to_fp16 = const()[name = string("op_1068_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1069_cast_fp16 = add(x = variance_19_cast_fp16, y = var_1068_to_fp16)[name = string("op_1069_cast_fp16")]; fp32 var_1070_epsilon_0 = const()[name = string("op_1070_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1070_cast_fp16 = rsqrt(epsilon = var_1070_epsilon_0, x = var_1069_cast_fp16)[name = string("op_1070_cast_fp16")]; tensor hidden_states_143_cast_fp16 = mul(x = hidden_states_139_cast_fp16, y = var_1070_cast_fp16)[name = string("hidden_states_143_cast_fp16")]; tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111199424)))]; tensor input_35_cast_fp16 = mul(x = model_model_layers_4_post_attention_layernorm_weight_to_fp16, y = hidden_states_143_cast_fp16)[name = string("input_35_cast_fp16")]; tensor model_model_layers_4_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(111201280))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113380416))))[name = string("model_model_layers_4_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_32_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_4_mlp_gate_proj_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = string("linear_32_cast_fp16")]; tensor var_1082_cast_fp16 = silu(x = linear_32_cast_fp16)[name = string("op_1082_cast_fp16")]; tensor model_model_layers_4_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(113652864))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(115832000))))[name = string("model_model_layers_4_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_33_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_4_mlp_up_proj_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = string("linear_33_cast_fp16")]; tensor input_39_cast_fp16 = mul(x = var_1082_cast_fp16, y = linear_33_cast_fp16)[name = string("input_39_cast_fp16")]; tensor model_model_layers_4_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(116104448))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118283584))))[name = string("model_model_layers_4_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_34_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_4_mlp_down_proj_weight_to_fp16_quantized, x = input_39_cast_fp16)[name = string("linear_34_cast_fp16")]; tensor hidden_states_149_cast_fp16 = add(x = hidden_states_139_cast_fp16, y = linear_34_cast_fp16)[name = string("hidden_states_149_cast_fp16")]; fp16 var_70_promoted_10_to_fp16 = const()[name = string("op_70_promoted_10_to_fp16"), val = fp16(0x1p+1)]; tensor var_1095_cast_fp16 = pow(x = hidden_states_149_cast_fp16, y = var_70_promoted_10_to_fp16)[name = string("op_1095_cast_fp16")]; tensor variance_21_axes_0 = const()[name = string("variance_21_axes_0"), val = tensor([-1])]; bool variance_21_keep_dims_0 = const()[name = string("variance_21_keep_dims_0"), val = bool(true)]; tensor variance_21_cast_fp16 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = var_1095_cast_fp16)[name = string("variance_21_cast_fp16")]; fp16 var_1098_to_fp16 = const()[name = string("op_1098_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1099_cast_fp16 = add(x = variance_21_cast_fp16, y = var_1098_to_fp16)[name = string("op_1099_cast_fp16")]; fp32 var_1100_epsilon_0 = const()[name = string("op_1100_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1100_cast_fp16 = rsqrt(epsilon = var_1100_epsilon_0, x = var_1099_cast_fp16)[name = string("op_1100_cast_fp16")]; tensor hidden_states_153_cast_fp16 = mul(x = hidden_states_149_cast_fp16, y = var_1100_cast_fp16)[name = string("hidden_states_153_cast_fp16")]; tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118556032)))]; tensor hidden_states_157_cast_fp16 = mul(x = model_model_layers_5_input_layernorm_weight_to_fp16, y = hidden_states_153_cast_fp16)[name = string("hidden_states_157_cast_fp16")]; tensor model_model_layers_5_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118557888))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(118959360))))[name = string("model_model_layers_5_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_5_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_5_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119009600)))]; tensor linear_35_cast_fp16 = linear(bias = model_model_layers_5_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_5_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_157_cast_fp16)[name = string("linear_35_cast_fp16")]; tensor model_model_layers_5_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119011456))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119068864))))[name = string("model_model_layers_5_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_5_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_5_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119076096)))]; tensor linear_36_cast_fp16 = linear(bias = model_model_layers_5_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_5_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_157_cast_fp16)[name = string("linear_36_cast_fp16")]; tensor model_model_layers_5_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119076416))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119133824))))[name = string("model_model_layers_5_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_5_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_5_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119141056)))]; tensor linear_37_cast_fp16 = linear(bias = model_model_layers_5_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_5_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_157_cast_fp16)[name = string("linear_37_cast_fp16")]; tensor concat_95x = const()[name = string("concat_95x"), val = tensor([1, -1, 14, 64])]; tensor var_1123_cast_fp16 = reshape(shape = concat_95x, x = linear_35_cast_fp16)[name = string("op_1123_cast_fp16")]; tensor q_11_perm_0 = const()[name = string("q_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_96x = const()[name = string("concat_96x"), val = tensor([1, -1, 2, 64])]; tensor var_1126_cast_fp16 = reshape(shape = concat_96x, x = linear_36_cast_fp16)[name = string("op_1126_cast_fp16")]; tensor k_11_perm_0 = const()[name = string("k_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_97x = const()[name = string("concat_97x"), val = tensor([1, -1, 2, 64])]; tensor var_1129_cast_fp16 = reshape(shape = concat_97x, x = linear_37_cast_fp16)[name = string("op_1129_cast_fp16")]; tensor v_state_11_perm_0 = const()[name = string("v_state_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_11_cast_fp16 = transpose(perm = q_11_perm_0, x = var_1123_cast_fp16)[name = string("transpose_75")]; tensor var_1133_cast_fp16 = mul(x = q_11_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1133_cast_fp16")]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11_cast_fp16)[name = string("x1_21_cast_fp16")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11_cast_fp16)[name = string("x2_21_cast_fp16")]; fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1144_cast_fp16 = mul(x = x2_21_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_1144_cast_fp16")]; bool var_1146_interleave_0 = const()[name = string("op_1146_interleave_0"), val = bool(false)]; tensor var_1146_cast_fp16 = concat(axis = var_64, interleave = var_1146_interleave_0, values = (var_1144_cast_fp16, x1_21_cast_fp16))[name = string("op_1146_cast_fp16")]; tensor var_1147_cast_fp16 = mul(x = var_1146_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1147_cast_fp16")]; tensor query_states_23_cast_fp16 = add(x = var_1133_cast_fp16, y = var_1147_cast_fp16)[name = string("query_states_23_cast_fp16")]; tensor k_11_cast_fp16 = transpose(perm = k_11_perm_0, x = var_1126_cast_fp16)[name = string("transpose_74")]; tensor var_1149_cast_fp16 = mul(x = k_11_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1149_cast_fp16")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11_cast_fp16)[name = string("x1_23_cast_fp16")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11_cast_fp16)[name = string("x2_23_cast_fp16")]; fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1160_cast_fp16 = mul(x = x2_23_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1160_cast_fp16")]; bool var_1162_interleave_0 = const()[name = string("op_1162_interleave_0"), val = bool(false)]; tensor var_1162_cast_fp16 = concat(axis = var_64, interleave = var_1162_interleave_0, values = (var_1160_cast_fp16, x1_23_cast_fp16))[name = string("op_1162_cast_fp16")]; tensor var_1163_cast_fp16 = mul(x = var_1162_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1163_cast_fp16")]; tensor k_state_11_cast_fp16 = add(x = var_1149_cast_fp16, y = var_1163_cast_fp16)[name = string("k_state_11_cast_fp16")]; tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([0])]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; tensor concat_100_values0_0 = const()[name = string("concat_100_values0_0"), val = tensor([5])]; int32 concat_100_axis_0 = const()[name = string("concat_100_axis_0"), val = int32(0)]; bool concat_100_interleave_0 = const()[name = string("concat_100_interleave_0"), val = bool(false)]; tensor concat_100 = concat(axis = concat_100_axis_0, interleave = concat_100_interleave_0, values = (concat_100_values0_0, expand_dims_60, expand_dims_61, expand_dims_2, expand_dims_63))[name = string("concat_100")]; tensor key_cache_internal_tensor_assign_6_stride_0 = const()[name = string("key_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_100, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = k_state_11_cast_fp16, x = coreml_update_state_56)[name = string("key_cache_internal_tensor_assign_6_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_6_cast_fp16, input = key_cache)[name = string("coreml_update_state_58_write_state")]; tensor coreml_update_state_58 = read_state(input = key_cache)[name = string("coreml_update_state_58")]; tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_11_cast_fp16 = transpose(perm = v_state_11_perm_0, x = var_1129_cast_fp16)[name = string("transpose_73")]; tensor value_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_100, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = v_state_11_cast_fp16, x = coreml_update_state_57)[name = string("value_cache_internal_tensor_assign_6_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_6_cast_fp16, input = value_cache)[name = string("coreml_update_state_59_write_state")]; tensor coreml_update_state_59 = read_state(input = value_cache)[name = string("coreml_update_state_59")]; tensor var_1186_begin_0 = const()[name = string("op_1186_begin_0"), val = tensor([5, 0, 0, 0, 0])]; tensor var_1186_end_0 = const()[name = string("op_1186_end_0"), val = tensor([6, 1, 2, 2048, 64])]; tensor var_1186_end_mask_0 = const()[name = string("op_1186_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1186_squeeze_mask_0 = const()[name = string("op_1186_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1186_cast_fp16 = slice_by_index(begin = var_1186_begin_0, end = var_1186_end_0, end_mask = var_1186_end_mask_0, squeeze_mask = var_1186_squeeze_mask_0, x = coreml_update_state_58)[name = string("op_1186_cast_fp16")]; tensor var_1189_begin_0 = const()[name = string("op_1189_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1189_end_mask_0 = const()[name = string("op_1189_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1189_cast_fp16 = slice_by_index(begin = var_1189_begin_0, end = concat_11, end_mask = var_1189_end_mask_0, x = var_1186_cast_fp16)[name = string("op_1189_cast_fp16")]; tensor var_1191_begin_0 = const()[name = string("op_1191_begin_0"), val = tensor([5, 0, 0, 0, 0])]; tensor var_1191_end_0 = const()[name = string("op_1191_end_0"), val = tensor([6, 1, 2, 2048, 64])]; tensor var_1191_end_mask_0 = const()[name = string("op_1191_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1191_squeeze_mask_0 = const()[name = string("op_1191_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1191_cast_fp16 = slice_by_index(begin = var_1191_begin_0, end = var_1191_end_0, end_mask = var_1191_end_mask_0, squeeze_mask = var_1191_squeeze_mask_0, x = coreml_update_state_59)[name = string("op_1191_cast_fp16")]; tensor var_1194_begin_0 = const()[name = string("op_1194_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1194_end_mask_0 = const()[name = string("op_1194_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1194_cast_fp16 = slice_by_index(begin = var_1194_begin_0, end = concat_11, end_mask = var_1194_end_mask_0, x = var_1191_cast_fp16)[name = string("op_1194_cast_fp16")]; tensor var_1196_shape_cast_fp16 = shape(x = var_1189_cast_fp16)[name = string("op_1196_shape_cast_fp16")]; int32 gather_103 = const()[name = string("gather_103"), val = int32(1)]; int32 gather_104 = const()[name = string("gather_104"), val = int32(2)]; int32 gather_105_axis_0 = const()[name = string("gather_105_axis_0"), val = int32(0)]; int32 gather_105_batch_dims_0 = const()[name = string("gather_105_batch_dims_0"), val = int32(0)]; bool gather_105_validate_indices_0 = const()[name = string("gather_105_validate_indices_0"), val = bool(false)]; string var_1196_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1196_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_105_to_uint16 = const()[name = string("select_105_to_uint16"), val = uint16(2)]; tensor var_1196_shape_cast_fp16_to_uint16 = cast(dtype = var_1196_shape_cast_fp16_to_uint16_dtype_0, x = var_1196_shape_cast_fp16)[name = string("cast_113")]; uint16 gather_105_cast_uint16 = gather(axis = gather_105_axis_0, batch_dims = gather_105_batch_dims_0, indices = select_105_to_uint16, validate_indices = gather_105_validate_indices_0, x = var_1196_shape_cast_fp16_to_uint16)[name = string("gather_105_cast_uint16")]; string gather_105_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_105_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_106 = const()[name = string("gather_106"), val = int32(64)]; tensor var_1203_axes_0 = const()[name = string("op_1203_axes_0"), val = tensor([2])]; tensor var_1203_cast_fp16 = expand_dims(axes = var_1203_axes_0, x = var_1189_cast_fp16)[name = string("op_1203_cast_fp16")]; tensor shape_117_cast_fp16 = shape(x = var_1203_cast_fp16)[name = string("shape_117_cast_fp16")]; int32 concat_108_axis_0 = const()[name = string("concat_108_axis_0"), val = int32(0)]; bool concat_108_interleave_0 = const()[name = string("concat_108_interleave_0"), val = bool(false)]; int32 gather_105_cast_uint16_to_int32 = cast(dtype = gather_105_cast_uint16_to_int32_dtype_0, x = gather_105_cast_uint16)[name = string("cast_112")]; tensor concat_108 = concat(axis = concat_108_axis_0, interleave = concat_108_interleave_0, values = (gather_103, gather_104, var_76, gather_105_cast_uint16_to_int32, gather_106))[name = string("concat_108")]; tensor real_div_10 = real_div(x = concat_108, y = shape_117_cast_fp16)[name = string("real_div_10")]; tensor hidden_states_161_cast_fp16 = tile(reps = real_div_10, x = var_1203_cast_fp16)[name = string("hidden_states_161_cast_fp16")]; tensor concat_109x = const()[name = string("concat_109x"), val = tensor([1, 14, -1, 64])]; tensor key_states_23_cast_fp16 = reshape(shape = concat_109x, x = hidden_states_161_cast_fp16)[name = string("key_states_23_cast_fp16")]; tensor var_1213_shape_cast_fp16 = shape(x = var_1194_cast_fp16)[name = string("op_1213_shape_cast_fp16")]; int32 gather_107 = const()[name = string("gather_107"), val = int32(1)]; int32 gather_108 = const()[name = string("gather_108"), val = int32(2)]; int32 gather_109_axis_0 = const()[name = string("gather_109_axis_0"), val = int32(0)]; int32 gather_109_batch_dims_0 = const()[name = string("gather_109_batch_dims_0"), val = int32(0)]; bool gather_109_validate_indices_0 = const()[name = string("gather_109_validate_indices_0"), val = bool(false)]; string var_1213_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1213_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_109_to_uint16 = const()[name = string("select_109_to_uint16"), val = uint16(2)]; tensor var_1213_shape_cast_fp16_to_uint16 = cast(dtype = var_1213_shape_cast_fp16_to_uint16_dtype_0, x = var_1213_shape_cast_fp16)[name = string("cast_111")]; uint16 gather_109_cast_uint16 = gather(axis = gather_109_axis_0, batch_dims = gather_109_batch_dims_0, indices = select_109_to_uint16, validate_indices = gather_109_validate_indices_0, x = var_1213_shape_cast_fp16_to_uint16)[name = string("gather_109_cast_uint16")]; string gather_109_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_109_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_110 = const()[name = string("gather_110"), val = int32(64)]; tensor var_1220_axes_0 = const()[name = string("op_1220_axes_0"), val = tensor([2])]; tensor var_1220_cast_fp16 = expand_dims(axes = var_1220_axes_0, x = var_1194_cast_fp16)[name = string("op_1220_cast_fp16")]; tensor shape_122_cast_fp16 = shape(x = var_1220_cast_fp16)[name = string("shape_122_cast_fp16")]; int32 concat_110_axis_0 = const()[name = string("concat_110_axis_0"), val = int32(0)]; bool concat_110_interleave_0 = const()[name = string("concat_110_interleave_0"), val = bool(false)]; int32 gather_109_cast_uint16_to_int32 = cast(dtype = gather_109_cast_uint16_to_int32_dtype_0, x = gather_109_cast_uint16)[name = string("cast_110")]; tensor concat_110 = concat(axis = concat_110_axis_0, interleave = concat_110_interleave_0, values = (gather_107, gather_108, var_76, gather_109_cast_uint16_to_int32, gather_110))[name = string("concat_110")]; tensor real_div_11 = real_div(x = concat_110, y = shape_122_cast_fp16)[name = string("real_div_11")]; tensor hidden_states_165_cast_fp16 = tile(reps = real_div_11, x = var_1220_cast_fp16)[name = string("hidden_states_165_cast_fp16")]; tensor concat_111x = const()[name = string("concat_111x"), val = tensor([1, 14, -1, 64])]; tensor value_states_23_cast_fp16 = reshape(shape = concat_111x, x = hidden_states_165_cast_fp16)[name = string("value_states_23_cast_fp16")]; tensor var_1230_shape_cast_fp16 = shape(x = key_states_23_cast_fp16)[name = string("op_1230_shape_cast_fp16")]; int32 gather_111_axis_0 = const()[name = string("gather_111_axis_0"), val = int32(0)]; int32 gather_111_batch_dims_0 = const()[name = string("gather_111_batch_dims_0"), val = int32(0)]; bool gather_111_validate_indices_0 = const()[name = string("gather_111_validate_indices_0"), val = bool(false)]; string var_1230_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1230_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_111_to_uint16 = const()[name = string("select_111_to_uint16"), val = uint16(2)]; tensor var_1230_shape_cast_fp16_to_uint16 = cast(dtype = var_1230_shape_cast_fp16_to_uint16_dtype_0, x = var_1230_shape_cast_fp16)[name = string("cast_109")]; uint16 gather_111_cast_uint16 = gather(axis = gather_111_axis_0, batch_dims = gather_111_batch_dims_0, indices = select_111_to_uint16, validate_indices = gather_111_validate_indices_0, x = var_1230_shape_cast_fp16_to_uint16)[name = string("gather_111_cast_uint16")]; string gather_111_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_111_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_112_values0_0 = const()[name = string("concat_112_values0_0"), val = int32(1)]; int32 concat_112_values1_0 = const()[name = string("concat_112_values1_0"), val = int32(1)]; int32 concat_112_values2_0 = const()[name = string("concat_112_values2_0"), val = int32(0)]; int32 concat_112_axis_0 = const()[name = string("concat_112_axis_0"), val = int32(0)]; bool concat_112_interleave_0 = const()[name = string("concat_112_interleave_0"), val = bool(false)]; int32 gather_111_cast_uint16_to_int32 = cast(dtype = gather_111_cast_uint16_to_int32_dtype_0, x = gather_111_cast_uint16)[name = string("cast_108")]; tensor concat_112 = concat(axis = concat_112_axis_0, interleave = concat_112_interleave_0, values = (concat_112_values0_0, concat_112_values1_0, concat_112_values2_0, gather_111_cast_uint16_to_int32))[name = string("concat_112")]; tensor causal_mask_13_begin_0 = const()[name = string("causal_mask_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_13_end_mask_0 = const()[name = string("causal_mask_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_13_cast_fp16 = slice_by_index(begin = causal_mask_13_begin_0, end = concat_112, end_mask = causal_mask_13_end_mask_0, x = causal_mask)[name = string("causal_mask_13_cast_fp16")]; tensor attn_output_21_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_13_cast_fp16, key = key_states_23_cast_fp16, query = query_states_23_cast_fp16, value = value_states_23_cast_fp16)[name = string("attn_output_21_cast_fp16")]; tensor var_1236_perm_0 = const()[name = string("op_1236_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_113x = const()[name = string("concat_113x"), val = tensor([1, -1, 896])]; tensor var_1236_cast_fp16 = transpose(perm = var_1236_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_72")]; tensor input_41_cast_fp16 = reshape(shape = concat_113x, x = var_1236_cast_fp16)[name = string("input_41_cast_fp16")]; tensor model_model_layers_5_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119141376))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119542848))))[name = string("model_model_layers_5_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_38_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_to_fp16_quantized, x = input_41_cast_fp16)[name = string("linear_38_cast_fp16")]; tensor hidden_states_169_cast_fp16 = add(x = hidden_states_149_cast_fp16, y = linear_38_cast_fp16)[name = string("hidden_states_169_cast_fp16")]; fp16 var_70_promoted_11_to_fp16 = const()[name = string("op_70_promoted_11_to_fp16"), val = fp16(0x1p+1)]; tensor var_1245_cast_fp16 = pow(x = hidden_states_169_cast_fp16, y = var_70_promoted_11_to_fp16)[name = string("op_1245_cast_fp16")]; tensor variance_23_axes_0 = const()[name = string("variance_23_axes_0"), val = tensor([-1])]; bool variance_23_keep_dims_0 = const()[name = string("variance_23_keep_dims_0"), val = bool(true)]; tensor variance_23_cast_fp16 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_1245_cast_fp16)[name = string("variance_23_cast_fp16")]; fp16 var_1248_to_fp16 = const()[name = string("op_1248_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1249_cast_fp16 = add(x = variance_23_cast_fp16, y = var_1248_to_fp16)[name = string("op_1249_cast_fp16")]; fp32 var_1250_epsilon_0 = const()[name = string("op_1250_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1250_cast_fp16 = rsqrt(epsilon = var_1250_epsilon_0, x = var_1249_cast_fp16)[name = string("op_1250_cast_fp16")]; tensor hidden_states_173_cast_fp16 = mul(x = hidden_states_169_cast_fp16, y = var_1250_cast_fp16)[name = string("hidden_states_173_cast_fp16")]; tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119593088)))]; tensor input_43_cast_fp16 = mul(x = model_model_layers_5_post_attention_layernorm_weight_to_fp16, y = hidden_states_173_cast_fp16)[name = string("input_43_cast_fp16")]; tensor model_model_layers_5_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(119594944))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(121774080))))[name = string("model_model_layers_5_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_39_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_5_mlp_gate_proj_weight_to_fp16_quantized, x = input_43_cast_fp16)[name = string("linear_39_cast_fp16")]; tensor var_1262_cast_fp16 = silu(x = linear_39_cast_fp16)[name = string("op_1262_cast_fp16")]; tensor model_model_layers_5_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(122046528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124225664))))[name = string("model_model_layers_5_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_40_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_5_mlp_up_proj_weight_to_fp16_quantized, x = input_43_cast_fp16)[name = string("linear_40_cast_fp16")]; tensor input_47_cast_fp16 = mul(x = var_1262_cast_fp16, y = linear_40_cast_fp16)[name = string("input_47_cast_fp16")]; tensor model_model_layers_5_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(124498112))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126677248))))[name = string("model_model_layers_5_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_41_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_5_mlp_down_proj_weight_to_fp16_quantized, x = input_47_cast_fp16)[name = string("linear_41_cast_fp16")]; tensor hidden_states_179_cast_fp16 = add(x = hidden_states_169_cast_fp16, y = linear_41_cast_fp16)[name = string("hidden_states_179_cast_fp16")]; fp16 var_70_promoted_12_to_fp16 = const()[name = string("op_70_promoted_12_to_fp16"), val = fp16(0x1p+1)]; tensor var_1275_cast_fp16 = pow(x = hidden_states_179_cast_fp16, y = var_70_promoted_12_to_fp16)[name = string("op_1275_cast_fp16")]; tensor variance_25_axes_0 = const()[name = string("variance_25_axes_0"), val = tensor([-1])]; bool variance_25_keep_dims_0 = const()[name = string("variance_25_keep_dims_0"), val = bool(true)]; tensor variance_25_cast_fp16 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = var_1275_cast_fp16)[name = string("variance_25_cast_fp16")]; fp16 var_1278_to_fp16 = const()[name = string("op_1278_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1279_cast_fp16 = add(x = variance_25_cast_fp16, y = var_1278_to_fp16)[name = string("op_1279_cast_fp16")]; fp32 var_1280_epsilon_0 = const()[name = string("op_1280_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1280_cast_fp16 = rsqrt(epsilon = var_1280_epsilon_0, x = var_1279_cast_fp16)[name = string("op_1280_cast_fp16")]; tensor hidden_states_183_cast_fp16 = mul(x = hidden_states_179_cast_fp16, y = var_1280_cast_fp16)[name = string("hidden_states_183_cast_fp16")]; tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126949696)))]; tensor hidden_states_187_cast_fp16 = mul(x = model_model_layers_6_input_layernorm_weight_to_fp16, y = hidden_states_183_cast_fp16)[name = string("hidden_states_187_cast_fp16")]; tensor model_model_layers_6_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(126951552))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127353024))))[name = string("model_model_layers_6_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_6_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_6_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127403264)))]; tensor linear_42_cast_fp16 = linear(bias = model_model_layers_6_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_6_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_187_cast_fp16)[name = string("linear_42_cast_fp16")]; tensor model_model_layers_6_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127405120))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127462528))))[name = string("model_model_layers_6_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_6_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_6_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127469760)))]; tensor linear_43_cast_fp16 = linear(bias = model_model_layers_6_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_6_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_187_cast_fp16)[name = string("linear_43_cast_fp16")]; tensor model_model_layers_6_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127470080))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127527488))))[name = string("model_model_layers_6_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_6_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_6_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127534720)))]; tensor linear_44_cast_fp16 = linear(bias = model_model_layers_6_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_6_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_187_cast_fp16)[name = string("linear_44_cast_fp16")]; tensor concat_114x = const()[name = string("concat_114x"), val = tensor([1, -1, 14, 64])]; tensor var_1303_cast_fp16 = reshape(shape = concat_114x, x = linear_42_cast_fp16)[name = string("op_1303_cast_fp16")]; tensor q_13_perm_0 = const()[name = string("q_13_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_115x = const()[name = string("concat_115x"), val = tensor([1, -1, 2, 64])]; tensor var_1306_cast_fp16 = reshape(shape = concat_115x, x = linear_43_cast_fp16)[name = string("op_1306_cast_fp16")]; tensor k_13_perm_0 = const()[name = string("k_13_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_116x = const()[name = string("concat_116x"), val = tensor([1, -1, 2, 64])]; tensor var_1309_cast_fp16 = reshape(shape = concat_116x, x = linear_44_cast_fp16)[name = string("op_1309_cast_fp16")]; tensor v_state_13_perm_0 = const()[name = string("v_state_13_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_13_cast_fp16 = transpose(perm = q_13_perm_0, x = var_1303_cast_fp16)[name = string("transpose_71")]; tensor var_1313_cast_fp16 = mul(x = q_13_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1313_cast_fp16")]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13_cast_fp16)[name = string("x1_25_cast_fp16")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13_cast_fp16)[name = string("x2_25_cast_fp16")]; fp16 const_15_promoted_to_fp16 = const()[name = string("const_15_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1324_cast_fp16 = mul(x = x2_25_cast_fp16, y = const_15_promoted_to_fp16)[name = string("op_1324_cast_fp16")]; bool var_1326_interleave_0 = const()[name = string("op_1326_interleave_0"), val = bool(false)]; tensor var_1326_cast_fp16 = concat(axis = var_64, interleave = var_1326_interleave_0, values = (var_1324_cast_fp16, x1_25_cast_fp16))[name = string("op_1326_cast_fp16")]; tensor var_1327_cast_fp16 = mul(x = var_1326_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1327_cast_fp16")]; tensor query_states_27_cast_fp16 = add(x = var_1313_cast_fp16, y = var_1327_cast_fp16)[name = string("query_states_27_cast_fp16")]; tensor k_13_cast_fp16 = transpose(perm = k_13_perm_0, x = var_1306_cast_fp16)[name = string("transpose_70")]; tensor var_1329_cast_fp16 = mul(x = k_13_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1329_cast_fp16")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13_cast_fp16)[name = string("x1_27_cast_fp16")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13_cast_fp16)[name = string("x2_27_cast_fp16")]; fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1340_cast_fp16 = mul(x = x2_27_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1340_cast_fp16")]; bool var_1342_interleave_0 = const()[name = string("op_1342_interleave_0"), val = bool(false)]; tensor var_1342_cast_fp16 = concat(axis = var_64, interleave = var_1342_interleave_0, values = (var_1340_cast_fp16, x1_27_cast_fp16))[name = string("op_1342_cast_fp16")]; tensor var_1343_cast_fp16 = mul(x = var_1342_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1343_cast_fp16")]; tensor k_state_13_cast_fp16 = add(x = var_1329_cast_fp16, y = var_1343_cast_fp16)[name = string("k_state_13_cast_fp16")]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([0])]; tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; tensor concat_119_values0_0 = const()[name = string("concat_119_values0_0"), val = tensor([6])]; int32 concat_119_axis_0 = const()[name = string("concat_119_axis_0"), val = int32(0)]; bool concat_119_interleave_0 = const()[name = string("concat_119_interleave_0"), val = bool(false)]; tensor concat_119 = concat(axis = concat_119_axis_0, interleave = concat_119_interleave_0, values = (concat_119_values0_0, expand_dims_72, expand_dims_73, expand_dims_2, expand_dims_75))[name = string("concat_119")]; tensor key_cache_internal_tensor_assign_7_stride_0 = const()[name = string("key_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_119, begin_mask = key_cache_internal_tensor_assign_7_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_7_squeeze_mask_0, stride = key_cache_internal_tensor_assign_7_stride_0, update = k_state_13_cast_fp16, x = coreml_update_state_58)[name = string("key_cache_internal_tensor_assign_7_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_7_cast_fp16, input = key_cache)[name = string("coreml_update_state_60_write_state")]; tensor coreml_update_state_60 = read_state(input = key_cache)[name = string("coreml_update_state_60")]; tensor value_cache_internal_tensor_assign_7_stride_0 = const()[name = string("value_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_13_cast_fp16 = transpose(perm = v_state_13_perm_0, x = var_1309_cast_fp16)[name = string("transpose_69")]; tensor value_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_119, begin_mask = value_cache_internal_tensor_assign_7_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_7_squeeze_mask_0, stride = value_cache_internal_tensor_assign_7_stride_0, update = v_state_13_cast_fp16, x = coreml_update_state_59)[name = string("value_cache_internal_tensor_assign_7_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_7_cast_fp16, input = value_cache)[name = string("coreml_update_state_61_write_state")]; tensor coreml_update_state_61 = read_state(input = value_cache)[name = string("coreml_update_state_61")]; tensor var_1366_begin_0 = const()[name = string("op_1366_begin_0"), val = tensor([6, 0, 0, 0, 0])]; tensor var_1366_end_0 = const()[name = string("op_1366_end_0"), val = tensor([7, 1, 2, 2048, 64])]; tensor var_1366_end_mask_0 = const()[name = string("op_1366_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1366_squeeze_mask_0 = const()[name = string("op_1366_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1366_cast_fp16 = slice_by_index(begin = var_1366_begin_0, end = var_1366_end_0, end_mask = var_1366_end_mask_0, squeeze_mask = var_1366_squeeze_mask_0, x = coreml_update_state_60)[name = string("op_1366_cast_fp16")]; tensor var_1369_begin_0 = const()[name = string("op_1369_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1369_end_mask_0 = const()[name = string("op_1369_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1369_cast_fp16 = slice_by_index(begin = var_1369_begin_0, end = concat_11, end_mask = var_1369_end_mask_0, x = var_1366_cast_fp16)[name = string("op_1369_cast_fp16")]; tensor var_1371_begin_0 = const()[name = string("op_1371_begin_0"), val = tensor([6, 0, 0, 0, 0])]; tensor var_1371_end_0 = const()[name = string("op_1371_end_0"), val = tensor([7, 1, 2, 2048, 64])]; tensor var_1371_end_mask_0 = const()[name = string("op_1371_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1371_squeeze_mask_0 = const()[name = string("op_1371_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1371_cast_fp16 = slice_by_index(begin = var_1371_begin_0, end = var_1371_end_0, end_mask = var_1371_end_mask_0, squeeze_mask = var_1371_squeeze_mask_0, x = coreml_update_state_61)[name = string("op_1371_cast_fp16")]; tensor var_1374_begin_0 = const()[name = string("op_1374_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1374_end_mask_0 = const()[name = string("op_1374_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1374_cast_fp16 = slice_by_index(begin = var_1374_begin_0, end = concat_11, end_mask = var_1374_end_mask_0, x = var_1371_cast_fp16)[name = string("op_1374_cast_fp16")]; tensor var_1376_shape_cast_fp16 = shape(x = var_1369_cast_fp16)[name = string("op_1376_shape_cast_fp16")]; int32 gather_121 = const()[name = string("gather_121"), val = int32(1)]; int32 gather_122 = const()[name = string("gather_122"), val = int32(2)]; int32 gather_123_axis_0 = const()[name = string("gather_123_axis_0"), val = int32(0)]; int32 gather_123_batch_dims_0 = const()[name = string("gather_123_batch_dims_0"), val = int32(0)]; bool gather_123_validate_indices_0 = const()[name = string("gather_123_validate_indices_0"), val = bool(false)]; string var_1376_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1376_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_123_to_uint16 = const()[name = string("select_123_to_uint16"), val = uint16(2)]; tensor var_1376_shape_cast_fp16_to_uint16 = cast(dtype = var_1376_shape_cast_fp16_to_uint16_dtype_0, x = var_1376_shape_cast_fp16)[name = string("cast_107")]; uint16 gather_123_cast_uint16 = gather(axis = gather_123_axis_0, batch_dims = gather_123_batch_dims_0, indices = select_123_to_uint16, validate_indices = gather_123_validate_indices_0, x = var_1376_shape_cast_fp16_to_uint16)[name = string("gather_123_cast_uint16")]; string gather_123_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_123_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_124 = const()[name = string("gather_124"), val = int32(64)]; tensor var_1383_axes_0 = const()[name = string("op_1383_axes_0"), val = tensor([2])]; tensor var_1383_cast_fp16 = expand_dims(axes = var_1383_axes_0, x = var_1369_cast_fp16)[name = string("op_1383_cast_fp16")]; tensor shape_137_cast_fp16 = shape(x = var_1383_cast_fp16)[name = string("shape_137_cast_fp16")]; int32 concat_127_axis_0 = const()[name = string("concat_127_axis_0"), val = int32(0)]; bool concat_127_interleave_0 = const()[name = string("concat_127_interleave_0"), val = bool(false)]; int32 gather_123_cast_uint16_to_int32 = cast(dtype = gather_123_cast_uint16_to_int32_dtype_0, x = gather_123_cast_uint16)[name = string("cast_106")]; tensor concat_127 = concat(axis = concat_127_axis_0, interleave = concat_127_interleave_0, values = (gather_121, gather_122, var_76, gather_123_cast_uint16_to_int32, gather_124))[name = string("concat_127")]; tensor real_div_12 = real_div(x = concat_127, y = shape_137_cast_fp16)[name = string("real_div_12")]; tensor hidden_states_191_cast_fp16 = tile(reps = real_div_12, x = var_1383_cast_fp16)[name = string("hidden_states_191_cast_fp16")]; tensor concat_128x = const()[name = string("concat_128x"), val = tensor([1, 14, -1, 64])]; tensor key_states_27_cast_fp16 = reshape(shape = concat_128x, x = hidden_states_191_cast_fp16)[name = string("key_states_27_cast_fp16")]; tensor var_1393_shape_cast_fp16 = shape(x = var_1374_cast_fp16)[name = string("op_1393_shape_cast_fp16")]; int32 gather_125 = const()[name = string("gather_125"), val = int32(1)]; int32 gather_126 = const()[name = string("gather_126"), val = int32(2)]; int32 gather_127_axis_0 = const()[name = string("gather_127_axis_0"), val = int32(0)]; int32 gather_127_batch_dims_0 = const()[name = string("gather_127_batch_dims_0"), val = int32(0)]; bool gather_127_validate_indices_0 = const()[name = string("gather_127_validate_indices_0"), val = bool(false)]; string var_1393_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1393_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_127_to_uint16 = const()[name = string("select_127_to_uint16"), val = uint16(2)]; tensor var_1393_shape_cast_fp16_to_uint16 = cast(dtype = var_1393_shape_cast_fp16_to_uint16_dtype_0, x = var_1393_shape_cast_fp16)[name = string("cast_105")]; uint16 gather_127_cast_uint16 = gather(axis = gather_127_axis_0, batch_dims = gather_127_batch_dims_0, indices = select_127_to_uint16, validate_indices = gather_127_validate_indices_0, x = var_1393_shape_cast_fp16_to_uint16)[name = string("gather_127_cast_uint16")]; string gather_127_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_127_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_128 = const()[name = string("gather_128"), val = int32(64)]; tensor var_1400_axes_0 = const()[name = string("op_1400_axes_0"), val = tensor([2])]; tensor var_1400_cast_fp16 = expand_dims(axes = var_1400_axes_0, x = var_1374_cast_fp16)[name = string("op_1400_cast_fp16")]; tensor shape_142_cast_fp16 = shape(x = var_1400_cast_fp16)[name = string("shape_142_cast_fp16")]; int32 concat_129_axis_0 = const()[name = string("concat_129_axis_0"), val = int32(0)]; bool concat_129_interleave_0 = const()[name = string("concat_129_interleave_0"), val = bool(false)]; int32 gather_127_cast_uint16_to_int32 = cast(dtype = gather_127_cast_uint16_to_int32_dtype_0, x = gather_127_cast_uint16)[name = string("cast_104")]; tensor concat_129 = concat(axis = concat_129_axis_0, interleave = concat_129_interleave_0, values = (gather_125, gather_126, var_76, gather_127_cast_uint16_to_int32, gather_128))[name = string("concat_129")]; tensor real_div_13 = real_div(x = concat_129, y = shape_142_cast_fp16)[name = string("real_div_13")]; tensor hidden_states_195_cast_fp16 = tile(reps = real_div_13, x = var_1400_cast_fp16)[name = string("hidden_states_195_cast_fp16")]; tensor concat_130x = const()[name = string("concat_130x"), val = tensor([1, 14, -1, 64])]; tensor value_states_27_cast_fp16 = reshape(shape = concat_130x, x = hidden_states_195_cast_fp16)[name = string("value_states_27_cast_fp16")]; tensor var_1410_shape_cast_fp16 = shape(x = key_states_27_cast_fp16)[name = string("op_1410_shape_cast_fp16")]; int32 gather_129_axis_0 = const()[name = string("gather_129_axis_0"), val = int32(0)]; int32 gather_129_batch_dims_0 = const()[name = string("gather_129_batch_dims_0"), val = int32(0)]; bool gather_129_validate_indices_0 = const()[name = string("gather_129_validate_indices_0"), val = bool(false)]; string var_1410_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1410_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_129_to_uint16 = const()[name = string("select_129_to_uint16"), val = uint16(2)]; tensor var_1410_shape_cast_fp16_to_uint16 = cast(dtype = var_1410_shape_cast_fp16_to_uint16_dtype_0, x = var_1410_shape_cast_fp16)[name = string("cast_103")]; uint16 gather_129_cast_uint16 = gather(axis = gather_129_axis_0, batch_dims = gather_129_batch_dims_0, indices = select_129_to_uint16, validate_indices = gather_129_validate_indices_0, x = var_1410_shape_cast_fp16_to_uint16)[name = string("gather_129_cast_uint16")]; string gather_129_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_129_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_131_values0_0 = const()[name = string("concat_131_values0_0"), val = int32(1)]; int32 concat_131_values1_0 = const()[name = string("concat_131_values1_0"), val = int32(1)]; int32 concat_131_values2_0 = const()[name = string("concat_131_values2_0"), val = int32(0)]; int32 concat_131_axis_0 = const()[name = string("concat_131_axis_0"), val = int32(0)]; bool concat_131_interleave_0 = const()[name = string("concat_131_interleave_0"), val = bool(false)]; int32 gather_129_cast_uint16_to_int32 = cast(dtype = gather_129_cast_uint16_to_int32_dtype_0, x = gather_129_cast_uint16)[name = string("cast_102")]; tensor concat_131 = concat(axis = concat_131_axis_0, interleave = concat_131_interleave_0, values = (concat_131_values0_0, concat_131_values1_0, concat_131_values2_0, gather_129_cast_uint16_to_int32))[name = string("concat_131")]; tensor causal_mask_15_begin_0 = const()[name = string("causal_mask_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_15_end_mask_0 = const()[name = string("causal_mask_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_15_cast_fp16 = slice_by_index(begin = causal_mask_15_begin_0, end = concat_131, end_mask = causal_mask_15_end_mask_0, x = causal_mask)[name = string("causal_mask_15_cast_fp16")]; tensor attn_output_25_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_15_cast_fp16, key = key_states_27_cast_fp16, query = query_states_27_cast_fp16, value = value_states_27_cast_fp16)[name = string("attn_output_25_cast_fp16")]; tensor var_1416_perm_0 = const()[name = string("op_1416_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_132x = const()[name = string("concat_132x"), val = tensor([1, -1, 896])]; tensor var_1416_cast_fp16 = transpose(perm = var_1416_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_68")]; tensor input_49_cast_fp16 = reshape(shape = concat_132x, x = var_1416_cast_fp16)[name = string("input_49_cast_fp16")]; tensor model_model_layers_6_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127535040))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127936512))))[name = string("model_model_layers_6_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_45_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_to_fp16_quantized, x = input_49_cast_fp16)[name = string("linear_45_cast_fp16")]; tensor hidden_states_199_cast_fp16 = add(x = hidden_states_179_cast_fp16, y = linear_45_cast_fp16)[name = string("hidden_states_199_cast_fp16")]; fp16 var_70_promoted_13_to_fp16 = const()[name = string("op_70_promoted_13_to_fp16"), val = fp16(0x1p+1)]; tensor var_1425_cast_fp16 = pow(x = hidden_states_199_cast_fp16, y = var_70_promoted_13_to_fp16)[name = string("op_1425_cast_fp16")]; tensor variance_27_axes_0 = const()[name = string("variance_27_axes_0"), val = tensor([-1])]; bool variance_27_keep_dims_0 = const()[name = string("variance_27_keep_dims_0"), val = bool(true)]; tensor variance_27_cast_fp16 = reduce_mean(axes = variance_27_axes_0, keep_dims = variance_27_keep_dims_0, x = var_1425_cast_fp16)[name = string("variance_27_cast_fp16")]; fp16 var_1428_to_fp16 = const()[name = string("op_1428_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1429_cast_fp16 = add(x = variance_27_cast_fp16, y = var_1428_to_fp16)[name = string("op_1429_cast_fp16")]; fp32 var_1430_epsilon_0 = const()[name = string("op_1430_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1430_cast_fp16 = rsqrt(epsilon = var_1430_epsilon_0, x = var_1429_cast_fp16)[name = string("op_1430_cast_fp16")]; tensor hidden_states_203_cast_fp16 = mul(x = hidden_states_199_cast_fp16, y = var_1430_cast_fp16)[name = string("hidden_states_203_cast_fp16")]; tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127986752)))]; tensor input_51_cast_fp16 = mul(x = model_model_layers_6_post_attention_layernorm_weight_to_fp16, y = hidden_states_203_cast_fp16)[name = string("input_51_cast_fp16")]; tensor model_model_layers_6_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(127988608))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130167744))))[name = string("model_model_layers_6_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_46_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_6_mlp_gate_proj_weight_to_fp16_quantized, x = input_51_cast_fp16)[name = string("linear_46_cast_fp16")]; tensor var_1442_cast_fp16 = silu(x = linear_46_cast_fp16)[name = string("op_1442_cast_fp16")]; tensor model_model_layers_6_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(130440192))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132619328))))[name = string("model_model_layers_6_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_47_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_6_mlp_up_proj_weight_to_fp16_quantized, x = input_51_cast_fp16)[name = string("linear_47_cast_fp16")]; tensor input_55_cast_fp16 = mul(x = var_1442_cast_fp16, y = linear_47_cast_fp16)[name = string("input_55_cast_fp16")]; tensor model_model_layers_6_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(132891776))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135070912))))[name = string("model_model_layers_6_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_48_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_6_mlp_down_proj_weight_to_fp16_quantized, x = input_55_cast_fp16)[name = string("linear_48_cast_fp16")]; tensor hidden_states_209_cast_fp16 = add(x = hidden_states_199_cast_fp16, y = linear_48_cast_fp16)[name = string("hidden_states_209_cast_fp16")]; fp16 var_70_promoted_14_to_fp16 = const()[name = string("op_70_promoted_14_to_fp16"), val = fp16(0x1p+1)]; tensor var_1455_cast_fp16 = pow(x = hidden_states_209_cast_fp16, y = var_70_promoted_14_to_fp16)[name = string("op_1455_cast_fp16")]; tensor variance_29_axes_0 = const()[name = string("variance_29_axes_0"), val = tensor([-1])]; bool variance_29_keep_dims_0 = const()[name = string("variance_29_keep_dims_0"), val = bool(true)]; tensor variance_29_cast_fp16 = reduce_mean(axes = variance_29_axes_0, keep_dims = variance_29_keep_dims_0, x = var_1455_cast_fp16)[name = string("variance_29_cast_fp16")]; fp16 var_1458_to_fp16 = const()[name = string("op_1458_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1459_cast_fp16 = add(x = variance_29_cast_fp16, y = var_1458_to_fp16)[name = string("op_1459_cast_fp16")]; fp32 var_1460_epsilon_0 = const()[name = string("op_1460_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1460_cast_fp16 = rsqrt(epsilon = var_1460_epsilon_0, x = var_1459_cast_fp16)[name = string("op_1460_cast_fp16")]; tensor hidden_states_213_cast_fp16 = mul(x = hidden_states_209_cast_fp16, y = var_1460_cast_fp16)[name = string("hidden_states_213_cast_fp16")]; tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135343360)))]; tensor hidden_states_217_cast_fp16 = mul(x = model_model_layers_7_input_layernorm_weight_to_fp16, y = hidden_states_213_cast_fp16)[name = string("hidden_states_217_cast_fp16")]; tensor model_model_layers_7_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135345216))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135746688))))[name = string("model_model_layers_7_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_7_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_7_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135796928)))]; tensor linear_49_cast_fp16 = linear(bias = model_model_layers_7_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_7_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_217_cast_fp16)[name = string("linear_49_cast_fp16")]; tensor model_model_layers_7_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135798784))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135856192))))[name = string("model_model_layers_7_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_7_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_7_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135863424)))]; tensor linear_50_cast_fp16 = linear(bias = model_model_layers_7_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_7_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_217_cast_fp16)[name = string("linear_50_cast_fp16")]; tensor model_model_layers_7_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135863744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135921152))))[name = string("model_model_layers_7_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_7_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_7_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135928384)))]; tensor linear_51_cast_fp16 = linear(bias = model_model_layers_7_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_7_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_217_cast_fp16)[name = string("linear_51_cast_fp16")]; tensor concat_133x = const()[name = string("concat_133x"), val = tensor([1, -1, 14, 64])]; tensor var_1483_cast_fp16 = reshape(shape = concat_133x, x = linear_49_cast_fp16)[name = string("op_1483_cast_fp16")]; tensor q_15_perm_0 = const()[name = string("q_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_134x = const()[name = string("concat_134x"), val = tensor([1, -1, 2, 64])]; tensor var_1486_cast_fp16 = reshape(shape = concat_134x, x = linear_50_cast_fp16)[name = string("op_1486_cast_fp16")]; tensor k_15_perm_0 = const()[name = string("k_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_135x = const()[name = string("concat_135x"), val = tensor([1, -1, 2, 64])]; tensor var_1489_cast_fp16 = reshape(shape = concat_135x, x = linear_51_cast_fp16)[name = string("op_1489_cast_fp16")]; tensor v_state_15_perm_0 = const()[name = string("v_state_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_15_cast_fp16 = transpose(perm = q_15_perm_0, x = var_1483_cast_fp16)[name = string("transpose_67")]; tensor var_1493_cast_fp16 = mul(x = q_15_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1493_cast_fp16")]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15_cast_fp16)[name = string("x1_29_cast_fp16")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15_cast_fp16)[name = string("x2_29_cast_fp16")]; fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1504_cast_fp16 = mul(x = x2_29_cast_fp16, y = const_17_promoted_to_fp16)[name = string("op_1504_cast_fp16")]; bool var_1506_interleave_0 = const()[name = string("op_1506_interleave_0"), val = bool(false)]; tensor var_1506_cast_fp16 = concat(axis = var_64, interleave = var_1506_interleave_0, values = (var_1504_cast_fp16, x1_29_cast_fp16))[name = string("op_1506_cast_fp16")]; tensor var_1507_cast_fp16 = mul(x = var_1506_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1507_cast_fp16")]; tensor query_states_31_cast_fp16 = add(x = var_1493_cast_fp16, y = var_1507_cast_fp16)[name = string("query_states_31_cast_fp16")]; tensor k_15_cast_fp16 = transpose(perm = k_15_perm_0, x = var_1486_cast_fp16)[name = string("transpose_66")]; tensor var_1509_cast_fp16 = mul(x = k_15_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1509_cast_fp16")]; tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15_cast_fp16)[name = string("x1_31_cast_fp16")]; tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15_cast_fp16)[name = string("x2_31_cast_fp16")]; fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1520_cast_fp16 = mul(x = x2_31_cast_fp16, y = const_18_promoted_to_fp16)[name = string("op_1520_cast_fp16")]; bool var_1522_interleave_0 = const()[name = string("op_1522_interleave_0"), val = bool(false)]; tensor var_1522_cast_fp16 = concat(axis = var_64, interleave = var_1522_interleave_0, values = (var_1520_cast_fp16, x1_31_cast_fp16))[name = string("op_1522_cast_fp16")]; tensor var_1523_cast_fp16 = mul(x = var_1522_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1523_cast_fp16")]; tensor k_state_15_cast_fp16 = add(x = var_1509_cast_fp16, y = var_1523_cast_fp16)[name = string("k_state_15_cast_fp16")]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([0])]; tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; tensor concat_138_values0_0 = const()[name = string("concat_138_values0_0"), val = tensor([7])]; int32 concat_138_axis_0 = const()[name = string("concat_138_axis_0"), val = int32(0)]; bool concat_138_interleave_0 = const()[name = string("concat_138_interleave_0"), val = bool(false)]; tensor concat_138 = concat(axis = concat_138_axis_0, interleave = concat_138_interleave_0, values = (concat_138_values0_0, expand_dims_84, expand_dims_85, expand_dims_2, expand_dims_87))[name = string("concat_138")]; tensor key_cache_internal_tensor_assign_8_stride_0 = const()[name = string("key_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_138, begin_mask = key_cache_internal_tensor_assign_8_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_8_squeeze_mask_0, stride = key_cache_internal_tensor_assign_8_stride_0, update = k_state_15_cast_fp16, x = coreml_update_state_60)[name = string("key_cache_internal_tensor_assign_8_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_8_cast_fp16, input = key_cache)[name = string("coreml_update_state_62_write_state")]; tensor coreml_update_state_62 = read_state(input = key_cache)[name = string("coreml_update_state_62")]; tensor value_cache_internal_tensor_assign_8_stride_0 = const()[name = string("value_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_15_cast_fp16 = transpose(perm = v_state_15_perm_0, x = var_1489_cast_fp16)[name = string("transpose_65")]; tensor value_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_138, begin_mask = value_cache_internal_tensor_assign_8_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_8_squeeze_mask_0, stride = value_cache_internal_tensor_assign_8_stride_0, update = v_state_15_cast_fp16, x = coreml_update_state_61)[name = string("value_cache_internal_tensor_assign_8_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_8_cast_fp16, input = value_cache)[name = string("coreml_update_state_63_write_state")]; tensor coreml_update_state_63 = read_state(input = value_cache)[name = string("coreml_update_state_63")]; tensor var_1546_begin_0 = const()[name = string("op_1546_begin_0"), val = tensor([7, 0, 0, 0, 0])]; tensor var_1546_end_0 = const()[name = string("op_1546_end_0"), val = tensor([8, 1, 2, 2048, 64])]; tensor var_1546_end_mask_0 = const()[name = string("op_1546_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1546_squeeze_mask_0 = const()[name = string("op_1546_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1546_cast_fp16 = slice_by_index(begin = var_1546_begin_0, end = var_1546_end_0, end_mask = var_1546_end_mask_0, squeeze_mask = var_1546_squeeze_mask_0, x = coreml_update_state_62)[name = string("op_1546_cast_fp16")]; tensor var_1549_begin_0 = const()[name = string("op_1549_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1549_end_mask_0 = const()[name = string("op_1549_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1549_cast_fp16 = slice_by_index(begin = var_1549_begin_0, end = concat_11, end_mask = var_1549_end_mask_0, x = var_1546_cast_fp16)[name = string("op_1549_cast_fp16")]; tensor var_1551_begin_0 = const()[name = string("op_1551_begin_0"), val = tensor([7, 0, 0, 0, 0])]; tensor var_1551_end_0 = const()[name = string("op_1551_end_0"), val = tensor([8, 1, 2, 2048, 64])]; tensor var_1551_end_mask_0 = const()[name = string("op_1551_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1551_squeeze_mask_0 = const()[name = string("op_1551_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1551_cast_fp16 = slice_by_index(begin = var_1551_begin_0, end = var_1551_end_0, end_mask = var_1551_end_mask_0, squeeze_mask = var_1551_squeeze_mask_0, x = coreml_update_state_63)[name = string("op_1551_cast_fp16")]; tensor var_1554_begin_0 = const()[name = string("op_1554_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1554_end_mask_0 = const()[name = string("op_1554_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1554_cast_fp16 = slice_by_index(begin = var_1554_begin_0, end = concat_11, end_mask = var_1554_end_mask_0, x = var_1551_cast_fp16)[name = string("op_1554_cast_fp16")]; tensor var_1556_shape_cast_fp16 = shape(x = var_1549_cast_fp16)[name = string("op_1556_shape_cast_fp16")]; int32 gather_139 = const()[name = string("gather_139"), val = int32(1)]; int32 gather_140 = const()[name = string("gather_140"), val = int32(2)]; int32 gather_141_axis_0 = const()[name = string("gather_141_axis_0"), val = int32(0)]; int32 gather_141_batch_dims_0 = const()[name = string("gather_141_batch_dims_0"), val = int32(0)]; bool gather_141_validate_indices_0 = const()[name = string("gather_141_validate_indices_0"), val = bool(false)]; string var_1556_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1556_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_141_to_uint16 = const()[name = string("select_141_to_uint16"), val = uint16(2)]; tensor var_1556_shape_cast_fp16_to_uint16 = cast(dtype = var_1556_shape_cast_fp16_to_uint16_dtype_0, x = var_1556_shape_cast_fp16)[name = string("cast_101")]; uint16 gather_141_cast_uint16 = gather(axis = gather_141_axis_0, batch_dims = gather_141_batch_dims_0, indices = select_141_to_uint16, validate_indices = gather_141_validate_indices_0, x = var_1556_shape_cast_fp16_to_uint16)[name = string("gather_141_cast_uint16")]; string gather_141_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_141_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_142 = const()[name = string("gather_142"), val = int32(64)]; tensor var_1563_axes_0 = const()[name = string("op_1563_axes_0"), val = tensor([2])]; tensor var_1563_cast_fp16 = expand_dims(axes = var_1563_axes_0, x = var_1549_cast_fp16)[name = string("op_1563_cast_fp16")]; tensor shape_157_cast_fp16 = shape(x = var_1563_cast_fp16)[name = string("shape_157_cast_fp16")]; int32 concat_146_axis_0 = const()[name = string("concat_146_axis_0"), val = int32(0)]; bool concat_146_interleave_0 = const()[name = string("concat_146_interleave_0"), val = bool(false)]; int32 gather_141_cast_uint16_to_int32 = cast(dtype = gather_141_cast_uint16_to_int32_dtype_0, x = gather_141_cast_uint16)[name = string("cast_100")]; tensor concat_146 = concat(axis = concat_146_axis_0, interleave = concat_146_interleave_0, values = (gather_139, gather_140, var_76, gather_141_cast_uint16_to_int32, gather_142))[name = string("concat_146")]; tensor real_div_14 = real_div(x = concat_146, y = shape_157_cast_fp16)[name = string("real_div_14")]; tensor hidden_states_221_cast_fp16 = tile(reps = real_div_14, x = var_1563_cast_fp16)[name = string("hidden_states_221_cast_fp16")]; tensor concat_147x = const()[name = string("concat_147x"), val = tensor([1, 14, -1, 64])]; tensor key_states_31_cast_fp16 = reshape(shape = concat_147x, x = hidden_states_221_cast_fp16)[name = string("key_states_31_cast_fp16")]; tensor var_1573_shape_cast_fp16 = shape(x = var_1554_cast_fp16)[name = string("op_1573_shape_cast_fp16")]; int32 gather_143 = const()[name = string("gather_143"), val = int32(1)]; int32 gather_144 = const()[name = string("gather_144"), val = int32(2)]; int32 gather_145_axis_0 = const()[name = string("gather_145_axis_0"), val = int32(0)]; int32 gather_145_batch_dims_0 = const()[name = string("gather_145_batch_dims_0"), val = int32(0)]; bool gather_145_validate_indices_0 = const()[name = string("gather_145_validate_indices_0"), val = bool(false)]; string var_1573_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1573_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_145_to_uint16 = const()[name = string("select_145_to_uint16"), val = uint16(2)]; tensor var_1573_shape_cast_fp16_to_uint16 = cast(dtype = var_1573_shape_cast_fp16_to_uint16_dtype_0, x = var_1573_shape_cast_fp16)[name = string("cast_99")]; uint16 gather_145_cast_uint16 = gather(axis = gather_145_axis_0, batch_dims = gather_145_batch_dims_0, indices = select_145_to_uint16, validate_indices = gather_145_validate_indices_0, x = var_1573_shape_cast_fp16_to_uint16)[name = string("gather_145_cast_uint16")]; string gather_145_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_145_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_146 = const()[name = string("gather_146"), val = int32(64)]; tensor var_1580_axes_0 = const()[name = string("op_1580_axes_0"), val = tensor([2])]; tensor var_1580_cast_fp16 = expand_dims(axes = var_1580_axes_0, x = var_1554_cast_fp16)[name = string("op_1580_cast_fp16")]; tensor shape_162_cast_fp16 = shape(x = var_1580_cast_fp16)[name = string("shape_162_cast_fp16")]; int32 concat_148_axis_0 = const()[name = string("concat_148_axis_0"), val = int32(0)]; bool concat_148_interleave_0 = const()[name = string("concat_148_interleave_0"), val = bool(false)]; int32 gather_145_cast_uint16_to_int32 = cast(dtype = gather_145_cast_uint16_to_int32_dtype_0, x = gather_145_cast_uint16)[name = string("cast_98")]; tensor concat_148 = concat(axis = concat_148_axis_0, interleave = concat_148_interleave_0, values = (gather_143, gather_144, var_76, gather_145_cast_uint16_to_int32, gather_146))[name = string("concat_148")]; tensor real_div_15 = real_div(x = concat_148, y = shape_162_cast_fp16)[name = string("real_div_15")]; tensor hidden_states_225_cast_fp16 = tile(reps = real_div_15, x = var_1580_cast_fp16)[name = string("hidden_states_225_cast_fp16")]; tensor concat_149x = const()[name = string("concat_149x"), val = tensor([1, 14, -1, 64])]; tensor value_states_31_cast_fp16 = reshape(shape = concat_149x, x = hidden_states_225_cast_fp16)[name = string("value_states_31_cast_fp16")]; tensor var_1590_shape_cast_fp16 = shape(x = key_states_31_cast_fp16)[name = string("op_1590_shape_cast_fp16")]; int32 gather_147_axis_0 = const()[name = string("gather_147_axis_0"), val = int32(0)]; int32 gather_147_batch_dims_0 = const()[name = string("gather_147_batch_dims_0"), val = int32(0)]; bool gather_147_validate_indices_0 = const()[name = string("gather_147_validate_indices_0"), val = bool(false)]; string var_1590_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1590_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_147_to_uint16 = const()[name = string("select_147_to_uint16"), val = uint16(2)]; tensor var_1590_shape_cast_fp16_to_uint16 = cast(dtype = var_1590_shape_cast_fp16_to_uint16_dtype_0, x = var_1590_shape_cast_fp16)[name = string("cast_97")]; uint16 gather_147_cast_uint16 = gather(axis = gather_147_axis_0, batch_dims = gather_147_batch_dims_0, indices = select_147_to_uint16, validate_indices = gather_147_validate_indices_0, x = var_1590_shape_cast_fp16_to_uint16)[name = string("gather_147_cast_uint16")]; string gather_147_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_147_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_150_values0_0 = const()[name = string("concat_150_values0_0"), val = int32(1)]; int32 concat_150_values1_0 = const()[name = string("concat_150_values1_0"), val = int32(1)]; int32 concat_150_values2_0 = const()[name = string("concat_150_values2_0"), val = int32(0)]; int32 concat_150_axis_0 = const()[name = string("concat_150_axis_0"), val = int32(0)]; bool concat_150_interleave_0 = const()[name = string("concat_150_interleave_0"), val = bool(false)]; int32 gather_147_cast_uint16_to_int32 = cast(dtype = gather_147_cast_uint16_to_int32_dtype_0, x = gather_147_cast_uint16)[name = string("cast_96")]; tensor concat_150 = concat(axis = concat_150_axis_0, interleave = concat_150_interleave_0, values = (concat_150_values0_0, concat_150_values1_0, concat_150_values2_0, gather_147_cast_uint16_to_int32))[name = string("concat_150")]; tensor causal_mask_17_begin_0 = const()[name = string("causal_mask_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_17_end_mask_0 = const()[name = string("causal_mask_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_17_cast_fp16 = slice_by_index(begin = causal_mask_17_begin_0, end = concat_150, end_mask = causal_mask_17_end_mask_0, x = causal_mask)[name = string("causal_mask_17_cast_fp16")]; tensor attn_output_29_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_17_cast_fp16, key = key_states_31_cast_fp16, query = query_states_31_cast_fp16, value = value_states_31_cast_fp16)[name = string("attn_output_29_cast_fp16")]; tensor var_1596_perm_0 = const()[name = string("op_1596_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_151x = const()[name = string("concat_151x"), val = tensor([1, -1, 896])]; tensor var_1596_cast_fp16 = transpose(perm = var_1596_perm_0, x = attn_output_29_cast_fp16)[name = string("transpose_64")]; tensor input_57_cast_fp16 = reshape(shape = concat_151x, x = var_1596_cast_fp16)[name = string("input_57_cast_fp16")]; tensor model_model_layers_7_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(135928704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136330176))))[name = string("model_model_layers_7_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_52_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_to_fp16_quantized, x = input_57_cast_fp16)[name = string("linear_52_cast_fp16")]; tensor hidden_states_229_cast_fp16 = add(x = hidden_states_209_cast_fp16, y = linear_52_cast_fp16)[name = string("hidden_states_229_cast_fp16")]; fp16 var_70_promoted_15_to_fp16 = const()[name = string("op_70_promoted_15_to_fp16"), val = fp16(0x1p+1)]; tensor var_1605_cast_fp16 = pow(x = hidden_states_229_cast_fp16, y = var_70_promoted_15_to_fp16)[name = string("op_1605_cast_fp16")]; tensor variance_31_axes_0 = const()[name = string("variance_31_axes_0"), val = tensor([-1])]; bool variance_31_keep_dims_0 = const()[name = string("variance_31_keep_dims_0"), val = bool(true)]; tensor variance_31_cast_fp16 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = var_1605_cast_fp16)[name = string("variance_31_cast_fp16")]; fp16 var_1608_to_fp16 = const()[name = string("op_1608_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1609_cast_fp16 = add(x = variance_31_cast_fp16, y = var_1608_to_fp16)[name = string("op_1609_cast_fp16")]; fp32 var_1610_epsilon_0 = const()[name = string("op_1610_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1610_cast_fp16 = rsqrt(epsilon = var_1610_epsilon_0, x = var_1609_cast_fp16)[name = string("op_1610_cast_fp16")]; tensor hidden_states_233_cast_fp16 = mul(x = hidden_states_229_cast_fp16, y = var_1610_cast_fp16)[name = string("hidden_states_233_cast_fp16")]; tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136380416)))]; tensor input_59_cast_fp16 = mul(x = model_model_layers_7_post_attention_layernorm_weight_to_fp16, y = hidden_states_233_cast_fp16)[name = string("input_59_cast_fp16")]; tensor model_model_layers_7_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(136382272))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138561408))))[name = string("model_model_layers_7_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_53_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_7_mlp_gate_proj_weight_to_fp16_quantized, x = input_59_cast_fp16)[name = string("linear_53_cast_fp16")]; tensor var_1622_cast_fp16 = silu(x = linear_53_cast_fp16)[name = string("op_1622_cast_fp16")]; tensor model_model_layers_7_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(138833856))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141012992))))[name = string("model_model_layers_7_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_54_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_7_mlp_up_proj_weight_to_fp16_quantized, x = input_59_cast_fp16)[name = string("linear_54_cast_fp16")]; tensor input_63_cast_fp16 = mul(x = var_1622_cast_fp16, y = linear_54_cast_fp16)[name = string("input_63_cast_fp16")]; tensor model_model_layers_7_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(141285440))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143464576))))[name = string("model_model_layers_7_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_55_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_7_mlp_down_proj_weight_to_fp16_quantized, x = input_63_cast_fp16)[name = string("linear_55_cast_fp16")]; tensor hidden_states_239_cast_fp16 = add(x = hidden_states_229_cast_fp16, y = linear_55_cast_fp16)[name = string("hidden_states_239_cast_fp16")]; fp16 var_70_promoted_16_to_fp16 = const()[name = string("op_70_promoted_16_to_fp16"), val = fp16(0x1p+1)]; tensor var_1635_cast_fp16 = pow(x = hidden_states_239_cast_fp16, y = var_70_promoted_16_to_fp16)[name = string("op_1635_cast_fp16")]; tensor variance_33_axes_0 = const()[name = string("variance_33_axes_0"), val = tensor([-1])]; bool variance_33_keep_dims_0 = const()[name = string("variance_33_keep_dims_0"), val = bool(true)]; tensor variance_33_cast_fp16 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = var_1635_cast_fp16)[name = string("variance_33_cast_fp16")]; fp16 var_1638_to_fp16 = const()[name = string("op_1638_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1639_cast_fp16 = add(x = variance_33_cast_fp16, y = var_1638_to_fp16)[name = string("op_1639_cast_fp16")]; fp32 var_1640_epsilon_0 = const()[name = string("op_1640_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1640_cast_fp16 = rsqrt(epsilon = var_1640_epsilon_0, x = var_1639_cast_fp16)[name = string("op_1640_cast_fp16")]; tensor hidden_states_243_cast_fp16 = mul(x = hidden_states_239_cast_fp16, y = var_1640_cast_fp16)[name = string("hidden_states_243_cast_fp16")]; tensor model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143737024)))]; tensor hidden_states_247_cast_fp16 = mul(x = model_model_layers_8_input_layernorm_weight_to_fp16, y = hidden_states_243_cast_fp16)[name = string("hidden_states_247_cast_fp16")]; tensor model_model_layers_8_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(143738880))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144140352))))[name = string("model_model_layers_8_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_8_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_8_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144190592)))]; tensor linear_56_cast_fp16 = linear(bias = model_model_layers_8_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_8_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_247_cast_fp16)[name = string("linear_56_cast_fp16")]; tensor model_model_layers_8_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144192448))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144249856))))[name = string("model_model_layers_8_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_8_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_8_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144257088)))]; tensor linear_57_cast_fp16 = linear(bias = model_model_layers_8_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_8_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_247_cast_fp16)[name = string("linear_57_cast_fp16")]; tensor model_model_layers_8_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144257408))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144314816))))[name = string("model_model_layers_8_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_8_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_8_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144322048)))]; tensor linear_58_cast_fp16 = linear(bias = model_model_layers_8_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_8_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_247_cast_fp16)[name = string("linear_58_cast_fp16")]; tensor concat_152x = const()[name = string("concat_152x"), val = tensor([1, -1, 14, 64])]; tensor var_1663_cast_fp16 = reshape(shape = concat_152x, x = linear_56_cast_fp16)[name = string("op_1663_cast_fp16")]; tensor q_17_perm_0 = const()[name = string("q_17_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_153x = const()[name = string("concat_153x"), val = tensor([1, -1, 2, 64])]; tensor var_1666_cast_fp16 = reshape(shape = concat_153x, x = linear_57_cast_fp16)[name = string("op_1666_cast_fp16")]; tensor k_17_perm_0 = const()[name = string("k_17_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_154x = const()[name = string("concat_154x"), val = tensor([1, -1, 2, 64])]; tensor var_1669_cast_fp16 = reshape(shape = concat_154x, x = linear_58_cast_fp16)[name = string("op_1669_cast_fp16")]; tensor v_state_17_perm_0 = const()[name = string("v_state_17_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_17_cast_fp16 = transpose(perm = q_17_perm_0, x = var_1663_cast_fp16)[name = string("transpose_63")]; tensor var_1673_cast_fp16 = mul(x = q_17_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1673_cast_fp16")]; tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17_cast_fp16)[name = string("x1_33_cast_fp16")]; tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17_cast_fp16)[name = string("x2_33_cast_fp16")]; fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1684_cast_fp16 = mul(x = x2_33_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1684_cast_fp16")]; bool var_1686_interleave_0 = const()[name = string("op_1686_interleave_0"), val = bool(false)]; tensor var_1686_cast_fp16 = concat(axis = var_64, interleave = var_1686_interleave_0, values = (var_1684_cast_fp16, x1_33_cast_fp16))[name = string("op_1686_cast_fp16")]; tensor var_1687_cast_fp16 = mul(x = var_1686_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1687_cast_fp16")]; tensor query_states_35_cast_fp16 = add(x = var_1673_cast_fp16, y = var_1687_cast_fp16)[name = string("query_states_35_cast_fp16")]; tensor k_17_cast_fp16 = transpose(perm = k_17_perm_0, x = var_1666_cast_fp16)[name = string("transpose_62")]; tensor var_1689_cast_fp16 = mul(x = k_17_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1689_cast_fp16")]; tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17_cast_fp16)[name = string("x1_35_cast_fp16")]; tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17_cast_fp16)[name = string("x2_35_cast_fp16")]; fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1700_cast_fp16 = mul(x = x2_35_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1700_cast_fp16")]; bool var_1702_interleave_0 = const()[name = string("op_1702_interleave_0"), val = bool(false)]; tensor var_1702_cast_fp16 = concat(axis = var_64, interleave = var_1702_interleave_0, values = (var_1700_cast_fp16, x1_35_cast_fp16))[name = string("op_1702_cast_fp16")]; tensor var_1703_cast_fp16 = mul(x = var_1702_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1703_cast_fp16")]; tensor k_state_17_cast_fp16 = add(x = var_1689_cast_fp16, y = var_1703_cast_fp16)[name = string("k_state_17_cast_fp16")]; tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([0])]; tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; tensor concat_157_values0_0 = const()[name = string("concat_157_values0_0"), val = tensor([8])]; int32 concat_157_axis_0 = const()[name = string("concat_157_axis_0"), val = int32(0)]; bool concat_157_interleave_0 = const()[name = string("concat_157_interleave_0"), val = bool(false)]; tensor concat_157 = concat(axis = concat_157_axis_0, interleave = concat_157_interleave_0, values = (concat_157_values0_0, expand_dims_96, expand_dims_97, expand_dims_2, expand_dims_99))[name = string("concat_157")]; tensor key_cache_internal_tensor_assign_9_stride_0 = const()[name = string("key_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_157, begin_mask = key_cache_internal_tensor_assign_9_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_9_squeeze_mask_0, stride = key_cache_internal_tensor_assign_9_stride_0, update = k_state_17_cast_fp16, x = coreml_update_state_62)[name = string("key_cache_internal_tensor_assign_9_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_9_cast_fp16, input = key_cache)[name = string("coreml_update_state_64_write_state")]; tensor coreml_update_state_64 = read_state(input = key_cache)[name = string("coreml_update_state_64")]; tensor value_cache_internal_tensor_assign_9_stride_0 = const()[name = string("value_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_17_cast_fp16 = transpose(perm = v_state_17_perm_0, x = var_1669_cast_fp16)[name = string("transpose_61")]; tensor value_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_157, begin_mask = value_cache_internal_tensor_assign_9_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_9_squeeze_mask_0, stride = value_cache_internal_tensor_assign_9_stride_0, update = v_state_17_cast_fp16, x = coreml_update_state_63)[name = string("value_cache_internal_tensor_assign_9_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_9_cast_fp16, input = value_cache)[name = string("coreml_update_state_65_write_state")]; tensor coreml_update_state_65 = read_state(input = value_cache)[name = string("coreml_update_state_65")]; tensor var_1726_begin_0 = const()[name = string("op_1726_begin_0"), val = tensor([8, 0, 0, 0, 0])]; tensor var_1726_end_0 = const()[name = string("op_1726_end_0"), val = tensor([9, 1, 2, 2048, 64])]; tensor var_1726_end_mask_0 = const()[name = string("op_1726_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1726_squeeze_mask_0 = const()[name = string("op_1726_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1726_cast_fp16 = slice_by_index(begin = var_1726_begin_0, end = var_1726_end_0, end_mask = var_1726_end_mask_0, squeeze_mask = var_1726_squeeze_mask_0, x = coreml_update_state_64)[name = string("op_1726_cast_fp16")]; tensor var_1729_begin_0 = const()[name = string("op_1729_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1729_end_mask_0 = const()[name = string("op_1729_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1729_cast_fp16 = slice_by_index(begin = var_1729_begin_0, end = concat_11, end_mask = var_1729_end_mask_0, x = var_1726_cast_fp16)[name = string("op_1729_cast_fp16")]; tensor var_1731_begin_0 = const()[name = string("op_1731_begin_0"), val = tensor([8, 0, 0, 0, 0])]; tensor var_1731_end_0 = const()[name = string("op_1731_end_0"), val = tensor([9, 1, 2, 2048, 64])]; tensor var_1731_end_mask_0 = const()[name = string("op_1731_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1731_squeeze_mask_0 = const()[name = string("op_1731_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1731_cast_fp16 = slice_by_index(begin = var_1731_begin_0, end = var_1731_end_0, end_mask = var_1731_end_mask_0, squeeze_mask = var_1731_squeeze_mask_0, x = coreml_update_state_65)[name = string("op_1731_cast_fp16")]; tensor var_1734_begin_0 = const()[name = string("op_1734_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1734_end_mask_0 = const()[name = string("op_1734_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1734_cast_fp16 = slice_by_index(begin = var_1734_begin_0, end = concat_11, end_mask = var_1734_end_mask_0, x = var_1731_cast_fp16)[name = string("op_1734_cast_fp16")]; tensor var_1736_shape_cast_fp16 = shape(x = var_1729_cast_fp16)[name = string("op_1736_shape_cast_fp16")]; int32 gather_157 = const()[name = string("gather_157"), val = int32(1)]; int32 gather_158 = const()[name = string("gather_158"), val = int32(2)]; int32 gather_159_axis_0 = const()[name = string("gather_159_axis_0"), val = int32(0)]; int32 gather_159_batch_dims_0 = const()[name = string("gather_159_batch_dims_0"), val = int32(0)]; bool gather_159_validate_indices_0 = const()[name = string("gather_159_validate_indices_0"), val = bool(false)]; string var_1736_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1736_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_159_to_uint16 = const()[name = string("select_159_to_uint16"), val = uint16(2)]; tensor var_1736_shape_cast_fp16_to_uint16 = cast(dtype = var_1736_shape_cast_fp16_to_uint16_dtype_0, x = var_1736_shape_cast_fp16)[name = string("cast_95")]; uint16 gather_159_cast_uint16 = gather(axis = gather_159_axis_0, batch_dims = gather_159_batch_dims_0, indices = select_159_to_uint16, validate_indices = gather_159_validate_indices_0, x = var_1736_shape_cast_fp16_to_uint16)[name = string("gather_159_cast_uint16")]; string gather_159_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_159_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_160 = const()[name = string("gather_160"), val = int32(64)]; tensor var_1743_axes_0 = const()[name = string("op_1743_axes_0"), val = tensor([2])]; tensor var_1743_cast_fp16 = expand_dims(axes = var_1743_axes_0, x = var_1729_cast_fp16)[name = string("op_1743_cast_fp16")]; tensor shape_177_cast_fp16 = shape(x = var_1743_cast_fp16)[name = string("shape_177_cast_fp16")]; int32 concat_165_axis_0 = const()[name = string("concat_165_axis_0"), val = int32(0)]; bool concat_165_interleave_0 = const()[name = string("concat_165_interleave_0"), val = bool(false)]; int32 gather_159_cast_uint16_to_int32 = cast(dtype = gather_159_cast_uint16_to_int32_dtype_0, x = gather_159_cast_uint16)[name = string("cast_94")]; tensor concat_165 = concat(axis = concat_165_axis_0, interleave = concat_165_interleave_0, values = (gather_157, gather_158, var_76, gather_159_cast_uint16_to_int32, gather_160))[name = string("concat_165")]; tensor real_div_16 = real_div(x = concat_165, y = shape_177_cast_fp16)[name = string("real_div_16")]; tensor hidden_states_251_cast_fp16 = tile(reps = real_div_16, x = var_1743_cast_fp16)[name = string("hidden_states_251_cast_fp16")]; tensor concat_166x = const()[name = string("concat_166x"), val = tensor([1, 14, -1, 64])]; tensor key_states_35_cast_fp16 = reshape(shape = concat_166x, x = hidden_states_251_cast_fp16)[name = string("key_states_35_cast_fp16")]; tensor var_1753_shape_cast_fp16 = shape(x = var_1734_cast_fp16)[name = string("op_1753_shape_cast_fp16")]; int32 gather_161 = const()[name = string("gather_161"), val = int32(1)]; int32 gather_162 = const()[name = string("gather_162"), val = int32(2)]; int32 gather_163_axis_0 = const()[name = string("gather_163_axis_0"), val = int32(0)]; int32 gather_163_batch_dims_0 = const()[name = string("gather_163_batch_dims_0"), val = int32(0)]; bool gather_163_validate_indices_0 = const()[name = string("gather_163_validate_indices_0"), val = bool(false)]; string var_1753_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1753_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_163_to_uint16 = const()[name = string("select_163_to_uint16"), val = uint16(2)]; tensor var_1753_shape_cast_fp16_to_uint16 = cast(dtype = var_1753_shape_cast_fp16_to_uint16_dtype_0, x = var_1753_shape_cast_fp16)[name = string("cast_93")]; uint16 gather_163_cast_uint16 = gather(axis = gather_163_axis_0, batch_dims = gather_163_batch_dims_0, indices = select_163_to_uint16, validate_indices = gather_163_validate_indices_0, x = var_1753_shape_cast_fp16_to_uint16)[name = string("gather_163_cast_uint16")]; string gather_163_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_163_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_164 = const()[name = string("gather_164"), val = int32(64)]; tensor var_1760_axes_0 = const()[name = string("op_1760_axes_0"), val = tensor([2])]; tensor var_1760_cast_fp16 = expand_dims(axes = var_1760_axes_0, x = var_1734_cast_fp16)[name = string("op_1760_cast_fp16")]; tensor shape_182_cast_fp16 = shape(x = var_1760_cast_fp16)[name = string("shape_182_cast_fp16")]; int32 concat_167_axis_0 = const()[name = string("concat_167_axis_0"), val = int32(0)]; bool concat_167_interleave_0 = const()[name = string("concat_167_interleave_0"), val = bool(false)]; int32 gather_163_cast_uint16_to_int32 = cast(dtype = gather_163_cast_uint16_to_int32_dtype_0, x = gather_163_cast_uint16)[name = string("cast_92")]; tensor concat_167 = concat(axis = concat_167_axis_0, interleave = concat_167_interleave_0, values = (gather_161, gather_162, var_76, gather_163_cast_uint16_to_int32, gather_164))[name = string("concat_167")]; tensor real_div_17 = real_div(x = concat_167, y = shape_182_cast_fp16)[name = string("real_div_17")]; tensor hidden_states_255_cast_fp16 = tile(reps = real_div_17, x = var_1760_cast_fp16)[name = string("hidden_states_255_cast_fp16")]; tensor concat_168x = const()[name = string("concat_168x"), val = tensor([1, 14, -1, 64])]; tensor value_states_35_cast_fp16 = reshape(shape = concat_168x, x = hidden_states_255_cast_fp16)[name = string("value_states_35_cast_fp16")]; tensor var_1770_shape_cast_fp16 = shape(x = key_states_35_cast_fp16)[name = string("op_1770_shape_cast_fp16")]; int32 gather_165_axis_0 = const()[name = string("gather_165_axis_0"), val = int32(0)]; int32 gather_165_batch_dims_0 = const()[name = string("gather_165_batch_dims_0"), val = int32(0)]; bool gather_165_validate_indices_0 = const()[name = string("gather_165_validate_indices_0"), val = bool(false)]; string var_1770_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1770_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_165_to_uint16 = const()[name = string("select_165_to_uint16"), val = uint16(2)]; tensor var_1770_shape_cast_fp16_to_uint16 = cast(dtype = var_1770_shape_cast_fp16_to_uint16_dtype_0, x = var_1770_shape_cast_fp16)[name = string("cast_91")]; uint16 gather_165_cast_uint16 = gather(axis = gather_165_axis_0, batch_dims = gather_165_batch_dims_0, indices = select_165_to_uint16, validate_indices = gather_165_validate_indices_0, x = var_1770_shape_cast_fp16_to_uint16)[name = string("gather_165_cast_uint16")]; string gather_165_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_165_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_169_values0_0 = const()[name = string("concat_169_values0_0"), val = int32(1)]; int32 concat_169_values1_0 = const()[name = string("concat_169_values1_0"), val = int32(1)]; int32 concat_169_values2_0 = const()[name = string("concat_169_values2_0"), val = int32(0)]; int32 concat_169_axis_0 = const()[name = string("concat_169_axis_0"), val = int32(0)]; bool concat_169_interleave_0 = const()[name = string("concat_169_interleave_0"), val = bool(false)]; int32 gather_165_cast_uint16_to_int32 = cast(dtype = gather_165_cast_uint16_to_int32_dtype_0, x = gather_165_cast_uint16)[name = string("cast_90")]; tensor concat_169 = concat(axis = concat_169_axis_0, interleave = concat_169_interleave_0, values = (concat_169_values0_0, concat_169_values1_0, concat_169_values2_0, gather_165_cast_uint16_to_int32))[name = string("concat_169")]; tensor causal_mask_19_begin_0 = const()[name = string("causal_mask_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_19_end_mask_0 = const()[name = string("causal_mask_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_19_cast_fp16 = slice_by_index(begin = causal_mask_19_begin_0, end = concat_169, end_mask = causal_mask_19_end_mask_0, x = causal_mask)[name = string("causal_mask_19_cast_fp16")]; tensor attn_output_33_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_19_cast_fp16, key = key_states_35_cast_fp16, query = query_states_35_cast_fp16, value = value_states_35_cast_fp16)[name = string("attn_output_33_cast_fp16")]; tensor var_1776_perm_0 = const()[name = string("op_1776_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_170x = const()[name = string("concat_170x"), val = tensor([1, -1, 896])]; tensor var_1776_cast_fp16 = transpose(perm = var_1776_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_60")]; tensor input_65_cast_fp16 = reshape(shape = concat_170x, x = var_1776_cast_fp16)[name = string("input_65_cast_fp16")]; tensor model_model_layers_8_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144322368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144723840))))[name = string("model_model_layers_8_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_59_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_8_self_attn_o_proj_weight_to_fp16_quantized, x = input_65_cast_fp16)[name = string("linear_59_cast_fp16")]; tensor hidden_states_259_cast_fp16 = add(x = hidden_states_239_cast_fp16, y = linear_59_cast_fp16)[name = string("hidden_states_259_cast_fp16")]; fp16 var_70_promoted_17_to_fp16 = const()[name = string("op_70_promoted_17_to_fp16"), val = fp16(0x1p+1)]; tensor var_1785_cast_fp16 = pow(x = hidden_states_259_cast_fp16, y = var_70_promoted_17_to_fp16)[name = string("op_1785_cast_fp16")]; tensor variance_35_axes_0 = const()[name = string("variance_35_axes_0"), val = tensor([-1])]; bool variance_35_keep_dims_0 = const()[name = string("variance_35_keep_dims_0"), val = bool(true)]; tensor variance_35_cast_fp16 = reduce_mean(axes = variance_35_axes_0, keep_dims = variance_35_keep_dims_0, x = var_1785_cast_fp16)[name = string("variance_35_cast_fp16")]; fp16 var_1788_to_fp16 = const()[name = string("op_1788_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1789_cast_fp16 = add(x = variance_35_cast_fp16, y = var_1788_to_fp16)[name = string("op_1789_cast_fp16")]; fp32 var_1790_epsilon_0 = const()[name = string("op_1790_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1790_cast_fp16 = rsqrt(epsilon = var_1790_epsilon_0, x = var_1789_cast_fp16)[name = string("op_1790_cast_fp16")]; tensor hidden_states_263_cast_fp16 = mul(x = hidden_states_259_cast_fp16, y = var_1790_cast_fp16)[name = string("hidden_states_263_cast_fp16")]; tensor model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144774080)))]; tensor input_67_cast_fp16 = mul(x = model_model_layers_8_post_attention_layernorm_weight_to_fp16, y = hidden_states_263_cast_fp16)[name = string("input_67_cast_fp16")]; tensor model_model_layers_8_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(144775936))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(146955072))))[name = string("model_model_layers_8_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_60_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_8_mlp_gate_proj_weight_to_fp16_quantized, x = input_67_cast_fp16)[name = string("linear_60_cast_fp16")]; tensor var_1802_cast_fp16 = silu(x = linear_60_cast_fp16)[name = string("op_1802_cast_fp16")]; tensor model_model_layers_8_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(147227520))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149406656))))[name = string("model_model_layers_8_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_61_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_8_mlp_up_proj_weight_to_fp16_quantized, x = input_67_cast_fp16)[name = string("linear_61_cast_fp16")]; tensor input_71_cast_fp16 = mul(x = var_1802_cast_fp16, y = linear_61_cast_fp16)[name = string("input_71_cast_fp16")]; tensor model_model_layers_8_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(149679104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(151858240))))[name = string("model_model_layers_8_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_62_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_8_mlp_down_proj_weight_to_fp16_quantized, x = input_71_cast_fp16)[name = string("linear_62_cast_fp16")]; tensor hidden_states_269_cast_fp16 = add(x = hidden_states_259_cast_fp16, y = linear_62_cast_fp16)[name = string("hidden_states_269_cast_fp16")]; fp16 var_70_promoted_18_to_fp16 = const()[name = string("op_70_promoted_18_to_fp16"), val = fp16(0x1p+1)]; tensor var_1815_cast_fp16 = pow(x = hidden_states_269_cast_fp16, y = var_70_promoted_18_to_fp16)[name = string("op_1815_cast_fp16")]; tensor variance_37_axes_0 = const()[name = string("variance_37_axes_0"), val = tensor([-1])]; bool variance_37_keep_dims_0 = const()[name = string("variance_37_keep_dims_0"), val = bool(true)]; tensor variance_37_cast_fp16 = reduce_mean(axes = variance_37_axes_0, keep_dims = variance_37_keep_dims_0, x = var_1815_cast_fp16)[name = string("variance_37_cast_fp16")]; fp16 var_1818_to_fp16 = const()[name = string("op_1818_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1819_cast_fp16 = add(x = variance_37_cast_fp16, y = var_1818_to_fp16)[name = string("op_1819_cast_fp16")]; fp32 var_1820_epsilon_0 = const()[name = string("op_1820_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1820_cast_fp16 = rsqrt(epsilon = var_1820_epsilon_0, x = var_1819_cast_fp16)[name = string("op_1820_cast_fp16")]; tensor hidden_states_273_cast_fp16 = mul(x = hidden_states_269_cast_fp16, y = var_1820_cast_fp16)[name = string("hidden_states_273_cast_fp16")]; tensor model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152130688)))]; tensor hidden_states_277_cast_fp16 = mul(x = model_model_layers_9_input_layernorm_weight_to_fp16, y = hidden_states_273_cast_fp16)[name = string("hidden_states_277_cast_fp16")]; tensor model_model_layers_9_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152132544))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152534016))))[name = string("model_model_layers_9_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_9_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_9_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152584256)))]; tensor linear_63_cast_fp16 = linear(bias = model_model_layers_9_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_9_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_277_cast_fp16)[name = string("linear_63_cast_fp16")]; tensor model_model_layers_9_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152586112))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152643520))))[name = string("model_model_layers_9_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_9_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_9_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152650752)))]; tensor linear_64_cast_fp16 = linear(bias = model_model_layers_9_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_9_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_277_cast_fp16)[name = string("linear_64_cast_fp16")]; tensor model_model_layers_9_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152651072))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152708480))))[name = string("model_model_layers_9_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_9_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_9_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152715712)))]; tensor linear_65_cast_fp16 = linear(bias = model_model_layers_9_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_9_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_277_cast_fp16)[name = string("linear_65_cast_fp16")]; tensor concat_171x = const()[name = string("concat_171x"), val = tensor([1, -1, 14, 64])]; tensor var_1843_cast_fp16 = reshape(shape = concat_171x, x = linear_63_cast_fp16)[name = string("op_1843_cast_fp16")]; tensor q_19_perm_0 = const()[name = string("q_19_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_172x = const()[name = string("concat_172x"), val = tensor([1, -1, 2, 64])]; tensor var_1846_cast_fp16 = reshape(shape = concat_172x, x = linear_64_cast_fp16)[name = string("op_1846_cast_fp16")]; tensor k_19_perm_0 = const()[name = string("k_19_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_173x = const()[name = string("concat_173x"), val = tensor([1, -1, 2, 64])]; tensor var_1849_cast_fp16 = reshape(shape = concat_173x, x = linear_65_cast_fp16)[name = string("op_1849_cast_fp16")]; tensor v_state_19_perm_0 = const()[name = string("v_state_19_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_19_cast_fp16 = transpose(perm = q_19_perm_0, x = var_1843_cast_fp16)[name = string("transpose_59")]; tensor var_1853_cast_fp16 = mul(x = q_19_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1853_cast_fp16")]; tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19_cast_fp16)[name = string("x1_37_cast_fp16")]; tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19_cast_fp16)[name = string("x2_37_cast_fp16")]; fp16 const_21_promoted_to_fp16 = const()[name = string("const_21_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1864_cast_fp16 = mul(x = x2_37_cast_fp16, y = const_21_promoted_to_fp16)[name = string("op_1864_cast_fp16")]; bool var_1866_interleave_0 = const()[name = string("op_1866_interleave_0"), val = bool(false)]; tensor var_1866_cast_fp16 = concat(axis = var_64, interleave = var_1866_interleave_0, values = (var_1864_cast_fp16, x1_37_cast_fp16))[name = string("op_1866_cast_fp16")]; tensor var_1867_cast_fp16 = mul(x = var_1866_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1867_cast_fp16")]; tensor query_states_39_cast_fp16 = add(x = var_1853_cast_fp16, y = var_1867_cast_fp16)[name = string("query_states_39_cast_fp16")]; tensor k_19_cast_fp16 = transpose(perm = k_19_perm_0, x = var_1846_cast_fp16)[name = string("transpose_58")]; tensor var_1869_cast_fp16 = mul(x = k_19_cast_fp16, y = cos_7_cast_fp16)[name = string("op_1869_cast_fp16")]; tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19_cast_fp16)[name = string("x1_39_cast_fp16")]; tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19_cast_fp16)[name = string("x2_39_cast_fp16")]; fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1880_cast_fp16 = mul(x = x2_39_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_1880_cast_fp16")]; bool var_1882_interleave_0 = const()[name = string("op_1882_interleave_0"), val = bool(false)]; tensor var_1882_cast_fp16 = concat(axis = var_64, interleave = var_1882_interleave_0, values = (var_1880_cast_fp16, x1_39_cast_fp16))[name = string("op_1882_cast_fp16")]; tensor var_1883_cast_fp16 = mul(x = var_1882_cast_fp16, y = sin_7_cast_fp16)[name = string("op_1883_cast_fp16")]; tensor k_state_19_cast_fp16 = add(x = var_1869_cast_fp16, y = var_1883_cast_fp16)[name = string("k_state_19_cast_fp16")]; tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([0])]; tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; tensor concat_176_values0_0 = const()[name = string("concat_176_values0_0"), val = tensor([9])]; int32 concat_176_axis_0 = const()[name = string("concat_176_axis_0"), val = int32(0)]; bool concat_176_interleave_0 = const()[name = string("concat_176_interleave_0"), val = bool(false)]; tensor concat_176 = concat(axis = concat_176_axis_0, interleave = concat_176_interleave_0, values = (concat_176_values0_0, expand_dims_108, expand_dims_109, expand_dims_2, expand_dims_111))[name = string("concat_176")]; tensor key_cache_internal_tensor_assign_10_stride_0 = const()[name = string("key_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_176, begin_mask = key_cache_internal_tensor_assign_10_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_10_squeeze_mask_0, stride = key_cache_internal_tensor_assign_10_stride_0, update = k_state_19_cast_fp16, x = coreml_update_state_64)[name = string("key_cache_internal_tensor_assign_10_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_10_cast_fp16, input = key_cache)[name = string("coreml_update_state_66_write_state")]; tensor coreml_update_state_66 = read_state(input = key_cache)[name = string("coreml_update_state_66")]; tensor value_cache_internal_tensor_assign_10_stride_0 = const()[name = string("value_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_19_cast_fp16 = transpose(perm = v_state_19_perm_0, x = var_1849_cast_fp16)[name = string("transpose_57")]; tensor value_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_176, begin_mask = value_cache_internal_tensor_assign_10_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_10_squeeze_mask_0, stride = value_cache_internal_tensor_assign_10_stride_0, update = v_state_19_cast_fp16, x = coreml_update_state_65)[name = string("value_cache_internal_tensor_assign_10_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_10_cast_fp16, input = value_cache)[name = string("coreml_update_state_67_write_state")]; tensor coreml_update_state_67 = read_state(input = value_cache)[name = string("coreml_update_state_67")]; tensor var_1906_begin_0 = const()[name = string("op_1906_begin_0"), val = tensor([9, 0, 0, 0, 0])]; tensor var_1906_end_0 = const()[name = string("op_1906_end_0"), val = tensor([10, 1, 2, 2048, 64])]; tensor var_1906_end_mask_0 = const()[name = string("op_1906_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1906_squeeze_mask_0 = const()[name = string("op_1906_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1906_cast_fp16 = slice_by_index(begin = var_1906_begin_0, end = var_1906_end_0, end_mask = var_1906_end_mask_0, squeeze_mask = var_1906_squeeze_mask_0, x = coreml_update_state_66)[name = string("op_1906_cast_fp16")]; tensor var_1909_begin_0 = const()[name = string("op_1909_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1909_end_mask_0 = const()[name = string("op_1909_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1909_cast_fp16 = slice_by_index(begin = var_1909_begin_0, end = concat_11, end_mask = var_1909_end_mask_0, x = var_1906_cast_fp16)[name = string("op_1909_cast_fp16")]; tensor var_1911_begin_0 = const()[name = string("op_1911_begin_0"), val = tensor([9, 0, 0, 0, 0])]; tensor var_1911_end_0 = const()[name = string("op_1911_end_0"), val = tensor([10, 1, 2, 2048, 64])]; tensor var_1911_end_mask_0 = const()[name = string("op_1911_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1911_squeeze_mask_0 = const()[name = string("op_1911_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1911_cast_fp16 = slice_by_index(begin = var_1911_begin_0, end = var_1911_end_0, end_mask = var_1911_end_mask_0, squeeze_mask = var_1911_squeeze_mask_0, x = coreml_update_state_67)[name = string("op_1911_cast_fp16")]; tensor var_1914_begin_0 = const()[name = string("op_1914_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1914_end_mask_0 = const()[name = string("op_1914_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1914_cast_fp16 = slice_by_index(begin = var_1914_begin_0, end = concat_11, end_mask = var_1914_end_mask_0, x = var_1911_cast_fp16)[name = string("op_1914_cast_fp16")]; tensor var_1916_shape_cast_fp16 = shape(x = var_1909_cast_fp16)[name = string("op_1916_shape_cast_fp16")]; int32 gather_175 = const()[name = string("gather_175"), val = int32(1)]; int32 gather_176 = const()[name = string("gather_176"), val = int32(2)]; int32 gather_177_axis_0 = const()[name = string("gather_177_axis_0"), val = int32(0)]; int32 gather_177_batch_dims_0 = const()[name = string("gather_177_batch_dims_0"), val = int32(0)]; bool gather_177_validate_indices_0 = const()[name = string("gather_177_validate_indices_0"), val = bool(false)]; string var_1916_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1916_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_177_to_uint16 = const()[name = string("select_177_to_uint16"), val = uint16(2)]; tensor var_1916_shape_cast_fp16_to_uint16 = cast(dtype = var_1916_shape_cast_fp16_to_uint16_dtype_0, x = var_1916_shape_cast_fp16)[name = string("cast_89")]; uint16 gather_177_cast_uint16 = gather(axis = gather_177_axis_0, batch_dims = gather_177_batch_dims_0, indices = select_177_to_uint16, validate_indices = gather_177_validate_indices_0, x = var_1916_shape_cast_fp16_to_uint16)[name = string("gather_177_cast_uint16")]; string gather_177_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_177_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_178 = const()[name = string("gather_178"), val = int32(64)]; tensor var_1923_axes_0 = const()[name = string("op_1923_axes_0"), val = tensor([2])]; tensor var_1923_cast_fp16 = expand_dims(axes = var_1923_axes_0, x = var_1909_cast_fp16)[name = string("op_1923_cast_fp16")]; tensor shape_197_cast_fp16 = shape(x = var_1923_cast_fp16)[name = string("shape_197_cast_fp16")]; int32 concat_184_axis_0 = const()[name = string("concat_184_axis_0"), val = int32(0)]; bool concat_184_interleave_0 = const()[name = string("concat_184_interleave_0"), val = bool(false)]; int32 gather_177_cast_uint16_to_int32 = cast(dtype = gather_177_cast_uint16_to_int32_dtype_0, x = gather_177_cast_uint16)[name = string("cast_88")]; tensor concat_184 = concat(axis = concat_184_axis_0, interleave = concat_184_interleave_0, values = (gather_175, gather_176, var_76, gather_177_cast_uint16_to_int32, gather_178))[name = string("concat_184")]; tensor real_div_18 = real_div(x = concat_184, y = shape_197_cast_fp16)[name = string("real_div_18")]; tensor hidden_states_281_cast_fp16 = tile(reps = real_div_18, x = var_1923_cast_fp16)[name = string("hidden_states_281_cast_fp16")]; tensor concat_185x = const()[name = string("concat_185x"), val = tensor([1, 14, -1, 64])]; tensor key_states_39_cast_fp16 = reshape(shape = concat_185x, x = hidden_states_281_cast_fp16)[name = string("key_states_39_cast_fp16")]; tensor var_1933_shape_cast_fp16 = shape(x = var_1914_cast_fp16)[name = string("op_1933_shape_cast_fp16")]; int32 gather_179 = const()[name = string("gather_179"), val = int32(1)]; int32 gather_180 = const()[name = string("gather_180"), val = int32(2)]; int32 gather_181_axis_0 = const()[name = string("gather_181_axis_0"), val = int32(0)]; int32 gather_181_batch_dims_0 = const()[name = string("gather_181_batch_dims_0"), val = int32(0)]; bool gather_181_validate_indices_0 = const()[name = string("gather_181_validate_indices_0"), val = bool(false)]; string var_1933_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1933_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_181_to_uint16 = const()[name = string("select_181_to_uint16"), val = uint16(2)]; tensor var_1933_shape_cast_fp16_to_uint16 = cast(dtype = var_1933_shape_cast_fp16_to_uint16_dtype_0, x = var_1933_shape_cast_fp16)[name = string("cast_87")]; uint16 gather_181_cast_uint16 = gather(axis = gather_181_axis_0, batch_dims = gather_181_batch_dims_0, indices = select_181_to_uint16, validate_indices = gather_181_validate_indices_0, x = var_1933_shape_cast_fp16_to_uint16)[name = string("gather_181_cast_uint16")]; string gather_181_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_181_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_182 = const()[name = string("gather_182"), val = int32(64)]; tensor var_1940_axes_0 = const()[name = string("op_1940_axes_0"), val = tensor([2])]; tensor var_1940_cast_fp16 = expand_dims(axes = var_1940_axes_0, x = var_1914_cast_fp16)[name = string("op_1940_cast_fp16")]; tensor shape_202_cast_fp16 = shape(x = var_1940_cast_fp16)[name = string("shape_202_cast_fp16")]; int32 concat_186_axis_0 = const()[name = string("concat_186_axis_0"), val = int32(0)]; bool concat_186_interleave_0 = const()[name = string("concat_186_interleave_0"), val = bool(false)]; int32 gather_181_cast_uint16_to_int32 = cast(dtype = gather_181_cast_uint16_to_int32_dtype_0, x = gather_181_cast_uint16)[name = string("cast_86")]; tensor concat_186 = concat(axis = concat_186_axis_0, interleave = concat_186_interleave_0, values = (gather_179, gather_180, var_76, gather_181_cast_uint16_to_int32, gather_182))[name = string("concat_186")]; tensor real_div_19 = real_div(x = concat_186, y = shape_202_cast_fp16)[name = string("real_div_19")]; tensor hidden_states_285_cast_fp16 = tile(reps = real_div_19, x = var_1940_cast_fp16)[name = string("hidden_states_285_cast_fp16")]; tensor concat_187x = const()[name = string("concat_187x"), val = tensor([1, 14, -1, 64])]; tensor value_states_39_cast_fp16 = reshape(shape = concat_187x, x = hidden_states_285_cast_fp16)[name = string("value_states_39_cast_fp16")]; tensor var_1950_shape_cast_fp16 = shape(x = key_states_39_cast_fp16)[name = string("op_1950_shape_cast_fp16")]; int32 gather_183_axis_0 = const()[name = string("gather_183_axis_0"), val = int32(0)]; int32 gather_183_batch_dims_0 = const()[name = string("gather_183_batch_dims_0"), val = int32(0)]; bool gather_183_validate_indices_0 = const()[name = string("gather_183_validate_indices_0"), val = bool(false)]; string var_1950_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1950_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_183_to_uint16 = const()[name = string("select_183_to_uint16"), val = uint16(2)]; tensor var_1950_shape_cast_fp16_to_uint16 = cast(dtype = var_1950_shape_cast_fp16_to_uint16_dtype_0, x = var_1950_shape_cast_fp16)[name = string("cast_85")]; uint16 gather_183_cast_uint16 = gather(axis = gather_183_axis_0, batch_dims = gather_183_batch_dims_0, indices = select_183_to_uint16, validate_indices = gather_183_validate_indices_0, x = var_1950_shape_cast_fp16_to_uint16)[name = string("gather_183_cast_uint16")]; string gather_183_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_183_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_188_values0_0 = const()[name = string("concat_188_values0_0"), val = int32(1)]; int32 concat_188_values1_0 = const()[name = string("concat_188_values1_0"), val = int32(1)]; int32 concat_188_values2_0 = const()[name = string("concat_188_values2_0"), val = int32(0)]; int32 concat_188_axis_0 = const()[name = string("concat_188_axis_0"), val = int32(0)]; bool concat_188_interleave_0 = const()[name = string("concat_188_interleave_0"), val = bool(false)]; int32 gather_183_cast_uint16_to_int32 = cast(dtype = gather_183_cast_uint16_to_int32_dtype_0, x = gather_183_cast_uint16)[name = string("cast_84")]; tensor concat_188 = concat(axis = concat_188_axis_0, interleave = concat_188_interleave_0, values = (concat_188_values0_0, concat_188_values1_0, concat_188_values2_0, gather_183_cast_uint16_to_int32))[name = string("concat_188")]; tensor causal_mask_21_begin_0 = const()[name = string("causal_mask_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_21_end_mask_0 = const()[name = string("causal_mask_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_21_cast_fp16 = slice_by_index(begin = causal_mask_21_begin_0, end = concat_188, end_mask = causal_mask_21_end_mask_0, x = causal_mask)[name = string("causal_mask_21_cast_fp16")]; tensor attn_output_37_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_21_cast_fp16, key = key_states_39_cast_fp16, query = query_states_39_cast_fp16, value = value_states_39_cast_fp16)[name = string("attn_output_37_cast_fp16")]; tensor var_1956_perm_0 = const()[name = string("op_1956_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_189x = const()[name = string("concat_189x"), val = tensor([1, -1, 896])]; tensor var_1956_cast_fp16 = transpose(perm = var_1956_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_56")]; tensor input_73_cast_fp16 = reshape(shape = concat_189x, x = var_1956_cast_fp16)[name = string("input_73_cast_fp16")]; tensor model_model_layers_9_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(152716032))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153117504))))[name = string("model_model_layers_9_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_66_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_9_self_attn_o_proj_weight_to_fp16_quantized, x = input_73_cast_fp16)[name = string("linear_66_cast_fp16")]; tensor hidden_states_289_cast_fp16 = add(x = hidden_states_269_cast_fp16, y = linear_66_cast_fp16)[name = string("hidden_states_289_cast_fp16")]; fp16 var_70_promoted_19_to_fp16 = const()[name = string("op_70_promoted_19_to_fp16"), val = fp16(0x1p+1)]; tensor var_1965_cast_fp16 = pow(x = hidden_states_289_cast_fp16, y = var_70_promoted_19_to_fp16)[name = string("op_1965_cast_fp16")]; tensor variance_39_axes_0 = const()[name = string("variance_39_axes_0"), val = tensor([-1])]; bool variance_39_keep_dims_0 = const()[name = string("variance_39_keep_dims_0"), val = bool(true)]; tensor variance_39_cast_fp16 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = var_1965_cast_fp16)[name = string("variance_39_cast_fp16")]; fp16 var_1968_to_fp16 = const()[name = string("op_1968_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1969_cast_fp16 = add(x = variance_39_cast_fp16, y = var_1968_to_fp16)[name = string("op_1969_cast_fp16")]; fp32 var_1970_epsilon_0 = const()[name = string("op_1970_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1970_cast_fp16 = rsqrt(epsilon = var_1970_epsilon_0, x = var_1969_cast_fp16)[name = string("op_1970_cast_fp16")]; tensor hidden_states_293_cast_fp16 = mul(x = hidden_states_289_cast_fp16, y = var_1970_cast_fp16)[name = string("hidden_states_293_cast_fp16")]; tensor model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153167744)))]; tensor input_75_cast_fp16 = mul(x = model_model_layers_9_post_attention_layernorm_weight_to_fp16, y = hidden_states_293_cast_fp16)[name = string("input_75_cast_fp16")]; tensor model_model_layers_9_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(153169600))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155348736))))[name = string("model_model_layers_9_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_67_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_9_mlp_gate_proj_weight_to_fp16_quantized, x = input_75_cast_fp16)[name = string("linear_67_cast_fp16")]; tensor var_1982_cast_fp16 = silu(x = linear_67_cast_fp16)[name = string("op_1982_cast_fp16")]; tensor model_model_layers_9_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(155621184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(157800320))))[name = string("model_model_layers_9_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_68_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_9_mlp_up_proj_weight_to_fp16_quantized, x = input_75_cast_fp16)[name = string("linear_68_cast_fp16")]; tensor input_79_cast_fp16 = mul(x = var_1982_cast_fp16, y = linear_68_cast_fp16)[name = string("input_79_cast_fp16")]; tensor model_model_layers_9_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(158072768))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160251904))))[name = string("model_model_layers_9_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_69_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_9_mlp_down_proj_weight_to_fp16_quantized, x = input_79_cast_fp16)[name = string("linear_69_cast_fp16")]; tensor hidden_states_299_cast_fp16 = add(x = hidden_states_289_cast_fp16, y = linear_69_cast_fp16)[name = string("hidden_states_299_cast_fp16")]; fp16 var_70_promoted_20_to_fp16 = const()[name = string("op_70_promoted_20_to_fp16"), val = fp16(0x1p+1)]; tensor var_1995_cast_fp16 = pow(x = hidden_states_299_cast_fp16, y = var_70_promoted_20_to_fp16)[name = string("op_1995_cast_fp16")]; tensor variance_41_axes_0 = const()[name = string("variance_41_axes_0"), val = tensor([-1])]; bool variance_41_keep_dims_0 = const()[name = string("variance_41_keep_dims_0"), val = bool(true)]; tensor variance_41_cast_fp16 = reduce_mean(axes = variance_41_axes_0, keep_dims = variance_41_keep_dims_0, x = var_1995_cast_fp16)[name = string("variance_41_cast_fp16")]; fp16 var_1998_to_fp16 = const()[name = string("op_1998_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1999_cast_fp16 = add(x = variance_41_cast_fp16, y = var_1998_to_fp16)[name = string("op_1999_cast_fp16")]; fp32 var_2000_epsilon_0 = const()[name = string("op_2000_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2000_cast_fp16 = rsqrt(epsilon = var_2000_epsilon_0, x = var_1999_cast_fp16)[name = string("op_2000_cast_fp16")]; tensor hidden_states_303_cast_fp16 = mul(x = hidden_states_299_cast_fp16, y = var_2000_cast_fp16)[name = string("hidden_states_303_cast_fp16")]; tensor model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160524352)))]; tensor hidden_states_307_cast_fp16 = mul(x = model_model_layers_10_input_layernorm_weight_to_fp16, y = hidden_states_303_cast_fp16)[name = string("hidden_states_307_cast_fp16")]; tensor model_model_layers_10_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160526208))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160927680))))[name = string("model_model_layers_10_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_10_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_10_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160977920)))]; tensor linear_70_cast_fp16 = linear(bias = model_model_layers_10_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_10_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_307_cast_fp16)[name = string("linear_70_cast_fp16")]; tensor model_model_layers_10_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(160979776))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161037184))))[name = string("model_model_layers_10_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_10_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_10_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161044416)))]; tensor linear_71_cast_fp16 = linear(bias = model_model_layers_10_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_10_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_307_cast_fp16)[name = string("linear_71_cast_fp16")]; tensor model_model_layers_10_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161044736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161102144))))[name = string("model_model_layers_10_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_10_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_10_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161109376)))]; tensor linear_72_cast_fp16 = linear(bias = model_model_layers_10_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_10_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_307_cast_fp16)[name = string("linear_72_cast_fp16")]; tensor concat_190x = const()[name = string("concat_190x"), val = tensor([1, -1, 14, 64])]; tensor var_2023_cast_fp16 = reshape(shape = concat_190x, x = linear_70_cast_fp16)[name = string("op_2023_cast_fp16")]; tensor q_21_perm_0 = const()[name = string("q_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_191x = const()[name = string("concat_191x"), val = tensor([1, -1, 2, 64])]; tensor var_2026_cast_fp16 = reshape(shape = concat_191x, x = linear_71_cast_fp16)[name = string("op_2026_cast_fp16")]; tensor k_21_perm_0 = const()[name = string("k_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_192x = const()[name = string("concat_192x"), val = tensor([1, -1, 2, 64])]; tensor var_2029_cast_fp16 = reshape(shape = concat_192x, x = linear_72_cast_fp16)[name = string("op_2029_cast_fp16")]; tensor v_state_21_perm_0 = const()[name = string("v_state_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_21_cast_fp16 = transpose(perm = q_21_perm_0, x = var_2023_cast_fp16)[name = string("transpose_55")]; tensor var_2033_cast_fp16 = mul(x = q_21_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2033_cast_fp16")]; tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21_cast_fp16)[name = string("x1_41_cast_fp16")]; tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21_cast_fp16)[name = string("x2_41_cast_fp16")]; fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2044_cast_fp16 = mul(x = x2_41_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_2044_cast_fp16")]; bool var_2046_interleave_0 = const()[name = string("op_2046_interleave_0"), val = bool(false)]; tensor var_2046_cast_fp16 = concat(axis = var_64, interleave = var_2046_interleave_0, values = (var_2044_cast_fp16, x1_41_cast_fp16))[name = string("op_2046_cast_fp16")]; tensor var_2047_cast_fp16 = mul(x = var_2046_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2047_cast_fp16")]; tensor query_states_43_cast_fp16 = add(x = var_2033_cast_fp16, y = var_2047_cast_fp16)[name = string("query_states_43_cast_fp16")]; tensor k_21_cast_fp16 = transpose(perm = k_21_perm_0, x = var_2026_cast_fp16)[name = string("transpose_54")]; tensor var_2049_cast_fp16 = mul(x = k_21_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2049_cast_fp16")]; tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21_cast_fp16)[name = string("x1_43_cast_fp16")]; tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21_cast_fp16)[name = string("x2_43_cast_fp16")]; fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2060_cast_fp16 = mul(x = x2_43_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_2060_cast_fp16")]; bool var_2062_interleave_0 = const()[name = string("op_2062_interleave_0"), val = bool(false)]; tensor var_2062_cast_fp16 = concat(axis = var_64, interleave = var_2062_interleave_0, values = (var_2060_cast_fp16, x1_43_cast_fp16))[name = string("op_2062_cast_fp16")]; tensor var_2063_cast_fp16 = mul(x = var_2062_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2063_cast_fp16")]; tensor k_state_21_cast_fp16 = add(x = var_2049_cast_fp16, y = var_2063_cast_fp16)[name = string("k_state_21_cast_fp16")]; tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([0])]; tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; tensor concat_195_values0_0 = const()[name = string("concat_195_values0_0"), val = tensor([10])]; int32 concat_195_axis_0 = const()[name = string("concat_195_axis_0"), val = int32(0)]; bool concat_195_interleave_0 = const()[name = string("concat_195_interleave_0"), val = bool(false)]; tensor concat_195 = concat(axis = concat_195_axis_0, interleave = concat_195_interleave_0, values = (concat_195_values0_0, expand_dims_120, expand_dims_121, expand_dims_2, expand_dims_123))[name = string("concat_195")]; tensor key_cache_internal_tensor_assign_11_stride_0 = const()[name = string("key_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_195, begin_mask = key_cache_internal_tensor_assign_11_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_11_squeeze_mask_0, stride = key_cache_internal_tensor_assign_11_stride_0, update = k_state_21_cast_fp16, x = coreml_update_state_66)[name = string("key_cache_internal_tensor_assign_11_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_11_cast_fp16, input = key_cache)[name = string("coreml_update_state_68_write_state")]; tensor coreml_update_state_68 = read_state(input = key_cache)[name = string("coreml_update_state_68")]; tensor value_cache_internal_tensor_assign_11_stride_0 = const()[name = string("value_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_21_cast_fp16 = transpose(perm = v_state_21_perm_0, x = var_2029_cast_fp16)[name = string("transpose_53")]; tensor value_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_195, begin_mask = value_cache_internal_tensor_assign_11_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_11_squeeze_mask_0, stride = value_cache_internal_tensor_assign_11_stride_0, update = v_state_21_cast_fp16, x = coreml_update_state_67)[name = string("value_cache_internal_tensor_assign_11_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_11_cast_fp16, input = value_cache)[name = string("coreml_update_state_69_write_state")]; tensor coreml_update_state_69 = read_state(input = value_cache)[name = string("coreml_update_state_69")]; tensor var_2086_begin_0 = const()[name = string("op_2086_begin_0"), val = tensor([10, 0, 0, 0, 0])]; tensor var_2086_end_0 = const()[name = string("op_2086_end_0"), val = tensor([11, 1, 2, 2048, 64])]; tensor var_2086_end_mask_0 = const()[name = string("op_2086_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2086_squeeze_mask_0 = const()[name = string("op_2086_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2086_cast_fp16 = slice_by_index(begin = var_2086_begin_0, end = var_2086_end_0, end_mask = var_2086_end_mask_0, squeeze_mask = var_2086_squeeze_mask_0, x = coreml_update_state_68)[name = string("op_2086_cast_fp16")]; tensor var_2089_begin_0 = const()[name = string("op_2089_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2089_end_mask_0 = const()[name = string("op_2089_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2089_cast_fp16 = slice_by_index(begin = var_2089_begin_0, end = concat_11, end_mask = var_2089_end_mask_0, x = var_2086_cast_fp16)[name = string("op_2089_cast_fp16")]; tensor var_2091_begin_0 = const()[name = string("op_2091_begin_0"), val = tensor([10, 0, 0, 0, 0])]; tensor var_2091_end_0 = const()[name = string("op_2091_end_0"), val = tensor([11, 1, 2, 2048, 64])]; tensor var_2091_end_mask_0 = const()[name = string("op_2091_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2091_squeeze_mask_0 = const()[name = string("op_2091_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2091_cast_fp16 = slice_by_index(begin = var_2091_begin_0, end = var_2091_end_0, end_mask = var_2091_end_mask_0, squeeze_mask = var_2091_squeeze_mask_0, x = coreml_update_state_69)[name = string("op_2091_cast_fp16")]; tensor var_2094_begin_0 = const()[name = string("op_2094_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2094_end_mask_0 = const()[name = string("op_2094_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2094_cast_fp16 = slice_by_index(begin = var_2094_begin_0, end = concat_11, end_mask = var_2094_end_mask_0, x = var_2091_cast_fp16)[name = string("op_2094_cast_fp16")]; tensor var_2096_shape_cast_fp16 = shape(x = var_2089_cast_fp16)[name = string("op_2096_shape_cast_fp16")]; int32 gather_193 = const()[name = string("gather_193"), val = int32(1)]; int32 gather_194 = const()[name = string("gather_194"), val = int32(2)]; int32 gather_195_axis_0 = const()[name = string("gather_195_axis_0"), val = int32(0)]; int32 gather_195_batch_dims_0 = const()[name = string("gather_195_batch_dims_0"), val = int32(0)]; bool gather_195_validate_indices_0 = const()[name = string("gather_195_validate_indices_0"), val = bool(false)]; string var_2096_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2096_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_195_to_uint16 = const()[name = string("select_195_to_uint16"), val = uint16(2)]; tensor var_2096_shape_cast_fp16_to_uint16 = cast(dtype = var_2096_shape_cast_fp16_to_uint16_dtype_0, x = var_2096_shape_cast_fp16)[name = string("cast_83")]; uint16 gather_195_cast_uint16 = gather(axis = gather_195_axis_0, batch_dims = gather_195_batch_dims_0, indices = select_195_to_uint16, validate_indices = gather_195_validate_indices_0, x = var_2096_shape_cast_fp16_to_uint16)[name = string("gather_195_cast_uint16")]; string gather_195_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_195_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_196 = const()[name = string("gather_196"), val = int32(64)]; tensor var_2103_axes_0 = const()[name = string("op_2103_axes_0"), val = tensor([2])]; tensor var_2103_cast_fp16 = expand_dims(axes = var_2103_axes_0, x = var_2089_cast_fp16)[name = string("op_2103_cast_fp16")]; tensor shape_217_cast_fp16 = shape(x = var_2103_cast_fp16)[name = string("shape_217_cast_fp16")]; int32 concat_203_axis_0 = const()[name = string("concat_203_axis_0"), val = int32(0)]; bool concat_203_interleave_0 = const()[name = string("concat_203_interleave_0"), val = bool(false)]; int32 gather_195_cast_uint16_to_int32 = cast(dtype = gather_195_cast_uint16_to_int32_dtype_0, x = gather_195_cast_uint16)[name = string("cast_82")]; tensor concat_203 = concat(axis = concat_203_axis_0, interleave = concat_203_interleave_0, values = (gather_193, gather_194, var_76, gather_195_cast_uint16_to_int32, gather_196))[name = string("concat_203")]; tensor real_div_20 = real_div(x = concat_203, y = shape_217_cast_fp16)[name = string("real_div_20")]; tensor hidden_states_311_cast_fp16 = tile(reps = real_div_20, x = var_2103_cast_fp16)[name = string("hidden_states_311_cast_fp16")]; tensor concat_204x = const()[name = string("concat_204x"), val = tensor([1, 14, -1, 64])]; tensor key_states_43_cast_fp16 = reshape(shape = concat_204x, x = hidden_states_311_cast_fp16)[name = string("key_states_43_cast_fp16")]; tensor var_2113_shape_cast_fp16 = shape(x = var_2094_cast_fp16)[name = string("op_2113_shape_cast_fp16")]; int32 gather_197 = const()[name = string("gather_197"), val = int32(1)]; int32 gather_198 = const()[name = string("gather_198"), val = int32(2)]; int32 gather_199_axis_0 = const()[name = string("gather_199_axis_0"), val = int32(0)]; int32 gather_199_batch_dims_0 = const()[name = string("gather_199_batch_dims_0"), val = int32(0)]; bool gather_199_validate_indices_0 = const()[name = string("gather_199_validate_indices_0"), val = bool(false)]; string var_2113_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2113_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_199_to_uint16 = const()[name = string("select_199_to_uint16"), val = uint16(2)]; tensor var_2113_shape_cast_fp16_to_uint16 = cast(dtype = var_2113_shape_cast_fp16_to_uint16_dtype_0, x = var_2113_shape_cast_fp16)[name = string("cast_81")]; uint16 gather_199_cast_uint16 = gather(axis = gather_199_axis_0, batch_dims = gather_199_batch_dims_0, indices = select_199_to_uint16, validate_indices = gather_199_validate_indices_0, x = var_2113_shape_cast_fp16_to_uint16)[name = string("gather_199_cast_uint16")]; string gather_199_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_199_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_200 = const()[name = string("gather_200"), val = int32(64)]; tensor var_2120_axes_0 = const()[name = string("op_2120_axes_0"), val = tensor([2])]; tensor var_2120_cast_fp16 = expand_dims(axes = var_2120_axes_0, x = var_2094_cast_fp16)[name = string("op_2120_cast_fp16")]; tensor shape_222_cast_fp16 = shape(x = var_2120_cast_fp16)[name = string("shape_222_cast_fp16")]; int32 concat_205_axis_0 = const()[name = string("concat_205_axis_0"), val = int32(0)]; bool concat_205_interleave_0 = const()[name = string("concat_205_interleave_0"), val = bool(false)]; int32 gather_199_cast_uint16_to_int32 = cast(dtype = gather_199_cast_uint16_to_int32_dtype_0, x = gather_199_cast_uint16)[name = string("cast_80")]; tensor concat_205 = concat(axis = concat_205_axis_0, interleave = concat_205_interleave_0, values = (gather_197, gather_198, var_76, gather_199_cast_uint16_to_int32, gather_200))[name = string("concat_205")]; tensor real_div_21 = real_div(x = concat_205, y = shape_222_cast_fp16)[name = string("real_div_21")]; tensor hidden_states_315_cast_fp16 = tile(reps = real_div_21, x = var_2120_cast_fp16)[name = string("hidden_states_315_cast_fp16")]; tensor concat_206x = const()[name = string("concat_206x"), val = tensor([1, 14, -1, 64])]; tensor value_states_43_cast_fp16 = reshape(shape = concat_206x, x = hidden_states_315_cast_fp16)[name = string("value_states_43_cast_fp16")]; tensor var_2130_shape_cast_fp16 = shape(x = key_states_43_cast_fp16)[name = string("op_2130_shape_cast_fp16")]; int32 gather_201_axis_0 = const()[name = string("gather_201_axis_0"), val = int32(0)]; int32 gather_201_batch_dims_0 = const()[name = string("gather_201_batch_dims_0"), val = int32(0)]; bool gather_201_validate_indices_0 = const()[name = string("gather_201_validate_indices_0"), val = bool(false)]; string var_2130_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2130_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_201_to_uint16 = const()[name = string("select_201_to_uint16"), val = uint16(2)]; tensor var_2130_shape_cast_fp16_to_uint16 = cast(dtype = var_2130_shape_cast_fp16_to_uint16_dtype_0, x = var_2130_shape_cast_fp16)[name = string("cast_79")]; uint16 gather_201_cast_uint16 = gather(axis = gather_201_axis_0, batch_dims = gather_201_batch_dims_0, indices = select_201_to_uint16, validate_indices = gather_201_validate_indices_0, x = var_2130_shape_cast_fp16_to_uint16)[name = string("gather_201_cast_uint16")]; string gather_201_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_201_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_207_values0_0 = const()[name = string("concat_207_values0_0"), val = int32(1)]; int32 concat_207_values1_0 = const()[name = string("concat_207_values1_0"), val = int32(1)]; int32 concat_207_values2_0 = const()[name = string("concat_207_values2_0"), val = int32(0)]; int32 concat_207_axis_0 = const()[name = string("concat_207_axis_0"), val = int32(0)]; bool concat_207_interleave_0 = const()[name = string("concat_207_interleave_0"), val = bool(false)]; int32 gather_201_cast_uint16_to_int32 = cast(dtype = gather_201_cast_uint16_to_int32_dtype_0, x = gather_201_cast_uint16)[name = string("cast_78")]; tensor concat_207 = concat(axis = concat_207_axis_0, interleave = concat_207_interleave_0, values = (concat_207_values0_0, concat_207_values1_0, concat_207_values2_0, gather_201_cast_uint16_to_int32))[name = string("concat_207")]; tensor causal_mask_23_begin_0 = const()[name = string("causal_mask_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_23_end_mask_0 = const()[name = string("causal_mask_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_23_cast_fp16 = slice_by_index(begin = causal_mask_23_begin_0, end = concat_207, end_mask = causal_mask_23_end_mask_0, x = causal_mask)[name = string("causal_mask_23_cast_fp16")]; tensor attn_output_41_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_23_cast_fp16, key = key_states_43_cast_fp16, query = query_states_43_cast_fp16, value = value_states_43_cast_fp16)[name = string("attn_output_41_cast_fp16")]; tensor var_2136_perm_0 = const()[name = string("op_2136_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_208x = const()[name = string("concat_208x"), val = tensor([1, -1, 896])]; tensor var_2136_cast_fp16 = transpose(perm = var_2136_perm_0, x = attn_output_41_cast_fp16)[name = string("transpose_52")]; tensor input_81_cast_fp16 = reshape(shape = concat_208x, x = var_2136_cast_fp16)[name = string("input_81_cast_fp16")]; tensor model_model_layers_10_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161109696))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161511168))))[name = string("model_model_layers_10_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_73_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_10_self_attn_o_proj_weight_to_fp16_quantized, x = input_81_cast_fp16)[name = string("linear_73_cast_fp16")]; tensor hidden_states_319_cast_fp16 = add(x = hidden_states_299_cast_fp16, y = linear_73_cast_fp16)[name = string("hidden_states_319_cast_fp16")]; fp16 var_70_promoted_21_to_fp16 = const()[name = string("op_70_promoted_21_to_fp16"), val = fp16(0x1p+1)]; tensor var_2145_cast_fp16 = pow(x = hidden_states_319_cast_fp16, y = var_70_promoted_21_to_fp16)[name = string("op_2145_cast_fp16")]; tensor variance_43_axes_0 = const()[name = string("variance_43_axes_0"), val = tensor([-1])]; bool variance_43_keep_dims_0 = const()[name = string("variance_43_keep_dims_0"), val = bool(true)]; tensor variance_43_cast_fp16 = reduce_mean(axes = variance_43_axes_0, keep_dims = variance_43_keep_dims_0, x = var_2145_cast_fp16)[name = string("variance_43_cast_fp16")]; fp16 var_2148_to_fp16 = const()[name = string("op_2148_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2149_cast_fp16 = add(x = variance_43_cast_fp16, y = var_2148_to_fp16)[name = string("op_2149_cast_fp16")]; fp32 var_2150_epsilon_0 = const()[name = string("op_2150_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2150_cast_fp16 = rsqrt(epsilon = var_2150_epsilon_0, x = var_2149_cast_fp16)[name = string("op_2150_cast_fp16")]; tensor hidden_states_323_cast_fp16 = mul(x = hidden_states_319_cast_fp16, y = var_2150_cast_fp16)[name = string("hidden_states_323_cast_fp16")]; tensor model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161561408)))]; tensor input_83_cast_fp16 = mul(x = model_model_layers_10_post_attention_layernorm_weight_to_fp16, y = hidden_states_323_cast_fp16)[name = string("input_83_cast_fp16")]; tensor model_model_layers_10_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(161563264))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(163742400))))[name = string("model_model_layers_10_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_74_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_10_mlp_gate_proj_weight_to_fp16_quantized, x = input_83_cast_fp16)[name = string("linear_74_cast_fp16")]; tensor var_2162_cast_fp16 = silu(x = linear_74_cast_fp16)[name = string("op_2162_cast_fp16")]; tensor model_model_layers_10_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(164014848))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166193984))))[name = string("model_model_layers_10_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_75_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_10_mlp_up_proj_weight_to_fp16_quantized, x = input_83_cast_fp16)[name = string("linear_75_cast_fp16")]; tensor input_87_cast_fp16 = mul(x = var_2162_cast_fp16, y = linear_75_cast_fp16)[name = string("input_87_cast_fp16")]; tensor model_model_layers_10_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(166466432))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168645568))))[name = string("model_model_layers_10_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_76_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_10_mlp_down_proj_weight_to_fp16_quantized, x = input_87_cast_fp16)[name = string("linear_76_cast_fp16")]; tensor hidden_states_329_cast_fp16 = add(x = hidden_states_319_cast_fp16, y = linear_76_cast_fp16)[name = string("hidden_states_329_cast_fp16")]; fp16 var_70_promoted_22_to_fp16 = const()[name = string("op_70_promoted_22_to_fp16"), val = fp16(0x1p+1)]; tensor var_2175_cast_fp16 = pow(x = hidden_states_329_cast_fp16, y = var_70_promoted_22_to_fp16)[name = string("op_2175_cast_fp16")]; tensor variance_45_axes_0 = const()[name = string("variance_45_axes_0"), val = tensor([-1])]; bool variance_45_keep_dims_0 = const()[name = string("variance_45_keep_dims_0"), val = bool(true)]; tensor variance_45_cast_fp16 = reduce_mean(axes = variance_45_axes_0, keep_dims = variance_45_keep_dims_0, x = var_2175_cast_fp16)[name = string("variance_45_cast_fp16")]; fp16 var_2178_to_fp16 = const()[name = string("op_2178_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2179_cast_fp16 = add(x = variance_45_cast_fp16, y = var_2178_to_fp16)[name = string("op_2179_cast_fp16")]; fp32 var_2180_epsilon_0 = const()[name = string("op_2180_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2180_cast_fp16 = rsqrt(epsilon = var_2180_epsilon_0, x = var_2179_cast_fp16)[name = string("op_2180_cast_fp16")]; tensor hidden_states_333_cast_fp16 = mul(x = hidden_states_329_cast_fp16, y = var_2180_cast_fp16)[name = string("hidden_states_333_cast_fp16")]; tensor model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168918016)))]; tensor hidden_states_337_cast_fp16 = mul(x = model_model_layers_11_input_layernorm_weight_to_fp16, y = hidden_states_333_cast_fp16)[name = string("hidden_states_337_cast_fp16")]; tensor model_model_layers_11_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(168919872))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169321344))))[name = string("model_model_layers_11_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_11_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_11_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169371584)))]; tensor linear_77_cast_fp16 = linear(bias = model_model_layers_11_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_11_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_337_cast_fp16)[name = string("linear_77_cast_fp16")]; tensor model_model_layers_11_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169373440))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169430848))))[name = string("model_model_layers_11_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_11_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_11_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169438080)))]; tensor linear_78_cast_fp16 = linear(bias = model_model_layers_11_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_11_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_337_cast_fp16)[name = string("linear_78_cast_fp16")]; tensor model_model_layers_11_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169438400))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169495808))))[name = string("model_model_layers_11_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_11_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_11_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169503040)))]; tensor linear_79_cast_fp16 = linear(bias = model_model_layers_11_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_11_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_337_cast_fp16)[name = string("linear_79_cast_fp16")]; tensor concat_209x = const()[name = string("concat_209x"), val = tensor([1, -1, 14, 64])]; tensor var_2203_cast_fp16 = reshape(shape = concat_209x, x = linear_77_cast_fp16)[name = string("op_2203_cast_fp16")]; tensor q_23_perm_0 = const()[name = string("q_23_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_210x = const()[name = string("concat_210x"), val = tensor([1, -1, 2, 64])]; tensor var_2206_cast_fp16 = reshape(shape = concat_210x, x = linear_78_cast_fp16)[name = string("op_2206_cast_fp16")]; tensor k_23_perm_0 = const()[name = string("k_23_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_211x = const()[name = string("concat_211x"), val = tensor([1, -1, 2, 64])]; tensor var_2209_cast_fp16 = reshape(shape = concat_211x, x = linear_79_cast_fp16)[name = string("op_2209_cast_fp16")]; tensor v_state_23_perm_0 = const()[name = string("v_state_23_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_23_cast_fp16 = transpose(perm = q_23_perm_0, x = var_2203_cast_fp16)[name = string("transpose_51")]; tensor var_2213_cast_fp16 = mul(x = q_23_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2213_cast_fp16")]; tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23_cast_fp16)[name = string("x1_45_cast_fp16")]; tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23_cast_fp16)[name = string("x2_45_cast_fp16")]; fp16 const_25_promoted_to_fp16 = const()[name = string("const_25_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2224_cast_fp16 = mul(x = x2_45_cast_fp16, y = const_25_promoted_to_fp16)[name = string("op_2224_cast_fp16")]; bool var_2226_interleave_0 = const()[name = string("op_2226_interleave_0"), val = bool(false)]; tensor var_2226_cast_fp16 = concat(axis = var_64, interleave = var_2226_interleave_0, values = (var_2224_cast_fp16, x1_45_cast_fp16))[name = string("op_2226_cast_fp16")]; tensor var_2227_cast_fp16 = mul(x = var_2226_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2227_cast_fp16")]; tensor query_states_47_cast_fp16 = add(x = var_2213_cast_fp16, y = var_2227_cast_fp16)[name = string("query_states_47_cast_fp16")]; tensor k_23_cast_fp16 = transpose(perm = k_23_perm_0, x = var_2206_cast_fp16)[name = string("transpose_50")]; tensor var_2229_cast_fp16 = mul(x = k_23_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2229_cast_fp16")]; tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23_cast_fp16)[name = string("x1_47_cast_fp16")]; tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23_cast_fp16)[name = string("x2_47_cast_fp16")]; fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2240_cast_fp16 = mul(x = x2_47_cast_fp16, y = const_26_promoted_to_fp16)[name = string("op_2240_cast_fp16")]; bool var_2242_interleave_0 = const()[name = string("op_2242_interleave_0"), val = bool(false)]; tensor var_2242_cast_fp16 = concat(axis = var_64, interleave = var_2242_interleave_0, values = (var_2240_cast_fp16, x1_47_cast_fp16))[name = string("op_2242_cast_fp16")]; tensor var_2243_cast_fp16 = mul(x = var_2242_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2243_cast_fp16")]; tensor k_state_23_cast_fp16 = add(x = var_2229_cast_fp16, y = var_2243_cast_fp16)[name = string("k_state_23_cast_fp16")]; tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([0])]; tensor expand_dims_133 = const()[name = string("expand_dims_133"), val = tensor([0])]; tensor expand_dims_135 = const()[name = string("expand_dims_135"), val = tensor([0])]; tensor concat_214_values0_0 = const()[name = string("concat_214_values0_0"), val = tensor([11])]; int32 concat_214_axis_0 = const()[name = string("concat_214_axis_0"), val = int32(0)]; bool concat_214_interleave_0 = const()[name = string("concat_214_interleave_0"), val = bool(false)]; tensor concat_214 = concat(axis = concat_214_axis_0, interleave = concat_214_interleave_0, values = (concat_214_values0_0, expand_dims_132, expand_dims_133, expand_dims_2, expand_dims_135))[name = string("concat_214")]; tensor key_cache_internal_tensor_assign_12_stride_0 = const()[name = string("key_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_214, begin_mask = key_cache_internal_tensor_assign_12_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_12_squeeze_mask_0, stride = key_cache_internal_tensor_assign_12_stride_0, update = k_state_23_cast_fp16, x = coreml_update_state_68)[name = string("key_cache_internal_tensor_assign_12_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_12_cast_fp16, input = key_cache)[name = string("coreml_update_state_70_write_state")]; tensor coreml_update_state_70 = read_state(input = key_cache)[name = string("coreml_update_state_70")]; tensor value_cache_internal_tensor_assign_12_stride_0 = const()[name = string("value_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_23_cast_fp16 = transpose(perm = v_state_23_perm_0, x = var_2209_cast_fp16)[name = string("transpose_49")]; tensor value_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_214, begin_mask = value_cache_internal_tensor_assign_12_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_12_squeeze_mask_0, stride = value_cache_internal_tensor_assign_12_stride_0, update = v_state_23_cast_fp16, x = coreml_update_state_69)[name = string("value_cache_internal_tensor_assign_12_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_12_cast_fp16, input = value_cache)[name = string("coreml_update_state_71_write_state")]; tensor coreml_update_state_71 = read_state(input = value_cache)[name = string("coreml_update_state_71")]; tensor var_2266_begin_0 = const()[name = string("op_2266_begin_0"), val = tensor([11, 0, 0, 0, 0])]; tensor var_2266_end_0 = const()[name = string("op_2266_end_0"), val = tensor([12, 1, 2, 2048, 64])]; tensor var_2266_end_mask_0 = const()[name = string("op_2266_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2266_squeeze_mask_0 = const()[name = string("op_2266_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2266_cast_fp16 = slice_by_index(begin = var_2266_begin_0, end = var_2266_end_0, end_mask = var_2266_end_mask_0, squeeze_mask = var_2266_squeeze_mask_0, x = coreml_update_state_70)[name = string("op_2266_cast_fp16")]; tensor var_2269_begin_0 = const()[name = string("op_2269_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2269_end_mask_0 = const()[name = string("op_2269_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2269_cast_fp16 = slice_by_index(begin = var_2269_begin_0, end = concat_11, end_mask = var_2269_end_mask_0, x = var_2266_cast_fp16)[name = string("op_2269_cast_fp16")]; tensor var_2271_begin_0 = const()[name = string("op_2271_begin_0"), val = tensor([11, 0, 0, 0, 0])]; tensor var_2271_end_0 = const()[name = string("op_2271_end_0"), val = tensor([12, 1, 2, 2048, 64])]; tensor var_2271_end_mask_0 = const()[name = string("op_2271_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2271_squeeze_mask_0 = const()[name = string("op_2271_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2271_cast_fp16 = slice_by_index(begin = var_2271_begin_0, end = var_2271_end_0, end_mask = var_2271_end_mask_0, squeeze_mask = var_2271_squeeze_mask_0, x = coreml_update_state_71)[name = string("op_2271_cast_fp16")]; tensor var_2274_begin_0 = const()[name = string("op_2274_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2274_end_mask_0 = const()[name = string("op_2274_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2274_cast_fp16 = slice_by_index(begin = var_2274_begin_0, end = concat_11, end_mask = var_2274_end_mask_0, x = var_2271_cast_fp16)[name = string("op_2274_cast_fp16")]; tensor var_2276_shape_cast_fp16 = shape(x = var_2269_cast_fp16)[name = string("op_2276_shape_cast_fp16")]; int32 gather_211 = const()[name = string("gather_211"), val = int32(1)]; int32 gather_212 = const()[name = string("gather_212"), val = int32(2)]; int32 gather_213_axis_0 = const()[name = string("gather_213_axis_0"), val = int32(0)]; int32 gather_213_batch_dims_0 = const()[name = string("gather_213_batch_dims_0"), val = int32(0)]; bool gather_213_validate_indices_0 = const()[name = string("gather_213_validate_indices_0"), val = bool(false)]; string var_2276_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2276_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_213_to_uint16 = const()[name = string("select_213_to_uint16"), val = uint16(2)]; tensor var_2276_shape_cast_fp16_to_uint16 = cast(dtype = var_2276_shape_cast_fp16_to_uint16_dtype_0, x = var_2276_shape_cast_fp16)[name = string("cast_77")]; uint16 gather_213_cast_uint16 = gather(axis = gather_213_axis_0, batch_dims = gather_213_batch_dims_0, indices = select_213_to_uint16, validate_indices = gather_213_validate_indices_0, x = var_2276_shape_cast_fp16_to_uint16)[name = string("gather_213_cast_uint16")]; string gather_213_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_213_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_214 = const()[name = string("gather_214"), val = int32(64)]; tensor var_2283_axes_0 = const()[name = string("op_2283_axes_0"), val = tensor([2])]; tensor var_2283_cast_fp16 = expand_dims(axes = var_2283_axes_0, x = var_2269_cast_fp16)[name = string("op_2283_cast_fp16")]; tensor shape_237_cast_fp16 = shape(x = var_2283_cast_fp16)[name = string("shape_237_cast_fp16")]; int32 concat_222_axis_0 = const()[name = string("concat_222_axis_0"), val = int32(0)]; bool concat_222_interleave_0 = const()[name = string("concat_222_interleave_0"), val = bool(false)]; int32 gather_213_cast_uint16_to_int32 = cast(dtype = gather_213_cast_uint16_to_int32_dtype_0, x = gather_213_cast_uint16)[name = string("cast_76")]; tensor concat_222 = concat(axis = concat_222_axis_0, interleave = concat_222_interleave_0, values = (gather_211, gather_212, var_76, gather_213_cast_uint16_to_int32, gather_214))[name = string("concat_222")]; tensor real_div_22 = real_div(x = concat_222, y = shape_237_cast_fp16)[name = string("real_div_22")]; tensor hidden_states_341_cast_fp16 = tile(reps = real_div_22, x = var_2283_cast_fp16)[name = string("hidden_states_341_cast_fp16")]; tensor concat_223x = const()[name = string("concat_223x"), val = tensor([1, 14, -1, 64])]; tensor key_states_47_cast_fp16 = reshape(shape = concat_223x, x = hidden_states_341_cast_fp16)[name = string("key_states_47_cast_fp16")]; tensor var_2293_shape_cast_fp16 = shape(x = var_2274_cast_fp16)[name = string("op_2293_shape_cast_fp16")]; int32 gather_215 = const()[name = string("gather_215"), val = int32(1)]; int32 gather_216 = const()[name = string("gather_216"), val = int32(2)]; int32 gather_217_axis_0 = const()[name = string("gather_217_axis_0"), val = int32(0)]; int32 gather_217_batch_dims_0 = const()[name = string("gather_217_batch_dims_0"), val = int32(0)]; bool gather_217_validate_indices_0 = const()[name = string("gather_217_validate_indices_0"), val = bool(false)]; string var_2293_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2293_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_217_to_uint16 = const()[name = string("select_217_to_uint16"), val = uint16(2)]; tensor var_2293_shape_cast_fp16_to_uint16 = cast(dtype = var_2293_shape_cast_fp16_to_uint16_dtype_0, x = var_2293_shape_cast_fp16)[name = string("cast_75")]; uint16 gather_217_cast_uint16 = gather(axis = gather_217_axis_0, batch_dims = gather_217_batch_dims_0, indices = select_217_to_uint16, validate_indices = gather_217_validate_indices_0, x = var_2293_shape_cast_fp16_to_uint16)[name = string("gather_217_cast_uint16")]; string gather_217_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_217_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_218 = const()[name = string("gather_218"), val = int32(64)]; tensor var_2300_axes_0 = const()[name = string("op_2300_axes_0"), val = tensor([2])]; tensor var_2300_cast_fp16 = expand_dims(axes = var_2300_axes_0, x = var_2274_cast_fp16)[name = string("op_2300_cast_fp16")]; tensor shape_242_cast_fp16 = shape(x = var_2300_cast_fp16)[name = string("shape_242_cast_fp16")]; int32 concat_224_axis_0 = const()[name = string("concat_224_axis_0"), val = int32(0)]; bool concat_224_interleave_0 = const()[name = string("concat_224_interleave_0"), val = bool(false)]; int32 gather_217_cast_uint16_to_int32 = cast(dtype = gather_217_cast_uint16_to_int32_dtype_0, x = gather_217_cast_uint16)[name = string("cast_74")]; tensor concat_224 = concat(axis = concat_224_axis_0, interleave = concat_224_interleave_0, values = (gather_215, gather_216, var_76, gather_217_cast_uint16_to_int32, gather_218))[name = string("concat_224")]; tensor real_div_23 = real_div(x = concat_224, y = shape_242_cast_fp16)[name = string("real_div_23")]; tensor hidden_states_345_cast_fp16 = tile(reps = real_div_23, x = var_2300_cast_fp16)[name = string("hidden_states_345_cast_fp16")]; tensor concat_225x = const()[name = string("concat_225x"), val = tensor([1, 14, -1, 64])]; tensor value_states_47_cast_fp16 = reshape(shape = concat_225x, x = hidden_states_345_cast_fp16)[name = string("value_states_47_cast_fp16")]; tensor var_2310_shape_cast_fp16 = shape(x = key_states_47_cast_fp16)[name = string("op_2310_shape_cast_fp16")]; int32 gather_219_axis_0 = const()[name = string("gather_219_axis_0"), val = int32(0)]; int32 gather_219_batch_dims_0 = const()[name = string("gather_219_batch_dims_0"), val = int32(0)]; bool gather_219_validate_indices_0 = const()[name = string("gather_219_validate_indices_0"), val = bool(false)]; string var_2310_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2310_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_219_to_uint16 = const()[name = string("select_219_to_uint16"), val = uint16(2)]; tensor var_2310_shape_cast_fp16_to_uint16 = cast(dtype = var_2310_shape_cast_fp16_to_uint16_dtype_0, x = var_2310_shape_cast_fp16)[name = string("cast_73")]; uint16 gather_219_cast_uint16 = gather(axis = gather_219_axis_0, batch_dims = gather_219_batch_dims_0, indices = select_219_to_uint16, validate_indices = gather_219_validate_indices_0, x = var_2310_shape_cast_fp16_to_uint16)[name = string("gather_219_cast_uint16")]; string gather_219_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_219_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_226_values0_0 = const()[name = string("concat_226_values0_0"), val = int32(1)]; int32 concat_226_values1_0 = const()[name = string("concat_226_values1_0"), val = int32(1)]; int32 concat_226_values2_0 = const()[name = string("concat_226_values2_0"), val = int32(0)]; int32 concat_226_axis_0 = const()[name = string("concat_226_axis_0"), val = int32(0)]; bool concat_226_interleave_0 = const()[name = string("concat_226_interleave_0"), val = bool(false)]; int32 gather_219_cast_uint16_to_int32 = cast(dtype = gather_219_cast_uint16_to_int32_dtype_0, x = gather_219_cast_uint16)[name = string("cast_72")]; tensor concat_226 = concat(axis = concat_226_axis_0, interleave = concat_226_interleave_0, values = (concat_226_values0_0, concat_226_values1_0, concat_226_values2_0, gather_219_cast_uint16_to_int32))[name = string("concat_226")]; tensor causal_mask_25_begin_0 = const()[name = string("causal_mask_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_25_end_mask_0 = const()[name = string("causal_mask_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_25_cast_fp16 = slice_by_index(begin = causal_mask_25_begin_0, end = concat_226, end_mask = causal_mask_25_end_mask_0, x = causal_mask)[name = string("causal_mask_25_cast_fp16")]; tensor attn_output_45_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_25_cast_fp16, key = key_states_47_cast_fp16, query = query_states_47_cast_fp16, value = value_states_47_cast_fp16)[name = string("attn_output_45_cast_fp16")]; tensor var_2316_perm_0 = const()[name = string("op_2316_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_227x = const()[name = string("concat_227x"), val = tensor([1, -1, 896])]; tensor var_2316_cast_fp16 = transpose(perm = var_2316_perm_0, x = attn_output_45_cast_fp16)[name = string("transpose_48")]; tensor input_89_cast_fp16 = reshape(shape = concat_227x, x = var_2316_cast_fp16)[name = string("input_89_cast_fp16")]; tensor model_model_layers_11_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169503360))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169904832))))[name = string("model_model_layers_11_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_80_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_11_self_attn_o_proj_weight_to_fp16_quantized, x = input_89_cast_fp16)[name = string("linear_80_cast_fp16")]; tensor hidden_states_349_cast_fp16 = add(x = hidden_states_329_cast_fp16, y = linear_80_cast_fp16)[name = string("hidden_states_349_cast_fp16")]; fp16 var_70_promoted_23_to_fp16 = const()[name = string("op_70_promoted_23_to_fp16"), val = fp16(0x1p+1)]; tensor var_2325_cast_fp16 = pow(x = hidden_states_349_cast_fp16, y = var_70_promoted_23_to_fp16)[name = string("op_2325_cast_fp16")]; tensor variance_47_axes_0 = const()[name = string("variance_47_axes_0"), val = tensor([-1])]; bool variance_47_keep_dims_0 = const()[name = string("variance_47_keep_dims_0"), val = bool(true)]; tensor variance_47_cast_fp16 = reduce_mean(axes = variance_47_axes_0, keep_dims = variance_47_keep_dims_0, x = var_2325_cast_fp16)[name = string("variance_47_cast_fp16")]; fp16 var_2328_to_fp16 = const()[name = string("op_2328_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2329_cast_fp16 = add(x = variance_47_cast_fp16, y = var_2328_to_fp16)[name = string("op_2329_cast_fp16")]; fp32 var_2330_epsilon_0 = const()[name = string("op_2330_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2330_cast_fp16 = rsqrt(epsilon = var_2330_epsilon_0, x = var_2329_cast_fp16)[name = string("op_2330_cast_fp16")]; tensor hidden_states_353_cast_fp16 = mul(x = hidden_states_349_cast_fp16, y = var_2330_cast_fp16)[name = string("hidden_states_353_cast_fp16")]; tensor model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169955072)))]; tensor input_91_cast_fp16 = mul(x = model_model_layers_11_post_attention_layernorm_weight_to_fp16, y = hidden_states_353_cast_fp16)[name = string("input_91_cast_fp16")]; tensor model_model_layers_11_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(169956928))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172136064))))[name = string("model_model_layers_11_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_81_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_11_mlp_gate_proj_weight_to_fp16_quantized, x = input_91_cast_fp16)[name = string("linear_81_cast_fp16")]; tensor var_2342_cast_fp16 = silu(x = linear_81_cast_fp16)[name = string("op_2342_cast_fp16")]; tensor model_model_layers_11_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(172408512))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174587648))))[name = string("model_model_layers_11_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_82_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_11_mlp_up_proj_weight_to_fp16_quantized, x = input_91_cast_fp16)[name = string("linear_82_cast_fp16")]; tensor input_95_cast_fp16 = mul(x = var_2342_cast_fp16, y = linear_82_cast_fp16)[name = string("input_95_cast_fp16")]; tensor model_model_layers_11_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(174860096))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177039232))))[name = string("model_model_layers_11_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_83_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_11_mlp_down_proj_weight_to_fp16_quantized, x = input_95_cast_fp16)[name = string("linear_83_cast_fp16")]; tensor hidden_states_359_cast_fp16 = add(x = hidden_states_349_cast_fp16, y = linear_83_cast_fp16)[name = string("hidden_states_359_cast_fp16")]; fp16 var_70_promoted_24_to_fp16 = const()[name = string("op_70_promoted_24_to_fp16"), val = fp16(0x1p+1)]; tensor var_2355_cast_fp16 = pow(x = hidden_states_359_cast_fp16, y = var_70_promoted_24_to_fp16)[name = string("op_2355_cast_fp16")]; tensor variance_49_axes_0 = const()[name = string("variance_49_axes_0"), val = tensor([-1])]; bool variance_49_keep_dims_0 = const()[name = string("variance_49_keep_dims_0"), val = bool(true)]; tensor variance_49_cast_fp16 = reduce_mean(axes = variance_49_axes_0, keep_dims = variance_49_keep_dims_0, x = var_2355_cast_fp16)[name = string("variance_49_cast_fp16")]; fp16 var_2358_to_fp16 = const()[name = string("op_2358_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2359_cast_fp16 = add(x = variance_49_cast_fp16, y = var_2358_to_fp16)[name = string("op_2359_cast_fp16")]; fp32 var_2360_epsilon_0 = const()[name = string("op_2360_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2360_cast_fp16 = rsqrt(epsilon = var_2360_epsilon_0, x = var_2359_cast_fp16)[name = string("op_2360_cast_fp16")]; tensor hidden_states_363_cast_fp16 = mul(x = hidden_states_359_cast_fp16, y = var_2360_cast_fp16)[name = string("hidden_states_363_cast_fp16")]; tensor model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177311680)))]; tensor hidden_states_367_cast_fp16 = mul(x = model_model_layers_12_input_layernorm_weight_to_fp16, y = hidden_states_363_cast_fp16)[name = string("hidden_states_367_cast_fp16")]; tensor model_model_layers_12_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177313536))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177715008))))[name = string("model_model_layers_12_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_12_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_12_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177765248)))]; tensor linear_84_cast_fp16 = linear(bias = model_model_layers_12_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_12_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_367_cast_fp16)[name = string("linear_84_cast_fp16")]; tensor model_model_layers_12_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177767104))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177824512))))[name = string("model_model_layers_12_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_12_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_12_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177831744)))]; tensor linear_85_cast_fp16 = linear(bias = model_model_layers_12_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_12_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_367_cast_fp16)[name = string("linear_85_cast_fp16")]; tensor model_model_layers_12_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177832064))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177889472))))[name = string("model_model_layers_12_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_12_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_12_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177896704)))]; tensor linear_86_cast_fp16 = linear(bias = model_model_layers_12_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_12_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_367_cast_fp16)[name = string("linear_86_cast_fp16")]; tensor concat_228x = const()[name = string("concat_228x"), val = tensor([1, -1, 14, 64])]; tensor var_2383_cast_fp16 = reshape(shape = concat_228x, x = linear_84_cast_fp16)[name = string("op_2383_cast_fp16")]; tensor q_25_perm_0 = const()[name = string("q_25_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_229x = const()[name = string("concat_229x"), val = tensor([1, -1, 2, 64])]; tensor var_2386_cast_fp16 = reshape(shape = concat_229x, x = linear_85_cast_fp16)[name = string("op_2386_cast_fp16")]; tensor k_25_perm_0 = const()[name = string("k_25_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_230x = const()[name = string("concat_230x"), val = tensor([1, -1, 2, 64])]; tensor var_2389_cast_fp16 = reshape(shape = concat_230x, x = linear_86_cast_fp16)[name = string("op_2389_cast_fp16")]; tensor v_state_25_perm_0 = const()[name = string("v_state_25_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_25_cast_fp16 = transpose(perm = q_25_perm_0, x = var_2383_cast_fp16)[name = string("transpose_47")]; tensor var_2393_cast_fp16 = mul(x = q_25_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2393_cast_fp16")]; tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25_cast_fp16)[name = string("x1_49_cast_fp16")]; tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25_cast_fp16)[name = string("x2_49_cast_fp16")]; fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2404_cast_fp16 = mul(x = x2_49_cast_fp16, y = const_27_promoted_to_fp16)[name = string("op_2404_cast_fp16")]; bool var_2406_interleave_0 = const()[name = string("op_2406_interleave_0"), val = bool(false)]; tensor var_2406_cast_fp16 = concat(axis = var_64, interleave = var_2406_interleave_0, values = (var_2404_cast_fp16, x1_49_cast_fp16))[name = string("op_2406_cast_fp16")]; tensor var_2407_cast_fp16 = mul(x = var_2406_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2407_cast_fp16")]; tensor query_states_51_cast_fp16 = add(x = var_2393_cast_fp16, y = var_2407_cast_fp16)[name = string("query_states_51_cast_fp16")]; tensor k_25_cast_fp16 = transpose(perm = k_25_perm_0, x = var_2386_cast_fp16)[name = string("transpose_46")]; tensor var_2409_cast_fp16 = mul(x = k_25_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2409_cast_fp16")]; tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25_cast_fp16)[name = string("x1_51_cast_fp16")]; tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25_cast_fp16)[name = string("x2_51_cast_fp16")]; fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2420_cast_fp16 = mul(x = x2_51_cast_fp16, y = const_28_promoted_to_fp16)[name = string("op_2420_cast_fp16")]; bool var_2422_interleave_0 = const()[name = string("op_2422_interleave_0"), val = bool(false)]; tensor var_2422_cast_fp16 = concat(axis = var_64, interleave = var_2422_interleave_0, values = (var_2420_cast_fp16, x1_51_cast_fp16))[name = string("op_2422_cast_fp16")]; tensor var_2423_cast_fp16 = mul(x = var_2422_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2423_cast_fp16")]; tensor k_state_25_cast_fp16 = add(x = var_2409_cast_fp16, y = var_2423_cast_fp16)[name = string("k_state_25_cast_fp16")]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([0])]; tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; tensor concat_233_values0_0 = const()[name = string("concat_233_values0_0"), val = tensor([12])]; int32 concat_233_axis_0 = const()[name = string("concat_233_axis_0"), val = int32(0)]; bool concat_233_interleave_0 = const()[name = string("concat_233_interleave_0"), val = bool(false)]; tensor concat_233 = concat(axis = concat_233_axis_0, interleave = concat_233_interleave_0, values = (concat_233_values0_0, expand_dims_144, expand_dims_145, expand_dims_2, expand_dims_147))[name = string("concat_233")]; tensor key_cache_internal_tensor_assign_13_stride_0 = const()[name = string("key_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_233, begin_mask = key_cache_internal_tensor_assign_13_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_13_squeeze_mask_0, stride = key_cache_internal_tensor_assign_13_stride_0, update = k_state_25_cast_fp16, x = coreml_update_state_70)[name = string("key_cache_internal_tensor_assign_13_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_13_cast_fp16, input = key_cache)[name = string("coreml_update_state_72_write_state")]; tensor coreml_update_state_72 = read_state(input = key_cache)[name = string("coreml_update_state_72")]; tensor value_cache_internal_tensor_assign_13_stride_0 = const()[name = string("value_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_25_cast_fp16 = transpose(perm = v_state_25_perm_0, x = var_2389_cast_fp16)[name = string("transpose_45")]; tensor value_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_233, begin_mask = value_cache_internal_tensor_assign_13_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_13_squeeze_mask_0, stride = value_cache_internal_tensor_assign_13_stride_0, update = v_state_25_cast_fp16, x = coreml_update_state_71)[name = string("value_cache_internal_tensor_assign_13_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_13_cast_fp16, input = value_cache)[name = string("coreml_update_state_73_write_state")]; tensor coreml_update_state_73 = read_state(input = value_cache)[name = string("coreml_update_state_73")]; tensor var_2446_begin_0 = const()[name = string("op_2446_begin_0"), val = tensor([12, 0, 0, 0, 0])]; tensor var_2446_end_0 = const()[name = string("op_2446_end_0"), val = tensor([13, 1, 2, 2048, 64])]; tensor var_2446_end_mask_0 = const()[name = string("op_2446_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2446_squeeze_mask_0 = const()[name = string("op_2446_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2446_cast_fp16 = slice_by_index(begin = var_2446_begin_0, end = var_2446_end_0, end_mask = var_2446_end_mask_0, squeeze_mask = var_2446_squeeze_mask_0, x = coreml_update_state_72)[name = string("op_2446_cast_fp16")]; tensor var_2449_begin_0 = const()[name = string("op_2449_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2449_end_mask_0 = const()[name = string("op_2449_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2449_cast_fp16 = slice_by_index(begin = var_2449_begin_0, end = concat_11, end_mask = var_2449_end_mask_0, x = var_2446_cast_fp16)[name = string("op_2449_cast_fp16")]; tensor var_2451_begin_0 = const()[name = string("op_2451_begin_0"), val = tensor([12, 0, 0, 0, 0])]; tensor var_2451_end_0 = const()[name = string("op_2451_end_0"), val = tensor([13, 1, 2, 2048, 64])]; tensor var_2451_end_mask_0 = const()[name = string("op_2451_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2451_squeeze_mask_0 = const()[name = string("op_2451_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2451_cast_fp16 = slice_by_index(begin = var_2451_begin_0, end = var_2451_end_0, end_mask = var_2451_end_mask_0, squeeze_mask = var_2451_squeeze_mask_0, x = coreml_update_state_73)[name = string("op_2451_cast_fp16")]; tensor var_2454_begin_0 = const()[name = string("op_2454_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2454_end_mask_0 = const()[name = string("op_2454_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2454_cast_fp16 = slice_by_index(begin = var_2454_begin_0, end = concat_11, end_mask = var_2454_end_mask_0, x = var_2451_cast_fp16)[name = string("op_2454_cast_fp16")]; tensor var_2456_shape_cast_fp16 = shape(x = var_2449_cast_fp16)[name = string("op_2456_shape_cast_fp16")]; int32 gather_229 = const()[name = string("gather_229"), val = int32(1)]; int32 gather_230 = const()[name = string("gather_230"), val = int32(2)]; int32 gather_231_axis_0 = const()[name = string("gather_231_axis_0"), val = int32(0)]; int32 gather_231_batch_dims_0 = const()[name = string("gather_231_batch_dims_0"), val = int32(0)]; bool gather_231_validate_indices_0 = const()[name = string("gather_231_validate_indices_0"), val = bool(false)]; string var_2456_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2456_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_231_to_uint16 = const()[name = string("select_231_to_uint16"), val = uint16(2)]; tensor var_2456_shape_cast_fp16_to_uint16 = cast(dtype = var_2456_shape_cast_fp16_to_uint16_dtype_0, x = var_2456_shape_cast_fp16)[name = string("cast_71")]; uint16 gather_231_cast_uint16 = gather(axis = gather_231_axis_0, batch_dims = gather_231_batch_dims_0, indices = select_231_to_uint16, validate_indices = gather_231_validate_indices_0, x = var_2456_shape_cast_fp16_to_uint16)[name = string("gather_231_cast_uint16")]; string gather_231_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_231_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_232 = const()[name = string("gather_232"), val = int32(64)]; tensor var_2463_axes_0 = const()[name = string("op_2463_axes_0"), val = tensor([2])]; tensor var_2463_cast_fp16 = expand_dims(axes = var_2463_axes_0, x = var_2449_cast_fp16)[name = string("op_2463_cast_fp16")]; tensor shape_257_cast_fp16 = shape(x = var_2463_cast_fp16)[name = string("shape_257_cast_fp16")]; int32 concat_241_axis_0 = const()[name = string("concat_241_axis_0"), val = int32(0)]; bool concat_241_interleave_0 = const()[name = string("concat_241_interleave_0"), val = bool(false)]; int32 gather_231_cast_uint16_to_int32 = cast(dtype = gather_231_cast_uint16_to_int32_dtype_0, x = gather_231_cast_uint16)[name = string("cast_70")]; tensor concat_241 = concat(axis = concat_241_axis_0, interleave = concat_241_interleave_0, values = (gather_229, gather_230, var_76, gather_231_cast_uint16_to_int32, gather_232))[name = string("concat_241")]; tensor real_div_24 = real_div(x = concat_241, y = shape_257_cast_fp16)[name = string("real_div_24")]; tensor hidden_states_371_cast_fp16 = tile(reps = real_div_24, x = var_2463_cast_fp16)[name = string("hidden_states_371_cast_fp16")]; tensor concat_242x = const()[name = string("concat_242x"), val = tensor([1, 14, -1, 64])]; tensor key_states_51_cast_fp16 = reshape(shape = concat_242x, x = hidden_states_371_cast_fp16)[name = string("key_states_51_cast_fp16")]; tensor var_2473_shape_cast_fp16 = shape(x = var_2454_cast_fp16)[name = string("op_2473_shape_cast_fp16")]; int32 gather_233 = const()[name = string("gather_233"), val = int32(1)]; int32 gather_234 = const()[name = string("gather_234"), val = int32(2)]; int32 gather_235_axis_0 = const()[name = string("gather_235_axis_0"), val = int32(0)]; int32 gather_235_batch_dims_0 = const()[name = string("gather_235_batch_dims_0"), val = int32(0)]; bool gather_235_validate_indices_0 = const()[name = string("gather_235_validate_indices_0"), val = bool(false)]; string var_2473_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2473_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_235_to_uint16 = const()[name = string("select_235_to_uint16"), val = uint16(2)]; tensor var_2473_shape_cast_fp16_to_uint16 = cast(dtype = var_2473_shape_cast_fp16_to_uint16_dtype_0, x = var_2473_shape_cast_fp16)[name = string("cast_69")]; uint16 gather_235_cast_uint16 = gather(axis = gather_235_axis_0, batch_dims = gather_235_batch_dims_0, indices = select_235_to_uint16, validate_indices = gather_235_validate_indices_0, x = var_2473_shape_cast_fp16_to_uint16)[name = string("gather_235_cast_uint16")]; string gather_235_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_235_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_236 = const()[name = string("gather_236"), val = int32(64)]; tensor var_2480_axes_0 = const()[name = string("op_2480_axes_0"), val = tensor([2])]; tensor var_2480_cast_fp16 = expand_dims(axes = var_2480_axes_0, x = var_2454_cast_fp16)[name = string("op_2480_cast_fp16")]; tensor shape_262_cast_fp16 = shape(x = var_2480_cast_fp16)[name = string("shape_262_cast_fp16")]; int32 concat_243_axis_0 = const()[name = string("concat_243_axis_0"), val = int32(0)]; bool concat_243_interleave_0 = const()[name = string("concat_243_interleave_0"), val = bool(false)]; int32 gather_235_cast_uint16_to_int32 = cast(dtype = gather_235_cast_uint16_to_int32_dtype_0, x = gather_235_cast_uint16)[name = string("cast_68")]; tensor concat_243 = concat(axis = concat_243_axis_0, interleave = concat_243_interleave_0, values = (gather_233, gather_234, var_76, gather_235_cast_uint16_to_int32, gather_236))[name = string("concat_243")]; tensor real_div_25 = real_div(x = concat_243, y = shape_262_cast_fp16)[name = string("real_div_25")]; tensor hidden_states_375_cast_fp16 = tile(reps = real_div_25, x = var_2480_cast_fp16)[name = string("hidden_states_375_cast_fp16")]; tensor concat_244x = const()[name = string("concat_244x"), val = tensor([1, 14, -1, 64])]; tensor value_states_51_cast_fp16 = reshape(shape = concat_244x, x = hidden_states_375_cast_fp16)[name = string("value_states_51_cast_fp16")]; tensor var_2490_shape_cast_fp16 = shape(x = key_states_51_cast_fp16)[name = string("op_2490_shape_cast_fp16")]; int32 gather_237_axis_0 = const()[name = string("gather_237_axis_0"), val = int32(0)]; int32 gather_237_batch_dims_0 = const()[name = string("gather_237_batch_dims_0"), val = int32(0)]; bool gather_237_validate_indices_0 = const()[name = string("gather_237_validate_indices_0"), val = bool(false)]; string var_2490_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2490_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_237_to_uint16 = const()[name = string("select_237_to_uint16"), val = uint16(2)]; tensor var_2490_shape_cast_fp16_to_uint16 = cast(dtype = var_2490_shape_cast_fp16_to_uint16_dtype_0, x = var_2490_shape_cast_fp16)[name = string("cast_67")]; uint16 gather_237_cast_uint16 = gather(axis = gather_237_axis_0, batch_dims = gather_237_batch_dims_0, indices = select_237_to_uint16, validate_indices = gather_237_validate_indices_0, x = var_2490_shape_cast_fp16_to_uint16)[name = string("gather_237_cast_uint16")]; string gather_237_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_237_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_245_values0_0 = const()[name = string("concat_245_values0_0"), val = int32(1)]; int32 concat_245_values1_0 = const()[name = string("concat_245_values1_0"), val = int32(1)]; int32 concat_245_values2_0 = const()[name = string("concat_245_values2_0"), val = int32(0)]; int32 concat_245_axis_0 = const()[name = string("concat_245_axis_0"), val = int32(0)]; bool concat_245_interleave_0 = const()[name = string("concat_245_interleave_0"), val = bool(false)]; int32 gather_237_cast_uint16_to_int32 = cast(dtype = gather_237_cast_uint16_to_int32_dtype_0, x = gather_237_cast_uint16)[name = string("cast_66")]; tensor concat_245 = concat(axis = concat_245_axis_0, interleave = concat_245_interleave_0, values = (concat_245_values0_0, concat_245_values1_0, concat_245_values2_0, gather_237_cast_uint16_to_int32))[name = string("concat_245")]; tensor causal_mask_27_begin_0 = const()[name = string("causal_mask_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_27_end_mask_0 = const()[name = string("causal_mask_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_27_cast_fp16 = slice_by_index(begin = causal_mask_27_begin_0, end = concat_245, end_mask = causal_mask_27_end_mask_0, x = causal_mask)[name = string("causal_mask_27_cast_fp16")]; tensor attn_output_49_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_27_cast_fp16, key = key_states_51_cast_fp16, query = query_states_51_cast_fp16, value = value_states_51_cast_fp16)[name = string("attn_output_49_cast_fp16")]; tensor var_2496_perm_0 = const()[name = string("op_2496_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_246x = const()[name = string("concat_246x"), val = tensor([1, -1, 896])]; tensor var_2496_cast_fp16 = transpose(perm = var_2496_perm_0, x = attn_output_49_cast_fp16)[name = string("transpose_44")]; tensor input_97_cast_fp16 = reshape(shape = concat_246x, x = var_2496_cast_fp16)[name = string("input_97_cast_fp16")]; tensor model_model_layers_12_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(177897024))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178298496))))[name = string("model_model_layers_12_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_87_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_12_self_attn_o_proj_weight_to_fp16_quantized, x = input_97_cast_fp16)[name = string("linear_87_cast_fp16")]; tensor hidden_states_379_cast_fp16 = add(x = hidden_states_359_cast_fp16, y = linear_87_cast_fp16)[name = string("hidden_states_379_cast_fp16")]; fp16 var_70_promoted_25_to_fp16 = const()[name = string("op_70_promoted_25_to_fp16"), val = fp16(0x1p+1)]; tensor var_2505_cast_fp16 = pow(x = hidden_states_379_cast_fp16, y = var_70_promoted_25_to_fp16)[name = string("op_2505_cast_fp16")]; tensor variance_51_axes_0 = const()[name = string("variance_51_axes_0"), val = tensor([-1])]; bool variance_51_keep_dims_0 = const()[name = string("variance_51_keep_dims_0"), val = bool(true)]; tensor variance_51_cast_fp16 = reduce_mean(axes = variance_51_axes_0, keep_dims = variance_51_keep_dims_0, x = var_2505_cast_fp16)[name = string("variance_51_cast_fp16")]; fp16 var_2508_to_fp16 = const()[name = string("op_2508_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2509_cast_fp16 = add(x = variance_51_cast_fp16, y = var_2508_to_fp16)[name = string("op_2509_cast_fp16")]; fp32 var_2510_epsilon_0 = const()[name = string("op_2510_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2510_cast_fp16 = rsqrt(epsilon = var_2510_epsilon_0, x = var_2509_cast_fp16)[name = string("op_2510_cast_fp16")]; tensor hidden_states_383_cast_fp16 = mul(x = hidden_states_379_cast_fp16, y = var_2510_cast_fp16)[name = string("hidden_states_383_cast_fp16")]; tensor model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178348736)))]; tensor input_99_cast_fp16 = mul(x = model_model_layers_12_post_attention_layernorm_weight_to_fp16, y = hidden_states_383_cast_fp16)[name = string("input_99_cast_fp16")]; tensor model_model_layers_12_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(178350592))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180529728))))[name = string("model_model_layers_12_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_88_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_12_mlp_gate_proj_weight_to_fp16_quantized, x = input_99_cast_fp16)[name = string("linear_88_cast_fp16")]; tensor var_2522_cast_fp16 = silu(x = linear_88_cast_fp16)[name = string("op_2522_cast_fp16")]; tensor model_model_layers_12_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(180802176))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(182981312))))[name = string("model_model_layers_12_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_89_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_12_mlp_up_proj_weight_to_fp16_quantized, x = input_99_cast_fp16)[name = string("linear_89_cast_fp16")]; tensor input_103_cast_fp16 = mul(x = var_2522_cast_fp16, y = linear_89_cast_fp16)[name = string("input_103_cast_fp16")]; tensor model_model_layers_12_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(183253760))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185432896))))[name = string("model_model_layers_12_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_90_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_12_mlp_down_proj_weight_to_fp16_quantized, x = input_103_cast_fp16)[name = string("linear_90_cast_fp16")]; tensor hidden_states_389_cast_fp16 = add(x = hidden_states_379_cast_fp16, y = linear_90_cast_fp16)[name = string("hidden_states_389_cast_fp16")]; fp16 var_70_promoted_26_to_fp16 = const()[name = string("op_70_promoted_26_to_fp16"), val = fp16(0x1p+1)]; tensor var_2535_cast_fp16 = pow(x = hidden_states_389_cast_fp16, y = var_70_promoted_26_to_fp16)[name = string("op_2535_cast_fp16")]; tensor variance_53_axes_0 = const()[name = string("variance_53_axes_0"), val = tensor([-1])]; bool variance_53_keep_dims_0 = const()[name = string("variance_53_keep_dims_0"), val = bool(true)]; tensor variance_53_cast_fp16 = reduce_mean(axes = variance_53_axes_0, keep_dims = variance_53_keep_dims_0, x = var_2535_cast_fp16)[name = string("variance_53_cast_fp16")]; fp16 var_2538_to_fp16 = const()[name = string("op_2538_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2539_cast_fp16 = add(x = variance_53_cast_fp16, y = var_2538_to_fp16)[name = string("op_2539_cast_fp16")]; fp32 var_2540_epsilon_0 = const()[name = string("op_2540_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2540_cast_fp16 = rsqrt(epsilon = var_2540_epsilon_0, x = var_2539_cast_fp16)[name = string("op_2540_cast_fp16")]; tensor hidden_states_393_cast_fp16 = mul(x = hidden_states_389_cast_fp16, y = var_2540_cast_fp16)[name = string("hidden_states_393_cast_fp16")]; tensor model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185705344)))]; tensor hidden_states_397_cast_fp16 = mul(x = model_model_layers_13_input_layernorm_weight_to_fp16, y = hidden_states_393_cast_fp16)[name = string("hidden_states_397_cast_fp16")]; tensor model_model_layers_13_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(185707200))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186108672))))[name = string("model_model_layers_13_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_13_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_13_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186158912)))]; tensor linear_91_cast_fp16 = linear(bias = model_model_layers_13_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_13_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_397_cast_fp16)[name = string("linear_91_cast_fp16")]; tensor model_model_layers_13_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186160768))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186218176))))[name = string("model_model_layers_13_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_13_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_13_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186225408)))]; tensor linear_92_cast_fp16 = linear(bias = model_model_layers_13_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_13_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_397_cast_fp16)[name = string("linear_92_cast_fp16")]; tensor model_model_layers_13_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186225728))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186283136))))[name = string("model_model_layers_13_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_13_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_13_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186290368)))]; tensor linear_93_cast_fp16 = linear(bias = model_model_layers_13_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_13_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_397_cast_fp16)[name = string("linear_93_cast_fp16")]; tensor concat_247x = const()[name = string("concat_247x"), val = tensor([1, -1, 14, 64])]; tensor var_2563_cast_fp16 = reshape(shape = concat_247x, x = linear_91_cast_fp16)[name = string("op_2563_cast_fp16")]; tensor q_27_perm_0 = const()[name = string("q_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_248x = const()[name = string("concat_248x"), val = tensor([1, -1, 2, 64])]; tensor var_2566_cast_fp16 = reshape(shape = concat_248x, x = linear_92_cast_fp16)[name = string("op_2566_cast_fp16")]; tensor k_27_perm_0 = const()[name = string("k_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_249x = const()[name = string("concat_249x"), val = tensor([1, -1, 2, 64])]; tensor var_2569_cast_fp16 = reshape(shape = concat_249x, x = linear_93_cast_fp16)[name = string("op_2569_cast_fp16")]; tensor v_state_27_perm_0 = const()[name = string("v_state_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_27_cast_fp16 = transpose(perm = q_27_perm_0, x = var_2563_cast_fp16)[name = string("transpose_43")]; tensor var_2573_cast_fp16 = mul(x = q_27_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2573_cast_fp16")]; tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27_cast_fp16)[name = string("x1_53_cast_fp16")]; tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27_cast_fp16)[name = string("x2_53_cast_fp16")]; fp16 const_29_promoted_to_fp16 = const()[name = string("const_29_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2584_cast_fp16 = mul(x = x2_53_cast_fp16, y = const_29_promoted_to_fp16)[name = string("op_2584_cast_fp16")]; bool var_2586_interleave_0 = const()[name = string("op_2586_interleave_0"), val = bool(false)]; tensor var_2586_cast_fp16 = concat(axis = var_64, interleave = var_2586_interleave_0, values = (var_2584_cast_fp16, x1_53_cast_fp16))[name = string("op_2586_cast_fp16")]; tensor var_2587_cast_fp16 = mul(x = var_2586_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2587_cast_fp16")]; tensor query_states_55_cast_fp16 = add(x = var_2573_cast_fp16, y = var_2587_cast_fp16)[name = string("query_states_55_cast_fp16")]; tensor k_27_cast_fp16 = transpose(perm = k_27_perm_0, x = var_2566_cast_fp16)[name = string("transpose_42")]; tensor var_2589_cast_fp16 = mul(x = k_27_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2589_cast_fp16")]; tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27_cast_fp16)[name = string("x1_55_cast_fp16")]; tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27_cast_fp16)[name = string("x2_55_cast_fp16")]; fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2600_cast_fp16 = mul(x = x2_55_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_2600_cast_fp16")]; bool var_2602_interleave_0 = const()[name = string("op_2602_interleave_0"), val = bool(false)]; tensor var_2602_cast_fp16 = concat(axis = var_64, interleave = var_2602_interleave_0, values = (var_2600_cast_fp16, x1_55_cast_fp16))[name = string("op_2602_cast_fp16")]; tensor var_2603_cast_fp16 = mul(x = var_2602_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2603_cast_fp16")]; tensor k_state_27_cast_fp16 = add(x = var_2589_cast_fp16, y = var_2603_cast_fp16)[name = string("k_state_27_cast_fp16")]; tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([0])]; tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; tensor concat_252_values0_0 = const()[name = string("concat_252_values0_0"), val = tensor([13])]; int32 concat_252_axis_0 = const()[name = string("concat_252_axis_0"), val = int32(0)]; bool concat_252_interleave_0 = const()[name = string("concat_252_interleave_0"), val = bool(false)]; tensor concat_252 = concat(axis = concat_252_axis_0, interleave = concat_252_interleave_0, values = (concat_252_values0_0, expand_dims_156, expand_dims_157, expand_dims_2, expand_dims_159))[name = string("concat_252")]; tensor key_cache_internal_tensor_assign_14_stride_0 = const()[name = string("key_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_252, begin_mask = key_cache_internal_tensor_assign_14_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_14_squeeze_mask_0, stride = key_cache_internal_tensor_assign_14_stride_0, update = k_state_27_cast_fp16, x = coreml_update_state_72)[name = string("key_cache_internal_tensor_assign_14_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_14_cast_fp16, input = key_cache)[name = string("coreml_update_state_74_write_state")]; tensor coreml_update_state_74 = read_state(input = key_cache)[name = string("coreml_update_state_74")]; tensor value_cache_internal_tensor_assign_14_stride_0 = const()[name = string("value_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_27_cast_fp16 = transpose(perm = v_state_27_perm_0, x = var_2569_cast_fp16)[name = string("transpose_41")]; tensor value_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_252, begin_mask = value_cache_internal_tensor_assign_14_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_14_squeeze_mask_0, stride = value_cache_internal_tensor_assign_14_stride_0, update = v_state_27_cast_fp16, x = coreml_update_state_73)[name = string("value_cache_internal_tensor_assign_14_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_14_cast_fp16, input = value_cache)[name = string("coreml_update_state_75_write_state")]; tensor coreml_update_state_75 = read_state(input = value_cache)[name = string("coreml_update_state_75")]; tensor var_2626_begin_0 = const()[name = string("op_2626_begin_0"), val = tensor([13, 0, 0, 0, 0])]; tensor var_2626_end_0 = const()[name = string("op_2626_end_0"), val = tensor([14, 1, 2, 2048, 64])]; tensor var_2626_end_mask_0 = const()[name = string("op_2626_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2626_squeeze_mask_0 = const()[name = string("op_2626_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2626_cast_fp16 = slice_by_index(begin = var_2626_begin_0, end = var_2626_end_0, end_mask = var_2626_end_mask_0, squeeze_mask = var_2626_squeeze_mask_0, x = coreml_update_state_74)[name = string("op_2626_cast_fp16")]; tensor var_2629_begin_0 = const()[name = string("op_2629_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2629_end_mask_0 = const()[name = string("op_2629_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2629_cast_fp16 = slice_by_index(begin = var_2629_begin_0, end = concat_11, end_mask = var_2629_end_mask_0, x = var_2626_cast_fp16)[name = string("op_2629_cast_fp16")]; tensor var_2631_begin_0 = const()[name = string("op_2631_begin_0"), val = tensor([13, 0, 0, 0, 0])]; tensor var_2631_end_0 = const()[name = string("op_2631_end_0"), val = tensor([14, 1, 2, 2048, 64])]; tensor var_2631_end_mask_0 = const()[name = string("op_2631_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2631_squeeze_mask_0 = const()[name = string("op_2631_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2631_cast_fp16 = slice_by_index(begin = var_2631_begin_0, end = var_2631_end_0, end_mask = var_2631_end_mask_0, squeeze_mask = var_2631_squeeze_mask_0, x = coreml_update_state_75)[name = string("op_2631_cast_fp16")]; tensor var_2634_begin_0 = const()[name = string("op_2634_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2634_end_mask_0 = const()[name = string("op_2634_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2634_cast_fp16 = slice_by_index(begin = var_2634_begin_0, end = concat_11, end_mask = var_2634_end_mask_0, x = var_2631_cast_fp16)[name = string("op_2634_cast_fp16")]; tensor var_2636_shape_cast_fp16 = shape(x = var_2629_cast_fp16)[name = string("op_2636_shape_cast_fp16")]; int32 gather_247 = const()[name = string("gather_247"), val = int32(1)]; int32 gather_248 = const()[name = string("gather_248"), val = int32(2)]; int32 gather_249_axis_0 = const()[name = string("gather_249_axis_0"), val = int32(0)]; int32 gather_249_batch_dims_0 = const()[name = string("gather_249_batch_dims_0"), val = int32(0)]; bool gather_249_validate_indices_0 = const()[name = string("gather_249_validate_indices_0"), val = bool(false)]; string var_2636_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2636_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_249_to_uint16 = const()[name = string("select_249_to_uint16"), val = uint16(2)]; tensor var_2636_shape_cast_fp16_to_uint16 = cast(dtype = var_2636_shape_cast_fp16_to_uint16_dtype_0, x = var_2636_shape_cast_fp16)[name = string("cast_65")]; uint16 gather_249_cast_uint16 = gather(axis = gather_249_axis_0, batch_dims = gather_249_batch_dims_0, indices = select_249_to_uint16, validate_indices = gather_249_validate_indices_0, x = var_2636_shape_cast_fp16_to_uint16)[name = string("gather_249_cast_uint16")]; string gather_249_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_249_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_250 = const()[name = string("gather_250"), val = int32(64)]; tensor var_2643_axes_0 = const()[name = string("op_2643_axes_0"), val = tensor([2])]; tensor var_2643_cast_fp16 = expand_dims(axes = var_2643_axes_0, x = var_2629_cast_fp16)[name = string("op_2643_cast_fp16")]; tensor shape_277_cast_fp16 = shape(x = var_2643_cast_fp16)[name = string("shape_277_cast_fp16")]; int32 concat_260_axis_0 = const()[name = string("concat_260_axis_0"), val = int32(0)]; bool concat_260_interleave_0 = const()[name = string("concat_260_interleave_0"), val = bool(false)]; int32 gather_249_cast_uint16_to_int32 = cast(dtype = gather_249_cast_uint16_to_int32_dtype_0, x = gather_249_cast_uint16)[name = string("cast_64")]; tensor concat_260 = concat(axis = concat_260_axis_0, interleave = concat_260_interleave_0, values = (gather_247, gather_248, var_76, gather_249_cast_uint16_to_int32, gather_250))[name = string("concat_260")]; tensor real_div_26 = real_div(x = concat_260, y = shape_277_cast_fp16)[name = string("real_div_26")]; tensor hidden_states_401_cast_fp16 = tile(reps = real_div_26, x = var_2643_cast_fp16)[name = string("hidden_states_401_cast_fp16")]; tensor concat_261x = const()[name = string("concat_261x"), val = tensor([1, 14, -1, 64])]; tensor key_states_55_cast_fp16 = reshape(shape = concat_261x, x = hidden_states_401_cast_fp16)[name = string("key_states_55_cast_fp16")]; tensor var_2653_shape_cast_fp16 = shape(x = var_2634_cast_fp16)[name = string("op_2653_shape_cast_fp16")]; int32 gather_251 = const()[name = string("gather_251"), val = int32(1)]; int32 gather_252 = const()[name = string("gather_252"), val = int32(2)]; int32 gather_253_axis_0 = const()[name = string("gather_253_axis_0"), val = int32(0)]; int32 gather_253_batch_dims_0 = const()[name = string("gather_253_batch_dims_0"), val = int32(0)]; bool gather_253_validate_indices_0 = const()[name = string("gather_253_validate_indices_0"), val = bool(false)]; string var_2653_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2653_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_253_to_uint16 = const()[name = string("select_253_to_uint16"), val = uint16(2)]; tensor var_2653_shape_cast_fp16_to_uint16 = cast(dtype = var_2653_shape_cast_fp16_to_uint16_dtype_0, x = var_2653_shape_cast_fp16)[name = string("cast_63")]; uint16 gather_253_cast_uint16 = gather(axis = gather_253_axis_0, batch_dims = gather_253_batch_dims_0, indices = select_253_to_uint16, validate_indices = gather_253_validate_indices_0, x = var_2653_shape_cast_fp16_to_uint16)[name = string("gather_253_cast_uint16")]; string gather_253_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_253_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_254 = const()[name = string("gather_254"), val = int32(64)]; tensor var_2660_axes_0 = const()[name = string("op_2660_axes_0"), val = tensor([2])]; tensor var_2660_cast_fp16 = expand_dims(axes = var_2660_axes_0, x = var_2634_cast_fp16)[name = string("op_2660_cast_fp16")]; tensor shape_282_cast_fp16 = shape(x = var_2660_cast_fp16)[name = string("shape_282_cast_fp16")]; int32 concat_262_axis_0 = const()[name = string("concat_262_axis_0"), val = int32(0)]; bool concat_262_interleave_0 = const()[name = string("concat_262_interleave_0"), val = bool(false)]; int32 gather_253_cast_uint16_to_int32 = cast(dtype = gather_253_cast_uint16_to_int32_dtype_0, x = gather_253_cast_uint16)[name = string("cast_62")]; tensor concat_262 = concat(axis = concat_262_axis_0, interleave = concat_262_interleave_0, values = (gather_251, gather_252, var_76, gather_253_cast_uint16_to_int32, gather_254))[name = string("concat_262")]; tensor real_div_27 = real_div(x = concat_262, y = shape_282_cast_fp16)[name = string("real_div_27")]; tensor hidden_states_405_cast_fp16 = tile(reps = real_div_27, x = var_2660_cast_fp16)[name = string("hidden_states_405_cast_fp16")]; tensor concat_263x = const()[name = string("concat_263x"), val = tensor([1, 14, -1, 64])]; tensor value_states_55_cast_fp16 = reshape(shape = concat_263x, x = hidden_states_405_cast_fp16)[name = string("value_states_55_cast_fp16")]; tensor var_2670_shape_cast_fp16 = shape(x = key_states_55_cast_fp16)[name = string("op_2670_shape_cast_fp16")]; int32 gather_255_axis_0 = const()[name = string("gather_255_axis_0"), val = int32(0)]; int32 gather_255_batch_dims_0 = const()[name = string("gather_255_batch_dims_0"), val = int32(0)]; bool gather_255_validate_indices_0 = const()[name = string("gather_255_validate_indices_0"), val = bool(false)]; string var_2670_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2670_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_255_to_uint16 = const()[name = string("select_255_to_uint16"), val = uint16(2)]; tensor var_2670_shape_cast_fp16_to_uint16 = cast(dtype = var_2670_shape_cast_fp16_to_uint16_dtype_0, x = var_2670_shape_cast_fp16)[name = string("cast_61")]; uint16 gather_255_cast_uint16 = gather(axis = gather_255_axis_0, batch_dims = gather_255_batch_dims_0, indices = select_255_to_uint16, validate_indices = gather_255_validate_indices_0, x = var_2670_shape_cast_fp16_to_uint16)[name = string("gather_255_cast_uint16")]; string gather_255_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_255_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_264_values0_0 = const()[name = string("concat_264_values0_0"), val = int32(1)]; int32 concat_264_values1_0 = const()[name = string("concat_264_values1_0"), val = int32(1)]; int32 concat_264_values2_0 = const()[name = string("concat_264_values2_0"), val = int32(0)]; int32 concat_264_axis_0 = const()[name = string("concat_264_axis_0"), val = int32(0)]; bool concat_264_interleave_0 = const()[name = string("concat_264_interleave_0"), val = bool(false)]; int32 gather_255_cast_uint16_to_int32 = cast(dtype = gather_255_cast_uint16_to_int32_dtype_0, x = gather_255_cast_uint16)[name = string("cast_60")]; tensor concat_264 = concat(axis = concat_264_axis_0, interleave = concat_264_interleave_0, values = (concat_264_values0_0, concat_264_values1_0, concat_264_values2_0, gather_255_cast_uint16_to_int32))[name = string("concat_264")]; tensor causal_mask_29_begin_0 = const()[name = string("causal_mask_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_29_end_mask_0 = const()[name = string("causal_mask_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_29_cast_fp16 = slice_by_index(begin = causal_mask_29_begin_0, end = concat_264, end_mask = causal_mask_29_end_mask_0, x = causal_mask)[name = string("causal_mask_29_cast_fp16")]; tensor attn_output_53_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_29_cast_fp16, key = key_states_55_cast_fp16, query = query_states_55_cast_fp16, value = value_states_55_cast_fp16)[name = string("attn_output_53_cast_fp16")]; tensor var_2676_perm_0 = const()[name = string("op_2676_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_265x = const()[name = string("concat_265x"), val = tensor([1, -1, 896])]; tensor var_2676_cast_fp16 = transpose(perm = var_2676_perm_0, x = attn_output_53_cast_fp16)[name = string("transpose_40")]; tensor input_105_cast_fp16 = reshape(shape = concat_265x, x = var_2676_cast_fp16)[name = string("input_105_cast_fp16")]; tensor model_model_layers_13_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186290688))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186692160))))[name = string("model_model_layers_13_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_94_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_13_self_attn_o_proj_weight_to_fp16_quantized, x = input_105_cast_fp16)[name = string("linear_94_cast_fp16")]; tensor hidden_states_409_cast_fp16 = add(x = hidden_states_389_cast_fp16, y = linear_94_cast_fp16)[name = string("hidden_states_409_cast_fp16")]; fp16 var_70_promoted_27_to_fp16 = const()[name = string("op_70_promoted_27_to_fp16"), val = fp16(0x1p+1)]; tensor var_2685_cast_fp16 = pow(x = hidden_states_409_cast_fp16, y = var_70_promoted_27_to_fp16)[name = string("op_2685_cast_fp16")]; tensor variance_55_axes_0 = const()[name = string("variance_55_axes_0"), val = tensor([-1])]; bool variance_55_keep_dims_0 = const()[name = string("variance_55_keep_dims_0"), val = bool(true)]; tensor variance_55_cast_fp16 = reduce_mean(axes = variance_55_axes_0, keep_dims = variance_55_keep_dims_0, x = var_2685_cast_fp16)[name = string("variance_55_cast_fp16")]; fp16 var_2688_to_fp16 = const()[name = string("op_2688_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2689_cast_fp16 = add(x = variance_55_cast_fp16, y = var_2688_to_fp16)[name = string("op_2689_cast_fp16")]; fp32 var_2690_epsilon_0 = const()[name = string("op_2690_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2690_cast_fp16 = rsqrt(epsilon = var_2690_epsilon_0, x = var_2689_cast_fp16)[name = string("op_2690_cast_fp16")]; tensor hidden_states_413_cast_fp16 = mul(x = hidden_states_409_cast_fp16, y = var_2690_cast_fp16)[name = string("hidden_states_413_cast_fp16")]; tensor model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186742400)))]; tensor input_107_cast_fp16 = mul(x = model_model_layers_13_post_attention_layernorm_weight_to_fp16, y = hidden_states_413_cast_fp16)[name = string("input_107_cast_fp16")]; tensor model_model_layers_13_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(186744256))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(188923392))))[name = string("model_model_layers_13_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_95_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_13_mlp_gate_proj_weight_to_fp16_quantized, x = input_107_cast_fp16)[name = string("linear_95_cast_fp16")]; tensor var_2702_cast_fp16 = silu(x = linear_95_cast_fp16)[name = string("op_2702_cast_fp16")]; tensor model_model_layers_13_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(189195840))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191374976))))[name = string("model_model_layers_13_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_96_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_13_mlp_up_proj_weight_to_fp16_quantized, x = input_107_cast_fp16)[name = string("linear_96_cast_fp16")]; tensor input_111_cast_fp16 = mul(x = var_2702_cast_fp16, y = linear_96_cast_fp16)[name = string("input_111_cast_fp16")]; tensor model_model_layers_13_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(191647424))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(193826560))))[name = string("model_model_layers_13_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_97_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_13_mlp_down_proj_weight_to_fp16_quantized, x = input_111_cast_fp16)[name = string("linear_97_cast_fp16")]; tensor hidden_states_419_cast_fp16 = add(x = hidden_states_409_cast_fp16, y = linear_97_cast_fp16)[name = string("hidden_states_419_cast_fp16")]; fp16 var_70_promoted_28_to_fp16 = const()[name = string("op_70_promoted_28_to_fp16"), val = fp16(0x1p+1)]; tensor var_2715_cast_fp16 = pow(x = hidden_states_419_cast_fp16, y = var_70_promoted_28_to_fp16)[name = string("op_2715_cast_fp16")]; tensor variance_57_axes_0 = const()[name = string("variance_57_axes_0"), val = tensor([-1])]; bool variance_57_keep_dims_0 = const()[name = string("variance_57_keep_dims_0"), val = bool(true)]; tensor variance_57_cast_fp16 = reduce_mean(axes = variance_57_axes_0, keep_dims = variance_57_keep_dims_0, x = var_2715_cast_fp16)[name = string("variance_57_cast_fp16")]; fp16 var_2718_to_fp16 = const()[name = string("op_2718_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2719_cast_fp16 = add(x = variance_57_cast_fp16, y = var_2718_to_fp16)[name = string("op_2719_cast_fp16")]; fp32 var_2720_epsilon_0 = const()[name = string("op_2720_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2720_cast_fp16 = rsqrt(epsilon = var_2720_epsilon_0, x = var_2719_cast_fp16)[name = string("op_2720_cast_fp16")]; tensor hidden_states_423_cast_fp16 = mul(x = hidden_states_419_cast_fp16, y = var_2720_cast_fp16)[name = string("hidden_states_423_cast_fp16")]; tensor model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194099008)))]; tensor hidden_states_427_cast_fp16 = mul(x = model_model_layers_14_input_layernorm_weight_to_fp16, y = hidden_states_423_cast_fp16)[name = string("hidden_states_427_cast_fp16")]; tensor model_model_layers_14_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194100864))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194502336))))[name = string("model_model_layers_14_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_14_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_14_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194552576)))]; tensor linear_98_cast_fp16 = linear(bias = model_model_layers_14_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_14_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_427_cast_fp16)[name = string("linear_98_cast_fp16")]; tensor model_model_layers_14_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194554432))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194611840))))[name = string("model_model_layers_14_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_14_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_14_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194619072)))]; tensor linear_99_cast_fp16 = linear(bias = model_model_layers_14_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_14_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_427_cast_fp16)[name = string("linear_99_cast_fp16")]; tensor model_model_layers_14_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194619392))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194676800))))[name = string("model_model_layers_14_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_14_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_14_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194684032)))]; tensor linear_100_cast_fp16 = linear(bias = model_model_layers_14_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_14_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_427_cast_fp16)[name = string("linear_100_cast_fp16")]; tensor concat_266x = const()[name = string("concat_266x"), val = tensor([1, -1, 14, 64])]; tensor var_2743_cast_fp16 = reshape(shape = concat_266x, x = linear_98_cast_fp16)[name = string("op_2743_cast_fp16")]; tensor q_29_perm_0 = const()[name = string("q_29_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_267x = const()[name = string("concat_267x"), val = tensor([1, -1, 2, 64])]; tensor var_2746_cast_fp16 = reshape(shape = concat_267x, x = linear_99_cast_fp16)[name = string("op_2746_cast_fp16")]; tensor k_29_perm_0 = const()[name = string("k_29_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_268x = const()[name = string("concat_268x"), val = tensor([1, -1, 2, 64])]; tensor var_2749_cast_fp16 = reshape(shape = concat_268x, x = linear_100_cast_fp16)[name = string("op_2749_cast_fp16")]; tensor v_state_29_perm_0 = const()[name = string("v_state_29_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_29_cast_fp16 = transpose(perm = q_29_perm_0, x = var_2743_cast_fp16)[name = string("transpose_39")]; tensor var_2753_cast_fp16 = mul(x = q_29_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2753_cast_fp16")]; tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29_cast_fp16)[name = string("x1_57_cast_fp16")]; tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29_cast_fp16)[name = string("x2_57_cast_fp16")]; fp16 const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2764_cast_fp16 = mul(x = x2_57_cast_fp16, y = const_31_promoted_to_fp16)[name = string("op_2764_cast_fp16")]; bool var_2766_interleave_0 = const()[name = string("op_2766_interleave_0"), val = bool(false)]; tensor var_2766_cast_fp16 = concat(axis = var_64, interleave = var_2766_interleave_0, values = (var_2764_cast_fp16, x1_57_cast_fp16))[name = string("op_2766_cast_fp16")]; tensor var_2767_cast_fp16 = mul(x = var_2766_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2767_cast_fp16")]; tensor query_states_59_cast_fp16 = add(x = var_2753_cast_fp16, y = var_2767_cast_fp16)[name = string("query_states_59_cast_fp16")]; tensor k_29_cast_fp16 = transpose(perm = k_29_perm_0, x = var_2746_cast_fp16)[name = string("transpose_38")]; tensor var_2769_cast_fp16 = mul(x = k_29_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2769_cast_fp16")]; tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29_cast_fp16)[name = string("x1_59_cast_fp16")]; tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29_cast_fp16)[name = string("x2_59_cast_fp16")]; fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2780_cast_fp16 = mul(x = x2_59_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_2780_cast_fp16")]; bool var_2782_interleave_0 = const()[name = string("op_2782_interleave_0"), val = bool(false)]; tensor var_2782_cast_fp16 = concat(axis = var_64, interleave = var_2782_interleave_0, values = (var_2780_cast_fp16, x1_59_cast_fp16))[name = string("op_2782_cast_fp16")]; tensor var_2783_cast_fp16 = mul(x = var_2782_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2783_cast_fp16")]; tensor k_state_29_cast_fp16 = add(x = var_2769_cast_fp16, y = var_2783_cast_fp16)[name = string("k_state_29_cast_fp16")]; tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([0])]; tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; tensor concat_271_values0_0 = const()[name = string("concat_271_values0_0"), val = tensor([14])]; int32 concat_271_axis_0 = const()[name = string("concat_271_axis_0"), val = int32(0)]; bool concat_271_interleave_0 = const()[name = string("concat_271_interleave_0"), val = bool(false)]; tensor concat_271 = concat(axis = concat_271_axis_0, interleave = concat_271_interleave_0, values = (concat_271_values0_0, expand_dims_168, expand_dims_169, expand_dims_2, expand_dims_171))[name = string("concat_271")]; tensor key_cache_internal_tensor_assign_15_stride_0 = const()[name = string("key_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_271, begin_mask = key_cache_internal_tensor_assign_15_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_15_squeeze_mask_0, stride = key_cache_internal_tensor_assign_15_stride_0, update = k_state_29_cast_fp16, x = coreml_update_state_74)[name = string("key_cache_internal_tensor_assign_15_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_15_cast_fp16, input = key_cache)[name = string("coreml_update_state_76_write_state")]; tensor coreml_update_state_76 = read_state(input = key_cache)[name = string("coreml_update_state_76")]; tensor value_cache_internal_tensor_assign_15_stride_0 = const()[name = string("value_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_29_cast_fp16 = transpose(perm = v_state_29_perm_0, x = var_2749_cast_fp16)[name = string("transpose_37")]; tensor value_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_271, begin_mask = value_cache_internal_tensor_assign_15_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_15_squeeze_mask_0, stride = value_cache_internal_tensor_assign_15_stride_0, update = v_state_29_cast_fp16, x = coreml_update_state_75)[name = string("value_cache_internal_tensor_assign_15_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_15_cast_fp16, input = value_cache)[name = string("coreml_update_state_77_write_state")]; tensor coreml_update_state_77 = read_state(input = value_cache)[name = string("coreml_update_state_77")]; tensor var_2806_begin_0 = const()[name = string("op_2806_begin_0"), val = tensor([14, 0, 0, 0, 0])]; tensor var_2806_end_0 = const()[name = string("op_2806_end_0"), val = tensor([15, 1, 2, 2048, 64])]; tensor var_2806_end_mask_0 = const()[name = string("op_2806_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2806_squeeze_mask_0 = const()[name = string("op_2806_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2806_cast_fp16 = slice_by_index(begin = var_2806_begin_0, end = var_2806_end_0, end_mask = var_2806_end_mask_0, squeeze_mask = var_2806_squeeze_mask_0, x = coreml_update_state_76)[name = string("op_2806_cast_fp16")]; tensor var_2809_begin_0 = const()[name = string("op_2809_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2809_end_mask_0 = const()[name = string("op_2809_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2809_cast_fp16 = slice_by_index(begin = var_2809_begin_0, end = concat_11, end_mask = var_2809_end_mask_0, x = var_2806_cast_fp16)[name = string("op_2809_cast_fp16")]; tensor var_2811_begin_0 = const()[name = string("op_2811_begin_0"), val = tensor([14, 0, 0, 0, 0])]; tensor var_2811_end_0 = const()[name = string("op_2811_end_0"), val = tensor([15, 1, 2, 2048, 64])]; tensor var_2811_end_mask_0 = const()[name = string("op_2811_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2811_squeeze_mask_0 = const()[name = string("op_2811_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2811_cast_fp16 = slice_by_index(begin = var_2811_begin_0, end = var_2811_end_0, end_mask = var_2811_end_mask_0, squeeze_mask = var_2811_squeeze_mask_0, x = coreml_update_state_77)[name = string("op_2811_cast_fp16")]; tensor var_2814_begin_0 = const()[name = string("op_2814_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2814_end_mask_0 = const()[name = string("op_2814_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2814_cast_fp16 = slice_by_index(begin = var_2814_begin_0, end = concat_11, end_mask = var_2814_end_mask_0, x = var_2811_cast_fp16)[name = string("op_2814_cast_fp16")]; tensor var_2816_shape_cast_fp16 = shape(x = var_2809_cast_fp16)[name = string("op_2816_shape_cast_fp16")]; int32 gather_265 = const()[name = string("gather_265"), val = int32(1)]; int32 gather_266 = const()[name = string("gather_266"), val = int32(2)]; int32 gather_267_axis_0 = const()[name = string("gather_267_axis_0"), val = int32(0)]; int32 gather_267_batch_dims_0 = const()[name = string("gather_267_batch_dims_0"), val = int32(0)]; bool gather_267_validate_indices_0 = const()[name = string("gather_267_validate_indices_0"), val = bool(false)]; string var_2816_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2816_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_267_to_uint16 = const()[name = string("select_267_to_uint16"), val = uint16(2)]; tensor var_2816_shape_cast_fp16_to_uint16 = cast(dtype = var_2816_shape_cast_fp16_to_uint16_dtype_0, x = var_2816_shape_cast_fp16)[name = string("cast_59")]; uint16 gather_267_cast_uint16 = gather(axis = gather_267_axis_0, batch_dims = gather_267_batch_dims_0, indices = select_267_to_uint16, validate_indices = gather_267_validate_indices_0, x = var_2816_shape_cast_fp16_to_uint16)[name = string("gather_267_cast_uint16")]; string gather_267_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_267_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_268 = const()[name = string("gather_268"), val = int32(64)]; tensor var_2823_axes_0 = const()[name = string("op_2823_axes_0"), val = tensor([2])]; tensor var_2823_cast_fp16 = expand_dims(axes = var_2823_axes_0, x = var_2809_cast_fp16)[name = string("op_2823_cast_fp16")]; tensor shape_297_cast_fp16 = shape(x = var_2823_cast_fp16)[name = string("shape_297_cast_fp16")]; int32 concat_279_axis_0 = const()[name = string("concat_279_axis_0"), val = int32(0)]; bool concat_279_interleave_0 = const()[name = string("concat_279_interleave_0"), val = bool(false)]; int32 gather_267_cast_uint16_to_int32 = cast(dtype = gather_267_cast_uint16_to_int32_dtype_0, x = gather_267_cast_uint16)[name = string("cast_58")]; tensor concat_279 = concat(axis = concat_279_axis_0, interleave = concat_279_interleave_0, values = (gather_265, gather_266, var_76, gather_267_cast_uint16_to_int32, gather_268))[name = string("concat_279")]; tensor real_div_28 = real_div(x = concat_279, y = shape_297_cast_fp16)[name = string("real_div_28")]; tensor hidden_states_431_cast_fp16 = tile(reps = real_div_28, x = var_2823_cast_fp16)[name = string("hidden_states_431_cast_fp16")]; tensor concat_280x = const()[name = string("concat_280x"), val = tensor([1, 14, -1, 64])]; tensor key_states_59_cast_fp16 = reshape(shape = concat_280x, x = hidden_states_431_cast_fp16)[name = string("key_states_59_cast_fp16")]; tensor var_2833_shape_cast_fp16 = shape(x = var_2814_cast_fp16)[name = string("op_2833_shape_cast_fp16")]; int32 gather_269 = const()[name = string("gather_269"), val = int32(1)]; int32 gather_270 = const()[name = string("gather_270"), val = int32(2)]; int32 gather_271_axis_0 = const()[name = string("gather_271_axis_0"), val = int32(0)]; int32 gather_271_batch_dims_0 = const()[name = string("gather_271_batch_dims_0"), val = int32(0)]; bool gather_271_validate_indices_0 = const()[name = string("gather_271_validate_indices_0"), val = bool(false)]; string var_2833_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2833_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_271_to_uint16 = const()[name = string("select_271_to_uint16"), val = uint16(2)]; tensor var_2833_shape_cast_fp16_to_uint16 = cast(dtype = var_2833_shape_cast_fp16_to_uint16_dtype_0, x = var_2833_shape_cast_fp16)[name = string("cast_57")]; uint16 gather_271_cast_uint16 = gather(axis = gather_271_axis_0, batch_dims = gather_271_batch_dims_0, indices = select_271_to_uint16, validate_indices = gather_271_validate_indices_0, x = var_2833_shape_cast_fp16_to_uint16)[name = string("gather_271_cast_uint16")]; string gather_271_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_271_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_272 = const()[name = string("gather_272"), val = int32(64)]; tensor var_2840_axes_0 = const()[name = string("op_2840_axes_0"), val = tensor([2])]; tensor var_2840_cast_fp16 = expand_dims(axes = var_2840_axes_0, x = var_2814_cast_fp16)[name = string("op_2840_cast_fp16")]; tensor shape_302_cast_fp16 = shape(x = var_2840_cast_fp16)[name = string("shape_302_cast_fp16")]; int32 concat_281_axis_0 = const()[name = string("concat_281_axis_0"), val = int32(0)]; bool concat_281_interleave_0 = const()[name = string("concat_281_interleave_0"), val = bool(false)]; int32 gather_271_cast_uint16_to_int32 = cast(dtype = gather_271_cast_uint16_to_int32_dtype_0, x = gather_271_cast_uint16)[name = string("cast_56")]; tensor concat_281 = concat(axis = concat_281_axis_0, interleave = concat_281_interleave_0, values = (gather_269, gather_270, var_76, gather_271_cast_uint16_to_int32, gather_272))[name = string("concat_281")]; tensor real_div_29 = real_div(x = concat_281, y = shape_302_cast_fp16)[name = string("real_div_29")]; tensor hidden_states_435_cast_fp16 = tile(reps = real_div_29, x = var_2840_cast_fp16)[name = string("hidden_states_435_cast_fp16")]; tensor concat_282x = const()[name = string("concat_282x"), val = tensor([1, 14, -1, 64])]; tensor value_states_59_cast_fp16 = reshape(shape = concat_282x, x = hidden_states_435_cast_fp16)[name = string("value_states_59_cast_fp16")]; tensor var_2850_shape_cast_fp16 = shape(x = key_states_59_cast_fp16)[name = string("op_2850_shape_cast_fp16")]; int32 gather_273_axis_0 = const()[name = string("gather_273_axis_0"), val = int32(0)]; int32 gather_273_batch_dims_0 = const()[name = string("gather_273_batch_dims_0"), val = int32(0)]; bool gather_273_validate_indices_0 = const()[name = string("gather_273_validate_indices_0"), val = bool(false)]; string var_2850_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2850_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_273_to_uint16 = const()[name = string("select_273_to_uint16"), val = uint16(2)]; tensor var_2850_shape_cast_fp16_to_uint16 = cast(dtype = var_2850_shape_cast_fp16_to_uint16_dtype_0, x = var_2850_shape_cast_fp16)[name = string("cast_55")]; uint16 gather_273_cast_uint16 = gather(axis = gather_273_axis_0, batch_dims = gather_273_batch_dims_0, indices = select_273_to_uint16, validate_indices = gather_273_validate_indices_0, x = var_2850_shape_cast_fp16_to_uint16)[name = string("gather_273_cast_uint16")]; string gather_273_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_273_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_283_values0_0 = const()[name = string("concat_283_values0_0"), val = int32(1)]; int32 concat_283_values1_0 = const()[name = string("concat_283_values1_0"), val = int32(1)]; int32 concat_283_values2_0 = const()[name = string("concat_283_values2_0"), val = int32(0)]; int32 concat_283_axis_0 = const()[name = string("concat_283_axis_0"), val = int32(0)]; bool concat_283_interleave_0 = const()[name = string("concat_283_interleave_0"), val = bool(false)]; int32 gather_273_cast_uint16_to_int32 = cast(dtype = gather_273_cast_uint16_to_int32_dtype_0, x = gather_273_cast_uint16)[name = string("cast_54")]; tensor concat_283 = concat(axis = concat_283_axis_0, interleave = concat_283_interleave_0, values = (concat_283_values0_0, concat_283_values1_0, concat_283_values2_0, gather_273_cast_uint16_to_int32))[name = string("concat_283")]; tensor causal_mask_31_begin_0 = const()[name = string("causal_mask_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_31_end_mask_0 = const()[name = string("causal_mask_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_31_cast_fp16 = slice_by_index(begin = causal_mask_31_begin_0, end = concat_283, end_mask = causal_mask_31_end_mask_0, x = causal_mask)[name = string("causal_mask_31_cast_fp16")]; tensor attn_output_57_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_31_cast_fp16, key = key_states_59_cast_fp16, query = query_states_59_cast_fp16, value = value_states_59_cast_fp16)[name = string("attn_output_57_cast_fp16")]; tensor var_2856_perm_0 = const()[name = string("op_2856_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_284x = const()[name = string("concat_284x"), val = tensor([1, -1, 896])]; tensor var_2856_cast_fp16 = transpose(perm = var_2856_perm_0, x = attn_output_57_cast_fp16)[name = string("transpose_36")]; tensor input_113_cast_fp16 = reshape(shape = concat_284x, x = var_2856_cast_fp16)[name = string("input_113_cast_fp16")]; tensor model_model_layers_14_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(194684352))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195085824))))[name = string("model_model_layers_14_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_101_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_14_self_attn_o_proj_weight_to_fp16_quantized, x = input_113_cast_fp16)[name = string("linear_101_cast_fp16")]; tensor hidden_states_439_cast_fp16 = add(x = hidden_states_419_cast_fp16, y = linear_101_cast_fp16)[name = string("hidden_states_439_cast_fp16")]; fp16 var_70_promoted_29_to_fp16 = const()[name = string("op_70_promoted_29_to_fp16"), val = fp16(0x1p+1)]; tensor var_2865_cast_fp16 = pow(x = hidden_states_439_cast_fp16, y = var_70_promoted_29_to_fp16)[name = string("op_2865_cast_fp16")]; tensor variance_59_axes_0 = const()[name = string("variance_59_axes_0"), val = tensor([-1])]; bool variance_59_keep_dims_0 = const()[name = string("variance_59_keep_dims_0"), val = bool(true)]; tensor variance_59_cast_fp16 = reduce_mean(axes = variance_59_axes_0, keep_dims = variance_59_keep_dims_0, x = var_2865_cast_fp16)[name = string("variance_59_cast_fp16")]; fp16 var_2868_to_fp16 = const()[name = string("op_2868_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2869_cast_fp16 = add(x = variance_59_cast_fp16, y = var_2868_to_fp16)[name = string("op_2869_cast_fp16")]; fp32 var_2870_epsilon_0 = const()[name = string("op_2870_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2870_cast_fp16 = rsqrt(epsilon = var_2870_epsilon_0, x = var_2869_cast_fp16)[name = string("op_2870_cast_fp16")]; tensor hidden_states_443_cast_fp16 = mul(x = hidden_states_439_cast_fp16, y = var_2870_cast_fp16)[name = string("hidden_states_443_cast_fp16")]; tensor model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195136064)))]; tensor input_115_cast_fp16 = mul(x = model_model_layers_14_post_attention_layernorm_weight_to_fp16, y = hidden_states_443_cast_fp16)[name = string("input_115_cast_fp16")]; tensor model_model_layers_14_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(195137920))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197317056))))[name = string("model_model_layers_14_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_102_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_14_mlp_gate_proj_weight_to_fp16_quantized, x = input_115_cast_fp16)[name = string("linear_102_cast_fp16")]; tensor var_2882_cast_fp16 = silu(x = linear_102_cast_fp16)[name = string("op_2882_cast_fp16")]; tensor model_model_layers_14_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(197589504))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(199768640))))[name = string("model_model_layers_14_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_103_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_14_mlp_up_proj_weight_to_fp16_quantized, x = input_115_cast_fp16)[name = string("linear_103_cast_fp16")]; tensor input_119_cast_fp16 = mul(x = var_2882_cast_fp16, y = linear_103_cast_fp16)[name = string("input_119_cast_fp16")]; tensor model_model_layers_14_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(200041088))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202220224))))[name = string("model_model_layers_14_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_104_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_14_mlp_down_proj_weight_to_fp16_quantized, x = input_119_cast_fp16)[name = string("linear_104_cast_fp16")]; tensor hidden_states_449_cast_fp16 = add(x = hidden_states_439_cast_fp16, y = linear_104_cast_fp16)[name = string("hidden_states_449_cast_fp16")]; fp16 var_70_promoted_30_to_fp16 = const()[name = string("op_70_promoted_30_to_fp16"), val = fp16(0x1p+1)]; tensor var_2895_cast_fp16 = pow(x = hidden_states_449_cast_fp16, y = var_70_promoted_30_to_fp16)[name = string("op_2895_cast_fp16")]; tensor variance_61_axes_0 = const()[name = string("variance_61_axes_0"), val = tensor([-1])]; bool variance_61_keep_dims_0 = const()[name = string("variance_61_keep_dims_0"), val = bool(true)]; tensor variance_61_cast_fp16 = reduce_mean(axes = variance_61_axes_0, keep_dims = variance_61_keep_dims_0, x = var_2895_cast_fp16)[name = string("variance_61_cast_fp16")]; fp16 var_2898_to_fp16 = const()[name = string("op_2898_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2899_cast_fp16 = add(x = variance_61_cast_fp16, y = var_2898_to_fp16)[name = string("op_2899_cast_fp16")]; fp32 var_2900_epsilon_0 = const()[name = string("op_2900_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2900_cast_fp16 = rsqrt(epsilon = var_2900_epsilon_0, x = var_2899_cast_fp16)[name = string("op_2900_cast_fp16")]; tensor hidden_states_453_cast_fp16 = mul(x = hidden_states_449_cast_fp16, y = var_2900_cast_fp16)[name = string("hidden_states_453_cast_fp16")]; tensor model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202492672)))]; tensor hidden_states_457_cast_fp16 = mul(x = model_model_layers_15_input_layernorm_weight_to_fp16, y = hidden_states_453_cast_fp16)[name = string("hidden_states_457_cast_fp16")]; tensor model_model_layers_15_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202494528))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202896000))))[name = string("model_model_layers_15_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_15_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_15_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202946240)))]; tensor linear_105_cast_fp16 = linear(bias = model_model_layers_15_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_15_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_457_cast_fp16)[name = string("linear_105_cast_fp16")]; tensor model_model_layers_15_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(202948096))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203005504))))[name = string("model_model_layers_15_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_15_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_15_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203012736)))]; tensor linear_106_cast_fp16 = linear(bias = model_model_layers_15_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_15_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_457_cast_fp16)[name = string("linear_106_cast_fp16")]; tensor model_model_layers_15_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203013056))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203070464))))[name = string("model_model_layers_15_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_15_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_15_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203077696)))]; tensor linear_107_cast_fp16 = linear(bias = model_model_layers_15_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_15_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_457_cast_fp16)[name = string("linear_107_cast_fp16")]; tensor concat_285x = const()[name = string("concat_285x"), val = tensor([1, -1, 14, 64])]; tensor var_2923_cast_fp16 = reshape(shape = concat_285x, x = linear_105_cast_fp16)[name = string("op_2923_cast_fp16")]; tensor q_31_perm_0 = const()[name = string("q_31_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_286x = const()[name = string("concat_286x"), val = tensor([1, -1, 2, 64])]; tensor var_2926_cast_fp16 = reshape(shape = concat_286x, x = linear_106_cast_fp16)[name = string("op_2926_cast_fp16")]; tensor k_31_perm_0 = const()[name = string("k_31_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_287x = const()[name = string("concat_287x"), val = tensor([1, -1, 2, 64])]; tensor var_2929_cast_fp16 = reshape(shape = concat_287x, x = linear_107_cast_fp16)[name = string("op_2929_cast_fp16")]; tensor v_state_31_perm_0 = const()[name = string("v_state_31_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_31_cast_fp16 = transpose(perm = q_31_perm_0, x = var_2923_cast_fp16)[name = string("transpose_35")]; tensor var_2933_cast_fp16 = mul(x = q_31_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2933_cast_fp16")]; tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_31_cast_fp16)[name = string("x1_61_cast_fp16")]; tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_31_cast_fp16)[name = string("x2_61_cast_fp16")]; fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2944_cast_fp16 = mul(x = x2_61_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_2944_cast_fp16")]; bool var_2946_interleave_0 = const()[name = string("op_2946_interleave_0"), val = bool(false)]; tensor var_2946_cast_fp16 = concat(axis = var_64, interleave = var_2946_interleave_0, values = (var_2944_cast_fp16, x1_61_cast_fp16))[name = string("op_2946_cast_fp16")]; tensor var_2947_cast_fp16 = mul(x = var_2946_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2947_cast_fp16")]; tensor query_states_63_cast_fp16 = add(x = var_2933_cast_fp16, y = var_2947_cast_fp16)[name = string("query_states_63_cast_fp16")]; tensor k_31_cast_fp16 = transpose(perm = k_31_perm_0, x = var_2926_cast_fp16)[name = string("transpose_34")]; tensor var_2949_cast_fp16 = mul(x = k_31_cast_fp16, y = cos_7_cast_fp16)[name = string("op_2949_cast_fp16")]; tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_63_cast_fp16 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_31_cast_fp16)[name = string("x1_63_cast_fp16")]; tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_63_cast_fp16 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_31_cast_fp16)[name = string("x2_63_cast_fp16")]; fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2960_cast_fp16 = mul(x = x2_63_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_2960_cast_fp16")]; bool var_2962_interleave_0 = const()[name = string("op_2962_interleave_0"), val = bool(false)]; tensor var_2962_cast_fp16 = concat(axis = var_64, interleave = var_2962_interleave_0, values = (var_2960_cast_fp16, x1_63_cast_fp16))[name = string("op_2962_cast_fp16")]; tensor var_2963_cast_fp16 = mul(x = var_2962_cast_fp16, y = sin_7_cast_fp16)[name = string("op_2963_cast_fp16")]; tensor k_state_31_cast_fp16 = add(x = var_2949_cast_fp16, y = var_2963_cast_fp16)[name = string("k_state_31_cast_fp16")]; tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([0])]; tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; tensor concat_290_values0_0 = const()[name = string("concat_290_values0_0"), val = tensor([15])]; int32 concat_290_axis_0 = const()[name = string("concat_290_axis_0"), val = int32(0)]; bool concat_290_interleave_0 = const()[name = string("concat_290_interleave_0"), val = bool(false)]; tensor concat_290 = concat(axis = concat_290_axis_0, interleave = concat_290_interleave_0, values = (concat_290_values0_0, expand_dims_180, expand_dims_181, expand_dims_2, expand_dims_183))[name = string("concat_290")]; tensor key_cache_internal_tensor_assign_16_stride_0 = const()[name = string("key_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_290, begin_mask = key_cache_internal_tensor_assign_16_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_16_squeeze_mask_0, stride = key_cache_internal_tensor_assign_16_stride_0, update = k_state_31_cast_fp16, x = coreml_update_state_76)[name = string("key_cache_internal_tensor_assign_16_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_16_cast_fp16, input = key_cache)[name = string("coreml_update_state_78_write_state")]; tensor coreml_update_state_78 = read_state(input = key_cache)[name = string("coreml_update_state_78")]; tensor value_cache_internal_tensor_assign_16_stride_0 = const()[name = string("value_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_31_cast_fp16 = transpose(perm = v_state_31_perm_0, x = var_2929_cast_fp16)[name = string("transpose_33")]; tensor value_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_290, begin_mask = value_cache_internal_tensor_assign_16_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_16_squeeze_mask_0, stride = value_cache_internal_tensor_assign_16_stride_0, update = v_state_31_cast_fp16, x = coreml_update_state_77)[name = string("value_cache_internal_tensor_assign_16_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_16_cast_fp16, input = value_cache)[name = string("coreml_update_state_79_write_state")]; tensor coreml_update_state_79 = read_state(input = value_cache)[name = string("coreml_update_state_79")]; tensor var_2986_begin_0 = const()[name = string("op_2986_begin_0"), val = tensor([15, 0, 0, 0, 0])]; tensor var_2986_end_0 = const()[name = string("op_2986_end_0"), val = tensor([16, 1, 2, 2048, 64])]; tensor var_2986_end_mask_0 = const()[name = string("op_2986_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2986_squeeze_mask_0 = const()[name = string("op_2986_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2986_cast_fp16 = slice_by_index(begin = var_2986_begin_0, end = var_2986_end_0, end_mask = var_2986_end_mask_0, squeeze_mask = var_2986_squeeze_mask_0, x = coreml_update_state_78)[name = string("op_2986_cast_fp16")]; tensor var_2989_begin_0 = const()[name = string("op_2989_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2989_end_mask_0 = const()[name = string("op_2989_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2989_cast_fp16 = slice_by_index(begin = var_2989_begin_0, end = concat_11, end_mask = var_2989_end_mask_0, x = var_2986_cast_fp16)[name = string("op_2989_cast_fp16")]; tensor var_2991_begin_0 = const()[name = string("op_2991_begin_0"), val = tensor([15, 0, 0, 0, 0])]; tensor var_2991_end_0 = const()[name = string("op_2991_end_0"), val = tensor([16, 1, 2, 2048, 64])]; tensor var_2991_end_mask_0 = const()[name = string("op_2991_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2991_squeeze_mask_0 = const()[name = string("op_2991_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2991_cast_fp16 = slice_by_index(begin = var_2991_begin_0, end = var_2991_end_0, end_mask = var_2991_end_mask_0, squeeze_mask = var_2991_squeeze_mask_0, x = coreml_update_state_79)[name = string("op_2991_cast_fp16")]; tensor var_2994_begin_0 = const()[name = string("op_2994_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2994_end_mask_0 = const()[name = string("op_2994_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2994_cast_fp16 = slice_by_index(begin = var_2994_begin_0, end = concat_11, end_mask = var_2994_end_mask_0, x = var_2991_cast_fp16)[name = string("op_2994_cast_fp16")]; tensor var_2996_shape_cast_fp16 = shape(x = var_2989_cast_fp16)[name = string("op_2996_shape_cast_fp16")]; int32 gather_283 = const()[name = string("gather_283"), val = int32(1)]; int32 gather_284 = const()[name = string("gather_284"), val = int32(2)]; int32 gather_285_axis_0 = const()[name = string("gather_285_axis_0"), val = int32(0)]; int32 gather_285_batch_dims_0 = const()[name = string("gather_285_batch_dims_0"), val = int32(0)]; bool gather_285_validate_indices_0 = const()[name = string("gather_285_validate_indices_0"), val = bool(false)]; string var_2996_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2996_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_285_to_uint16 = const()[name = string("select_285_to_uint16"), val = uint16(2)]; tensor var_2996_shape_cast_fp16_to_uint16 = cast(dtype = var_2996_shape_cast_fp16_to_uint16_dtype_0, x = var_2996_shape_cast_fp16)[name = string("cast_53")]; uint16 gather_285_cast_uint16 = gather(axis = gather_285_axis_0, batch_dims = gather_285_batch_dims_0, indices = select_285_to_uint16, validate_indices = gather_285_validate_indices_0, x = var_2996_shape_cast_fp16_to_uint16)[name = string("gather_285_cast_uint16")]; string gather_285_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_285_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_286 = const()[name = string("gather_286"), val = int32(64)]; tensor var_3003_axes_0 = const()[name = string("op_3003_axes_0"), val = tensor([2])]; tensor var_3003_cast_fp16 = expand_dims(axes = var_3003_axes_0, x = var_2989_cast_fp16)[name = string("op_3003_cast_fp16")]; tensor shape_317_cast_fp16 = shape(x = var_3003_cast_fp16)[name = string("shape_317_cast_fp16")]; int32 concat_298_axis_0 = const()[name = string("concat_298_axis_0"), val = int32(0)]; bool concat_298_interleave_0 = const()[name = string("concat_298_interleave_0"), val = bool(false)]; int32 gather_285_cast_uint16_to_int32 = cast(dtype = gather_285_cast_uint16_to_int32_dtype_0, x = gather_285_cast_uint16)[name = string("cast_52")]; tensor concat_298 = concat(axis = concat_298_axis_0, interleave = concat_298_interleave_0, values = (gather_283, gather_284, var_76, gather_285_cast_uint16_to_int32, gather_286))[name = string("concat_298")]; tensor real_div_30 = real_div(x = concat_298, y = shape_317_cast_fp16)[name = string("real_div_30")]; tensor hidden_states_461_cast_fp16 = tile(reps = real_div_30, x = var_3003_cast_fp16)[name = string("hidden_states_461_cast_fp16")]; tensor concat_299x = const()[name = string("concat_299x"), val = tensor([1, 14, -1, 64])]; tensor key_states_63_cast_fp16 = reshape(shape = concat_299x, x = hidden_states_461_cast_fp16)[name = string("key_states_63_cast_fp16")]; tensor var_3013_shape_cast_fp16 = shape(x = var_2994_cast_fp16)[name = string("op_3013_shape_cast_fp16")]; int32 gather_287 = const()[name = string("gather_287"), val = int32(1)]; int32 gather_288 = const()[name = string("gather_288"), val = int32(2)]; int32 gather_289_axis_0 = const()[name = string("gather_289_axis_0"), val = int32(0)]; int32 gather_289_batch_dims_0 = const()[name = string("gather_289_batch_dims_0"), val = int32(0)]; bool gather_289_validate_indices_0 = const()[name = string("gather_289_validate_indices_0"), val = bool(false)]; string var_3013_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3013_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_289_to_uint16 = const()[name = string("select_289_to_uint16"), val = uint16(2)]; tensor var_3013_shape_cast_fp16_to_uint16 = cast(dtype = var_3013_shape_cast_fp16_to_uint16_dtype_0, x = var_3013_shape_cast_fp16)[name = string("cast_51")]; uint16 gather_289_cast_uint16 = gather(axis = gather_289_axis_0, batch_dims = gather_289_batch_dims_0, indices = select_289_to_uint16, validate_indices = gather_289_validate_indices_0, x = var_3013_shape_cast_fp16_to_uint16)[name = string("gather_289_cast_uint16")]; string gather_289_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_289_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_290 = const()[name = string("gather_290"), val = int32(64)]; tensor var_3020_axes_0 = const()[name = string("op_3020_axes_0"), val = tensor([2])]; tensor var_3020_cast_fp16 = expand_dims(axes = var_3020_axes_0, x = var_2994_cast_fp16)[name = string("op_3020_cast_fp16")]; tensor shape_322_cast_fp16 = shape(x = var_3020_cast_fp16)[name = string("shape_322_cast_fp16")]; int32 concat_300_axis_0 = const()[name = string("concat_300_axis_0"), val = int32(0)]; bool concat_300_interleave_0 = const()[name = string("concat_300_interleave_0"), val = bool(false)]; int32 gather_289_cast_uint16_to_int32 = cast(dtype = gather_289_cast_uint16_to_int32_dtype_0, x = gather_289_cast_uint16)[name = string("cast_50")]; tensor concat_300 = concat(axis = concat_300_axis_0, interleave = concat_300_interleave_0, values = (gather_287, gather_288, var_76, gather_289_cast_uint16_to_int32, gather_290))[name = string("concat_300")]; tensor real_div_31 = real_div(x = concat_300, y = shape_322_cast_fp16)[name = string("real_div_31")]; tensor hidden_states_465_cast_fp16 = tile(reps = real_div_31, x = var_3020_cast_fp16)[name = string("hidden_states_465_cast_fp16")]; tensor concat_301x = const()[name = string("concat_301x"), val = tensor([1, 14, -1, 64])]; tensor value_states_63_cast_fp16 = reshape(shape = concat_301x, x = hidden_states_465_cast_fp16)[name = string("value_states_63_cast_fp16")]; tensor var_3030_shape_cast_fp16 = shape(x = key_states_63_cast_fp16)[name = string("op_3030_shape_cast_fp16")]; int32 gather_291_axis_0 = const()[name = string("gather_291_axis_0"), val = int32(0)]; int32 gather_291_batch_dims_0 = const()[name = string("gather_291_batch_dims_0"), val = int32(0)]; bool gather_291_validate_indices_0 = const()[name = string("gather_291_validate_indices_0"), val = bool(false)]; string var_3030_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3030_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_291_to_uint16 = const()[name = string("select_291_to_uint16"), val = uint16(2)]; tensor var_3030_shape_cast_fp16_to_uint16 = cast(dtype = var_3030_shape_cast_fp16_to_uint16_dtype_0, x = var_3030_shape_cast_fp16)[name = string("cast_49")]; uint16 gather_291_cast_uint16 = gather(axis = gather_291_axis_0, batch_dims = gather_291_batch_dims_0, indices = select_291_to_uint16, validate_indices = gather_291_validate_indices_0, x = var_3030_shape_cast_fp16_to_uint16)[name = string("gather_291_cast_uint16")]; string gather_291_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_291_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_302_values0_0 = const()[name = string("concat_302_values0_0"), val = int32(1)]; int32 concat_302_values1_0 = const()[name = string("concat_302_values1_0"), val = int32(1)]; int32 concat_302_values2_0 = const()[name = string("concat_302_values2_0"), val = int32(0)]; int32 concat_302_axis_0 = const()[name = string("concat_302_axis_0"), val = int32(0)]; bool concat_302_interleave_0 = const()[name = string("concat_302_interleave_0"), val = bool(false)]; int32 gather_291_cast_uint16_to_int32 = cast(dtype = gather_291_cast_uint16_to_int32_dtype_0, x = gather_291_cast_uint16)[name = string("cast_48")]; tensor concat_302 = concat(axis = concat_302_axis_0, interleave = concat_302_interleave_0, values = (concat_302_values0_0, concat_302_values1_0, concat_302_values2_0, gather_291_cast_uint16_to_int32))[name = string("concat_302")]; tensor causal_mask_33_begin_0 = const()[name = string("causal_mask_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_33_end_mask_0 = const()[name = string("causal_mask_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_33_cast_fp16 = slice_by_index(begin = causal_mask_33_begin_0, end = concat_302, end_mask = causal_mask_33_end_mask_0, x = causal_mask)[name = string("causal_mask_33_cast_fp16")]; tensor attn_output_61_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_33_cast_fp16, key = key_states_63_cast_fp16, query = query_states_63_cast_fp16, value = value_states_63_cast_fp16)[name = string("attn_output_61_cast_fp16")]; tensor var_3036_perm_0 = const()[name = string("op_3036_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_303x = const()[name = string("concat_303x"), val = tensor([1, -1, 896])]; tensor var_3036_cast_fp16 = transpose(perm = var_3036_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_32")]; tensor input_121_cast_fp16 = reshape(shape = concat_303x, x = var_3036_cast_fp16)[name = string("input_121_cast_fp16")]; tensor model_model_layers_15_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203078016))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203479488))))[name = string("model_model_layers_15_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_108_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_15_self_attn_o_proj_weight_to_fp16_quantized, x = input_121_cast_fp16)[name = string("linear_108_cast_fp16")]; tensor hidden_states_469_cast_fp16 = add(x = hidden_states_449_cast_fp16, y = linear_108_cast_fp16)[name = string("hidden_states_469_cast_fp16")]; fp16 var_70_promoted_31_to_fp16 = const()[name = string("op_70_promoted_31_to_fp16"), val = fp16(0x1p+1)]; tensor var_3045_cast_fp16 = pow(x = hidden_states_469_cast_fp16, y = var_70_promoted_31_to_fp16)[name = string("op_3045_cast_fp16")]; tensor variance_63_axes_0 = const()[name = string("variance_63_axes_0"), val = tensor([-1])]; bool variance_63_keep_dims_0 = const()[name = string("variance_63_keep_dims_0"), val = bool(true)]; tensor variance_63_cast_fp16 = reduce_mean(axes = variance_63_axes_0, keep_dims = variance_63_keep_dims_0, x = var_3045_cast_fp16)[name = string("variance_63_cast_fp16")]; fp16 var_3048_to_fp16 = const()[name = string("op_3048_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3049_cast_fp16 = add(x = variance_63_cast_fp16, y = var_3048_to_fp16)[name = string("op_3049_cast_fp16")]; fp32 var_3050_epsilon_0 = const()[name = string("op_3050_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3050_cast_fp16 = rsqrt(epsilon = var_3050_epsilon_0, x = var_3049_cast_fp16)[name = string("op_3050_cast_fp16")]; tensor hidden_states_473_cast_fp16 = mul(x = hidden_states_469_cast_fp16, y = var_3050_cast_fp16)[name = string("hidden_states_473_cast_fp16")]; tensor model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203529728)))]; tensor input_123_cast_fp16 = mul(x = model_model_layers_15_post_attention_layernorm_weight_to_fp16, y = hidden_states_473_cast_fp16)[name = string("input_123_cast_fp16")]; tensor model_model_layers_15_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(203531584))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205710720))))[name = string("model_model_layers_15_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_109_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_15_mlp_gate_proj_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = string("linear_109_cast_fp16")]; tensor var_3062_cast_fp16 = silu(x = linear_109_cast_fp16)[name = string("op_3062_cast_fp16")]; tensor model_model_layers_15_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(205983168))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208162304))))[name = string("model_model_layers_15_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_110_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_15_mlp_up_proj_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = string("linear_110_cast_fp16")]; tensor input_127_cast_fp16 = mul(x = var_3062_cast_fp16, y = linear_110_cast_fp16)[name = string("input_127_cast_fp16")]; tensor model_model_layers_15_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(208434752))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210613888))))[name = string("model_model_layers_15_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_111_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_15_mlp_down_proj_weight_to_fp16_quantized, x = input_127_cast_fp16)[name = string("linear_111_cast_fp16")]; tensor hidden_states_479_cast_fp16 = add(x = hidden_states_469_cast_fp16, y = linear_111_cast_fp16)[name = string("hidden_states_479_cast_fp16")]; fp16 var_70_promoted_32_to_fp16 = const()[name = string("op_70_promoted_32_to_fp16"), val = fp16(0x1p+1)]; tensor var_3075_cast_fp16 = pow(x = hidden_states_479_cast_fp16, y = var_70_promoted_32_to_fp16)[name = string("op_3075_cast_fp16")]; tensor variance_65_axes_0 = const()[name = string("variance_65_axes_0"), val = tensor([-1])]; bool variance_65_keep_dims_0 = const()[name = string("variance_65_keep_dims_0"), val = bool(true)]; tensor variance_65_cast_fp16 = reduce_mean(axes = variance_65_axes_0, keep_dims = variance_65_keep_dims_0, x = var_3075_cast_fp16)[name = string("variance_65_cast_fp16")]; fp16 var_3078_to_fp16 = const()[name = string("op_3078_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3079_cast_fp16 = add(x = variance_65_cast_fp16, y = var_3078_to_fp16)[name = string("op_3079_cast_fp16")]; fp32 var_3080_epsilon_0 = const()[name = string("op_3080_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3080_cast_fp16 = rsqrt(epsilon = var_3080_epsilon_0, x = var_3079_cast_fp16)[name = string("op_3080_cast_fp16")]; tensor hidden_states_483_cast_fp16 = mul(x = hidden_states_479_cast_fp16, y = var_3080_cast_fp16)[name = string("hidden_states_483_cast_fp16")]; tensor model_model_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210886336)))]; tensor hidden_states_487_cast_fp16 = mul(x = model_model_layers_16_input_layernorm_weight_to_fp16, y = hidden_states_483_cast_fp16)[name = string("hidden_states_487_cast_fp16")]; tensor model_model_layers_16_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(210888192))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211289664))))[name = string("model_model_layers_16_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_16_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_16_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211339904)))]; tensor linear_112_cast_fp16 = linear(bias = model_model_layers_16_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_16_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_487_cast_fp16)[name = string("linear_112_cast_fp16")]; tensor model_model_layers_16_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211341760))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211399168))))[name = string("model_model_layers_16_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_16_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_16_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211406400)))]; tensor linear_113_cast_fp16 = linear(bias = model_model_layers_16_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_16_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_487_cast_fp16)[name = string("linear_113_cast_fp16")]; tensor model_model_layers_16_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211406720))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211464128))))[name = string("model_model_layers_16_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_16_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_16_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211471360)))]; tensor linear_114_cast_fp16 = linear(bias = model_model_layers_16_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_16_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_487_cast_fp16)[name = string("linear_114_cast_fp16")]; tensor concat_304x = const()[name = string("concat_304x"), val = tensor([1, -1, 14, 64])]; tensor var_3103_cast_fp16 = reshape(shape = concat_304x, x = linear_112_cast_fp16)[name = string("op_3103_cast_fp16")]; tensor q_33_perm_0 = const()[name = string("q_33_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_305x = const()[name = string("concat_305x"), val = tensor([1, -1, 2, 64])]; tensor var_3106_cast_fp16 = reshape(shape = concat_305x, x = linear_113_cast_fp16)[name = string("op_3106_cast_fp16")]; tensor k_33_perm_0 = const()[name = string("k_33_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_306x = const()[name = string("concat_306x"), val = tensor([1, -1, 2, 64])]; tensor var_3109_cast_fp16 = reshape(shape = concat_306x, x = linear_114_cast_fp16)[name = string("op_3109_cast_fp16")]; tensor v_state_33_perm_0 = const()[name = string("v_state_33_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_33_cast_fp16 = transpose(perm = q_33_perm_0, x = var_3103_cast_fp16)[name = string("transpose_31")]; tensor var_3113_cast_fp16 = mul(x = q_33_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3113_cast_fp16")]; tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_65_cast_fp16 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_33_cast_fp16)[name = string("x1_65_cast_fp16")]; tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_65_cast_fp16 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_33_cast_fp16)[name = string("x2_65_cast_fp16")]; fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3124_cast_fp16 = mul(x = x2_65_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_3124_cast_fp16")]; bool var_3126_interleave_0 = const()[name = string("op_3126_interleave_0"), val = bool(false)]; tensor var_3126_cast_fp16 = concat(axis = var_64, interleave = var_3126_interleave_0, values = (var_3124_cast_fp16, x1_65_cast_fp16))[name = string("op_3126_cast_fp16")]; tensor var_3127_cast_fp16 = mul(x = var_3126_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3127_cast_fp16")]; tensor query_states_67_cast_fp16 = add(x = var_3113_cast_fp16, y = var_3127_cast_fp16)[name = string("query_states_67_cast_fp16")]; tensor k_33_cast_fp16 = transpose(perm = k_33_perm_0, x = var_3106_cast_fp16)[name = string("transpose_30")]; tensor var_3129_cast_fp16 = mul(x = k_33_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3129_cast_fp16")]; tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_67_cast_fp16 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_33_cast_fp16)[name = string("x1_67_cast_fp16")]; tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_67_cast_fp16 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_33_cast_fp16)[name = string("x2_67_cast_fp16")]; fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3140_cast_fp16 = mul(x = x2_67_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_3140_cast_fp16")]; bool var_3142_interleave_0 = const()[name = string("op_3142_interleave_0"), val = bool(false)]; tensor var_3142_cast_fp16 = concat(axis = var_64, interleave = var_3142_interleave_0, values = (var_3140_cast_fp16, x1_67_cast_fp16))[name = string("op_3142_cast_fp16")]; tensor var_3143_cast_fp16 = mul(x = var_3142_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3143_cast_fp16")]; tensor k_state_33_cast_fp16 = add(x = var_3129_cast_fp16, y = var_3143_cast_fp16)[name = string("k_state_33_cast_fp16")]; tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([0])]; tensor expand_dims_193 = const()[name = string("expand_dims_193"), val = tensor([0])]; tensor expand_dims_195 = const()[name = string("expand_dims_195"), val = tensor([0])]; tensor concat_309_values0_0 = const()[name = string("concat_309_values0_0"), val = tensor([16])]; int32 concat_309_axis_0 = const()[name = string("concat_309_axis_0"), val = int32(0)]; bool concat_309_interleave_0 = const()[name = string("concat_309_interleave_0"), val = bool(false)]; tensor concat_309 = concat(axis = concat_309_axis_0, interleave = concat_309_interleave_0, values = (concat_309_values0_0, expand_dims_192, expand_dims_193, expand_dims_2, expand_dims_195))[name = string("concat_309")]; tensor key_cache_internal_tensor_assign_17_stride_0 = const()[name = string("key_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_309, begin_mask = key_cache_internal_tensor_assign_17_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_17_squeeze_mask_0, stride = key_cache_internal_tensor_assign_17_stride_0, update = k_state_33_cast_fp16, x = coreml_update_state_78)[name = string("key_cache_internal_tensor_assign_17_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_17_cast_fp16, input = key_cache)[name = string("coreml_update_state_80_write_state")]; tensor coreml_update_state_80 = read_state(input = key_cache)[name = string("coreml_update_state_80")]; tensor value_cache_internal_tensor_assign_17_stride_0 = const()[name = string("value_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_33_cast_fp16 = transpose(perm = v_state_33_perm_0, x = var_3109_cast_fp16)[name = string("transpose_29")]; tensor value_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_309, begin_mask = value_cache_internal_tensor_assign_17_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_17_squeeze_mask_0, stride = value_cache_internal_tensor_assign_17_stride_0, update = v_state_33_cast_fp16, x = coreml_update_state_79)[name = string("value_cache_internal_tensor_assign_17_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_17_cast_fp16, input = value_cache)[name = string("coreml_update_state_81_write_state")]; tensor coreml_update_state_81 = read_state(input = value_cache)[name = string("coreml_update_state_81")]; tensor var_3166_begin_0 = const()[name = string("op_3166_begin_0"), val = tensor([16, 0, 0, 0, 0])]; tensor var_3166_end_0 = const()[name = string("op_3166_end_0"), val = tensor([17, 1, 2, 2048, 64])]; tensor var_3166_end_mask_0 = const()[name = string("op_3166_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3166_squeeze_mask_0 = const()[name = string("op_3166_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_3166_cast_fp16 = slice_by_index(begin = var_3166_begin_0, end = var_3166_end_0, end_mask = var_3166_end_mask_0, squeeze_mask = var_3166_squeeze_mask_0, x = coreml_update_state_80)[name = string("op_3166_cast_fp16")]; tensor var_3169_begin_0 = const()[name = string("op_3169_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3169_end_mask_0 = const()[name = string("op_3169_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3169_cast_fp16 = slice_by_index(begin = var_3169_begin_0, end = concat_11, end_mask = var_3169_end_mask_0, x = var_3166_cast_fp16)[name = string("op_3169_cast_fp16")]; tensor var_3171_begin_0 = const()[name = string("op_3171_begin_0"), val = tensor([16, 0, 0, 0, 0])]; tensor var_3171_end_0 = const()[name = string("op_3171_end_0"), val = tensor([17, 1, 2, 2048, 64])]; tensor var_3171_end_mask_0 = const()[name = string("op_3171_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3171_squeeze_mask_0 = const()[name = string("op_3171_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_3171_cast_fp16 = slice_by_index(begin = var_3171_begin_0, end = var_3171_end_0, end_mask = var_3171_end_mask_0, squeeze_mask = var_3171_squeeze_mask_0, x = coreml_update_state_81)[name = string("op_3171_cast_fp16")]; tensor var_3174_begin_0 = const()[name = string("op_3174_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3174_end_mask_0 = const()[name = string("op_3174_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3174_cast_fp16 = slice_by_index(begin = var_3174_begin_0, end = concat_11, end_mask = var_3174_end_mask_0, x = var_3171_cast_fp16)[name = string("op_3174_cast_fp16")]; tensor var_3176_shape_cast_fp16 = shape(x = var_3169_cast_fp16)[name = string("op_3176_shape_cast_fp16")]; int32 gather_301 = const()[name = string("gather_301"), val = int32(1)]; int32 gather_302 = const()[name = string("gather_302"), val = int32(2)]; int32 gather_303_axis_0 = const()[name = string("gather_303_axis_0"), val = int32(0)]; int32 gather_303_batch_dims_0 = const()[name = string("gather_303_batch_dims_0"), val = int32(0)]; bool gather_303_validate_indices_0 = const()[name = string("gather_303_validate_indices_0"), val = bool(false)]; string var_3176_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3176_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_303_to_uint16 = const()[name = string("select_303_to_uint16"), val = uint16(2)]; tensor var_3176_shape_cast_fp16_to_uint16 = cast(dtype = var_3176_shape_cast_fp16_to_uint16_dtype_0, x = var_3176_shape_cast_fp16)[name = string("cast_47")]; uint16 gather_303_cast_uint16 = gather(axis = gather_303_axis_0, batch_dims = gather_303_batch_dims_0, indices = select_303_to_uint16, validate_indices = gather_303_validate_indices_0, x = var_3176_shape_cast_fp16_to_uint16)[name = string("gather_303_cast_uint16")]; string gather_303_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_303_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_304 = const()[name = string("gather_304"), val = int32(64)]; tensor var_3183_axes_0 = const()[name = string("op_3183_axes_0"), val = tensor([2])]; tensor var_3183_cast_fp16 = expand_dims(axes = var_3183_axes_0, x = var_3169_cast_fp16)[name = string("op_3183_cast_fp16")]; tensor shape_337_cast_fp16 = shape(x = var_3183_cast_fp16)[name = string("shape_337_cast_fp16")]; int32 concat_317_axis_0 = const()[name = string("concat_317_axis_0"), val = int32(0)]; bool concat_317_interleave_0 = const()[name = string("concat_317_interleave_0"), val = bool(false)]; int32 gather_303_cast_uint16_to_int32 = cast(dtype = gather_303_cast_uint16_to_int32_dtype_0, x = gather_303_cast_uint16)[name = string("cast_46")]; tensor concat_317 = concat(axis = concat_317_axis_0, interleave = concat_317_interleave_0, values = (gather_301, gather_302, var_76, gather_303_cast_uint16_to_int32, gather_304))[name = string("concat_317")]; tensor real_div_32 = real_div(x = concat_317, y = shape_337_cast_fp16)[name = string("real_div_32")]; tensor hidden_states_491_cast_fp16 = tile(reps = real_div_32, x = var_3183_cast_fp16)[name = string("hidden_states_491_cast_fp16")]; tensor concat_318x = const()[name = string("concat_318x"), val = tensor([1, 14, -1, 64])]; tensor key_states_67_cast_fp16 = reshape(shape = concat_318x, x = hidden_states_491_cast_fp16)[name = string("key_states_67_cast_fp16")]; tensor var_3193_shape_cast_fp16 = shape(x = var_3174_cast_fp16)[name = string("op_3193_shape_cast_fp16")]; int32 gather_305 = const()[name = string("gather_305"), val = int32(1)]; int32 gather_306 = const()[name = string("gather_306"), val = int32(2)]; int32 gather_307_axis_0 = const()[name = string("gather_307_axis_0"), val = int32(0)]; int32 gather_307_batch_dims_0 = const()[name = string("gather_307_batch_dims_0"), val = int32(0)]; bool gather_307_validate_indices_0 = const()[name = string("gather_307_validate_indices_0"), val = bool(false)]; string var_3193_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3193_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_307_to_uint16 = const()[name = string("select_307_to_uint16"), val = uint16(2)]; tensor var_3193_shape_cast_fp16_to_uint16 = cast(dtype = var_3193_shape_cast_fp16_to_uint16_dtype_0, x = var_3193_shape_cast_fp16)[name = string("cast_45")]; uint16 gather_307_cast_uint16 = gather(axis = gather_307_axis_0, batch_dims = gather_307_batch_dims_0, indices = select_307_to_uint16, validate_indices = gather_307_validate_indices_0, x = var_3193_shape_cast_fp16_to_uint16)[name = string("gather_307_cast_uint16")]; string gather_307_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_307_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_308 = const()[name = string("gather_308"), val = int32(64)]; tensor var_3200_axes_0 = const()[name = string("op_3200_axes_0"), val = tensor([2])]; tensor var_3200_cast_fp16 = expand_dims(axes = var_3200_axes_0, x = var_3174_cast_fp16)[name = string("op_3200_cast_fp16")]; tensor shape_342_cast_fp16 = shape(x = var_3200_cast_fp16)[name = string("shape_342_cast_fp16")]; int32 concat_319_axis_0 = const()[name = string("concat_319_axis_0"), val = int32(0)]; bool concat_319_interleave_0 = const()[name = string("concat_319_interleave_0"), val = bool(false)]; int32 gather_307_cast_uint16_to_int32 = cast(dtype = gather_307_cast_uint16_to_int32_dtype_0, x = gather_307_cast_uint16)[name = string("cast_44")]; tensor concat_319 = concat(axis = concat_319_axis_0, interleave = concat_319_interleave_0, values = (gather_305, gather_306, var_76, gather_307_cast_uint16_to_int32, gather_308))[name = string("concat_319")]; tensor real_div_33 = real_div(x = concat_319, y = shape_342_cast_fp16)[name = string("real_div_33")]; tensor hidden_states_495_cast_fp16 = tile(reps = real_div_33, x = var_3200_cast_fp16)[name = string("hidden_states_495_cast_fp16")]; tensor concat_320x = const()[name = string("concat_320x"), val = tensor([1, 14, -1, 64])]; tensor value_states_67_cast_fp16 = reshape(shape = concat_320x, x = hidden_states_495_cast_fp16)[name = string("value_states_67_cast_fp16")]; tensor var_3210_shape_cast_fp16 = shape(x = key_states_67_cast_fp16)[name = string("op_3210_shape_cast_fp16")]; int32 gather_309_axis_0 = const()[name = string("gather_309_axis_0"), val = int32(0)]; int32 gather_309_batch_dims_0 = const()[name = string("gather_309_batch_dims_0"), val = int32(0)]; bool gather_309_validate_indices_0 = const()[name = string("gather_309_validate_indices_0"), val = bool(false)]; string var_3210_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3210_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_309_to_uint16 = const()[name = string("select_309_to_uint16"), val = uint16(2)]; tensor var_3210_shape_cast_fp16_to_uint16 = cast(dtype = var_3210_shape_cast_fp16_to_uint16_dtype_0, x = var_3210_shape_cast_fp16)[name = string("cast_43")]; uint16 gather_309_cast_uint16 = gather(axis = gather_309_axis_0, batch_dims = gather_309_batch_dims_0, indices = select_309_to_uint16, validate_indices = gather_309_validate_indices_0, x = var_3210_shape_cast_fp16_to_uint16)[name = string("gather_309_cast_uint16")]; string gather_309_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_309_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_321_values0_0 = const()[name = string("concat_321_values0_0"), val = int32(1)]; int32 concat_321_values1_0 = const()[name = string("concat_321_values1_0"), val = int32(1)]; int32 concat_321_values2_0 = const()[name = string("concat_321_values2_0"), val = int32(0)]; int32 concat_321_axis_0 = const()[name = string("concat_321_axis_0"), val = int32(0)]; bool concat_321_interleave_0 = const()[name = string("concat_321_interleave_0"), val = bool(false)]; int32 gather_309_cast_uint16_to_int32 = cast(dtype = gather_309_cast_uint16_to_int32_dtype_0, x = gather_309_cast_uint16)[name = string("cast_42")]; tensor concat_321 = concat(axis = concat_321_axis_0, interleave = concat_321_interleave_0, values = (concat_321_values0_0, concat_321_values1_0, concat_321_values2_0, gather_309_cast_uint16_to_int32))[name = string("concat_321")]; tensor causal_mask_35_begin_0 = const()[name = string("causal_mask_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_35_end_mask_0 = const()[name = string("causal_mask_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_35_cast_fp16 = slice_by_index(begin = causal_mask_35_begin_0, end = concat_321, end_mask = causal_mask_35_end_mask_0, x = causal_mask)[name = string("causal_mask_35_cast_fp16")]; tensor attn_output_65_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_35_cast_fp16, key = key_states_67_cast_fp16, query = query_states_67_cast_fp16, value = value_states_67_cast_fp16)[name = string("attn_output_65_cast_fp16")]; tensor var_3216_perm_0 = const()[name = string("op_3216_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_322x = const()[name = string("concat_322x"), val = tensor([1, -1, 896])]; tensor var_3216_cast_fp16 = transpose(perm = var_3216_perm_0, x = attn_output_65_cast_fp16)[name = string("transpose_28")]; tensor input_129_cast_fp16 = reshape(shape = concat_322x, x = var_3216_cast_fp16)[name = string("input_129_cast_fp16")]; tensor model_model_layers_16_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211471680))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211873152))))[name = string("model_model_layers_16_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_115_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_16_self_attn_o_proj_weight_to_fp16_quantized, x = input_129_cast_fp16)[name = string("linear_115_cast_fp16")]; tensor hidden_states_499_cast_fp16 = add(x = hidden_states_479_cast_fp16, y = linear_115_cast_fp16)[name = string("hidden_states_499_cast_fp16")]; fp16 var_70_promoted_33_to_fp16 = const()[name = string("op_70_promoted_33_to_fp16"), val = fp16(0x1p+1)]; tensor var_3225_cast_fp16 = pow(x = hidden_states_499_cast_fp16, y = var_70_promoted_33_to_fp16)[name = string("op_3225_cast_fp16")]; tensor variance_67_axes_0 = const()[name = string("variance_67_axes_0"), val = tensor([-1])]; bool variance_67_keep_dims_0 = const()[name = string("variance_67_keep_dims_0"), val = bool(true)]; tensor variance_67_cast_fp16 = reduce_mean(axes = variance_67_axes_0, keep_dims = variance_67_keep_dims_0, x = var_3225_cast_fp16)[name = string("variance_67_cast_fp16")]; fp16 var_3228_to_fp16 = const()[name = string("op_3228_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3229_cast_fp16 = add(x = variance_67_cast_fp16, y = var_3228_to_fp16)[name = string("op_3229_cast_fp16")]; fp32 var_3230_epsilon_0 = const()[name = string("op_3230_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3230_cast_fp16 = rsqrt(epsilon = var_3230_epsilon_0, x = var_3229_cast_fp16)[name = string("op_3230_cast_fp16")]; tensor hidden_states_503_cast_fp16 = mul(x = hidden_states_499_cast_fp16, y = var_3230_cast_fp16)[name = string("hidden_states_503_cast_fp16")]; tensor model_model_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211923392)))]; tensor input_131_cast_fp16 = mul(x = model_model_layers_16_post_attention_layernorm_weight_to_fp16, y = hidden_states_503_cast_fp16)[name = string("input_131_cast_fp16")]; tensor model_model_layers_16_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(211925248))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214104384))))[name = string("model_model_layers_16_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_116_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_16_mlp_gate_proj_weight_to_fp16_quantized, x = input_131_cast_fp16)[name = string("linear_116_cast_fp16")]; tensor var_3242_cast_fp16 = silu(x = linear_116_cast_fp16)[name = string("op_3242_cast_fp16")]; tensor model_model_layers_16_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(214376832))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216555968))))[name = string("model_model_layers_16_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_117_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_16_mlp_up_proj_weight_to_fp16_quantized, x = input_131_cast_fp16)[name = string("linear_117_cast_fp16")]; tensor input_135_cast_fp16 = mul(x = var_3242_cast_fp16, y = linear_117_cast_fp16)[name = string("input_135_cast_fp16")]; tensor model_model_layers_16_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(216828416))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219007552))))[name = string("model_model_layers_16_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_118_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_16_mlp_down_proj_weight_to_fp16_quantized, x = input_135_cast_fp16)[name = string("linear_118_cast_fp16")]; tensor hidden_states_509_cast_fp16 = add(x = hidden_states_499_cast_fp16, y = linear_118_cast_fp16)[name = string("hidden_states_509_cast_fp16")]; fp16 var_70_promoted_34_to_fp16 = const()[name = string("op_70_promoted_34_to_fp16"), val = fp16(0x1p+1)]; tensor var_3255_cast_fp16 = pow(x = hidden_states_509_cast_fp16, y = var_70_promoted_34_to_fp16)[name = string("op_3255_cast_fp16")]; tensor variance_69_axes_0 = const()[name = string("variance_69_axes_0"), val = tensor([-1])]; bool variance_69_keep_dims_0 = const()[name = string("variance_69_keep_dims_0"), val = bool(true)]; tensor variance_69_cast_fp16 = reduce_mean(axes = variance_69_axes_0, keep_dims = variance_69_keep_dims_0, x = var_3255_cast_fp16)[name = string("variance_69_cast_fp16")]; fp16 var_3258_to_fp16 = const()[name = string("op_3258_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3259_cast_fp16 = add(x = variance_69_cast_fp16, y = var_3258_to_fp16)[name = string("op_3259_cast_fp16")]; fp32 var_3260_epsilon_0 = const()[name = string("op_3260_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3260_cast_fp16 = rsqrt(epsilon = var_3260_epsilon_0, x = var_3259_cast_fp16)[name = string("op_3260_cast_fp16")]; tensor hidden_states_513_cast_fp16 = mul(x = hidden_states_509_cast_fp16, y = var_3260_cast_fp16)[name = string("hidden_states_513_cast_fp16")]; tensor model_model_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219280000)))]; tensor hidden_states_517_cast_fp16 = mul(x = model_model_layers_17_input_layernorm_weight_to_fp16, y = hidden_states_513_cast_fp16)[name = string("hidden_states_517_cast_fp16")]; tensor model_model_layers_17_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219281856))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219683328))))[name = string("model_model_layers_17_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_17_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_17_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219733568)))]; tensor linear_119_cast_fp16 = linear(bias = model_model_layers_17_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_17_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_517_cast_fp16)[name = string("linear_119_cast_fp16")]; tensor model_model_layers_17_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219735424))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219792832))))[name = string("model_model_layers_17_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_17_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_17_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219800064)))]; tensor linear_120_cast_fp16 = linear(bias = model_model_layers_17_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_17_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_517_cast_fp16)[name = string("linear_120_cast_fp16")]; tensor model_model_layers_17_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219800384))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219857792))))[name = string("model_model_layers_17_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_17_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_17_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219865024)))]; tensor linear_121_cast_fp16 = linear(bias = model_model_layers_17_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_17_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_517_cast_fp16)[name = string("linear_121_cast_fp16")]; tensor concat_323x = const()[name = string("concat_323x"), val = tensor([1, -1, 14, 64])]; tensor var_3283_cast_fp16 = reshape(shape = concat_323x, x = linear_119_cast_fp16)[name = string("op_3283_cast_fp16")]; tensor q_35_perm_0 = const()[name = string("q_35_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_324x = const()[name = string("concat_324x"), val = tensor([1, -1, 2, 64])]; tensor var_3286_cast_fp16 = reshape(shape = concat_324x, x = linear_120_cast_fp16)[name = string("op_3286_cast_fp16")]; tensor k_35_perm_0 = const()[name = string("k_35_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_325x = const()[name = string("concat_325x"), val = tensor([1, -1, 2, 64])]; tensor var_3289_cast_fp16 = reshape(shape = concat_325x, x = linear_121_cast_fp16)[name = string("op_3289_cast_fp16")]; tensor v_state_35_perm_0 = const()[name = string("v_state_35_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_35_cast_fp16 = transpose(perm = q_35_perm_0, x = var_3283_cast_fp16)[name = string("transpose_27")]; tensor var_3293_cast_fp16 = mul(x = q_35_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3293_cast_fp16")]; tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_69_cast_fp16 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_35_cast_fp16)[name = string("x1_69_cast_fp16")]; tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_69_cast_fp16 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_35_cast_fp16)[name = string("x2_69_cast_fp16")]; fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3304_cast_fp16 = mul(x = x2_69_cast_fp16, y = const_37_promoted_to_fp16)[name = string("op_3304_cast_fp16")]; bool var_3306_interleave_0 = const()[name = string("op_3306_interleave_0"), val = bool(false)]; tensor var_3306_cast_fp16 = concat(axis = var_64, interleave = var_3306_interleave_0, values = (var_3304_cast_fp16, x1_69_cast_fp16))[name = string("op_3306_cast_fp16")]; tensor var_3307_cast_fp16 = mul(x = var_3306_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3307_cast_fp16")]; tensor query_states_71_cast_fp16 = add(x = var_3293_cast_fp16, y = var_3307_cast_fp16)[name = string("query_states_71_cast_fp16")]; tensor k_35_cast_fp16 = transpose(perm = k_35_perm_0, x = var_3286_cast_fp16)[name = string("transpose_26")]; tensor var_3309_cast_fp16 = mul(x = k_35_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3309_cast_fp16")]; tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_71_cast_fp16 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_35_cast_fp16)[name = string("x1_71_cast_fp16")]; tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_71_cast_fp16 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_35_cast_fp16)[name = string("x2_71_cast_fp16")]; fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3320_cast_fp16 = mul(x = x2_71_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_3320_cast_fp16")]; bool var_3322_interleave_0 = const()[name = string("op_3322_interleave_0"), val = bool(false)]; tensor var_3322_cast_fp16 = concat(axis = var_64, interleave = var_3322_interleave_0, values = (var_3320_cast_fp16, x1_71_cast_fp16))[name = string("op_3322_cast_fp16")]; tensor var_3323_cast_fp16 = mul(x = var_3322_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3323_cast_fp16")]; tensor k_state_35_cast_fp16 = add(x = var_3309_cast_fp16, y = var_3323_cast_fp16)[name = string("k_state_35_cast_fp16")]; tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([0])]; tensor expand_dims_205 = const()[name = string("expand_dims_205"), val = tensor([0])]; tensor expand_dims_207 = const()[name = string("expand_dims_207"), val = tensor([0])]; tensor concat_328_values0_0 = const()[name = string("concat_328_values0_0"), val = tensor([17])]; int32 concat_328_axis_0 = const()[name = string("concat_328_axis_0"), val = int32(0)]; bool concat_328_interleave_0 = const()[name = string("concat_328_interleave_0"), val = bool(false)]; tensor concat_328 = concat(axis = concat_328_axis_0, interleave = concat_328_interleave_0, values = (concat_328_values0_0, expand_dims_204, expand_dims_205, expand_dims_2, expand_dims_207))[name = string("concat_328")]; tensor key_cache_internal_tensor_assign_18_stride_0 = const()[name = string("key_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_328, begin_mask = key_cache_internal_tensor_assign_18_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_18_squeeze_mask_0, stride = key_cache_internal_tensor_assign_18_stride_0, update = k_state_35_cast_fp16, x = coreml_update_state_80)[name = string("key_cache_internal_tensor_assign_18_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_18_cast_fp16, input = key_cache)[name = string("coreml_update_state_82_write_state")]; tensor coreml_update_state_82 = read_state(input = key_cache)[name = string("coreml_update_state_82")]; tensor value_cache_internal_tensor_assign_18_stride_0 = const()[name = string("value_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_35_cast_fp16 = transpose(perm = v_state_35_perm_0, x = var_3289_cast_fp16)[name = string("transpose_25")]; tensor value_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_328, begin_mask = value_cache_internal_tensor_assign_18_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_18_squeeze_mask_0, stride = value_cache_internal_tensor_assign_18_stride_0, update = v_state_35_cast_fp16, x = coreml_update_state_81)[name = string("value_cache_internal_tensor_assign_18_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_18_cast_fp16, input = value_cache)[name = string("coreml_update_state_83_write_state")]; tensor coreml_update_state_83 = read_state(input = value_cache)[name = string("coreml_update_state_83")]; tensor var_3346_begin_0 = const()[name = string("op_3346_begin_0"), val = tensor([17, 0, 0, 0, 0])]; tensor var_3346_end_0 = const()[name = string("op_3346_end_0"), val = tensor([18, 1, 2, 2048, 64])]; tensor var_3346_end_mask_0 = const()[name = string("op_3346_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3346_squeeze_mask_0 = const()[name = string("op_3346_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_3346_cast_fp16 = slice_by_index(begin = var_3346_begin_0, end = var_3346_end_0, end_mask = var_3346_end_mask_0, squeeze_mask = var_3346_squeeze_mask_0, x = coreml_update_state_82)[name = string("op_3346_cast_fp16")]; tensor var_3349_begin_0 = const()[name = string("op_3349_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3349_end_mask_0 = const()[name = string("op_3349_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3349_cast_fp16 = slice_by_index(begin = var_3349_begin_0, end = concat_11, end_mask = var_3349_end_mask_0, x = var_3346_cast_fp16)[name = string("op_3349_cast_fp16")]; tensor var_3351_begin_0 = const()[name = string("op_3351_begin_0"), val = tensor([17, 0, 0, 0, 0])]; tensor var_3351_end_0 = const()[name = string("op_3351_end_0"), val = tensor([18, 1, 2, 2048, 64])]; tensor var_3351_end_mask_0 = const()[name = string("op_3351_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3351_squeeze_mask_0 = const()[name = string("op_3351_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_3351_cast_fp16 = slice_by_index(begin = var_3351_begin_0, end = var_3351_end_0, end_mask = var_3351_end_mask_0, squeeze_mask = var_3351_squeeze_mask_0, x = coreml_update_state_83)[name = string("op_3351_cast_fp16")]; tensor var_3354_begin_0 = const()[name = string("op_3354_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3354_end_mask_0 = const()[name = string("op_3354_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3354_cast_fp16 = slice_by_index(begin = var_3354_begin_0, end = concat_11, end_mask = var_3354_end_mask_0, x = var_3351_cast_fp16)[name = string("op_3354_cast_fp16")]; tensor var_3356_shape_cast_fp16 = shape(x = var_3349_cast_fp16)[name = string("op_3356_shape_cast_fp16")]; int32 gather_319 = const()[name = string("gather_319"), val = int32(1)]; int32 gather_320 = const()[name = string("gather_320"), val = int32(2)]; int32 gather_321_axis_0 = const()[name = string("gather_321_axis_0"), val = int32(0)]; int32 gather_321_batch_dims_0 = const()[name = string("gather_321_batch_dims_0"), val = int32(0)]; bool gather_321_validate_indices_0 = const()[name = string("gather_321_validate_indices_0"), val = bool(false)]; string var_3356_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3356_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_321_to_uint16 = const()[name = string("select_321_to_uint16"), val = uint16(2)]; tensor var_3356_shape_cast_fp16_to_uint16 = cast(dtype = var_3356_shape_cast_fp16_to_uint16_dtype_0, x = var_3356_shape_cast_fp16)[name = string("cast_41")]; uint16 gather_321_cast_uint16 = gather(axis = gather_321_axis_0, batch_dims = gather_321_batch_dims_0, indices = select_321_to_uint16, validate_indices = gather_321_validate_indices_0, x = var_3356_shape_cast_fp16_to_uint16)[name = string("gather_321_cast_uint16")]; string gather_321_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_321_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_322 = const()[name = string("gather_322"), val = int32(64)]; tensor var_3363_axes_0 = const()[name = string("op_3363_axes_0"), val = tensor([2])]; tensor var_3363_cast_fp16 = expand_dims(axes = var_3363_axes_0, x = var_3349_cast_fp16)[name = string("op_3363_cast_fp16")]; tensor shape_357_cast_fp16 = shape(x = var_3363_cast_fp16)[name = string("shape_357_cast_fp16")]; int32 concat_336_axis_0 = const()[name = string("concat_336_axis_0"), val = int32(0)]; bool concat_336_interleave_0 = const()[name = string("concat_336_interleave_0"), val = bool(false)]; int32 gather_321_cast_uint16_to_int32 = cast(dtype = gather_321_cast_uint16_to_int32_dtype_0, x = gather_321_cast_uint16)[name = string("cast_40")]; tensor concat_336 = concat(axis = concat_336_axis_0, interleave = concat_336_interleave_0, values = (gather_319, gather_320, var_76, gather_321_cast_uint16_to_int32, gather_322))[name = string("concat_336")]; tensor real_div_34 = real_div(x = concat_336, y = shape_357_cast_fp16)[name = string("real_div_34")]; tensor hidden_states_521_cast_fp16 = tile(reps = real_div_34, x = var_3363_cast_fp16)[name = string("hidden_states_521_cast_fp16")]; tensor concat_337x = const()[name = string("concat_337x"), val = tensor([1, 14, -1, 64])]; tensor key_states_71_cast_fp16 = reshape(shape = concat_337x, x = hidden_states_521_cast_fp16)[name = string("key_states_71_cast_fp16")]; tensor var_3373_shape_cast_fp16 = shape(x = var_3354_cast_fp16)[name = string("op_3373_shape_cast_fp16")]; int32 gather_323 = const()[name = string("gather_323"), val = int32(1)]; int32 gather_324 = const()[name = string("gather_324"), val = int32(2)]; int32 gather_325_axis_0 = const()[name = string("gather_325_axis_0"), val = int32(0)]; int32 gather_325_batch_dims_0 = const()[name = string("gather_325_batch_dims_0"), val = int32(0)]; bool gather_325_validate_indices_0 = const()[name = string("gather_325_validate_indices_0"), val = bool(false)]; string var_3373_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3373_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_325_to_uint16 = const()[name = string("select_325_to_uint16"), val = uint16(2)]; tensor var_3373_shape_cast_fp16_to_uint16 = cast(dtype = var_3373_shape_cast_fp16_to_uint16_dtype_0, x = var_3373_shape_cast_fp16)[name = string("cast_39")]; uint16 gather_325_cast_uint16 = gather(axis = gather_325_axis_0, batch_dims = gather_325_batch_dims_0, indices = select_325_to_uint16, validate_indices = gather_325_validate_indices_0, x = var_3373_shape_cast_fp16_to_uint16)[name = string("gather_325_cast_uint16")]; string gather_325_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_325_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_326 = const()[name = string("gather_326"), val = int32(64)]; tensor var_3380_axes_0 = const()[name = string("op_3380_axes_0"), val = tensor([2])]; tensor var_3380_cast_fp16 = expand_dims(axes = var_3380_axes_0, x = var_3354_cast_fp16)[name = string("op_3380_cast_fp16")]; tensor shape_362_cast_fp16 = shape(x = var_3380_cast_fp16)[name = string("shape_362_cast_fp16")]; int32 concat_338_axis_0 = const()[name = string("concat_338_axis_0"), val = int32(0)]; bool concat_338_interleave_0 = const()[name = string("concat_338_interleave_0"), val = bool(false)]; int32 gather_325_cast_uint16_to_int32 = cast(dtype = gather_325_cast_uint16_to_int32_dtype_0, x = gather_325_cast_uint16)[name = string("cast_38")]; tensor concat_338 = concat(axis = concat_338_axis_0, interleave = concat_338_interleave_0, values = (gather_323, gather_324, var_76, gather_325_cast_uint16_to_int32, gather_326))[name = string("concat_338")]; tensor real_div_35 = real_div(x = concat_338, y = shape_362_cast_fp16)[name = string("real_div_35")]; tensor hidden_states_525_cast_fp16 = tile(reps = real_div_35, x = var_3380_cast_fp16)[name = string("hidden_states_525_cast_fp16")]; tensor concat_339x = const()[name = string("concat_339x"), val = tensor([1, 14, -1, 64])]; tensor value_states_71_cast_fp16 = reshape(shape = concat_339x, x = hidden_states_525_cast_fp16)[name = string("value_states_71_cast_fp16")]; tensor var_3390_shape_cast_fp16 = shape(x = key_states_71_cast_fp16)[name = string("op_3390_shape_cast_fp16")]; int32 gather_327_axis_0 = const()[name = string("gather_327_axis_0"), val = int32(0)]; int32 gather_327_batch_dims_0 = const()[name = string("gather_327_batch_dims_0"), val = int32(0)]; bool gather_327_validate_indices_0 = const()[name = string("gather_327_validate_indices_0"), val = bool(false)]; string var_3390_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3390_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_327_to_uint16 = const()[name = string("select_327_to_uint16"), val = uint16(2)]; tensor var_3390_shape_cast_fp16_to_uint16 = cast(dtype = var_3390_shape_cast_fp16_to_uint16_dtype_0, x = var_3390_shape_cast_fp16)[name = string("cast_37")]; uint16 gather_327_cast_uint16 = gather(axis = gather_327_axis_0, batch_dims = gather_327_batch_dims_0, indices = select_327_to_uint16, validate_indices = gather_327_validate_indices_0, x = var_3390_shape_cast_fp16_to_uint16)[name = string("gather_327_cast_uint16")]; string gather_327_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_327_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_340_values0_0 = const()[name = string("concat_340_values0_0"), val = int32(1)]; int32 concat_340_values1_0 = const()[name = string("concat_340_values1_0"), val = int32(1)]; int32 concat_340_values2_0 = const()[name = string("concat_340_values2_0"), val = int32(0)]; int32 concat_340_axis_0 = const()[name = string("concat_340_axis_0"), val = int32(0)]; bool concat_340_interleave_0 = const()[name = string("concat_340_interleave_0"), val = bool(false)]; int32 gather_327_cast_uint16_to_int32 = cast(dtype = gather_327_cast_uint16_to_int32_dtype_0, x = gather_327_cast_uint16)[name = string("cast_36")]; tensor concat_340 = concat(axis = concat_340_axis_0, interleave = concat_340_interleave_0, values = (concat_340_values0_0, concat_340_values1_0, concat_340_values2_0, gather_327_cast_uint16_to_int32))[name = string("concat_340")]; tensor causal_mask_37_begin_0 = const()[name = string("causal_mask_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_37_end_mask_0 = const()[name = string("causal_mask_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_37_cast_fp16 = slice_by_index(begin = causal_mask_37_begin_0, end = concat_340, end_mask = causal_mask_37_end_mask_0, x = causal_mask)[name = string("causal_mask_37_cast_fp16")]; tensor attn_output_69_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_37_cast_fp16, key = key_states_71_cast_fp16, query = query_states_71_cast_fp16, value = value_states_71_cast_fp16)[name = string("attn_output_69_cast_fp16")]; tensor var_3396_perm_0 = const()[name = string("op_3396_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_341x = const()[name = string("concat_341x"), val = tensor([1, -1, 896])]; tensor var_3396_cast_fp16 = transpose(perm = var_3396_perm_0, x = attn_output_69_cast_fp16)[name = string("transpose_24")]; tensor input_137_cast_fp16 = reshape(shape = concat_341x, x = var_3396_cast_fp16)[name = string("input_137_cast_fp16")]; tensor model_model_layers_17_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(219865344))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220266816))))[name = string("model_model_layers_17_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_122_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_17_self_attn_o_proj_weight_to_fp16_quantized, x = input_137_cast_fp16)[name = string("linear_122_cast_fp16")]; tensor hidden_states_529_cast_fp16 = add(x = hidden_states_509_cast_fp16, y = linear_122_cast_fp16)[name = string("hidden_states_529_cast_fp16")]; fp16 var_70_promoted_35_to_fp16 = const()[name = string("op_70_promoted_35_to_fp16"), val = fp16(0x1p+1)]; tensor var_3405_cast_fp16 = pow(x = hidden_states_529_cast_fp16, y = var_70_promoted_35_to_fp16)[name = string("op_3405_cast_fp16")]; tensor variance_71_axes_0 = const()[name = string("variance_71_axes_0"), val = tensor([-1])]; bool variance_71_keep_dims_0 = const()[name = string("variance_71_keep_dims_0"), val = bool(true)]; tensor variance_71_cast_fp16 = reduce_mean(axes = variance_71_axes_0, keep_dims = variance_71_keep_dims_0, x = var_3405_cast_fp16)[name = string("variance_71_cast_fp16")]; fp16 var_3408_to_fp16 = const()[name = string("op_3408_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3409_cast_fp16 = add(x = variance_71_cast_fp16, y = var_3408_to_fp16)[name = string("op_3409_cast_fp16")]; fp32 var_3410_epsilon_0 = const()[name = string("op_3410_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3410_cast_fp16 = rsqrt(epsilon = var_3410_epsilon_0, x = var_3409_cast_fp16)[name = string("op_3410_cast_fp16")]; tensor hidden_states_533_cast_fp16 = mul(x = hidden_states_529_cast_fp16, y = var_3410_cast_fp16)[name = string("hidden_states_533_cast_fp16")]; tensor model_model_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220317056)))]; tensor input_139_cast_fp16 = mul(x = model_model_layers_17_post_attention_layernorm_weight_to_fp16, y = hidden_states_533_cast_fp16)[name = string("input_139_cast_fp16")]; tensor model_model_layers_17_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(220318912))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222498048))))[name = string("model_model_layers_17_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_123_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_17_mlp_gate_proj_weight_to_fp16_quantized, x = input_139_cast_fp16)[name = string("linear_123_cast_fp16")]; tensor var_3422_cast_fp16 = silu(x = linear_123_cast_fp16)[name = string("op_3422_cast_fp16")]; tensor model_model_layers_17_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(222770496))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(224949632))))[name = string("model_model_layers_17_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_124_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_17_mlp_up_proj_weight_to_fp16_quantized, x = input_139_cast_fp16)[name = string("linear_124_cast_fp16")]; tensor input_143_cast_fp16 = mul(x = var_3422_cast_fp16, y = linear_124_cast_fp16)[name = string("input_143_cast_fp16")]; tensor model_model_layers_17_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(225222080))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227401216))))[name = string("model_model_layers_17_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_125_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_17_mlp_down_proj_weight_to_fp16_quantized, x = input_143_cast_fp16)[name = string("linear_125_cast_fp16")]; tensor hidden_states_539_cast_fp16 = add(x = hidden_states_529_cast_fp16, y = linear_125_cast_fp16)[name = string("hidden_states_539_cast_fp16")]; fp16 var_70_promoted_36_to_fp16 = const()[name = string("op_70_promoted_36_to_fp16"), val = fp16(0x1p+1)]; tensor var_3435_cast_fp16 = pow(x = hidden_states_539_cast_fp16, y = var_70_promoted_36_to_fp16)[name = string("op_3435_cast_fp16")]; tensor variance_73_axes_0 = const()[name = string("variance_73_axes_0"), val = tensor([-1])]; bool variance_73_keep_dims_0 = const()[name = string("variance_73_keep_dims_0"), val = bool(true)]; tensor variance_73_cast_fp16 = reduce_mean(axes = variance_73_axes_0, keep_dims = variance_73_keep_dims_0, x = var_3435_cast_fp16)[name = string("variance_73_cast_fp16")]; fp16 var_3438_to_fp16 = const()[name = string("op_3438_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3439_cast_fp16 = add(x = variance_73_cast_fp16, y = var_3438_to_fp16)[name = string("op_3439_cast_fp16")]; fp32 var_3440_epsilon_0 = const()[name = string("op_3440_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3440_cast_fp16 = rsqrt(epsilon = var_3440_epsilon_0, x = var_3439_cast_fp16)[name = string("op_3440_cast_fp16")]; tensor hidden_states_543_cast_fp16 = mul(x = hidden_states_539_cast_fp16, y = var_3440_cast_fp16)[name = string("hidden_states_543_cast_fp16")]; tensor model_model_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227673664)))]; tensor hidden_states_547_cast_fp16 = mul(x = model_model_layers_18_input_layernorm_weight_to_fp16, y = hidden_states_543_cast_fp16)[name = string("hidden_states_547_cast_fp16")]; tensor model_model_layers_18_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(227675520))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228076992))))[name = string("model_model_layers_18_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_18_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_18_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228127232)))]; tensor linear_126_cast_fp16 = linear(bias = model_model_layers_18_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_18_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_547_cast_fp16)[name = string("linear_126_cast_fp16")]; tensor model_model_layers_18_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228129088))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228186496))))[name = string("model_model_layers_18_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_18_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_18_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228193728)))]; tensor linear_127_cast_fp16 = linear(bias = model_model_layers_18_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_18_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_547_cast_fp16)[name = string("linear_127_cast_fp16")]; tensor model_model_layers_18_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228194048))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228251456))))[name = string("model_model_layers_18_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_18_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_18_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228258688)))]; tensor linear_128_cast_fp16 = linear(bias = model_model_layers_18_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_18_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_547_cast_fp16)[name = string("linear_128_cast_fp16")]; tensor concat_342x = const()[name = string("concat_342x"), val = tensor([1, -1, 14, 64])]; tensor var_3463_cast_fp16 = reshape(shape = concat_342x, x = linear_126_cast_fp16)[name = string("op_3463_cast_fp16")]; tensor q_37_perm_0 = const()[name = string("q_37_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_343x = const()[name = string("concat_343x"), val = tensor([1, -1, 2, 64])]; tensor var_3466_cast_fp16 = reshape(shape = concat_343x, x = linear_127_cast_fp16)[name = string("op_3466_cast_fp16")]; tensor k_37_perm_0 = const()[name = string("k_37_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_344x = const()[name = string("concat_344x"), val = tensor([1, -1, 2, 64])]; tensor var_3469_cast_fp16 = reshape(shape = concat_344x, x = linear_128_cast_fp16)[name = string("op_3469_cast_fp16")]; tensor v_state_37_perm_0 = const()[name = string("v_state_37_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_37_cast_fp16 = transpose(perm = q_37_perm_0, x = var_3463_cast_fp16)[name = string("transpose_23")]; tensor var_3473_cast_fp16 = mul(x = q_37_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3473_cast_fp16")]; tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_73_cast_fp16 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_37_cast_fp16)[name = string("x1_73_cast_fp16")]; tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_73_cast_fp16 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_37_cast_fp16)[name = string("x2_73_cast_fp16")]; fp16 const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3484_cast_fp16 = mul(x = x2_73_cast_fp16, y = const_39_promoted_to_fp16)[name = string("op_3484_cast_fp16")]; bool var_3486_interleave_0 = const()[name = string("op_3486_interleave_0"), val = bool(false)]; tensor var_3486_cast_fp16 = concat(axis = var_64, interleave = var_3486_interleave_0, values = (var_3484_cast_fp16, x1_73_cast_fp16))[name = string("op_3486_cast_fp16")]; tensor var_3487_cast_fp16 = mul(x = var_3486_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3487_cast_fp16")]; tensor query_states_75_cast_fp16 = add(x = var_3473_cast_fp16, y = var_3487_cast_fp16)[name = string("query_states_75_cast_fp16")]; tensor k_37_cast_fp16 = transpose(perm = k_37_perm_0, x = var_3466_cast_fp16)[name = string("transpose_22")]; tensor var_3489_cast_fp16 = mul(x = k_37_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3489_cast_fp16")]; tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_75_cast_fp16 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_37_cast_fp16)[name = string("x1_75_cast_fp16")]; tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_75_cast_fp16 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_37_cast_fp16)[name = string("x2_75_cast_fp16")]; fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3500_cast_fp16 = mul(x = x2_75_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_3500_cast_fp16")]; bool var_3502_interleave_0 = const()[name = string("op_3502_interleave_0"), val = bool(false)]; tensor var_3502_cast_fp16 = concat(axis = var_64, interleave = var_3502_interleave_0, values = (var_3500_cast_fp16, x1_75_cast_fp16))[name = string("op_3502_cast_fp16")]; tensor var_3503_cast_fp16 = mul(x = var_3502_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3503_cast_fp16")]; tensor k_state_37_cast_fp16 = add(x = var_3489_cast_fp16, y = var_3503_cast_fp16)[name = string("k_state_37_cast_fp16")]; tensor expand_dims_216 = const()[name = string("expand_dims_216"), val = tensor([0])]; tensor expand_dims_217 = const()[name = string("expand_dims_217"), val = tensor([0])]; tensor expand_dims_219 = const()[name = string("expand_dims_219"), val = tensor([0])]; tensor concat_347_values0_0 = const()[name = string("concat_347_values0_0"), val = tensor([18])]; int32 concat_347_axis_0 = const()[name = string("concat_347_axis_0"), val = int32(0)]; bool concat_347_interleave_0 = const()[name = string("concat_347_interleave_0"), val = bool(false)]; tensor concat_347 = concat(axis = concat_347_axis_0, interleave = concat_347_interleave_0, values = (concat_347_values0_0, expand_dims_216, expand_dims_217, expand_dims_2, expand_dims_219))[name = string("concat_347")]; tensor key_cache_internal_tensor_assign_19_stride_0 = const()[name = string("key_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_347, begin_mask = key_cache_internal_tensor_assign_19_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_19_squeeze_mask_0, stride = key_cache_internal_tensor_assign_19_stride_0, update = k_state_37_cast_fp16, x = coreml_update_state_82)[name = string("key_cache_internal_tensor_assign_19_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_19_cast_fp16, input = key_cache)[name = string("coreml_update_state_84_write_state")]; tensor coreml_update_state_84 = read_state(input = key_cache)[name = string("coreml_update_state_84")]; tensor value_cache_internal_tensor_assign_19_stride_0 = const()[name = string("value_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_37_cast_fp16 = transpose(perm = v_state_37_perm_0, x = var_3469_cast_fp16)[name = string("transpose_21")]; tensor value_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_347, begin_mask = value_cache_internal_tensor_assign_19_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_19_squeeze_mask_0, stride = value_cache_internal_tensor_assign_19_stride_0, update = v_state_37_cast_fp16, x = coreml_update_state_83)[name = string("value_cache_internal_tensor_assign_19_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_19_cast_fp16, input = value_cache)[name = string("coreml_update_state_85_write_state")]; tensor coreml_update_state_85 = read_state(input = value_cache)[name = string("coreml_update_state_85")]; tensor var_3526_begin_0 = const()[name = string("op_3526_begin_0"), val = tensor([18, 0, 0, 0, 0])]; tensor var_3526_end_0 = const()[name = string("op_3526_end_0"), val = tensor([19, 1, 2, 2048, 64])]; tensor var_3526_end_mask_0 = const()[name = string("op_3526_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3526_squeeze_mask_0 = const()[name = string("op_3526_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_3526_cast_fp16 = slice_by_index(begin = var_3526_begin_0, end = var_3526_end_0, end_mask = var_3526_end_mask_0, squeeze_mask = var_3526_squeeze_mask_0, x = coreml_update_state_84)[name = string("op_3526_cast_fp16")]; tensor var_3529_begin_0 = const()[name = string("op_3529_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3529_end_mask_0 = const()[name = string("op_3529_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3529_cast_fp16 = slice_by_index(begin = var_3529_begin_0, end = concat_11, end_mask = var_3529_end_mask_0, x = var_3526_cast_fp16)[name = string("op_3529_cast_fp16")]; tensor var_3531_begin_0 = const()[name = string("op_3531_begin_0"), val = tensor([18, 0, 0, 0, 0])]; tensor var_3531_end_0 = const()[name = string("op_3531_end_0"), val = tensor([19, 1, 2, 2048, 64])]; tensor var_3531_end_mask_0 = const()[name = string("op_3531_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3531_squeeze_mask_0 = const()[name = string("op_3531_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_3531_cast_fp16 = slice_by_index(begin = var_3531_begin_0, end = var_3531_end_0, end_mask = var_3531_end_mask_0, squeeze_mask = var_3531_squeeze_mask_0, x = coreml_update_state_85)[name = string("op_3531_cast_fp16")]; tensor var_3534_begin_0 = const()[name = string("op_3534_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3534_end_mask_0 = const()[name = string("op_3534_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3534_cast_fp16 = slice_by_index(begin = var_3534_begin_0, end = concat_11, end_mask = var_3534_end_mask_0, x = var_3531_cast_fp16)[name = string("op_3534_cast_fp16")]; tensor var_3536_shape_cast_fp16 = shape(x = var_3529_cast_fp16)[name = string("op_3536_shape_cast_fp16")]; int32 gather_337 = const()[name = string("gather_337"), val = int32(1)]; int32 gather_338 = const()[name = string("gather_338"), val = int32(2)]; int32 gather_339_axis_0 = const()[name = string("gather_339_axis_0"), val = int32(0)]; int32 gather_339_batch_dims_0 = const()[name = string("gather_339_batch_dims_0"), val = int32(0)]; bool gather_339_validate_indices_0 = const()[name = string("gather_339_validate_indices_0"), val = bool(false)]; string var_3536_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3536_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_339_to_uint16 = const()[name = string("select_339_to_uint16"), val = uint16(2)]; tensor var_3536_shape_cast_fp16_to_uint16 = cast(dtype = var_3536_shape_cast_fp16_to_uint16_dtype_0, x = var_3536_shape_cast_fp16)[name = string("cast_35")]; uint16 gather_339_cast_uint16 = gather(axis = gather_339_axis_0, batch_dims = gather_339_batch_dims_0, indices = select_339_to_uint16, validate_indices = gather_339_validate_indices_0, x = var_3536_shape_cast_fp16_to_uint16)[name = string("gather_339_cast_uint16")]; string gather_339_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_339_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_340 = const()[name = string("gather_340"), val = int32(64)]; tensor var_3543_axes_0 = const()[name = string("op_3543_axes_0"), val = tensor([2])]; tensor var_3543_cast_fp16 = expand_dims(axes = var_3543_axes_0, x = var_3529_cast_fp16)[name = string("op_3543_cast_fp16")]; tensor shape_377_cast_fp16 = shape(x = var_3543_cast_fp16)[name = string("shape_377_cast_fp16")]; int32 concat_355_axis_0 = const()[name = string("concat_355_axis_0"), val = int32(0)]; bool concat_355_interleave_0 = const()[name = string("concat_355_interleave_0"), val = bool(false)]; int32 gather_339_cast_uint16_to_int32 = cast(dtype = gather_339_cast_uint16_to_int32_dtype_0, x = gather_339_cast_uint16)[name = string("cast_34")]; tensor concat_355 = concat(axis = concat_355_axis_0, interleave = concat_355_interleave_0, values = (gather_337, gather_338, var_76, gather_339_cast_uint16_to_int32, gather_340))[name = string("concat_355")]; tensor real_div_36 = real_div(x = concat_355, y = shape_377_cast_fp16)[name = string("real_div_36")]; tensor hidden_states_551_cast_fp16 = tile(reps = real_div_36, x = var_3543_cast_fp16)[name = string("hidden_states_551_cast_fp16")]; tensor concat_356x = const()[name = string("concat_356x"), val = tensor([1, 14, -1, 64])]; tensor key_states_75_cast_fp16 = reshape(shape = concat_356x, x = hidden_states_551_cast_fp16)[name = string("key_states_75_cast_fp16")]; tensor var_3553_shape_cast_fp16 = shape(x = var_3534_cast_fp16)[name = string("op_3553_shape_cast_fp16")]; int32 gather_341 = const()[name = string("gather_341"), val = int32(1)]; int32 gather_342 = const()[name = string("gather_342"), val = int32(2)]; int32 gather_343_axis_0 = const()[name = string("gather_343_axis_0"), val = int32(0)]; int32 gather_343_batch_dims_0 = const()[name = string("gather_343_batch_dims_0"), val = int32(0)]; bool gather_343_validate_indices_0 = const()[name = string("gather_343_validate_indices_0"), val = bool(false)]; string var_3553_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3553_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_343_to_uint16 = const()[name = string("select_343_to_uint16"), val = uint16(2)]; tensor var_3553_shape_cast_fp16_to_uint16 = cast(dtype = var_3553_shape_cast_fp16_to_uint16_dtype_0, x = var_3553_shape_cast_fp16)[name = string("cast_33")]; uint16 gather_343_cast_uint16 = gather(axis = gather_343_axis_0, batch_dims = gather_343_batch_dims_0, indices = select_343_to_uint16, validate_indices = gather_343_validate_indices_0, x = var_3553_shape_cast_fp16_to_uint16)[name = string("gather_343_cast_uint16")]; string gather_343_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_343_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_344 = const()[name = string("gather_344"), val = int32(64)]; tensor var_3560_axes_0 = const()[name = string("op_3560_axes_0"), val = tensor([2])]; tensor var_3560_cast_fp16 = expand_dims(axes = var_3560_axes_0, x = var_3534_cast_fp16)[name = string("op_3560_cast_fp16")]; tensor shape_382_cast_fp16 = shape(x = var_3560_cast_fp16)[name = string("shape_382_cast_fp16")]; int32 concat_357_axis_0 = const()[name = string("concat_357_axis_0"), val = int32(0)]; bool concat_357_interleave_0 = const()[name = string("concat_357_interleave_0"), val = bool(false)]; int32 gather_343_cast_uint16_to_int32 = cast(dtype = gather_343_cast_uint16_to_int32_dtype_0, x = gather_343_cast_uint16)[name = string("cast_32")]; tensor concat_357 = concat(axis = concat_357_axis_0, interleave = concat_357_interleave_0, values = (gather_341, gather_342, var_76, gather_343_cast_uint16_to_int32, gather_344))[name = string("concat_357")]; tensor real_div_37 = real_div(x = concat_357, y = shape_382_cast_fp16)[name = string("real_div_37")]; tensor hidden_states_555_cast_fp16 = tile(reps = real_div_37, x = var_3560_cast_fp16)[name = string("hidden_states_555_cast_fp16")]; tensor concat_358x = const()[name = string("concat_358x"), val = tensor([1, 14, -1, 64])]; tensor value_states_75_cast_fp16 = reshape(shape = concat_358x, x = hidden_states_555_cast_fp16)[name = string("value_states_75_cast_fp16")]; tensor var_3570_shape_cast_fp16 = shape(x = key_states_75_cast_fp16)[name = string("op_3570_shape_cast_fp16")]; int32 gather_345_axis_0 = const()[name = string("gather_345_axis_0"), val = int32(0)]; int32 gather_345_batch_dims_0 = const()[name = string("gather_345_batch_dims_0"), val = int32(0)]; bool gather_345_validate_indices_0 = const()[name = string("gather_345_validate_indices_0"), val = bool(false)]; string var_3570_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3570_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_345_to_uint16 = const()[name = string("select_345_to_uint16"), val = uint16(2)]; tensor var_3570_shape_cast_fp16_to_uint16 = cast(dtype = var_3570_shape_cast_fp16_to_uint16_dtype_0, x = var_3570_shape_cast_fp16)[name = string("cast_31")]; uint16 gather_345_cast_uint16 = gather(axis = gather_345_axis_0, batch_dims = gather_345_batch_dims_0, indices = select_345_to_uint16, validate_indices = gather_345_validate_indices_0, x = var_3570_shape_cast_fp16_to_uint16)[name = string("gather_345_cast_uint16")]; string gather_345_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_345_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_359_values0_0 = const()[name = string("concat_359_values0_0"), val = int32(1)]; int32 concat_359_values1_0 = const()[name = string("concat_359_values1_0"), val = int32(1)]; int32 concat_359_values2_0 = const()[name = string("concat_359_values2_0"), val = int32(0)]; int32 concat_359_axis_0 = const()[name = string("concat_359_axis_0"), val = int32(0)]; bool concat_359_interleave_0 = const()[name = string("concat_359_interleave_0"), val = bool(false)]; int32 gather_345_cast_uint16_to_int32 = cast(dtype = gather_345_cast_uint16_to_int32_dtype_0, x = gather_345_cast_uint16)[name = string("cast_30")]; tensor concat_359 = concat(axis = concat_359_axis_0, interleave = concat_359_interleave_0, values = (concat_359_values0_0, concat_359_values1_0, concat_359_values2_0, gather_345_cast_uint16_to_int32))[name = string("concat_359")]; tensor causal_mask_39_begin_0 = const()[name = string("causal_mask_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_39_end_mask_0 = const()[name = string("causal_mask_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_39_cast_fp16 = slice_by_index(begin = causal_mask_39_begin_0, end = concat_359, end_mask = causal_mask_39_end_mask_0, x = causal_mask)[name = string("causal_mask_39_cast_fp16")]; tensor attn_output_73_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_39_cast_fp16, key = key_states_75_cast_fp16, query = query_states_75_cast_fp16, value = value_states_75_cast_fp16)[name = string("attn_output_73_cast_fp16")]; tensor var_3576_perm_0 = const()[name = string("op_3576_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_360x = const()[name = string("concat_360x"), val = tensor([1, -1, 896])]; tensor var_3576_cast_fp16 = transpose(perm = var_3576_perm_0, x = attn_output_73_cast_fp16)[name = string("transpose_20")]; tensor input_145_cast_fp16 = reshape(shape = concat_360x, x = var_3576_cast_fp16)[name = string("input_145_cast_fp16")]; tensor model_model_layers_18_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228259008))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228660480))))[name = string("model_model_layers_18_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_129_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_18_self_attn_o_proj_weight_to_fp16_quantized, x = input_145_cast_fp16)[name = string("linear_129_cast_fp16")]; tensor hidden_states_559_cast_fp16 = add(x = hidden_states_539_cast_fp16, y = linear_129_cast_fp16)[name = string("hidden_states_559_cast_fp16")]; fp16 var_70_promoted_37_to_fp16 = const()[name = string("op_70_promoted_37_to_fp16"), val = fp16(0x1p+1)]; tensor var_3585_cast_fp16 = pow(x = hidden_states_559_cast_fp16, y = var_70_promoted_37_to_fp16)[name = string("op_3585_cast_fp16")]; tensor variance_75_axes_0 = const()[name = string("variance_75_axes_0"), val = tensor([-1])]; bool variance_75_keep_dims_0 = const()[name = string("variance_75_keep_dims_0"), val = bool(true)]; tensor variance_75_cast_fp16 = reduce_mean(axes = variance_75_axes_0, keep_dims = variance_75_keep_dims_0, x = var_3585_cast_fp16)[name = string("variance_75_cast_fp16")]; fp16 var_3588_to_fp16 = const()[name = string("op_3588_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3589_cast_fp16 = add(x = variance_75_cast_fp16, y = var_3588_to_fp16)[name = string("op_3589_cast_fp16")]; fp32 var_3590_epsilon_0 = const()[name = string("op_3590_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3590_cast_fp16 = rsqrt(epsilon = var_3590_epsilon_0, x = var_3589_cast_fp16)[name = string("op_3590_cast_fp16")]; tensor hidden_states_563_cast_fp16 = mul(x = hidden_states_559_cast_fp16, y = var_3590_cast_fp16)[name = string("hidden_states_563_cast_fp16")]; tensor model_model_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228710720)))]; tensor input_147_cast_fp16 = mul(x = model_model_layers_18_post_attention_layernorm_weight_to_fp16, y = hidden_states_563_cast_fp16)[name = string("input_147_cast_fp16")]; tensor model_model_layers_18_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(228712576))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(230891712))))[name = string("model_model_layers_18_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_130_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_18_mlp_gate_proj_weight_to_fp16_quantized, x = input_147_cast_fp16)[name = string("linear_130_cast_fp16")]; tensor var_3602_cast_fp16 = silu(x = linear_130_cast_fp16)[name = string("op_3602_cast_fp16")]; tensor model_model_layers_18_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(231164160))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233343296))))[name = string("model_model_layers_18_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_131_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_18_mlp_up_proj_weight_to_fp16_quantized, x = input_147_cast_fp16)[name = string("linear_131_cast_fp16")]; tensor input_151_cast_fp16 = mul(x = var_3602_cast_fp16, y = linear_131_cast_fp16)[name = string("input_151_cast_fp16")]; tensor model_model_layers_18_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(233615744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(235794880))))[name = string("model_model_layers_18_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_132_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_18_mlp_down_proj_weight_to_fp16_quantized, x = input_151_cast_fp16)[name = string("linear_132_cast_fp16")]; tensor hidden_states_569_cast_fp16 = add(x = hidden_states_559_cast_fp16, y = linear_132_cast_fp16)[name = string("hidden_states_569_cast_fp16")]; fp16 var_70_promoted_38_to_fp16 = const()[name = string("op_70_promoted_38_to_fp16"), val = fp16(0x1p+1)]; tensor var_3615_cast_fp16 = pow(x = hidden_states_569_cast_fp16, y = var_70_promoted_38_to_fp16)[name = string("op_3615_cast_fp16")]; tensor variance_77_axes_0 = const()[name = string("variance_77_axes_0"), val = tensor([-1])]; bool variance_77_keep_dims_0 = const()[name = string("variance_77_keep_dims_0"), val = bool(true)]; tensor variance_77_cast_fp16 = reduce_mean(axes = variance_77_axes_0, keep_dims = variance_77_keep_dims_0, x = var_3615_cast_fp16)[name = string("variance_77_cast_fp16")]; fp16 var_3618_to_fp16 = const()[name = string("op_3618_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3619_cast_fp16 = add(x = variance_77_cast_fp16, y = var_3618_to_fp16)[name = string("op_3619_cast_fp16")]; fp32 var_3620_epsilon_0 = const()[name = string("op_3620_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3620_cast_fp16 = rsqrt(epsilon = var_3620_epsilon_0, x = var_3619_cast_fp16)[name = string("op_3620_cast_fp16")]; tensor hidden_states_573_cast_fp16 = mul(x = hidden_states_569_cast_fp16, y = var_3620_cast_fp16)[name = string("hidden_states_573_cast_fp16")]; tensor model_model_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236067328)))]; tensor hidden_states_577_cast_fp16 = mul(x = model_model_layers_19_input_layernorm_weight_to_fp16, y = hidden_states_573_cast_fp16)[name = string("hidden_states_577_cast_fp16")]; tensor model_model_layers_19_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236069184))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236470656))))[name = string("model_model_layers_19_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_19_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_19_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236520896)))]; tensor linear_133_cast_fp16 = linear(bias = model_model_layers_19_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_19_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_577_cast_fp16)[name = string("linear_133_cast_fp16")]; tensor model_model_layers_19_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236522752))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236580160))))[name = string("model_model_layers_19_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_19_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_19_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236587392)))]; tensor linear_134_cast_fp16 = linear(bias = model_model_layers_19_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_19_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_577_cast_fp16)[name = string("linear_134_cast_fp16")]; tensor model_model_layers_19_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236587712))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236645120))))[name = string("model_model_layers_19_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_19_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_19_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236652352)))]; tensor linear_135_cast_fp16 = linear(bias = model_model_layers_19_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_19_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_577_cast_fp16)[name = string("linear_135_cast_fp16")]; tensor concat_361x = const()[name = string("concat_361x"), val = tensor([1, -1, 14, 64])]; tensor var_3643_cast_fp16 = reshape(shape = concat_361x, x = linear_133_cast_fp16)[name = string("op_3643_cast_fp16")]; tensor q_39_perm_0 = const()[name = string("q_39_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_362x = const()[name = string("concat_362x"), val = tensor([1, -1, 2, 64])]; tensor var_3646_cast_fp16 = reshape(shape = concat_362x, x = linear_134_cast_fp16)[name = string("op_3646_cast_fp16")]; tensor k_39_perm_0 = const()[name = string("k_39_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_363x = const()[name = string("concat_363x"), val = tensor([1, -1, 2, 64])]; tensor var_3649_cast_fp16 = reshape(shape = concat_363x, x = linear_135_cast_fp16)[name = string("op_3649_cast_fp16")]; tensor v_state_39_perm_0 = const()[name = string("v_state_39_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_39_cast_fp16 = transpose(perm = q_39_perm_0, x = var_3643_cast_fp16)[name = string("transpose_19")]; tensor var_3653_cast_fp16 = mul(x = q_39_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3653_cast_fp16")]; tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_77_cast_fp16 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_39_cast_fp16)[name = string("x1_77_cast_fp16")]; tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_77_cast_fp16 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_39_cast_fp16)[name = string("x2_77_cast_fp16")]; fp16 const_41_promoted_to_fp16 = const()[name = string("const_41_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3664_cast_fp16 = mul(x = x2_77_cast_fp16, y = const_41_promoted_to_fp16)[name = string("op_3664_cast_fp16")]; bool var_3666_interleave_0 = const()[name = string("op_3666_interleave_0"), val = bool(false)]; tensor var_3666_cast_fp16 = concat(axis = var_64, interleave = var_3666_interleave_0, values = (var_3664_cast_fp16, x1_77_cast_fp16))[name = string("op_3666_cast_fp16")]; tensor var_3667_cast_fp16 = mul(x = var_3666_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3667_cast_fp16")]; tensor query_states_79_cast_fp16 = add(x = var_3653_cast_fp16, y = var_3667_cast_fp16)[name = string("query_states_79_cast_fp16")]; tensor k_39_cast_fp16 = transpose(perm = k_39_perm_0, x = var_3646_cast_fp16)[name = string("transpose_18")]; tensor var_3669_cast_fp16 = mul(x = k_39_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3669_cast_fp16")]; tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_79_cast_fp16 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_39_cast_fp16)[name = string("x1_79_cast_fp16")]; tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_79_cast_fp16 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_39_cast_fp16)[name = string("x2_79_cast_fp16")]; fp16 const_42_promoted_to_fp16 = const()[name = string("const_42_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3680_cast_fp16 = mul(x = x2_79_cast_fp16, y = const_42_promoted_to_fp16)[name = string("op_3680_cast_fp16")]; bool var_3682_interleave_0 = const()[name = string("op_3682_interleave_0"), val = bool(false)]; tensor var_3682_cast_fp16 = concat(axis = var_64, interleave = var_3682_interleave_0, values = (var_3680_cast_fp16, x1_79_cast_fp16))[name = string("op_3682_cast_fp16")]; tensor var_3683_cast_fp16 = mul(x = var_3682_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3683_cast_fp16")]; tensor k_state_39_cast_fp16 = add(x = var_3669_cast_fp16, y = var_3683_cast_fp16)[name = string("k_state_39_cast_fp16")]; tensor expand_dims_228 = const()[name = string("expand_dims_228"), val = tensor([0])]; tensor expand_dims_229 = const()[name = string("expand_dims_229"), val = tensor([0])]; tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([0])]; tensor concat_366_values0_0 = const()[name = string("concat_366_values0_0"), val = tensor([19])]; int32 concat_366_axis_0 = const()[name = string("concat_366_axis_0"), val = int32(0)]; bool concat_366_interleave_0 = const()[name = string("concat_366_interleave_0"), val = bool(false)]; tensor concat_366 = concat(axis = concat_366_axis_0, interleave = concat_366_interleave_0, values = (concat_366_values0_0, expand_dims_228, expand_dims_229, expand_dims_2, expand_dims_231))[name = string("concat_366")]; tensor key_cache_internal_tensor_assign_20_stride_0 = const()[name = string("key_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_366, begin_mask = key_cache_internal_tensor_assign_20_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_20_squeeze_mask_0, stride = key_cache_internal_tensor_assign_20_stride_0, update = k_state_39_cast_fp16, x = coreml_update_state_84)[name = string("key_cache_internal_tensor_assign_20_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_20_cast_fp16, input = key_cache)[name = string("coreml_update_state_86_write_state")]; tensor coreml_update_state_86 = read_state(input = key_cache)[name = string("coreml_update_state_86")]; tensor value_cache_internal_tensor_assign_20_stride_0 = const()[name = string("value_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_39_cast_fp16 = transpose(perm = v_state_39_perm_0, x = var_3649_cast_fp16)[name = string("transpose_17")]; tensor value_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_366, begin_mask = value_cache_internal_tensor_assign_20_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_20_squeeze_mask_0, stride = value_cache_internal_tensor_assign_20_stride_0, update = v_state_39_cast_fp16, x = coreml_update_state_85)[name = string("value_cache_internal_tensor_assign_20_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_20_cast_fp16, input = value_cache)[name = string("coreml_update_state_87_write_state")]; tensor coreml_update_state_87 = read_state(input = value_cache)[name = string("coreml_update_state_87")]; tensor var_3706_begin_0 = const()[name = string("op_3706_begin_0"), val = tensor([19, 0, 0, 0, 0])]; tensor var_3706_end_0 = const()[name = string("op_3706_end_0"), val = tensor([20, 1, 2, 2048, 64])]; tensor var_3706_end_mask_0 = const()[name = string("op_3706_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3706_squeeze_mask_0 = const()[name = string("op_3706_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_3706_cast_fp16 = slice_by_index(begin = var_3706_begin_0, end = var_3706_end_0, end_mask = var_3706_end_mask_0, squeeze_mask = var_3706_squeeze_mask_0, x = coreml_update_state_86)[name = string("op_3706_cast_fp16")]; tensor var_3709_begin_0 = const()[name = string("op_3709_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3709_end_mask_0 = const()[name = string("op_3709_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3709_cast_fp16 = slice_by_index(begin = var_3709_begin_0, end = concat_11, end_mask = var_3709_end_mask_0, x = var_3706_cast_fp16)[name = string("op_3709_cast_fp16")]; tensor var_3711_begin_0 = const()[name = string("op_3711_begin_0"), val = tensor([19, 0, 0, 0, 0])]; tensor var_3711_end_0 = const()[name = string("op_3711_end_0"), val = tensor([20, 1, 2, 2048, 64])]; tensor var_3711_end_mask_0 = const()[name = string("op_3711_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3711_squeeze_mask_0 = const()[name = string("op_3711_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_3711_cast_fp16 = slice_by_index(begin = var_3711_begin_0, end = var_3711_end_0, end_mask = var_3711_end_mask_0, squeeze_mask = var_3711_squeeze_mask_0, x = coreml_update_state_87)[name = string("op_3711_cast_fp16")]; tensor var_3714_begin_0 = const()[name = string("op_3714_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3714_end_mask_0 = const()[name = string("op_3714_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3714_cast_fp16 = slice_by_index(begin = var_3714_begin_0, end = concat_11, end_mask = var_3714_end_mask_0, x = var_3711_cast_fp16)[name = string("op_3714_cast_fp16")]; tensor var_3716_shape_cast_fp16 = shape(x = var_3709_cast_fp16)[name = string("op_3716_shape_cast_fp16")]; int32 gather_355 = const()[name = string("gather_355"), val = int32(1)]; int32 gather_356 = const()[name = string("gather_356"), val = int32(2)]; int32 gather_357_axis_0 = const()[name = string("gather_357_axis_0"), val = int32(0)]; int32 gather_357_batch_dims_0 = const()[name = string("gather_357_batch_dims_0"), val = int32(0)]; bool gather_357_validate_indices_0 = const()[name = string("gather_357_validate_indices_0"), val = bool(false)]; string var_3716_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3716_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_357_to_uint16 = const()[name = string("select_357_to_uint16"), val = uint16(2)]; tensor var_3716_shape_cast_fp16_to_uint16 = cast(dtype = var_3716_shape_cast_fp16_to_uint16_dtype_0, x = var_3716_shape_cast_fp16)[name = string("cast_29")]; uint16 gather_357_cast_uint16 = gather(axis = gather_357_axis_0, batch_dims = gather_357_batch_dims_0, indices = select_357_to_uint16, validate_indices = gather_357_validate_indices_0, x = var_3716_shape_cast_fp16_to_uint16)[name = string("gather_357_cast_uint16")]; string gather_357_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_357_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_358 = const()[name = string("gather_358"), val = int32(64)]; tensor var_3723_axes_0 = const()[name = string("op_3723_axes_0"), val = tensor([2])]; tensor var_3723_cast_fp16 = expand_dims(axes = var_3723_axes_0, x = var_3709_cast_fp16)[name = string("op_3723_cast_fp16")]; tensor shape_397_cast_fp16 = shape(x = var_3723_cast_fp16)[name = string("shape_397_cast_fp16")]; int32 concat_374_axis_0 = const()[name = string("concat_374_axis_0"), val = int32(0)]; bool concat_374_interleave_0 = const()[name = string("concat_374_interleave_0"), val = bool(false)]; int32 gather_357_cast_uint16_to_int32 = cast(dtype = gather_357_cast_uint16_to_int32_dtype_0, x = gather_357_cast_uint16)[name = string("cast_28")]; tensor concat_374 = concat(axis = concat_374_axis_0, interleave = concat_374_interleave_0, values = (gather_355, gather_356, var_76, gather_357_cast_uint16_to_int32, gather_358))[name = string("concat_374")]; tensor real_div_38 = real_div(x = concat_374, y = shape_397_cast_fp16)[name = string("real_div_38")]; tensor hidden_states_581_cast_fp16 = tile(reps = real_div_38, x = var_3723_cast_fp16)[name = string("hidden_states_581_cast_fp16")]; tensor concat_375x = const()[name = string("concat_375x"), val = tensor([1, 14, -1, 64])]; tensor key_states_79_cast_fp16 = reshape(shape = concat_375x, x = hidden_states_581_cast_fp16)[name = string("key_states_79_cast_fp16")]; tensor var_3733_shape_cast_fp16 = shape(x = var_3714_cast_fp16)[name = string("op_3733_shape_cast_fp16")]; int32 gather_359 = const()[name = string("gather_359"), val = int32(1)]; int32 gather_360 = const()[name = string("gather_360"), val = int32(2)]; int32 gather_361_axis_0 = const()[name = string("gather_361_axis_0"), val = int32(0)]; int32 gather_361_batch_dims_0 = const()[name = string("gather_361_batch_dims_0"), val = int32(0)]; bool gather_361_validate_indices_0 = const()[name = string("gather_361_validate_indices_0"), val = bool(false)]; string var_3733_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3733_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_361_to_uint16 = const()[name = string("select_361_to_uint16"), val = uint16(2)]; tensor var_3733_shape_cast_fp16_to_uint16 = cast(dtype = var_3733_shape_cast_fp16_to_uint16_dtype_0, x = var_3733_shape_cast_fp16)[name = string("cast_27")]; uint16 gather_361_cast_uint16 = gather(axis = gather_361_axis_0, batch_dims = gather_361_batch_dims_0, indices = select_361_to_uint16, validate_indices = gather_361_validate_indices_0, x = var_3733_shape_cast_fp16_to_uint16)[name = string("gather_361_cast_uint16")]; string gather_361_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_361_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_362 = const()[name = string("gather_362"), val = int32(64)]; tensor var_3740_axes_0 = const()[name = string("op_3740_axes_0"), val = tensor([2])]; tensor var_3740_cast_fp16 = expand_dims(axes = var_3740_axes_0, x = var_3714_cast_fp16)[name = string("op_3740_cast_fp16")]; tensor shape_402_cast_fp16 = shape(x = var_3740_cast_fp16)[name = string("shape_402_cast_fp16")]; int32 concat_376_axis_0 = const()[name = string("concat_376_axis_0"), val = int32(0)]; bool concat_376_interleave_0 = const()[name = string("concat_376_interleave_0"), val = bool(false)]; int32 gather_361_cast_uint16_to_int32 = cast(dtype = gather_361_cast_uint16_to_int32_dtype_0, x = gather_361_cast_uint16)[name = string("cast_26")]; tensor concat_376 = concat(axis = concat_376_axis_0, interleave = concat_376_interleave_0, values = (gather_359, gather_360, var_76, gather_361_cast_uint16_to_int32, gather_362))[name = string("concat_376")]; tensor real_div_39 = real_div(x = concat_376, y = shape_402_cast_fp16)[name = string("real_div_39")]; tensor hidden_states_585_cast_fp16 = tile(reps = real_div_39, x = var_3740_cast_fp16)[name = string("hidden_states_585_cast_fp16")]; tensor concat_377x = const()[name = string("concat_377x"), val = tensor([1, 14, -1, 64])]; tensor value_states_79_cast_fp16 = reshape(shape = concat_377x, x = hidden_states_585_cast_fp16)[name = string("value_states_79_cast_fp16")]; tensor var_3750_shape_cast_fp16 = shape(x = key_states_79_cast_fp16)[name = string("op_3750_shape_cast_fp16")]; int32 gather_363_axis_0 = const()[name = string("gather_363_axis_0"), val = int32(0)]; int32 gather_363_batch_dims_0 = const()[name = string("gather_363_batch_dims_0"), val = int32(0)]; bool gather_363_validate_indices_0 = const()[name = string("gather_363_validate_indices_0"), val = bool(false)]; string var_3750_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3750_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_363_to_uint16 = const()[name = string("select_363_to_uint16"), val = uint16(2)]; tensor var_3750_shape_cast_fp16_to_uint16 = cast(dtype = var_3750_shape_cast_fp16_to_uint16_dtype_0, x = var_3750_shape_cast_fp16)[name = string("cast_25")]; uint16 gather_363_cast_uint16 = gather(axis = gather_363_axis_0, batch_dims = gather_363_batch_dims_0, indices = select_363_to_uint16, validate_indices = gather_363_validate_indices_0, x = var_3750_shape_cast_fp16_to_uint16)[name = string("gather_363_cast_uint16")]; string gather_363_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_363_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_378_values0_0 = const()[name = string("concat_378_values0_0"), val = int32(1)]; int32 concat_378_values1_0 = const()[name = string("concat_378_values1_0"), val = int32(1)]; int32 concat_378_values2_0 = const()[name = string("concat_378_values2_0"), val = int32(0)]; int32 concat_378_axis_0 = const()[name = string("concat_378_axis_0"), val = int32(0)]; bool concat_378_interleave_0 = const()[name = string("concat_378_interleave_0"), val = bool(false)]; int32 gather_363_cast_uint16_to_int32 = cast(dtype = gather_363_cast_uint16_to_int32_dtype_0, x = gather_363_cast_uint16)[name = string("cast_24")]; tensor concat_378 = concat(axis = concat_378_axis_0, interleave = concat_378_interleave_0, values = (concat_378_values0_0, concat_378_values1_0, concat_378_values2_0, gather_363_cast_uint16_to_int32))[name = string("concat_378")]; tensor causal_mask_41_begin_0 = const()[name = string("causal_mask_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_41_end_mask_0 = const()[name = string("causal_mask_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_41_cast_fp16 = slice_by_index(begin = causal_mask_41_begin_0, end = concat_378, end_mask = causal_mask_41_end_mask_0, x = causal_mask)[name = string("causal_mask_41_cast_fp16")]; tensor attn_output_77_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_41_cast_fp16, key = key_states_79_cast_fp16, query = query_states_79_cast_fp16, value = value_states_79_cast_fp16)[name = string("attn_output_77_cast_fp16")]; tensor var_3756_perm_0 = const()[name = string("op_3756_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_379x = const()[name = string("concat_379x"), val = tensor([1, -1, 896])]; tensor var_3756_cast_fp16 = transpose(perm = var_3756_perm_0, x = attn_output_77_cast_fp16)[name = string("transpose_16")]; tensor input_153_cast_fp16 = reshape(shape = concat_379x, x = var_3756_cast_fp16)[name = string("input_153_cast_fp16")]; tensor model_model_layers_19_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(236652672))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237054144))))[name = string("model_model_layers_19_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_136_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_19_self_attn_o_proj_weight_to_fp16_quantized, x = input_153_cast_fp16)[name = string("linear_136_cast_fp16")]; tensor hidden_states_589_cast_fp16 = add(x = hidden_states_569_cast_fp16, y = linear_136_cast_fp16)[name = string("hidden_states_589_cast_fp16")]; fp16 var_70_promoted_39_to_fp16 = const()[name = string("op_70_promoted_39_to_fp16"), val = fp16(0x1p+1)]; tensor var_3765_cast_fp16 = pow(x = hidden_states_589_cast_fp16, y = var_70_promoted_39_to_fp16)[name = string("op_3765_cast_fp16")]; tensor variance_79_axes_0 = const()[name = string("variance_79_axes_0"), val = tensor([-1])]; bool variance_79_keep_dims_0 = const()[name = string("variance_79_keep_dims_0"), val = bool(true)]; tensor variance_79_cast_fp16 = reduce_mean(axes = variance_79_axes_0, keep_dims = variance_79_keep_dims_0, x = var_3765_cast_fp16)[name = string("variance_79_cast_fp16")]; fp16 var_3768_to_fp16 = const()[name = string("op_3768_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3769_cast_fp16 = add(x = variance_79_cast_fp16, y = var_3768_to_fp16)[name = string("op_3769_cast_fp16")]; fp32 var_3770_epsilon_0 = const()[name = string("op_3770_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3770_cast_fp16 = rsqrt(epsilon = var_3770_epsilon_0, x = var_3769_cast_fp16)[name = string("op_3770_cast_fp16")]; tensor hidden_states_593_cast_fp16 = mul(x = hidden_states_589_cast_fp16, y = var_3770_cast_fp16)[name = string("hidden_states_593_cast_fp16")]; tensor model_model_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237104384)))]; tensor input_155_cast_fp16 = mul(x = model_model_layers_19_post_attention_layernorm_weight_to_fp16, y = hidden_states_593_cast_fp16)[name = string("input_155_cast_fp16")]; tensor model_model_layers_19_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(237106240))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239285376))))[name = string("model_model_layers_19_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_137_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_19_mlp_gate_proj_weight_to_fp16_quantized, x = input_155_cast_fp16)[name = string("linear_137_cast_fp16")]; tensor var_3782_cast_fp16 = silu(x = linear_137_cast_fp16)[name = string("op_3782_cast_fp16")]; tensor model_model_layers_19_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(239557824))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(241736960))))[name = string("model_model_layers_19_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_138_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_19_mlp_up_proj_weight_to_fp16_quantized, x = input_155_cast_fp16)[name = string("linear_138_cast_fp16")]; tensor input_159_cast_fp16 = mul(x = var_3782_cast_fp16, y = linear_138_cast_fp16)[name = string("input_159_cast_fp16")]; tensor model_model_layers_19_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(242009408))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244188544))))[name = string("model_model_layers_19_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_139_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_19_mlp_down_proj_weight_to_fp16_quantized, x = input_159_cast_fp16)[name = string("linear_139_cast_fp16")]; tensor hidden_states_599_cast_fp16 = add(x = hidden_states_589_cast_fp16, y = linear_139_cast_fp16)[name = string("hidden_states_599_cast_fp16")]; fp16 var_70_promoted_40_to_fp16 = const()[name = string("op_70_promoted_40_to_fp16"), val = fp16(0x1p+1)]; tensor var_3795_cast_fp16 = pow(x = hidden_states_599_cast_fp16, y = var_70_promoted_40_to_fp16)[name = string("op_3795_cast_fp16")]; tensor variance_81_axes_0 = const()[name = string("variance_81_axes_0"), val = tensor([-1])]; bool variance_81_keep_dims_0 = const()[name = string("variance_81_keep_dims_0"), val = bool(true)]; tensor variance_81_cast_fp16 = reduce_mean(axes = variance_81_axes_0, keep_dims = variance_81_keep_dims_0, x = var_3795_cast_fp16)[name = string("variance_81_cast_fp16")]; fp16 var_3798_to_fp16 = const()[name = string("op_3798_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3799_cast_fp16 = add(x = variance_81_cast_fp16, y = var_3798_to_fp16)[name = string("op_3799_cast_fp16")]; fp32 var_3800_epsilon_0 = const()[name = string("op_3800_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3800_cast_fp16 = rsqrt(epsilon = var_3800_epsilon_0, x = var_3799_cast_fp16)[name = string("op_3800_cast_fp16")]; tensor hidden_states_603_cast_fp16 = mul(x = hidden_states_599_cast_fp16, y = var_3800_cast_fp16)[name = string("hidden_states_603_cast_fp16")]; tensor model_model_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244460992)))]; tensor hidden_states_607_cast_fp16 = mul(x = model_model_layers_20_input_layernorm_weight_to_fp16, y = hidden_states_603_cast_fp16)[name = string("hidden_states_607_cast_fp16")]; tensor model_model_layers_20_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244462848))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244864320))))[name = string("model_model_layers_20_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_20_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_20_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244914560)))]; tensor linear_140_cast_fp16 = linear(bias = model_model_layers_20_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_20_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_607_cast_fp16)[name = string("linear_140_cast_fp16")]; tensor model_model_layers_20_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244916416))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244973824))))[name = string("model_model_layers_20_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_20_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_20_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244981056)))]; tensor linear_141_cast_fp16 = linear(bias = model_model_layers_20_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_20_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_607_cast_fp16)[name = string("linear_141_cast_fp16")]; tensor model_model_layers_20_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(244981376))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245038784))))[name = string("model_model_layers_20_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_20_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_20_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245046016)))]; tensor linear_142_cast_fp16 = linear(bias = model_model_layers_20_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_20_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_607_cast_fp16)[name = string("linear_142_cast_fp16")]; tensor concat_380x = const()[name = string("concat_380x"), val = tensor([1, -1, 14, 64])]; tensor var_3823_cast_fp16 = reshape(shape = concat_380x, x = linear_140_cast_fp16)[name = string("op_3823_cast_fp16")]; tensor q_41_perm_0 = const()[name = string("q_41_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_381x = const()[name = string("concat_381x"), val = tensor([1, -1, 2, 64])]; tensor var_3826_cast_fp16 = reshape(shape = concat_381x, x = linear_141_cast_fp16)[name = string("op_3826_cast_fp16")]; tensor k_41_perm_0 = const()[name = string("k_41_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_382x = const()[name = string("concat_382x"), val = tensor([1, -1, 2, 64])]; tensor var_3829_cast_fp16 = reshape(shape = concat_382x, x = linear_142_cast_fp16)[name = string("op_3829_cast_fp16")]; tensor v_state_41_perm_0 = const()[name = string("v_state_41_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_41_cast_fp16 = transpose(perm = q_41_perm_0, x = var_3823_cast_fp16)[name = string("transpose_15")]; tensor var_3833_cast_fp16 = mul(x = q_41_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3833_cast_fp16")]; tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_81_cast_fp16 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_41_cast_fp16)[name = string("x1_81_cast_fp16")]; tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_81_cast_fp16 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_41_cast_fp16)[name = string("x2_81_cast_fp16")]; fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3844_cast_fp16 = mul(x = x2_81_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_3844_cast_fp16")]; bool var_3846_interleave_0 = const()[name = string("op_3846_interleave_0"), val = bool(false)]; tensor var_3846_cast_fp16 = concat(axis = var_64, interleave = var_3846_interleave_0, values = (var_3844_cast_fp16, x1_81_cast_fp16))[name = string("op_3846_cast_fp16")]; tensor var_3847_cast_fp16 = mul(x = var_3846_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3847_cast_fp16")]; tensor query_states_83_cast_fp16 = add(x = var_3833_cast_fp16, y = var_3847_cast_fp16)[name = string("query_states_83_cast_fp16")]; tensor k_41_cast_fp16 = transpose(perm = k_41_perm_0, x = var_3826_cast_fp16)[name = string("transpose_14")]; tensor var_3849_cast_fp16 = mul(x = k_41_cast_fp16, y = cos_7_cast_fp16)[name = string("op_3849_cast_fp16")]; tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_83_cast_fp16 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_41_cast_fp16)[name = string("x1_83_cast_fp16")]; tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_83_cast_fp16 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_41_cast_fp16)[name = string("x2_83_cast_fp16")]; fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3860_cast_fp16 = mul(x = x2_83_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_3860_cast_fp16")]; bool var_3862_interleave_0 = const()[name = string("op_3862_interleave_0"), val = bool(false)]; tensor var_3862_cast_fp16 = concat(axis = var_64, interleave = var_3862_interleave_0, values = (var_3860_cast_fp16, x1_83_cast_fp16))[name = string("op_3862_cast_fp16")]; tensor var_3863_cast_fp16 = mul(x = var_3862_cast_fp16, y = sin_7_cast_fp16)[name = string("op_3863_cast_fp16")]; tensor k_state_41_cast_fp16 = add(x = var_3849_cast_fp16, y = var_3863_cast_fp16)[name = string("k_state_41_cast_fp16")]; tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([0])]; tensor expand_dims_241 = const()[name = string("expand_dims_241"), val = tensor([0])]; tensor expand_dims_243 = const()[name = string("expand_dims_243"), val = tensor([0])]; tensor concat_385_values0_0 = const()[name = string("concat_385_values0_0"), val = tensor([20])]; int32 concat_385_axis_0 = const()[name = string("concat_385_axis_0"), val = int32(0)]; bool concat_385_interleave_0 = const()[name = string("concat_385_interleave_0"), val = bool(false)]; tensor concat_385 = concat(axis = concat_385_axis_0, interleave = concat_385_interleave_0, values = (concat_385_values0_0, expand_dims_240, expand_dims_241, expand_dims_2, expand_dims_243))[name = string("concat_385")]; tensor key_cache_internal_tensor_assign_21_stride_0 = const()[name = string("key_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_385, begin_mask = key_cache_internal_tensor_assign_21_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_21_squeeze_mask_0, stride = key_cache_internal_tensor_assign_21_stride_0, update = k_state_41_cast_fp16, x = coreml_update_state_86)[name = string("key_cache_internal_tensor_assign_21_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_21_cast_fp16, input = key_cache)[name = string("coreml_update_state_88_write_state")]; tensor coreml_update_state_88 = read_state(input = key_cache)[name = string("coreml_update_state_88")]; tensor value_cache_internal_tensor_assign_21_stride_0 = const()[name = string("value_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_41_cast_fp16 = transpose(perm = v_state_41_perm_0, x = var_3829_cast_fp16)[name = string("transpose_13")]; tensor value_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_385, begin_mask = value_cache_internal_tensor_assign_21_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_21_squeeze_mask_0, stride = value_cache_internal_tensor_assign_21_stride_0, update = v_state_41_cast_fp16, x = coreml_update_state_87)[name = string("value_cache_internal_tensor_assign_21_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_21_cast_fp16, input = value_cache)[name = string("coreml_update_state_89_write_state")]; tensor coreml_update_state_89 = read_state(input = value_cache)[name = string("coreml_update_state_89")]; tensor var_3886_begin_0 = const()[name = string("op_3886_begin_0"), val = tensor([20, 0, 0, 0, 0])]; tensor var_3886_end_0 = const()[name = string("op_3886_end_0"), val = tensor([21, 1, 2, 2048, 64])]; tensor var_3886_end_mask_0 = const()[name = string("op_3886_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3886_squeeze_mask_0 = const()[name = string("op_3886_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_3886_cast_fp16 = slice_by_index(begin = var_3886_begin_0, end = var_3886_end_0, end_mask = var_3886_end_mask_0, squeeze_mask = var_3886_squeeze_mask_0, x = coreml_update_state_88)[name = string("op_3886_cast_fp16")]; tensor var_3889_begin_0 = const()[name = string("op_3889_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3889_end_mask_0 = const()[name = string("op_3889_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3889_cast_fp16 = slice_by_index(begin = var_3889_begin_0, end = concat_11, end_mask = var_3889_end_mask_0, x = var_3886_cast_fp16)[name = string("op_3889_cast_fp16")]; tensor var_3891_begin_0 = const()[name = string("op_3891_begin_0"), val = tensor([20, 0, 0, 0, 0])]; tensor var_3891_end_0 = const()[name = string("op_3891_end_0"), val = tensor([21, 1, 2, 2048, 64])]; tensor var_3891_end_mask_0 = const()[name = string("op_3891_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3891_squeeze_mask_0 = const()[name = string("op_3891_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_3891_cast_fp16 = slice_by_index(begin = var_3891_begin_0, end = var_3891_end_0, end_mask = var_3891_end_mask_0, squeeze_mask = var_3891_squeeze_mask_0, x = coreml_update_state_89)[name = string("op_3891_cast_fp16")]; tensor var_3894_begin_0 = const()[name = string("op_3894_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3894_end_mask_0 = const()[name = string("op_3894_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3894_cast_fp16 = slice_by_index(begin = var_3894_begin_0, end = concat_11, end_mask = var_3894_end_mask_0, x = var_3891_cast_fp16)[name = string("op_3894_cast_fp16")]; tensor var_3896_shape_cast_fp16 = shape(x = var_3889_cast_fp16)[name = string("op_3896_shape_cast_fp16")]; int32 gather_373 = const()[name = string("gather_373"), val = int32(1)]; int32 gather_374 = const()[name = string("gather_374"), val = int32(2)]; int32 gather_375_axis_0 = const()[name = string("gather_375_axis_0"), val = int32(0)]; int32 gather_375_batch_dims_0 = const()[name = string("gather_375_batch_dims_0"), val = int32(0)]; bool gather_375_validate_indices_0 = const()[name = string("gather_375_validate_indices_0"), val = bool(false)]; string var_3896_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3896_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_375_to_uint16 = const()[name = string("select_375_to_uint16"), val = uint16(2)]; tensor var_3896_shape_cast_fp16_to_uint16 = cast(dtype = var_3896_shape_cast_fp16_to_uint16_dtype_0, x = var_3896_shape_cast_fp16)[name = string("cast_23")]; uint16 gather_375_cast_uint16 = gather(axis = gather_375_axis_0, batch_dims = gather_375_batch_dims_0, indices = select_375_to_uint16, validate_indices = gather_375_validate_indices_0, x = var_3896_shape_cast_fp16_to_uint16)[name = string("gather_375_cast_uint16")]; string gather_375_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_375_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_376 = const()[name = string("gather_376"), val = int32(64)]; tensor var_3903_axes_0 = const()[name = string("op_3903_axes_0"), val = tensor([2])]; tensor var_3903_cast_fp16 = expand_dims(axes = var_3903_axes_0, x = var_3889_cast_fp16)[name = string("op_3903_cast_fp16")]; tensor shape_417_cast_fp16 = shape(x = var_3903_cast_fp16)[name = string("shape_417_cast_fp16")]; int32 concat_393_axis_0 = const()[name = string("concat_393_axis_0"), val = int32(0)]; bool concat_393_interleave_0 = const()[name = string("concat_393_interleave_0"), val = bool(false)]; int32 gather_375_cast_uint16_to_int32 = cast(dtype = gather_375_cast_uint16_to_int32_dtype_0, x = gather_375_cast_uint16)[name = string("cast_22")]; tensor concat_393 = concat(axis = concat_393_axis_0, interleave = concat_393_interleave_0, values = (gather_373, gather_374, var_76, gather_375_cast_uint16_to_int32, gather_376))[name = string("concat_393")]; tensor real_div_40 = real_div(x = concat_393, y = shape_417_cast_fp16)[name = string("real_div_40")]; tensor hidden_states_611_cast_fp16 = tile(reps = real_div_40, x = var_3903_cast_fp16)[name = string("hidden_states_611_cast_fp16")]; tensor concat_394x = const()[name = string("concat_394x"), val = tensor([1, 14, -1, 64])]; tensor key_states_83_cast_fp16 = reshape(shape = concat_394x, x = hidden_states_611_cast_fp16)[name = string("key_states_83_cast_fp16")]; tensor var_3913_shape_cast_fp16 = shape(x = var_3894_cast_fp16)[name = string("op_3913_shape_cast_fp16")]; int32 gather_377 = const()[name = string("gather_377"), val = int32(1)]; int32 gather_378 = const()[name = string("gather_378"), val = int32(2)]; int32 gather_379_axis_0 = const()[name = string("gather_379_axis_0"), val = int32(0)]; int32 gather_379_batch_dims_0 = const()[name = string("gather_379_batch_dims_0"), val = int32(0)]; bool gather_379_validate_indices_0 = const()[name = string("gather_379_validate_indices_0"), val = bool(false)]; string var_3913_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3913_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_379_to_uint16 = const()[name = string("select_379_to_uint16"), val = uint16(2)]; tensor var_3913_shape_cast_fp16_to_uint16 = cast(dtype = var_3913_shape_cast_fp16_to_uint16_dtype_0, x = var_3913_shape_cast_fp16)[name = string("cast_21")]; uint16 gather_379_cast_uint16 = gather(axis = gather_379_axis_0, batch_dims = gather_379_batch_dims_0, indices = select_379_to_uint16, validate_indices = gather_379_validate_indices_0, x = var_3913_shape_cast_fp16_to_uint16)[name = string("gather_379_cast_uint16")]; string gather_379_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_379_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_380 = const()[name = string("gather_380"), val = int32(64)]; tensor var_3920_axes_0 = const()[name = string("op_3920_axes_0"), val = tensor([2])]; tensor var_3920_cast_fp16 = expand_dims(axes = var_3920_axes_0, x = var_3894_cast_fp16)[name = string("op_3920_cast_fp16")]; tensor shape_422_cast_fp16 = shape(x = var_3920_cast_fp16)[name = string("shape_422_cast_fp16")]; int32 concat_395_axis_0 = const()[name = string("concat_395_axis_0"), val = int32(0)]; bool concat_395_interleave_0 = const()[name = string("concat_395_interleave_0"), val = bool(false)]; int32 gather_379_cast_uint16_to_int32 = cast(dtype = gather_379_cast_uint16_to_int32_dtype_0, x = gather_379_cast_uint16)[name = string("cast_20")]; tensor concat_395 = concat(axis = concat_395_axis_0, interleave = concat_395_interleave_0, values = (gather_377, gather_378, var_76, gather_379_cast_uint16_to_int32, gather_380))[name = string("concat_395")]; tensor real_div_41 = real_div(x = concat_395, y = shape_422_cast_fp16)[name = string("real_div_41")]; tensor hidden_states_615_cast_fp16 = tile(reps = real_div_41, x = var_3920_cast_fp16)[name = string("hidden_states_615_cast_fp16")]; tensor concat_396x = const()[name = string("concat_396x"), val = tensor([1, 14, -1, 64])]; tensor value_states_83_cast_fp16 = reshape(shape = concat_396x, x = hidden_states_615_cast_fp16)[name = string("value_states_83_cast_fp16")]; tensor var_3930_shape_cast_fp16 = shape(x = key_states_83_cast_fp16)[name = string("op_3930_shape_cast_fp16")]; int32 gather_381_axis_0 = const()[name = string("gather_381_axis_0"), val = int32(0)]; int32 gather_381_batch_dims_0 = const()[name = string("gather_381_batch_dims_0"), val = int32(0)]; bool gather_381_validate_indices_0 = const()[name = string("gather_381_validate_indices_0"), val = bool(false)]; string var_3930_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3930_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_381_to_uint16 = const()[name = string("select_381_to_uint16"), val = uint16(2)]; tensor var_3930_shape_cast_fp16_to_uint16 = cast(dtype = var_3930_shape_cast_fp16_to_uint16_dtype_0, x = var_3930_shape_cast_fp16)[name = string("cast_19")]; uint16 gather_381_cast_uint16 = gather(axis = gather_381_axis_0, batch_dims = gather_381_batch_dims_0, indices = select_381_to_uint16, validate_indices = gather_381_validate_indices_0, x = var_3930_shape_cast_fp16_to_uint16)[name = string("gather_381_cast_uint16")]; string gather_381_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_381_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_397_values0_0 = const()[name = string("concat_397_values0_0"), val = int32(1)]; int32 concat_397_values1_0 = const()[name = string("concat_397_values1_0"), val = int32(1)]; int32 concat_397_values2_0 = const()[name = string("concat_397_values2_0"), val = int32(0)]; int32 concat_397_axis_0 = const()[name = string("concat_397_axis_0"), val = int32(0)]; bool concat_397_interleave_0 = const()[name = string("concat_397_interleave_0"), val = bool(false)]; int32 gather_381_cast_uint16_to_int32 = cast(dtype = gather_381_cast_uint16_to_int32_dtype_0, x = gather_381_cast_uint16)[name = string("cast_18")]; tensor concat_397 = concat(axis = concat_397_axis_0, interleave = concat_397_interleave_0, values = (concat_397_values0_0, concat_397_values1_0, concat_397_values2_0, gather_381_cast_uint16_to_int32))[name = string("concat_397")]; tensor causal_mask_43_begin_0 = const()[name = string("causal_mask_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_43_end_mask_0 = const()[name = string("causal_mask_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_43_cast_fp16 = slice_by_index(begin = causal_mask_43_begin_0, end = concat_397, end_mask = causal_mask_43_end_mask_0, x = causal_mask)[name = string("causal_mask_43_cast_fp16")]; tensor attn_output_81_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_43_cast_fp16, key = key_states_83_cast_fp16, query = query_states_83_cast_fp16, value = value_states_83_cast_fp16)[name = string("attn_output_81_cast_fp16")]; tensor var_3936_perm_0 = const()[name = string("op_3936_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_398x = const()[name = string("concat_398x"), val = tensor([1, -1, 896])]; tensor var_3936_cast_fp16 = transpose(perm = var_3936_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_12")]; tensor input_161_cast_fp16 = reshape(shape = concat_398x, x = var_3936_cast_fp16)[name = string("input_161_cast_fp16")]; tensor model_model_layers_20_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245046336))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245447808))))[name = string("model_model_layers_20_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_143_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_20_self_attn_o_proj_weight_to_fp16_quantized, x = input_161_cast_fp16)[name = string("linear_143_cast_fp16")]; tensor hidden_states_619_cast_fp16 = add(x = hidden_states_599_cast_fp16, y = linear_143_cast_fp16)[name = string("hidden_states_619_cast_fp16")]; fp16 var_70_promoted_41_to_fp16 = const()[name = string("op_70_promoted_41_to_fp16"), val = fp16(0x1p+1)]; tensor var_3945_cast_fp16 = pow(x = hidden_states_619_cast_fp16, y = var_70_promoted_41_to_fp16)[name = string("op_3945_cast_fp16")]; tensor variance_83_axes_0 = const()[name = string("variance_83_axes_0"), val = tensor([-1])]; bool variance_83_keep_dims_0 = const()[name = string("variance_83_keep_dims_0"), val = bool(true)]; tensor variance_83_cast_fp16 = reduce_mean(axes = variance_83_axes_0, keep_dims = variance_83_keep_dims_0, x = var_3945_cast_fp16)[name = string("variance_83_cast_fp16")]; fp16 var_3948_to_fp16 = const()[name = string("op_3948_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3949_cast_fp16 = add(x = variance_83_cast_fp16, y = var_3948_to_fp16)[name = string("op_3949_cast_fp16")]; fp32 var_3950_epsilon_0 = const()[name = string("op_3950_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3950_cast_fp16 = rsqrt(epsilon = var_3950_epsilon_0, x = var_3949_cast_fp16)[name = string("op_3950_cast_fp16")]; tensor hidden_states_623_cast_fp16 = mul(x = hidden_states_619_cast_fp16, y = var_3950_cast_fp16)[name = string("hidden_states_623_cast_fp16")]; tensor model_model_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245498048)))]; tensor input_163_cast_fp16 = mul(x = model_model_layers_20_post_attention_layernorm_weight_to_fp16, y = hidden_states_623_cast_fp16)[name = string("input_163_cast_fp16")]; tensor model_model_layers_20_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(245499904))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247679040))))[name = string("model_model_layers_20_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_144_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_20_mlp_gate_proj_weight_to_fp16_quantized, x = input_163_cast_fp16)[name = string("linear_144_cast_fp16")]; tensor var_3962_cast_fp16 = silu(x = linear_144_cast_fp16)[name = string("op_3962_cast_fp16")]; tensor model_model_layers_20_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(247951488))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250130624))))[name = string("model_model_layers_20_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_145_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_20_mlp_up_proj_weight_to_fp16_quantized, x = input_163_cast_fp16)[name = string("linear_145_cast_fp16")]; tensor input_167_cast_fp16 = mul(x = var_3962_cast_fp16, y = linear_145_cast_fp16)[name = string("input_167_cast_fp16")]; tensor model_model_layers_20_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(250403072))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252582208))))[name = string("model_model_layers_20_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_146_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_20_mlp_down_proj_weight_to_fp16_quantized, x = input_167_cast_fp16)[name = string("linear_146_cast_fp16")]; tensor hidden_states_629_cast_fp16 = add(x = hidden_states_619_cast_fp16, y = linear_146_cast_fp16)[name = string("hidden_states_629_cast_fp16")]; fp16 var_70_promoted_42_to_fp16 = const()[name = string("op_70_promoted_42_to_fp16"), val = fp16(0x1p+1)]; tensor var_3975_cast_fp16 = pow(x = hidden_states_629_cast_fp16, y = var_70_promoted_42_to_fp16)[name = string("op_3975_cast_fp16")]; tensor variance_85_axes_0 = const()[name = string("variance_85_axes_0"), val = tensor([-1])]; bool variance_85_keep_dims_0 = const()[name = string("variance_85_keep_dims_0"), val = bool(true)]; tensor variance_85_cast_fp16 = reduce_mean(axes = variance_85_axes_0, keep_dims = variance_85_keep_dims_0, x = var_3975_cast_fp16)[name = string("variance_85_cast_fp16")]; fp16 var_3978_to_fp16 = const()[name = string("op_3978_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3979_cast_fp16 = add(x = variance_85_cast_fp16, y = var_3978_to_fp16)[name = string("op_3979_cast_fp16")]; fp32 var_3980_epsilon_0 = const()[name = string("op_3980_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3980_cast_fp16 = rsqrt(epsilon = var_3980_epsilon_0, x = var_3979_cast_fp16)[name = string("op_3980_cast_fp16")]; tensor hidden_states_633_cast_fp16 = mul(x = hidden_states_629_cast_fp16, y = var_3980_cast_fp16)[name = string("hidden_states_633_cast_fp16")]; tensor model_model_layers_21_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252854656)))]; tensor hidden_states_637_cast_fp16 = mul(x = model_model_layers_21_input_layernorm_weight_to_fp16, y = hidden_states_633_cast_fp16)[name = string("hidden_states_637_cast_fp16")]; tensor model_model_layers_21_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(252856512))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253257984))))[name = string("model_model_layers_21_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_21_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_21_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253308224)))]; tensor linear_147_cast_fp16 = linear(bias = model_model_layers_21_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_21_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_637_cast_fp16)[name = string("linear_147_cast_fp16")]; tensor model_model_layers_21_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253310080))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253367488))))[name = string("model_model_layers_21_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_21_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_21_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253374720)))]; tensor linear_148_cast_fp16 = linear(bias = model_model_layers_21_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_21_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_637_cast_fp16)[name = string("linear_148_cast_fp16")]; tensor model_model_layers_21_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253375040))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253432448))))[name = string("model_model_layers_21_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_21_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_21_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253439680)))]; tensor linear_149_cast_fp16 = linear(bias = model_model_layers_21_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_21_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_637_cast_fp16)[name = string("linear_149_cast_fp16")]; tensor concat_399x = const()[name = string("concat_399x"), val = tensor([1, -1, 14, 64])]; tensor var_4003_cast_fp16 = reshape(shape = concat_399x, x = linear_147_cast_fp16)[name = string("op_4003_cast_fp16")]; tensor q_43_perm_0 = const()[name = string("q_43_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_400x = const()[name = string("concat_400x"), val = tensor([1, -1, 2, 64])]; tensor var_4006_cast_fp16 = reshape(shape = concat_400x, x = linear_148_cast_fp16)[name = string("op_4006_cast_fp16")]; tensor k_43_perm_0 = const()[name = string("k_43_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_401x = const()[name = string("concat_401x"), val = tensor([1, -1, 2, 64])]; tensor var_4009_cast_fp16 = reshape(shape = concat_401x, x = linear_149_cast_fp16)[name = string("op_4009_cast_fp16")]; tensor v_state_43_perm_0 = const()[name = string("v_state_43_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_43_cast_fp16 = transpose(perm = q_43_perm_0, x = var_4003_cast_fp16)[name = string("transpose_11")]; tensor var_4013_cast_fp16 = mul(x = q_43_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4013_cast_fp16")]; tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_85_cast_fp16 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_43_cast_fp16)[name = string("x1_85_cast_fp16")]; tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_85_cast_fp16 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_43_cast_fp16)[name = string("x2_85_cast_fp16")]; fp16 const_45_promoted_to_fp16 = const()[name = string("const_45_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4024_cast_fp16 = mul(x = x2_85_cast_fp16, y = const_45_promoted_to_fp16)[name = string("op_4024_cast_fp16")]; bool var_4026_interleave_0 = const()[name = string("op_4026_interleave_0"), val = bool(false)]; tensor var_4026_cast_fp16 = concat(axis = var_64, interleave = var_4026_interleave_0, values = (var_4024_cast_fp16, x1_85_cast_fp16))[name = string("op_4026_cast_fp16")]; tensor var_4027_cast_fp16 = mul(x = var_4026_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4027_cast_fp16")]; tensor query_states_87_cast_fp16 = add(x = var_4013_cast_fp16, y = var_4027_cast_fp16)[name = string("query_states_87_cast_fp16")]; tensor k_43_cast_fp16 = transpose(perm = k_43_perm_0, x = var_4006_cast_fp16)[name = string("transpose_10")]; tensor var_4029_cast_fp16 = mul(x = k_43_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4029_cast_fp16")]; tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_87_cast_fp16 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_43_cast_fp16)[name = string("x1_87_cast_fp16")]; tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_87_cast_fp16 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_43_cast_fp16)[name = string("x2_87_cast_fp16")]; fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4040_cast_fp16 = mul(x = x2_87_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_4040_cast_fp16")]; bool var_4042_interleave_0 = const()[name = string("op_4042_interleave_0"), val = bool(false)]; tensor var_4042_cast_fp16 = concat(axis = var_64, interleave = var_4042_interleave_0, values = (var_4040_cast_fp16, x1_87_cast_fp16))[name = string("op_4042_cast_fp16")]; tensor var_4043_cast_fp16 = mul(x = var_4042_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4043_cast_fp16")]; tensor k_state_43_cast_fp16 = add(x = var_4029_cast_fp16, y = var_4043_cast_fp16)[name = string("k_state_43_cast_fp16")]; tensor expand_dims_252 = const()[name = string("expand_dims_252"), val = tensor([0])]; tensor expand_dims_253 = const()[name = string("expand_dims_253"), val = tensor([0])]; tensor expand_dims_255 = const()[name = string("expand_dims_255"), val = tensor([0])]; tensor concat_404_values0_0 = const()[name = string("concat_404_values0_0"), val = tensor([21])]; int32 concat_404_axis_0 = const()[name = string("concat_404_axis_0"), val = int32(0)]; bool concat_404_interleave_0 = const()[name = string("concat_404_interleave_0"), val = bool(false)]; tensor concat_404 = concat(axis = concat_404_axis_0, interleave = concat_404_interleave_0, values = (concat_404_values0_0, expand_dims_252, expand_dims_253, expand_dims_2, expand_dims_255))[name = string("concat_404")]; tensor key_cache_internal_tensor_assign_22_stride_0 = const()[name = string("key_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_404, begin_mask = key_cache_internal_tensor_assign_22_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_22_squeeze_mask_0, stride = key_cache_internal_tensor_assign_22_stride_0, update = k_state_43_cast_fp16, x = coreml_update_state_88)[name = string("key_cache_internal_tensor_assign_22_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_22_cast_fp16, input = key_cache)[name = string("coreml_update_state_90_write_state")]; tensor coreml_update_state_90 = read_state(input = key_cache)[name = string("coreml_update_state_90")]; tensor value_cache_internal_tensor_assign_22_stride_0 = const()[name = string("value_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_43_cast_fp16 = transpose(perm = v_state_43_perm_0, x = var_4009_cast_fp16)[name = string("transpose_9")]; tensor value_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_404, begin_mask = value_cache_internal_tensor_assign_22_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_22_squeeze_mask_0, stride = value_cache_internal_tensor_assign_22_stride_0, update = v_state_43_cast_fp16, x = coreml_update_state_89)[name = string("value_cache_internal_tensor_assign_22_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_22_cast_fp16, input = value_cache)[name = string("coreml_update_state_91_write_state")]; tensor coreml_update_state_91 = read_state(input = value_cache)[name = string("coreml_update_state_91")]; tensor var_4066_begin_0 = const()[name = string("op_4066_begin_0"), val = tensor([21, 0, 0, 0, 0])]; tensor var_4066_end_0 = const()[name = string("op_4066_end_0"), val = tensor([22, 1, 2, 2048, 64])]; tensor var_4066_end_mask_0 = const()[name = string("op_4066_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4066_squeeze_mask_0 = const()[name = string("op_4066_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_4066_cast_fp16 = slice_by_index(begin = var_4066_begin_0, end = var_4066_end_0, end_mask = var_4066_end_mask_0, squeeze_mask = var_4066_squeeze_mask_0, x = coreml_update_state_90)[name = string("op_4066_cast_fp16")]; tensor var_4069_begin_0 = const()[name = string("op_4069_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4069_end_mask_0 = const()[name = string("op_4069_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4069_cast_fp16 = slice_by_index(begin = var_4069_begin_0, end = concat_11, end_mask = var_4069_end_mask_0, x = var_4066_cast_fp16)[name = string("op_4069_cast_fp16")]; tensor var_4071_begin_0 = const()[name = string("op_4071_begin_0"), val = tensor([21, 0, 0, 0, 0])]; tensor var_4071_end_0 = const()[name = string("op_4071_end_0"), val = tensor([22, 1, 2, 2048, 64])]; tensor var_4071_end_mask_0 = const()[name = string("op_4071_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4071_squeeze_mask_0 = const()[name = string("op_4071_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_4071_cast_fp16 = slice_by_index(begin = var_4071_begin_0, end = var_4071_end_0, end_mask = var_4071_end_mask_0, squeeze_mask = var_4071_squeeze_mask_0, x = coreml_update_state_91)[name = string("op_4071_cast_fp16")]; tensor var_4074_begin_0 = const()[name = string("op_4074_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4074_end_mask_0 = const()[name = string("op_4074_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4074_cast_fp16 = slice_by_index(begin = var_4074_begin_0, end = concat_11, end_mask = var_4074_end_mask_0, x = var_4071_cast_fp16)[name = string("op_4074_cast_fp16")]; tensor var_4076_shape_cast_fp16 = shape(x = var_4069_cast_fp16)[name = string("op_4076_shape_cast_fp16")]; int32 gather_391 = const()[name = string("gather_391"), val = int32(1)]; int32 gather_392 = const()[name = string("gather_392"), val = int32(2)]; int32 gather_393_axis_0 = const()[name = string("gather_393_axis_0"), val = int32(0)]; int32 gather_393_batch_dims_0 = const()[name = string("gather_393_batch_dims_0"), val = int32(0)]; bool gather_393_validate_indices_0 = const()[name = string("gather_393_validate_indices_0"), val = bool(false)]; string var_4076_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4076_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_393_to_uint16 = const()[name = string("select_393_to_uint16"), val = uint16(2)]; tensor var_4076_shape_cast_fp16_to_uint16 = cast(dtype = var_4076_shape_cast_fp16_to_uint16_dtype_0, x = var_4076_shape_cast_fp16)[name = string("cast_17")]; uint16 gather_393_cast_uint16 = gather(axis = gather_393_axis_0, batch_dims = gather_393_batch_dims_0, indices = select_393_to_uint16, validate_indices = gather_393_validate_indices_0, x = var_4076_shape_cast_fp16_to_uint16)[name = string("gather_393_cast_uint16")]; string gather_393_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_393_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_394 = const()[name = string("gather_394"), val = int32(64)]; tensor var_4083_axes_0 = const()[name = string("op_4083_axes_0"), val = tensor([2])]; tensor var_4083_cast_fp16 = expand_dims(axes = var_4083_axes_0, x = var_4069_cast_fp16)[name = string("op_4083_cast_fp16")]; tensor shape_437_cast_fp16 = shape(x = var_4083_cast_fp16)[name = string("shape_437_cast_fp16")]; int32 concat_412_axis_0 = const()[name = string("concat_412_axis_0"), val = int32(0)]; bool concat_412_interleave_0 = const()[name = string("concat_412_interleave_0"), val = bool(false)]; int32 gather_393_cast_uint16_to_int32 = cast(dtype = gather_393_cast_uint16_to_int32_dtype_0, x = gather_393_cast_uint16)[name = string("cast_16")]; tensor concat_412 = concat(axis = concat_412_axis_0, interleave = concat_412_interleave_0, values = (gather_391, gather_392, var_76, gather_393_cast_uint16_to_int32, gather_394))[name = string("concat_412")]; tensor real_div_42 = real_div(x = concat_412, y = shape_437_cast_fp16)[name = string("real_div_42")]; tensor hidden_states_641_cast_fp16 = tile(reps = real_div_42, x = var_4083_cast_fp16)[name = string("hidden_states_641_cast_fp16")]; tensor concat_413x = const()[name = string("concat_413x"), val = tensor([1, 14, -1, 64])]; tensor key_states_87_cast_fp16 = reshape(shape = concat_413x, x = hidden_states_641_cast_fp16)[name = string("key_states_87_cast_fp16")]; tensor var_4093_shape_cast_fp16 = shape(x = var_4074_cast_fp16)[name = string("op_4093_shape_cast_fp16")]; int32 gather_395 = const()[name = string("gather_395"), val = int32(1)]; int32 gather_396 = const()[name = string("gather_396"), val = int32(2)]; int32 gather_397_axis_0 = const()[name = string("gather_397_axis_0"), val = int32(0)]; int32 gather_397_batch_dims_0 = const()[name = string("gather_397_batch_dims_0"), val = int32(0)]; bool gather_397_validate_indices_0 = const()[name = string("gather_397_validate_indices_0"), val = bool(false)]; string var_4093_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4093_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_397_to_uint16 = const()[name = string("select_397_to_uint16"), val = uint16(2)]; tensor var_4093_shape_cast_fp16_to_uint16 = cast(dtype = var_4093_shape_cast_fp16_to_uint16_dtype_0, x = var_4093_shape_cast_fp16)[name = string("cast_15")]; uint16 gather_397_cast_uint16 = gather(axis = gather_397_axis_0, batch_dims = gather_397_batch_dims_0, indices = select_397_to_uint16, validate_indices = gather_397_validate_indices_0, x = var_4093_shape_cast_fp16_to_uint16)[name = string("gather_397_cast_uint16")]; string gather_397_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_397_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_398 = const()[name = string("gather_398"), val = int32(64)]; tensor var_4100_axes_0 = const()[name = string("op_4100_axes_0"), val = tensor([2])]; tensor var_4100_cast_fp16 = expand_dims(axes = var_4100_axes_0, x = var_4074_cast_fp16)[name = string("op_4100_cast_fp16")]; tensor shape_442_cast_fp16 = shape(x = var_4100_cast_fp16)[name = string("shape_442_cast_fp16")]; int32 concat_414_axis_0 = const()[name = string("concat_414_axis_0"), val = int32(0)]; bool concat_414_interleave_0 = const()[name = string("concat_414_interleave_0"), val = bool(false)]; int32 gather_397_cast_uint16_to_int32 = cast(dtype = gather_397_cast_uint16_to_int32_dtype_0, x = gather_397_cast_uint16)[name = string("cast_14")]; tensor concat_414 = concat(axis = concat_414_axis_0, interleave = concat_414_interleave_0, values = (gather_395, gather_396, var_76, gather_397_cast_uint16_to_int32, gather_398))[name = string("concat_414")]; tensor real_div_43 = real_div(x = concat_414, y = shape_442_cast_fp16)[name = string("real_div_43")]; tensor hidden_states_645_cast_fp16 = tile(reps = real_div_43, x = var_4100_cast_fp16)[name = string("hidden_states_645_cast_fp16")]; tensor concat_415x = const()[name = string("concat_415x"), val = tensor([1, 14, -1, 64])]; tensor value_states_87_cast_fp16 = reshape(shape = concat_415x, x = hidden_states_645_cast_fp16)[name = string("value_states_87_cast_fp16")]; tensor var_4110_shape_cast_fp16 = shape(x = key_states_87_cast_fp16)[name = string("op_4110_shape_cast_fp16")]; int32 gather_399_axis_0 = const()[name = string("gather_399_axis_0"), val = int32(0)]; int32 gather_399_batch_dims_0 = const()[name = string("gather_399_batch_dims_0"), val = int32(0)]; bool gather_399_validate_indices_0 = const()[name = string("gather_399_validate_indices_0"), val = bool(false)]; string var_4110_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4110_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_399_to_uint16 = const()[name = string("select_399_to_uint16"), val = uint16(2)]; tensor var_4110_shape_cast_fp16_to_uint16 = cast(dtype = var_4110_shape_cast_fp16_to_uint16_dtype_0, x = var_4110_shape_cast_fp16)[name = string("cast_13")]; uint16 gather_399_cast_uint16 = gather(axis = gather_399_axis_0, batch_dims = gather_399_batch_dims_0, indices = select_399_to_uint16, validate_indices = gather_399_validate_indices_0, x = var_4110_shape_cast_fp16_to_uint16)[name = string("gather_399_cast_uint16")]; string gather_399_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_399_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_416_values0_0 = const()[name = string("concat_416_values0_0"), val = int32(1)]; int32 concat_416_values1_0 = const()[name = string("concat_416_values1_0"), val = int32(1)]; int32 concat_416_values2_0 = const()[name = string("concat_416_values2_0"), val = int32(0)]; int32 concat_416_axis_0 = const()[name = string("concat_416_axis_0"), val = int32(0)]; bool concat_416_interleave_0 = const()[name = string("concat_416_interleave_0"), val = bool(false)]; int32 gather_399_cast_uint16_to_int32 = cast(dtype = gather_399_cast_uint16_to_int32_dtype_0, x = gather_399_cast_uint16)[name = string("cast_12")]; tensor concat_416 = concat(axis = concat_416_axis_0, interleave = concat_416_interleave_0, values = (concat_416_values0_0, concat_416_values1_0, concat_416_values2_0, gather_399_cast_uint16_to_int32))[name = string("concat_416")]; tensor causal_mask_45_begin_0 = const()[name = string("causal_mask_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_45_end_mask_0 = const()[name = string("causal_mask_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_45_cast_fp16 = slice_by_index(begin = causal_mask_45_begin_0, end = concat_416, end_mask = causal_mask_45_end_mask_0, x = causal_mask)[name = string("causal_mask_45_cast_fp16")]; tensor attn_output_85_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_45_cast_fp16, key = key_states_87_cast_fp16, query = query_states_87_cast_fp16, value = value_states_87_cast_fp16)[name = string("attn_output_85_cast_fp16")]; tensor var_4116_perm_0 = const()[name = string("op_4116_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_417x = const()[name = string("concat_417x"), val = tensor([1, -1, 896])]; tensor var_4116_cast_fp16 = transpose(perm = var_4116_perm_0, x = attn_output_85_cast_fp16)[name = string("transpose_8")]; tensor input_169_cast_fp16 = reshape(shape = concat_417x, x = var_4116_cast_fp16)[name = string("input_169_cast_fp16")]; tensor model_model_layers_21_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253440000))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253841472))))[name = string("model_model_layers_21_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_150_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_21_self_attn_o_proj_weight_to_fp16_quantized, x = input_169_cast_fp16)[name = string("linear_150_cast_fp16")]; tensor hidden_states_649_cast_fp16 = add(x = hidden_states_629_cast_fp16, y = linear_150_cast_fp16)[name = string("hidden_states_649_cast_fp16")]; fp16 var_70_promoted_43_to_fp16 = const()[name = string("op_70_promoted_43_to_fp16"), val = fp16(0x1p+1)]; tensor var_4125_cast_fp16 = pow(x = hidden_states_649_cast_fp16, y = var_70_promoted_43_to_fp16)[name = string("op_4125_cast_fp16")]; tensor variance_87_axes_0 = const()[name = string("variance_87_axes_0"), val = tensor([-1])]; bool variance_87_keep_dims_0 = const()[name = string("variance_87_keep_dims_0"), val = bool(true)]; tensor variance_87_cast_fp16 = reduce_mean(axes = variance_87_axes_0, keep_dims = variance_87_keep_dims_0, x = var_4125_cast_fp16)[name = string("variance_87_cast_fp16")]; fp16 var_4128_to_fp16 = const()[name = string("op_4128_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4129_cast_fp16 = add(x = variance_87_cast_fp16, y = var_4128_to_fp16)[name = string("op_4129_cast_fp16")]; fp32 var_4130_epsilon_0 = const()[name = string("op_4130_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4130_cast_fp16 = rsqrt(epsilon = var_4130_epsilon_0, x = var_4129_cast_fp16)[name = string("op_4130_cast_fp16")]; tensor hidden_states_653_cast_fp16 = mul(x = hidden_states_649_cast_fp16, y = var_4130_cast_fp16)[name = string("hidden_states_653_cast_fp16")]; tensor model_model_layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253891712)))]; tensor input_171_cast_fp16 = mul(x = model_model_layers_21_post_attention_layernorm_weight_to_fp16, y = hidden_states_653_cast_fp16)[name = string("input_171_cast_fp16")]; tensor model_model_layers_21_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(253893568))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256072704))))[name = string("model_model_layers_21_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_151_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_21_mlp_gate_proj_weight_to_fp16_quantized, x = input_171_cast_fp16)[name = string("linear_151_cast_fp16")]; tensor var_4142_cast_fp16 = silu(x = linear_151_cast_fp16)[name = string("op_4142_cast_fp16")]; tensor model_model_layers_21_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(256345152))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258524288))))[name = string("model_model_layers_21_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_152_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_21_mlp_up_proj_weight_to_fp16_quantized, x = input_171_cast_fp16)[name = string("linear_152_cast_fp16")]; tensor input_175_cast_fp16 = mul(x = var_4142_cast_fp16, y = linear_152_cast_fp16)[name = string("input_175_cast_fp16")]; tensor model_model_layers_21_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(258796736))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(260975872))))[name = string("model_model_layers_21_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_153_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_21_mlp_down_proj_weight_to_fp16_quantized, x = input_175_cast_fp16)[name = string("linear_153_cast_fp16")]; tensor hidden_states_659_cast_fp16 = add(x = hidden_states_649_cast_fp16, y = linear_153_cast_fp16)[name = string("hidden_states_659_cast_fp16")]; fp16 var_70_promoted_44_to_fp16 = const()[name = string("op_70_promoted_44_to_fp16"), val = fp16(0x1p+1)]; tensor var_4155_cast_fp16 = pow(x = hidden_states_659_cast_fp16, y = var_70_promoted_44_to_fp16)[name = string("op_4155_cast_fp16")]; tensor variance_89_axes_0 = const()[name = string("variance_89_axes_0"), val = tensor([-1])]; bool variance_89_keep_dims_0 = const()[name = string("variance_89_keep_dims_0"), val = bool(true)]; tensor variance_89_cast_fp16 = reduce_mean(axes = variance_89_axes_0, keep_dims = variance_89_keep_dims_0, x = var_4155_cast_fp16)[name = string("variance_89_cast_fp16")]; fp16 var_4158_to_fp16 = const()[name = string("op_4158_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4159_cast_fp16 = add(x = variance_89_cast_fp16, y = var_4158_to_fp16)[name = string("op_4159_cast_fp16")]; fp32 var_4160_epsilon_0 = const()[name = string("op_4160_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4160_cast_fp16 = rsqrt(epsilon = var_4160_epsilon_0, x = var_4159_cast_fp16)[name = string("op_4160_cast_fp16")]; tensor hidden_states_663_cast_fp16 = mul(x = hidden_states_659_cast_fp16, y = var_4160_cast_fp16)[name = string("hidden_states_663_cast_fp16")]; tensor model_model_layers_22_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261248320)))]; tensor hidden_states_667_cast_fp16 = mul(x = model_model_layers_22_input_layernorm_weight_to_fp16, y = hidden_states_663_cast_fp16)[name = string("hidden_states_667_cast_fp16")]; tensor model_model_layers_22_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261250176))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261651648))))[name = string("model_model_layers_22_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_22_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_22_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261701888)))]; tensor linear_154_cast_fp16 = linear(bias = model_model_layers_22_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_22_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_667_cast_fp16)[name = string("linear_154_cast_fp16")]; tensor model_model_layers_22_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261703744))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261761152))))[name = string("model_model_layers_22_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_22_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_22_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261768384)))]; tensor linear_155_cast_fp16 = linear(bias = model_model_layers_22_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_22_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_667_cast_fp16)[name = string("linear_155_cast_fp16")]; tensor model_model_layers_22_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261768704))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261826112))))[name = string("model_model_layers_22_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_22_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_22_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261833344)))]; tensor linear_156_cast_fp16 = linear(bias = model_model_layers_22_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_22_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_667_cast_fp16)[name = string("linear_156_cast_fp16")]; tensor concat_418x = const()[name = string("concat_418x"), val = tensor([1, -1, 14, 64])]; tensor var_4183_cast_fp16 = reshape(shape = concat_418x, x = linear_154_cast_fp16)[name = string("op_4183_cast_fp16")]; tensor q_45_perm_0 = const()[name = string("q_45_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_419x = const()[name = string("concat_419x"), val = tensor([1, -1, 2, 64])]; tensor var_4186_cast_fp16 = reshape(shape = concat_419x, x = linear_155_cast_fp16)[name = string("op_4186_cast_fp16")]; tensor k_45_perm_0 = const()[name = string("k_45_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_420x = const()[name = string("concat_420x"), val = tensor([1, -1, 2, 64])]; tensor var_4189_cast_fp16 = reshape(shape = concat_420x, x = linear_156_cast_fp16)[name = string("op_4189_cast_fp16")]; tensor v_state_45_perm_0 = const()[name = string("v_state_45_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_45_cast_fp16 = transpose(perm = q_45_perm_0, x = var_4183_cast_fp16)[name = string("transpose_7")]; tensor var_4193_cast_fp16 = mul(x = q_45_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4193_cast_fp16")]; tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_89_cast_fp16 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_45_cast_fp16)[name = string("x1_89_cast_fp16")]; tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_89_cast_fp16 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_45_cast_fp16)[name = string("x2_89_cast_fp16")]; fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4204_cast_fp16 = mul(x = x2_89_cast_fp16, y = const_47_promoted_to_fp16)[name = string("op_4204_cast_fp16")]; bool var_4206_interleave_0 = const()[name = string("op_4206_interleave_0"), val = bool(false)]; tensor var_4206_cast_fp16 = concat(axis = var_64, interleave = var_4206_interleave_0, values = (var_4204_cast_fp16, x1_89_cast_fp16))[name = string("op_4206_cast_fp16")]; tensor var_4207_cast_fp16 = mul(x = var_4206_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4207_cast_fp16")]; tensor query_states_91_cast_fp16 = add(x = var_4193_cast_fp16, y = var_4207_cast_fp16)[name = string("query_states_91_cast_fp16")]; tensor k_45_cast_fp16 = transpose(perm = k_45_perm_0, x = var_4186_cast_fp16)[name = string("transpose_6")]; tensor var_4209_cast_fp16 = mul(x = k_45_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4209_cast_fp16")]; tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_91_cast_fp16 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_45_cast_fp16)[name = string("x1_91_cast_fp16")]; tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_91_cast_fp16 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_45_cast_fp16)[name = string("x2_91_cast_fp16")]; fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4220_cast_fp16 = mul(x = x2_91_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_4220_cast_fp16")]; bool var_4222_interleave_0 = const()[name = string("op_4222_interleave_0"), val = bool(false)]; tensor var_4222_cast_fp16 = concat(axis = var_64, interleave = var_4222_interleave_0, values = (var_4220_cast_fp16, x1_91_cast_fp16))[name = string("op_4222_cast_fp16")]; tensor var_4223_cast_fp16 = mul(x = var_4222_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4223_cast_fp16")]; tensor k_state_45_cast_fp16 = add(x = var_4209_cast_fp16, y = var_4223_cast_fp16)[name = string("k_state_45_cast_fp16")]; tensor expand_dims_264 = const()[name = string("expand_dims_264"), val = tensor([0])]; tensor expand_dims_265 = const()[name = string("expand_dims_265"), val = tensor([0])]; tensor expand_dims_267 = const()[name = string("expand_dims_267"), val = tensor([0])]; tensor concat_423_values0_0 = const()[name = string("concat_423_values0_0"), val = tensor([22])]; int32 concat_423_axis_0 = const()[name = string("concat_423_axis_0"), val = int32(0)]; bool concat_423_interleave_0 = const()[name = string("concat_423_interleave_0"), val = bool(false)]; tensor concat_423 = concat(axis = concat_423_axis_0, interleave = concat_423_interleave_0, values = (concat_423_values0_0, expand_dims_264, expand_dims_265, expand_dims_2, expand_dims_267))[name = string("concat_423")]; tensor key_cache_internal_tensor_assign_23_stride_0 = const()[name = string("key_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_423, begin_mask = key_cache_internal_tensor_assign_23_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_23_squeeze_mask_0, stride = key_cache_internal_tensor_assign_23_stride_0, update = k_state_45_cast_fp16, x = coreml_update_state_90)[name = string("key_cache_internal_tensor_assign_23_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_23_cast_fp16, input = key_cache)[name = string("coreml_update_state_92_write_state")]; tensor coreml_update_state_92 = read_state(input = key_cache)[name = string("coreml_update_state_92")]; tensor value_cache_internal_tensor_assign_23_stride_0 = const()[name = string("value_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_45_cast_fp16 = transpose(perm = v_state_45_perm_0, x = var_4189_cast_fp16)[name = string("transpose_5")]; tensor value_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_423, begin_mask = value_cache_internal_tensor_assign_23_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_23_squeeze_mask_0, stride = value_cache_internal_tensor_assign_23_stride_0, update = v_state_45_cast_fp16, x = coreml_update_state_91)[name = string("value_cache_internal_tensor_assign_23_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_23_cast_fp16, input = value_cache)[name = string("coreml_update_state_93_write_state")]; tensor coreml_update_state_93 = read_state(input = value_cache)[name = string("coreml_update_state_93")]; tensor var_4246_begin_0 = const()[name = string("op_4246_begin_0"), val = tensor([22, 0, 0, 0, 0])]; tensor var_4246_end_0 = const()[name = string("op_4246_end_0"), val = tensor([23, 1, 2, 2048, 64])]; tensor var_4246_end_mask_0 = const()[name = string("op_4246_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4246_squeeze_mask_0 = const()[name = string("op_4246_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_4246_cast_fp16 = slice_by_index(begin = var_4246_begin_0, end = var_4246_end_0, end_mask = var_4246_end_mask_0, squeeze_mask = var_4246_squeeze_mask_0, x = coreml_update_state_92)[name = string("op_4246_cast_fp16")]; tensor var_4249_begin_0 = const()[name = string("op_4249_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4249_end_mask_0 = const()[name = string("op_4249_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4249_cast_fp16 = slice_by_index(begin = var_4249_begin_0, end = concat_11, end_mask = var_4249_end_mask_0, x = var_4246_cast_fp16)[name = string("op_4249_cast_fp16")]; tensor var_4251_begin_0 = const()[name = string("op_4251_begin_0"), val = tensor([22, 0, 0, 0, 0])]; tensor var_4251_end_0 = const()[name = string("op_4251_end_0"), val = tensor([23, 1, 2, 2048, 64])]; tensor var_4251_end_mask_0 = const()[name = string("op_4251_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4251_squeeze_mask_0 = const()[name = string("op_4251_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_4251_cast_fp16 = slice_by_index(begin = var_4251_begin_0, end = var_4251_end_0, end_mask = var_4251_end_mask_0, squeeze_mask = var_4251_squeeze_mask_0, x = coreml_update_state_93)[name = string("op_4251_cast_fp16")]; tensor var_4254_begin_0 = const()[name = string("op_4254_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4254_end_mask_0 = const()[name = string("op_4254_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4254_cast_fp16 = slice_by_index(begin = var_4254_begin_0, end = concat_11, end_mask = var_4254_end_mask_0, x = var_4251_cast_fp16)[name = string("op_4254_cast_fp16")]; tensor var_4256_shape_cast_fp16 = shape(x = var_4249_cast_fp16)[name = string("op_4256_shape_cast_fp16")]; int32 gather_409 = const()[name = string("gather_409"), val = int32(1)]; int32 gather_410 = const()[name = string("gather_410"), val = int32(2)]; int32 gather_411_axis_0 = const()[name = string("gather_411_axis_0"), val = int32(0)]; int32 gather_411_batch_dims_0 = const()[name = string("gather_411_batch_dims_0"), val = int32(0)]; bool gather_411_validate_indices_0 = const()[name = string("gather_411_validate_indices_0"), val = bool(false)]; string var_4256_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4256_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_411_to_uint16 = const()[name = string("select_411_to_uint16"), val = uint16(2)]; tensor var_4256_shape_cast_fp16_to_uint16 = cast(dtype = var_4256_shape_cast_fp16_to_uint16_dtype_0, x = var_4256_shape_cast_fp16)[name = string("cast_11")]; uint16 gather_411_cast_uint16 = gather(axis = gather_411_axis_0, batch_dims = gather_411_batch_dims_0, indices = select_411_to_uint16, validate_indices = gather_411_validate_indices_0, x = var_4256_shape_cast_fp16_to_uint16)[name = string("gather_411_cast_uint16")]; string gather_411_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_411_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_412 = const()[name = string("gather_412"), val = int32(64)]; tensor var_4263_axes_0 = const()[name = string("op_4263_axes_0"), val = tensor([2])]; tensor var_4263_cast_fp16 = expand_dims(axes = var_4263_axes_0, x = var_4249_cast_fp16)[name = string("op_4263_cast_fp16")]; tensor shape_457_cast_fp16 = shape(x = var_4263_cast_fp16)[name = string("shape_457_cast_fp16")]; int32 concat_431_axis_0 = const()[name = string("concat_431_axis_0"), val = int32(0)]; bool concat_431_interleave_0 = const()[name = string("concat_431_interleave_0"), val = bool(false)]; int32 gather_411_cast_uint16_to_int32 = cast(dtype = gather_411_cast_uint16_to_int32_dtype_0, x = gather_411_cast_uint16)[name = string("cast_10")]; tensor concat_431 = concat(axis = concat_431_axis_0, interleave = concat_431_interleave_0, values = (gather_409, gather_410, var_76, gather_411_cast_uint16_to_int32, gather_412))[name = string("concat_431")]; tensor real_div_44 = real_div(x = concat_431, y = shape_457_cast_fp16)[name = string("real_div_44")]; tensor hidden_states_671_cast_fp16 = tile(reps = real_div_44, x = var_4263_cast_fp16)[name = string("hidden_states_671_cast_fp16")]; tensor concat_432x = const()[name = string("concat_432x"), val = tensor([1, 14, -1, 64])]; tensor key_states_91_cast_fp16 = reshape(shape = concat_432x, x = hidden_states_671_cast_fp16)[name = string("key_states_91_cast_fp16")]; tensor var_4273_shape_cast_fp16 = shape(x = var_4254_cast_fp16)[name = string("op_4273_shape_cast_fp16")]; int32 gather_413 = const()[name = string("gather_413"), val = int32(1)]; int32 gather_414 = const()[name = string("gather_414"), val = int32(2)]; int32 gather_415_axis_0 = const()[name = string("gather_415_axis_0"), val = int32(0)]; int32 gather_415_batch_dims_0 = const()[name = string("gather_415_batch_dims_0"), val = int32(0)]; bool gather_415_validate_indices_0 = const()[name = string("gather_415_validate_indices_0"), val = bool(false)]; string var_4273_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4273_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_415_to_uint16 = const()[name = string("select_415_to_uint16"), val = uint16(2)]; tensor var_4273_shape_cast_fp16_to_uint16 = cast(dtype = var_4273_shape_cast_fp16_to_uint16_dtype_0, x = var_4273_shape_cast_fp16)[name = string("cast_9")]; uint16 gather_415_cast_uint16 = gather(axis = gather_415_axis_0, batch_dims = gather_415_batch_dims_0, indices = select_415_to_uint16, validate_indices = gather_415_validate_indices_0, x = var_4273_shape_cast_fp16_to_uint16)[name = string("gather_415_cast_uint16")]; string gather_415_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_415_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_416 = const()[name = string("gather_416"), val = int32(64)]; tensor var_4280_axes_0 = const()[name = string("op_4280_axes_0"), val = tensor([2])]; tensor var_4280_cast_fp16 = expand_dims(axes = var_4280_axes_0, x = var_4254_cast_fp16)[name = string("op_4280_cast_fp16")]; tensor shape_462_cast_fp16 = shape(x = var_4280_cast_fp16)[name = string("shape_462_cast_fp16")]; int32 concat_433_axis_0 = const()[name = string("concat_433_axis_0"), val = int32(0)]; bool concat_433_interleave_0 = const()[name = string("concat_433_interleave_0"), val = bool(false)]; int32 gather_415_cast_uint16_to_int32 = cast(dtype = gather_415_cast_uint16_to_int32_dtype_0, x = gather_415_cast_uint16)[name = string("cast_8")]; tensor concat_433 = concat(axis = concat_433_axis_0, interleave = concat_433_interleave_0, values = (gather_413, gather_414, var_76, gather_415_cast_uint16_to_int32, gather_416))[name = string("concat_433")]; tensor real_div_45 = real_div(x = concat_433, y = shape_462_cast_fp16)[name = string("real_div_45")]; tensor hidden_states_675_cast_fp16 = tile(reps = real_div_45, x = var_4280_cast_fp16)[name = string("hidden_states_675_cast_fp16")]; tensor concat_434x = const()[name = string("concat_434x"), val = tensor([1, 14, -1, 64])]; tensor value_states_91_cast_fp16 = reshape(shape = concat_434x, x = hidden_states_675_cast_fp16)[name = string("value_states_91_cast_fp16")]; tensor var_4290_shape_cast_fp16 = shape(x = key_states_91_cast_fp16)[name = string("op_4290_shape_cast_fp16")]; int32 gather_417_axis_0 = const()[name = string("gather_417_axis_0"), val = int32(0)]; int32 gather_417_batch_dims_0 = const()[name = string("gather_417_batch_dims_0"), val = int32(0)]; bool gather_417_validate_indices_0 = const()[name = string("gather_417_validate_indices_0"), val = bool(false)]; string var_4290_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4290_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_417_to_uint16 = const()[name = string("select_417_to_uint16"), val = uint16(2)]; tensor var_4290_shape_cast_fp16_to_uint16 = cast(dtype = var_4290_shape_cast_fp16_to_uint16_dtype_0, x = var_4290_shape_cast_fp16)[name = string("cast_7")]; uint16 gather_417_cast_uint16 = gather(axis = gather_417_axis_0, batch_dims = gather_417_batch_dims_0, indices = select_417_to_uint16, validate_indices = gather_417_validate_indices_0, x = var_4290_shape_cast_fp16_to_uint16)[name = string("gather_417_cast_uint16")]; string gather_417_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_417_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_435_values0_0 = const()[name = string("concat_435_values0_0"), val = int32(1)]; int32 concat_435_values1_0 = const()[name = string("concat_435_values1_0"), val = int32(1)]; int32 concat_435_values2_0 = const()[name = string("concat_435_values2_0"), val = int32(0)]; int32 concat_435_axis_0 = const()[name = string("concat_435_axis_0"), val = int32(0)]; bool concat_435_interleave_0 = const()[name = string("concat_435_interleave_0"), val = bool(false)]; int32 gather_417_cast_uint16_to_int32 = cast(dtype = gather_417_cast_uint16_to_int32_dtype_0, x = gather_417_cast_uint16)[name = string("cast_6")]; tensor concat_435 = concat(axis = concat_435_axis_0, interleave = concat_435_interleave_0, values = (concat_435_values0_0, concat_435_values1_0, concat_435_values2_0, gather_417_cast_uint16_to_int32))[name = string("concat_435")]; tensor causal_mask_47_begin_0 = const()[name = string("causal_mask_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_47_end_mask_0 = const()[name = string("causal_mask_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_47_cast_fp16 = slice_by_index(begin = causal_mask_47_begin_0, end = concat_435, end_mask = causal_mask_47_end_mask_0, x = causal_mask)[name = string("causal_mask_47_cast_fp16")]; tensor attn_output_89_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_47_cast_fp16, key = key_states_91_cast_fp16, query = query_states_91_cast_fp16, value = value_states_91_cast_fp16)[name = string("attn_output_89_cast_fp16")]; tensor var_4296_perm_0 = const()[name = string("op_4296_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_436x = const()[name = string("concat_436x"), val = tensor([1, -1, 896])]; tensor var_4296_cast_fp16 = transpose(perm = var_4296_perm_0, x = attn_output_89_cast_fp16)[name = string("transpose_4")]; tensor input_177_cast_fp16 = reshape(shape = concat_436x, x = var_4296_cast_fp16)[name = string("input_177_cast_fp16")]; tensor model_model_layers_22_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(261833664))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262235136))))[name = string("model_model_layers_22_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_157_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_22_self_attn_o_proj_weight_to_fp16_quantized, x = input_177_cast_fp16)[name = string("linear_157_cast_fp16")]; tensor hidden_states_679_cast_fp16 = add(x = hidden_states_659_cast_fp16, y = linear_157_cast_fp16)[name = string("hidden_states_679_cast_fp16")]; fp16 var_70_promoted_45_to_fp16 = const()[name = string("op_70_promoted_45_to_fp16"), val = fp16(0x1p+1)]; tensor var_4305_cast_fp16 = pow(x = hidden_states_679_cast_fp16, y = var_70_promoted_45_to_fp16)[name = string("op_4305_cast_fp16")]; tensor variance_91_axes_0 = const()[name = string("variance_91_axes_0"), val = tensor([-1])]; bool variance_91_keep_dims_0 = const()[name = string("variance_91_keep_dims_0"), val = bool(true)]; tensor variance_91_cast_fp16 = reduce_mean(axes = variance_91_axes_0, keep_dims = variance_91_keep_dims_0, x = var_4305_cast_fp16)[name = string("variance_91_cast_fp16")]; fp16 var_4308_to_fp16 = const()[name = string("op_4308_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4309_cast_fp16 = add(x = variance_91_cast_fp16, y = var_4308_to_fp16)[name = string("op_4309_cast_fp16")]; fp32 var_4310_epsilon_0 = const()[name = string("op_4310_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4310_cast_fp16 = rsqrt(epsilon = var_4310_epsilon_0, x = var_4309_cast_fp16)[name = string("op_4310_cast_fp16")]; tensor hidden_states_683_cast_fp16 = mul(x = hidden_states_679_cast_fp16, y = var_4310_cast_fp16)[name = string("hidden_states_683_cast_fp16")]; tensor model_model_layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262285376)))]; tensor input_179_cast_fp16 = mul(x = model_model_layers_22_post_attention_layernorm_weight_to_fp16, y = hidden_states_683_cast_fp16)[name = string("input_179_cast_fp16")]; tensor model_model_layers_22_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(262287232))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264466368))))[name = string("model_model_layers_22_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_158_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_22_mlp_gate_proj_weight_to_fp16_quantized, x = input_179_cast_fp16)[name = string("linear_158_cast_fp16")]; tensor var_4322_cast_fp16 = silu(x = linear_158_cast_fp16)[name = string("op_4322_cast_fp16")]; tensor model_model_layers_22_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(264738816))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(266917952))))[name = string("model_model_layers_22_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_159_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_22_mlp_up_proj_weight_to_fp16_quantized, x = input_179_cast_fp16)[name = string("linear_159_cast_fp16")]; tensor input_183_cast_fp16 = mul(x = var_4322_cast_fp16, y = linear_159_cast_fp16)[name = string("input_183_cast_fp16")]; tensor model_model_layers_22_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(267190400))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269369536))))[name = string("model_model_layers_22_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_160_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_22_mlp_down_proj_weight_to_fp16_quantized, x = input_183_cast_fp16)[name = string("linear_160_cast_fp16")]; tensor hidden_states_689_cast_fp16 = add(x = hidden_states_679_cast_fp16, y = linear_160_cast_fp16)[name = string("hidden_states_689_cast_fp16")]; fp16 var_70_promoted_46_to_fp16 = const()[name = string("op_70_promoted_46_to_fp16"), val = fp16(0x1p+1)]; tensor var_4335_cast_fp16 = pow(x = hidden_states_689_cast_fp16, y = var_70_promoted_46_to_fp16)[name = string("op_4335_cast_fp16")]; tensor variance_93_axes_0 = const()[name = string("variance_93_axes_0"), val = tensor([-1])]; bool variance_93_keep_dims_0 = const()[name = string("variance_93_keep_dims_0"), val = bool(true)]; tensor variance_93_cast_fp16 = reduce_mean(axes = variance_93_axes_0, keep_dims = variance_93_keep_dims_0, x = var_4335_cast_fp16)[name = string("variance_93_cast_fp16")]; fp16 var_4338_to_fp16 = const()[name = string("op_4338_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4339_cast_fp16 = add(x = variance_93_cast_fp16, y = var_4338_to_fp16)[name = string("op_4339_cast_fp16")]; fp32 var_4340_epsilon_0 = const()[name = string("op_4340_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4340_cast_fp16 = rsqrt(epsilon = var_4340_epsilon_0, x = var_4339_cast_fp16)[name = string("op_4340_cast_fp16")]; tensor hidden_states_693_cast_fp16 = mul(x = hidden_states_689_cast_fp16, y = var_4340_cast_fp16)[name = string("hidden_states_693_cast_fp16")]; tensor model_model_layers_23_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269641984)))]; tensor hidden_states_697_cast_fp16 = mul(x = model_model_layers_23_input_layernorm_weight_to_fp16, y = hidden_states_693_cast_fp16)[name = string("hidden_states_697_cast_fp16")]; tensor model_model_layers_23_self_attn_q_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(269643840))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270045312))))[name = string("model_model_layers_23_self_attn_q_proj_weight_to_fp16_quantized")]; tensor model_model_layers_23_self_attn_q_proj_bias_to_fp16 = const()[name = string("model_model_layers_23_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270095552)))]; tensor linear_161_cast_fp16 = linear(bias = model_model_layers_23_self_attn_q_proj_bias_to_fp16, weight = model_model_layers_23_self_attn_q_proj_weight_to_fp16_quantized, x = hidden_states_697_cast_fp16)[name = string("linear_161_cast_fp16")]; tensor model_model_layers_23_self_attn_k_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270097408))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270154816))))[name = string("model_model_layers_23_self_attn_k_proj_weight_to_fp16_quantized")]; tensor model_model_layers_23_self_attn_k_proj_bias_to_fp16 = const()[name = string("model_model_layers_23_self_attn_k_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270162048)))]; tensor linear_162_cast_fp16 = linear(bias = model_model_layers_23_self_attn_k_proj_bias_to_fp16, weight = model_model_layers_23_self_attn_k_proj_weight_to_fp16_quantized, x = hidden_states_697_cast_fp16)[name = string("linear_162_cast_fp16")]; tensor model_model_layers_23_self_attn_v_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270162368))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270219776))))[name = string("model_model_layers_23_self_attn_v_proj_weight_to_fp16_quantized")]; tensor model_model_layers_23_self_attn_v_proj_bias_to_fp16 = const()[name = string("model_model_layers_23_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270227008)))]; tensor linear_163_cast_fp16 = linear(bias = model_model_layers_23_self_attn_v_proj_bias_to_fp16, weight = model_model_layers_23_self_attn_v_proj_weight_to_fp16_quantized, x = hidden_states_697_cast_fp16)[name = string("linear_163_cast_fp16")]; tensor concat_437x = const()[name = string("concat_437x"), val = tensor([1, -1, 14, 64])]; tensor var_4363_cast_fp16 = reshape(shape = concat_437x, x = linear_161_cast_fp16)[name = string("op_4363_cast_fp16")]; tensor q_perm_0 = const()[name = string("q_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_438x = const()[name = string("concat_438x"), val = tensor([1, -1, 2, 64])]; tensor var_4366_cast_fp16 = reshape(shape = concat_438x, x = linear_162_cast_fp16)[name = string("op_4366_cast_fp16")]; tensor k_perm_0 = const()[name = string("k_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_439x = const()[name = string("concat_439x"), val = tensor([1, -1, 2, 64])]; tensor var_4369_cast_fp16 = reshape(shape = concat_439x, x = linear_163_cast_fp16)[name = string("op_4369_cast_fp16")]; tensor v_state_perm_0 = const()[name = string("v_state_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_cast_fp16 = transpose(perm = q_perm_0, x = var_4363_cast_fp16)[name = string("transpose_3")]; tensor var_4373_cast_fp16 = mul(x = q_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4373_cast_fp16")]; tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 14, 0, 32])]; tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_93_cast_fp16 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_cast_fp16)[name = string("x1_93_cast_fp16")]; tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 14, 0, 64])]; tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_93_cast_fp16 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_cast_fp16)[name = string("x2_93_cast_fp16")]; fp16 const_49_promoted_to_fp16 = const()[name = string("const_49_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4384_cast_fp16 = mul(x = x2_93_cast_fp16, y = const_49_promoted_to_fp16)[name = string("op_4384_cast_fp16")]; bool var_4386_interleave_0 = const()[name = string("op_4386_interleave_0"), val = bool(false)]; tensor var_4386_cast_fp16 = concat(axis = var_64, interleave = var_4386_interleave_0, values = (var_4384_cast_fp16, x1_93_cast_fp16))[name = string("op_4386_cast_fp16")]; tensor var_4387_cast_fp16 = mul(x = var_4386_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4387_cast_fp16")]; tensor query_states_cast_fp16 = add(x = var_4373_cast_fp16, y = var_4387_cast_fp16)[name = string("query_states_cast_fp16")]; tensor k_cast_fp16 = transpose(perm = k_perm_0, x = var_4366_cast_fp16)[name = string("transpose_2")]; tensor var_4389_cast_fp16 = mul(x = k_cast_fp16, y = cos_7_cast_fp16)[name = string("op_4389_cast_fp16")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 2, 0, 32])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_cast_fp16)[name = string("x1_cast_fp16")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 32])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 2, 0, 64])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_cast_fp16)[name = string("x2_cast_fp16")]; fp16 const_50_promoted_to_fp16 = const()[name = string("const_50_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4400_cast_fp16 = mul(x = x2_cast_fp16, y = const_50_promoted_to_fp16)[name = string("op_4400_cast_fp16")]; bool var_4402_interleave_0 = const()[name = string("op_4402_interleave_0"), val = bool(false)]; tensor var_4402_cast_fp16 = concat(axis = var_64, interleave = var_4402_interleave_0, values = (var_4400_cast_fp16, x1_cast_fp16))[name = string("op_4402_cast_fp16")]; tensor var_4403_cast_fp16 = mul(x = var_4402_cast_fp16, y = sin_7_cast_fp16)[name = string("op_4403_cast_fp16")]; tensor k_state_cast_fp16 = add(x = var_4389_cast_fp16, y = var_4403_cast_fp16)[name = string("k_state_cast_fp16")]; tensor expand_dims_276 = const()[name = string("expand_dims_276"), val = tensor([0])]; tensor expand_dims_277 = const()[name = string("expand_dims_277"), val = tensor([0])]; tensor expand_dims_279 = const()[name = string("expand_dims_279"), val = tensor([0])]; tensor concat_442_values0_0 = const()[name = string("concat_442_values0_0"), val = tensor([23])]; int32 concat_442_axis_0 = const()[name = string("concat_442_axis_0"), val = int32(0)]; bool concat_442_interleave_0 = const()[name = string("concat_442_interleave_0"), val = bool(false)]; tensor concat_442 = concat(axis = concat_442_axis_0, interleave = concat_442_interleave_0, values = (concat_442_values0_0, expand_dims_276, expand_dims_277, expand_dims_2, expand_dims_279))[name = string("concat_442")]; tensor key_cache_internal_tensor_assign_24_stride_0 = const()[name = string("key_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_442, begin_mask = key_cache_internal_tensor_assign_24_begin_mask_0, end = concat_6, end_mask = key_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_24_squeeze_mask_0, stride = key_cache_internal_tensor_assign_24_stride_0, update = k_state_cast_fp16, x = coreml_update_state_92)[name = string("key_cache_internal_tensor_assign_24_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_24_cast_fp16, input = key_cache)[name = string("coreml_update_state_94_write_state")]; tensor coreml_update_state_94 = read_state(input = key_cache)[name = string("coreml_update_state_94")]; tensor value_cache_internal_tensor_assign_24_stride_0 = const()[name = string("value_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_cast_fp16 = transpose(perm = v_state_perm_0, x = var_4369_cast_fp16)[name = string("transpose_1")]; tensor value_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_442, begin_mask = value_cache_internal_tensor_assign_24_begin_mask_0, end = concat_6, end_mask = value_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_24_squeeze_mask_0, stride = value_cache_internal_tensor_assign_24_stride_0, update = v_state_cast_fp16, x = coreml_update_state_93)[name = string("value_cache_internal_tensor_assign_24_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_24_cast_fp16, input = value_cache)[name = string("coreml_update_state_95_write_state")]; tensor coreml_update_state_95 = read_state(input = value_cache)[name = string("coreml_update_state_95")]; tensor var_4426_begin_0 = const()[name = string("op_4426_begin_0"), val = tensor([23, 0, 0, 0, 0])]; tensor var_4426_end_0 = const()[name = string("op_4426_end_0"), val = tensor([24, 1, 2, 2048, 64])]; tensor var_4426_end_mask_0 = const()[name = string("op_4426_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4426_squeeze_mask_0 = const()[name = string("op_4426_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_4426_cast_fp16 = slice_by_index(begin = var_4426_begin_0, end = var_4426_end_0, end_mask = var_4426_end_mask_0, squeeze_mask = var_4426_squeeze_mask_0, x = coreml_update_state_94)[name = string("op_4426_cast_fp16")]; tensor var_4429_begin_0 = const()[name = string("op_4429_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4429_end_mask_0 = const()[name = string("op_4429_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4429_cast_fp16 = slice_by_index(begin = var_4429_begin_0, end = concat_11, end_mask = var_4429_end_mask_0, x = var_4426_cast_fp16)[name = string("op_4429_cast_fp16")]; tensor var_4431_begin_0 = const()[name = string("op_4431_begin_0"), val = tensor([23, 0, 0, 0, 0])]; tensor var_4431_end_0 = const()[name = string("op_4431_end_0"), val = tensor([24, 1, 2, 2048, 64])]; tensor var_4431_end_mask_0 = const()[name = string("op_4431_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4431_squeeze_mask_0 = const()[name = string("op_4431_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_4431_cast_fp16 = slice_by_index(begin = var_4431_begin_0, end = var_4431_end_0, end_mask = var_4431_end_mask_0, squeeze_mask = var_4431_squeeze_mask_0, x = coreml_update_state_95)[name = string("op_4431_cast_fp16")]; tensor var_4434_begin_0 = const()[name = string("op_4434_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4434_end_mask_0 = const()[name = string("op_4434_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4434_cast_fp16 = slice_by_index(begin = var_4434_begin_0, end = concat_11, end_mask = var_4434_end_mask_0, x = var_4431_cast_fp16)[name = string("op_4434_cast_fp16")]; tensor var_4436_shape_cast_fp16 = shape(x = var_4429_cast_fp16)[name = string("op_4436_shape_cast_fp16")]; int32 gather_427 = const()[name = string("gather_427"), val = int32(1)]; int32 gather_428 = const()[name = string("gather_428"), val = int32(2)]; int32 gather_429_axis_0 = const()[name = string("gather_429_axis_0"), val = int32(0)]; int32 gather_429_batch_dims_0 = const()[name = string("gather_429_batch_dims_0"), val = int32(0)]; bool gather_429_validate_indices_0 = const()[name = string("gather_429_validate_indices_0"), val = bool(false)]; string var_4436_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4436_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_429_to_uint16 = const()[name = string("select_429_to_uint16"), val = uint16(2)]; tensor var_4436_shape_cast_fp16_to_uint16 = cast(dtype = var_4436_shape_cast_fp16_to_uint16_dtype_0, x = var_4436_shape_cast_fp16)[name = string("cast_5")]; uint16 gather_429_cast_uint16 = gather(axis = gather_429_axis_0, batch_dims = gather_429_batch_dims_0, indices = select_429_to_uint16, validate_indices = gather_429_validate_indices_0, x = var_4436_shape_cast_fp16_to_uint16)[name = string("gather_429_cast_uint16")]; string gather_429_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_429_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_430 = const()[name = string("gather_430"), val = int32(64)]; tensor var_4443_axes_0 = const()[name = string("op_4443_axes_0"), val = tensor([2])]; tensor var_4443_cast_fp16 = expand_dims(axes = var_4443_axes_0, x = var_4429_cast_fp16)[name = string("op_4443_cast_fp16")]; tensor shape_477_cast_fp16 = shape(x = var_4443_cast_fp16)[name = string("shape_477_cast_fp16")]; int32 concat_450_axis_0 = const()[name = string("concat_450_axis_0"), val = int32(0)]; bool concat_450_interleave_0 = const()[name = string("concat_450_interleave_0"), val = bool(false)]; int32 gather_429_cast_uint16_to_int32 = cast(dtype = gather_429_cast_uint16_to_int32_dtype_0, x = gather_429_cast_uint16)[name = string("cast_4")]; tensor concat_450 = concat(axis = concat_450_axis_0, interleave = concat_450_interleave_0, values = (gather_427, gather_428, var_76, gather_429_cast_uint16_to_int32, gather_430))[name = string("concat_450")]; tensor real_div_46 = real_div(x = concat_450, y = shape_477_cast_fp16)[name = string("real_div_46")]; tensor hidden_states_701_cast_fp16 = tile(reps = real_div_46, x = var_4443_cast_fp16)[name = string("hidden_states_701_cast_fp16")]; tensor concat_451x = const()[name = string("concat_451x"), val = tensor([1, 14, -1, 64])]; tensor key_states_cast_fp16 = reshape(shape = concat_451x, x = hidden_states_701_cast_fp16)[name = string("key_states_cast_fp16")]; tensor var_4453_shape_cast_fp16 = shape(x = var_4434_cast_fp16)[name = string("op_4453_shape_cast_fp16")]; int32 gather_431 = const()[name = string("gather_431"), val = int32(1)]; int32 gather_432 = const()[name = string("gather_432"), val = int32(2)]; int32 gather_433_axis_0 = const()[name = string("gather_433_axis_0"), val = int32(0)]; int32 gather_433_batch_dims_0 = const()[name = string("gather_433_batch_dims_0"), val = int32(0)]; bool gather_433_validate_indices_0 = const()[name = string("gather_433_validate_indices_0"), val = bool(false)]; string var_4453_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4453_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_433_to_uint16 = const()[name = string("select_433_to_uint16"), val = uint16(2)]; tensor var_4453_shape_cast_fp16_to_uint16 = cast(dtype = var_4453_shape_cast_fp16_to_uint16_dtype_0, x = var_4453_shape_cast_fp16)[name = string("cast_3")]; uint16 gather_433_cast_uint16 = gather(axis = gather_433_axis_0, batch_dims = gather_433_batch_dims_0, indices = select_433_to_uint16, validate_indices = gather_433_validate_indices_0, x = var_4453_shape_cast_fp16_to_uint16)[name = string("gather_433_cast_uint16")]; string gather_433_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_433_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_434 = const()[name = string("gather_434"), val = int32(64)]; tensor var_4460_axes_0 = const()[name = string("op_4460_axes_0"), val = tensor([2])]; tensor var_4460_cast_fp16 = expand_dims(axes = var_4460_axes_0, x = var_4434_cast_fp16)[name = string("op_4460_cast_fp16")]; tensor shape_482_cast_fp16 = shape(x = var_4460_cast_fp16)[name = string("shape_482_cast_fp16")]; int32 concat_452_axis_0 = const()[name = string("concat_452_axis_0"), val = int32(0)]; bool concat_452_interleave_0 = const()[name = string("concat_452_interleave_0"), val = bool(false)]; int32 gather_433_cast_uint16_to_int32 = cast(dtype = gather_433_cast_uint16_to_int32_dtype_0, x = gather_433_cast_uint16)[name = string("cast_2")]; tensor concat_452 = concat(axis = concat_452_axis_0, interleave = concat_452_interleave_0, values = (gather_431, gather_432, var_76, gather_433_cast_uint16_to_int32, gather_434))[name = string("concat_452")]; tensor real_div_47 = real_div(x = concat_452, y = shape_482_cast_fp16)[name = string("real_div_47")]; tensor hidden_states_705_cast_fp16 = tile(reps = real_div_47, x = var_4460_cast_fp16)[name = string("hidden_states_705_cast_fp16")]; tensor concat_453x = const()[name = string("concat_453x"), val = tensor([1, 14, -1, 64])]; tensor value_states_cast_fp16 = reshape(shape = concat_453x, x = hidden_states_705_cast_fp16)[name = string("value_states_cast_fp16")]; tensor var_4470_shape_cast_fp16 = shape(x = key_states_cast_fp16)[name = string("op_4470_shape_cast_fp16")]; int32 gather_435_axis_0 = const()[name = string("gather_435_axis_0"), val = int32(0)]; int32 gather_435_batch_dims_0 = const()[name = string("gather_435_batch_dims_0"), val = int32(0)]; bool gather_435_validate_indices_0 = const()[name = string("gather_435_validate_indices_0"), val = bool(false)]; string var_4470_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4470_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_435_to_uint16 = const()[name = string("select_435_to_uint16"), val = uint16(2)]; tensor var_4470_shape_cast_fp16_to_uint16 = cast(dtype = var_4470_shape_cast_fp16_to_uint16_dtype_0, x = var_4470_shape_cast_fp16)[name = string("cast_1")]; uint16 gather_435_cast_uint16 = gather(axis = gather_435_axis_0, batch_dims = gather_435_batch_dims_0, indices = select_435_to_uint16, validate_indices = gather_435_validate_indices_0, x = var_4470_shape_cast_fp16_to_uint16)[name = string("gather_435_cast_uint16")]; string gather_435_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_435_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_454_values0_0 = const()[name = string("concat_454_values0_0"), val = int32(1)]; int32 concat_454_values1_0 = const()[name = string("concat_454_values1_0"), val = int32(1)]; int32 concat_454_values2_0 = const()[name = string("concat_454_values2_0"), val = int32(0)]; int32 concat_454_axis_0 = const()[name = string("concat_454_axis_0"), val = int32(0)]; bool concat_454_interleave_0 = const()[name = string("concat_454_interleave_0"), val = bool(false)]; int32 gather_435_cast_uint16_to_int32 = cast(dtype = gather_435_cast_uint16_to_int32_dtype_0, x = gather_435_cast_uint16)[name = string("cast_0")]; tensor concat_454 = concat(axis = concat_454_axis_0, interleave = concat_454_interleave_0, values = (concat_454_values0_0, concat_454_values1_0, concat_454_values2_0, gather_435_cast_uint16_to_int32))[name = string("concat_454")]; tensor causal_mask_begin_0 = const()[name = string("causal_mask_begin_0"), val = tensor([0, 0, 0, 0])]; tensor causal_mask_end_mask_0 = const()[name = string("causal_mask_end_mask_0"), val = tensor([true, true, true, false])]; tensor causal_mask_cast_fp16 = slice_by_index(begin = causal_mask_begin_0, end = concat_454, end_mask = causal_mask_end_mask_0, x = causal_mask)[name = string("causal_mask_cast_fp16")]; tensor attn_output_93_cast_fp16 = scaled_dot_product_attention(attn_mask = causal_mask_cast_fp16, key = key_states_cast_fp16, query = query_states_cast_fp16, value = value_states_cast_fp16)[name = string("attn_output_93_cast_fp16")]; tensor var_4476_perm_0 = const()[name = string("op_4476_perm_0"), val = tensor([0, 2, 1, 3])]; tensor concat_455x = const()[name = string("concat_455x"), val = tensor([1, -1, 896])]; tensor var_4476_cast_fp16 = transpose(perm = var_4476_perm_0, x = attn_output_93_cast_fp16)[name = string("transpose_0")]; tensor input_185_cast_fp16 = reshape(shape = concat_455x, x = var_4476_cast_fp16)[name = string("input_185_cast_fp16")]; tensor model_model_layers_23_self_attn_o_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270227328))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270628800))))[name = string("model_model_layers_23_self_attn_o_proj_weight_to_fp16_quantized")]; tensor linear_164_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_23_self_attn_o_proj_weight_to_fp16_quantized, x = input_185_cast_fp16)[name = string("linear_164_cast_fp16")]; tensor hidden_states_709_cast_fp16 = add(x = hidden_states_689_cast_fp16, y = linear_164_cast_fp16)[name = string("hidden_states_709_cast_fp16")]; fp16 var_70_promoted_47_to_fp16 = const()[name = string("op_70_promoted_47_to_fp16"), val = fp16(0x1p+1)]; tensor var_4485_cast_fp16 = pow(x = hidden_states_709_cast_fp16, y = var_70_promoted_47_to_fp16)[name = string("op_4485_cast_fp16")]; tensor variance_95_axes_0 = const()[name = string("variance_95_axes_0"), val = tensor([-1])]; bool variance_95_keep_dims_0 = const()[name = string("variance_95_keep_dims_0"), val = bool(true)]; tensor variance_95_cast_fp16 = reduce_mean(axes = variance_95_axes_0, keep_dims = variance_95_keep_dims_0, x = var_4485_cast_fp16)[name = string("variance_95_cast_fp16")]; fp16 var_4488_to_fp16 = const()[name = string("op_4488_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4489_cast_fp16 = add(x = variance_95_cast_fp16, y = var_4488_to_fp16)[name = string("op_4489_cast_fp16")]; fp32 var_4490_epsilon_0 = const()[name = string("op_4490_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4490_cast_fp16 = rsqrt(epsilon = var_4490_epsilon_0, x = var_4489_cast_fp16)[name = string("op_4490_cast_fp16")]; tensor hidden_states_713_cast_fp16 = mul(x = hidden_states_709_cast_fp16, y = var_4490_cast_fp16)[name = string("hidden_states_713_cast_fp16")]; tensor model_model_layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270679040)))]; tensor input_187_cast_fp16 = mul(x = model_model_layers_23_post_attention_layernorm_weight_to_fp16, y = hidden_states_713_cast_fp16)[name = string("input_187_cast_fp16")]; tensor model_model_layers_23_mlp_gate_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(270680896))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(272860032))))[name = string("model_model_layers_23_mlp_gate_proj_weight_to_fp16_quantized")]; tensor linear_165_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_23_mlp_gate_proj_weight_to_fp16_quantized, x = input_187_cast_fp16)[name = string("linear_165_cast_fp16")]; tensor var_4502_cast_fp16 = silu(x = linear_165_cast_fp16)[name = string("op_4502_cast_fp16")]; tensor model_model_layers_23_mlp_up_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(273132480))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275311616))))[name = string("model_model_layers_23_mlp_up_proj_weight_to_fp16_quantized")]; tensor linear_166_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_23_mlp_up_proj_weight_to_fp16_quantized, x = input_187_cast_fp16)[name = string("linear_166_cast_fp16")]; tensor input_191_cast_fp16 = mul(x = var_4502_cast_fp16, y = linear_166_cast_fp16)[name = string("input_191_cast_fp16")]; tensor model_model_layers_23_mlp_down_proj_weight_to_fp16_quantized = constexpr_blockwise_shift_scale(data = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(275584064))), scale = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(277763200))))[name = string("model_model_layers_23_mlp_down_proj_weight_to_fp16_quantized")]; tensor linear_167_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = model_model_layers_23_mlp_down_proj_weight_to_fp16_quantized, x = input_191_cast_fp16)[name = string("linear_167_cast_fp16")]; tensor hidden_states_719_cast_fp16 = add(x = hidden_states_709_cast_fp16, y = linear_167_cast_fp16)[name = string("hidden_states_719_cast_fp16")]; fp16 var_70_promoted_48_to_fp16 = const()[name = string("op_70_promoted_48_to_fp16"), val = fp16(0x1p+1)]; tensor var_4511_cast_fp16 = pow(x = hidden_states_719_cast_fp16, y = var_70_promoted_48_to_fp16)[name = string("op_4511_cast_fp16")]; tensor variance_axes_0 = const()[name = string("variance_axes_0"), val = tensor([-1])]; bool variance_keep_dims_0 = const()[name = string("variance_keep_dims_0"), val = bool(true)]; tensor variance_cast_fp16 = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_4511_cast_fp16)[name = string("variance_cast_fp16")]; fp16 var_4514_to_fp16 = const()[name = string("op_4514_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4515_cast_fp16 = add(x = variance_cast_fp16, y = var_4514_to_fp16)[name = string("op_4515_cast_fp16")]; fp32 var_4516_epsilon_0 = const()[name = string("op_4516_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4516_cast_fp16 = rsqrt(epsilon = var_4516_epsilon_0, x = var_4515_cast_fp16)[name = string("op_4516_cast_fp16")]; tensor hidden_states_723_cast_fp16 = mul(x = hidden_states_719_cast_fp16, y = var_4516_cast_fp16)[name = string("hidden_states_723_cast_fp16")]; tensor model_model_norm_weight_to_fp16 = const()[name = string("model_model_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278035648)))]; tensor hidden_states_cast_fp16 = mul(x = model_model_norm_weight_to_fp16, y = hidden_states_723_cast_fp16)[name = string("hidden_states_cast_fp16")]; tensor linear_168_bias_0_to_fp16 = const()[name = string("linear_168_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(278037504)))]; tensor logits = linear(bias = linear_168_bias_0_to_fp16, weight = model_model_embed_tokens_weight_to_fp16_quantized, x = hidden_states_cast_fp16)[name = string("linear_168_cast_fp16")]; } -> (logits); }